FreeCalypso > hg > freecalypso-tools
changeset 802:1c599681fd60
pcm-sms-decode & sms-pdu-decode: revamp bad char decoding
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Thu, 25 Mar 2021 02:58:30 +0000 |
parents | da724c67159d |
children | 5637794913a8 |
files | uptools/libcoding/decode_helpers.c uptools/libcoding/gsm7_decode.c uptools/libcoding/ucs2_decode.c uptools/sms-pdu-decode/pdu-common.c |
diffstat | 4 files changed, 73 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/uptools/libcoding/decode_helpers.c Thu Mar 25 01:40:36 2021 +0000 +++ b/uptools/libcoding/decode_helpers.c Thu Mar 25 02:58:30 2021 +0000 @@ -48,8 +48,15 @@ outp[1] = 0x80 | (uni & 0x3F); return(2); } - outp[0] = 0xE0 | (uni >> 12); - outp[1] = 0x80 | ((uni >> 6) & 0x3F); - outp[2] = 0x80 | (uni & 0x3F); - return(3); + if (uni < 0x10000) { + outp[0] = 0xE0 | (uni >> 12); + outp[1] = 0x80 | ((uni >> 6) & 0x3F); + outp[2] = 0x80 | (uni & 0x3F); + return(3); + } + outp[0] = 0xF0 | (uni >> 18); + outp[1] = 0x80 | ((uni >> 12) & 0x3F); + outp[2] = 0x80 | ((uni >> 6) & 0x3F); + outp[3] = 0x80 | (uni & 0x3F); + return(4); }
--- a/uptools/libcoding/gsm7_decode.c Thu Mar 25 01:40:36 2021 +0000 +++ b/uptools/libcoding/gsm7_decode.c Thu Mar 25 02:58:30 2021 +0000 @@ -4,6 +4,7 @@ */ #include <sys/types.h> +#include <stdio.h> extern u_short gsm7_decode_table[128]; extern u_short gsm7ext_decode_table[128]; @@ -13,33 +14,53 @@ unsigned inlen, *outlenp, *errp; { u_char *inp, *endp, *outp; - unsigned errcnt = 0; unsigned gsm, uni; + int is_ext; inp = inbuf; endp = inbuf + inlen; outp = outbuf; while (inp < endp) { gsm = *inp++; - if (gsm == 0x1B && inp < endp) - uni = gsm7ext_decode_table[*inp++]; - else + if (gsm == 0x1B && inp < endp && *inp != 0x1B && *inp != '\n' + && *inp != '\r') { + gsm = *inp++; + uni = gsm7ext_decode_table[gsm]; + if (uni == '\\') { + *outp++ = '\\'; + *outp++ = '\\'; + continue; + } + is_ext = 1; + } else { + switch (gsm) { + case 0x1B: + *outp++ = '\\'; + *outp++ = 'e'; + continue; + case '\n': + if (newline_ok) + *outp++ = '\n'; + else { + *outp++ = '\\'; + *outp++ = 'n'; + } + continue; + case '\r': + *outp++ = '\\'; + *outp++ = 'r'; + continue; + } uni = gsm7_decode_table[gsm]; - if (uni == '\r') { - *outp++ = '\\'; - *outp++ = 'r'; - errcnt++; - } else if (uni == '\n') { - if (newline_ok) - *outp++ = '\n'; - else { + is_ext = 0; + } + if (!uni || !is_decoded_char_ok(uni, ascii_ext)) { + if (is_ext) { *outp++ = '\\'; - *outp++ = 'n'; - errcnt++; + *outp++ = 'e'; } - } else if (!uni || !is_decoded_char_ok(uni, ascii_ext)) { - *outp++ = '?'; - errcnt++; + sprintf(outp, "\\%02X", gsm); + outp += 3; } else if (ascii_ext == 2) outp += emit_utf8_char(uni, outp); else @@ -49,5 +70,5 @@ if (outlenp) *outlenp = outp - outbuf; if (errp) - *errp = errcnt; + *errp = 0; }
--- a/uptools/libcoding/ucs2_decode.c Thu Mar 25 01:40:36 2021 +0000 +++ b/uptools/libcoding/ucs2_decode.c Thu Mar 25 02:58:30 2021 +0000 @@ -6,36 +6,51 @@ */ #include <sys/types.h> +#include <stdio.h> ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp) u_char *inbuf, *outbuf; unsigned inlen, *outlenp, *errp; { u_char *inp, *endp, *outp; - unsigned errcnt = 0; unsigned uni; inp = inbuf; endp = inbuf + (inlen & ~1); outp = outbuf; while (inp < endp) { + if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 && + (inp[2] & 0xFC) == 0xDC) { + uni = ((inp[0] & 3) << 18) | (inp[1] << 10) | + ((inp[2] & 3) << 8) | inp[3]; + inp += 4; + uni += 0x10000; + if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else { + sprintf(outp, "\\U%06X", uni); + outp += 8; + } + continue; + } uni = (inp[0] << 8) | inp[1]; inp += 2; - if (uni == '\r') { + if (uni == '\\') { + *outp++ = '\\'; + *outp++ = '\\'; + } else if (uni == '\r') { *outp++ = '\\'; *outp++ = 'r'; - errcnt++; } else if (uni == '\n') { if (newline_ok) *outp++ = '\n'; else { *outp++ = '\\'; *outp++ = 'n'; - errcnt++; } } else if (!is_decoded_char_ok(uni, ascii_ext)) { - *outp++ = '?'; - errcnt++; + sprintf(outp, "\\u%04X", uni); + outp += 6; } else if (ascii_ext == 2) outp += emit_utf8_char(uni, outp); else @@ -45,5 +60,5 @@ if (outlenp) *outlenp = outp - outbuf; if (errp) - *errp = errcnt; + *errp = 0; }
--- a/uptools/sms-pdu-decode/pdu-common.c Thu Mar 25 01:40:36 2021 +0000 +++ b/uptools/sms-pdu-decode/pdu-common.c Thu Mar 25 02:58:30 2021 +0000 @@ -250,7 +250,7 @@ { unsigned udl, udl_octets; unsigned udhl, udh_octets, udh_chars, ud_chars; - u_char ud7[160], decode_buf[321]; + u_char ud7[160], decode_buf[481]; int do_hexdump; unsigned decoded_len, badchars;