FreeCalypso > hg > freecalypso-tools
changeset 328:978571e23318
uptools started with libcoding
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Sat, 03 Feb 2018 20:07:05 +0000 |
parents | 973d885a68a0 |
children | 18c692984549 |
files | uptools/libcoding/Makefile uptools/libcoding/decode_helpers.c uptools/libcoding/gsm7_decode.c uptools/libcoding/gsm7_decode_tables.c uptools/libcoding/ucs2_decode.c |
diffstat | 5 files changed, 206 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uptools/libcoding/Makefile Sat Feb 03 20:07:05 2018 +0000 @@ -0,0 +1,13 @@ +CC= gcc +CFLAGS= -O2 +OBJS= decode_helpers.o gsm7_decode.o gsm7_decode_tables.o ucs2_decode.o +LIB= libcoding.a + +all: ${LIB} + +${LIB}: ${OBJS} + ar rcu $@ ${OBJS} + ranlib $@ + +clean: + rm -f *.[oa] errs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uptools/libcoding/decode_helpers.c Sat Feb 03 20:07:05 2018 +0000 @@ -0,0 +1,55 @@ +/* + * This library module implements the is_decoded_char_ok() and emit_utf8_char() + * functions used by gsm7_to_ascii_or_ext() and ucs2_to_ascii_or_ext(). + */ + +#include <sys/types.h> + +is_decoded_char_ok(uni, ascii_ext) + unsigned uni; +{ + unsigned upper_limit; + + /* weed out control chars first */ + if (uni < 0x20) + return(0); + if (uni >= 0x7F && uni <= 0x9F) + return(0); + /* see what range our output encoding allows */ + switch (ascii_ext) { + case 0: + upper_limit = 0x7F; + break; + case 1: + upper_limit = 0xFF; + break; + case 2: + upper_limit = 0xFFFF; + break; + default: + upper_limit = 0; + } + if (uni <= upper_limit) + return(1); + else + return(0); +} + +emit_utf8_char(uni, outp) + unsigned uni; + u_char *outp; +{ + if (uni < 0x80) { + *outp = uni; + return(1); + } + if (uni < 0x800) { + outp[0] = 0xC0 | (uni >> 6); + outp[1] = 0x80 | (uni & 0x3F); + return(2); + } + outp[0] = 0xE0 | (uni >> 12); + outp[1] = 0x80 | ((uni >> 6) & 0x3F); + outp[2] = 0x80 | (uni & 0x3F); + return(3); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uptools/libcoding/gsm7_decode.c Sat Feb 03 20:07:05 2018 +0000 @@ -0,0 +1,53 @@ +/* + * This library module implements the decoding of GSM7-encoded data + * into ASCII, ISO 8859-1 or UTF-8. + */ + +#include <sys/types.h> + +extern u_short gsm7_decode_table[128]; +extern u_short gsm7ext_decode_table[128]; + +gsm7_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp) + u_char *inbuf, *outbuf; + unsigned inlen, *outlenp, *errp; +{ + u_char *inp, *endp, *outp; + unsigned errcnt = 0; + unsigned gsm, uni; + + inp = inbuf; + endp = inbuf + inlen; + outp = outbuf; + while (inp < endp) { + gsm = *inp++; + if (gsm == 0x1B && inp < endp) + uni = gsm7ext_decode_table[*inp++]; + else + uni = gsm7_decode_table[gsm]; + if (uni == '\r') { + *outp++ = '\\'; + *outp++ = 'r'; + errcnt++; + } else if (uni == '\n') { + if (newline_ok) + *outp++ = '\n'; + else { + *outp++ = '\\'; + *outp++ = 'n'; + errcnt++; + } + } else if (!uni || !is_decoded_char_ok(uni, ascii_ext)) { + *outp++ = '?'; + errcnt++; + } else if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else + *outp++ = uni; + } + *outp = '\0'; + if (outlenp) + *outlenp = outp - outbuf; + if (errp) + *errp = errcnt; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uptools/libcoding/gsm7_decode_tables.c Sat Feb 03 20:07:05 2018 +0000 @@ -0,0 +1,36 @@ +/* + * This library module contains the tables for decoding the GSM 7-bit + * default alphabet (03.38 or 23.038) into Unicode. + */ + +#include <sys/types.h> + +u_short gsm7_decode_table[128] = { + '@', 0xA3, '$', 0xA5, 0xE8, 0xE9, 0xF9, 0xEC, + 0xF2, 0xC7, '\n', 0xD8, 0xF8, '\r', 0xC5, 0xE5, + 0x394,'_', 0x3A6,0x393,0x39B,0x3A9,0x3A0,0x3A8, + 0x3A3,0x398,0x39E, 0, 0xC6, 0xE6, 0xDF, 0xC9, + ' ', '!', '"', '#', 0xA4, '%', '&', 0x27, + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + 0xA1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0xC4, 0xD6, 0xD1, 0xDC, 0xA7, + 0xBF, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0xE4, 0xF6, 0xF1, 0xFC, 0xE0 +}; + +u_short gsm7ext_decode_table[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\n', 0, 0, '\r', 0, 0, + 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0, '\\', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0, + '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,0x20AC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uptools/libcoding/ucs2_decode.c Sat Feb 03 20:07:05 2018 +0000 @@ -0,0 +1,49 @@ +/* + * This library module implements the conversion of UCS2-encoded data + * (typically received in SMS) into ASCII, ISO 8859-1 or UTF-8, + * maintaining parallelism with the corresponding function for decoding + * GSM7-encoded data. + */ + +#include <sys/types.h> + +ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok, errp) + u_char *inbuf, *outbuf; + unsigned inlen, *outlenp, *errp; +{ + u_char *inp, *endp, *outp; + unsigned errcnt = 0; + unsigned uni; + + inp = inbuf; + endp = inbuf + (inlen & ~1); + outp = outbuf; + while (inp < endp) { + uni = (inp[0] << 8) | inp[1]; + inp += 2; + if (uni == '\r') { + *outp++ = '\\'; + *outp++ = 'r'; + errcnt++; + } else if (uni == '\n') { + if (newline_ok) + *outp++ = '\n'; + else { + *outp++ = '\\'; + *outp++ = 'n'; + errcnt++; + } + } else if (!is_decoded_char_ok(uni, ascii_ext)) { + *outp++ = '?'; + errcnt++; + } else if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else + *outp++ = uni; + } + *outp = '\0'; + if (outlenp) + *outlenp = outp - outbuf; + if (errp) + *errp = errcnt; +}