FreeCalypso > hg > sms-coding-utils
changeset 27:7418ca2e9949
libcoding: add functions from freecalypso-tools/uptools/libcoding
that are needed for sms-pdu-decode & pcm-sms-decode
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Thu, 13 Jun 2024 02:29:29 +0000 |
parents | c8cb05b69118 |
children | 6e925aa54727 |
files | libcoding/Makefile libcoding/decode_helpers.c libcoding/grokdcs.c libcoding/gsm7_decode.c libcoding/gsm7_decode_qstring.c libcoding/gsm7_decode_tables.c libcoding/gsm7_unpack.c libcoding/gsmtime.c libcoding/hexdecode.c libcoding/hexdump.c libcoding/number_decode.c libcoding/scaddr.c libcoding/ucs2_decode.c |
diffstat | 13 files changed, 542 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/libcoding/Makefile Thu Jun 13 01:55:19 2024 +0000 +++ b/libcoding/Makefile Thu Jun 13 02:29:29 2024 +0000 @@ -1,6 +1,9 @@ -OBJS= alpha_addr.o alpha_addr_enc.o check_high_bit.o gsm7_encode.o \ - gsm7_encode2.o gsm7_encode_table.o gsm7_pack.o hexdigits.o hexout.o \ - number_encode.o timestamp.o ucs2_bigend.o utf8_decode.o utf8_decode2.o +OBJS= alpha_addr.o alpha_addr_enc.o check_high_bit.o decode_helpers.o \ + grokdcs.o gsm7_decode.o gsm7_decode_qstring.o gsm7_decode_tables.o \ + gsm7_encode.o gsm7_encode2.o gsm7_encode_table.o gsm7_pack.o \ + gsm7_unpack.o gsmtime.o hexdecode.o hexdigits.o hexdump.o hexout.o \ + number_decode.o number_encode.o scaddr.o timestamp.o ucs2_bigend.o \ + ucs2_decode.o utf8_decode.o utf8_decode2.o LIB= libcoding.a include ../config.defs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/decode_helpers.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,62 @@ +/* + * This library module implements the is_decoded_char_ok() and emit_utf8_char() + * functions used by gsm7_to_ascii_or_ext() and ucs2_to_ascii_or_ext(). + */ + +#include <sys/types.h> + +is_decoded_char_ok(uni, ascii_ext) + unsigned uni; +{ + unsigned upper_limit; + + /* weed out control chars first */ + if (uni < 0x20) + return(0); + if (uni >= 0x7F && uni <= 0x9F) + return(0); + /* see what range our output encoding allows */ + switch (ascii_ext) { + case 0: + upper_limit = 0x7F; + break; + case 1: + upper_limit = 0xFF; + break; + case 2: + upper_limit = 0xFFFF; + break; + default: + upper_limit = 0; + } + if (uni <= upper_limit) + return(1); + else + return(0); +} + +emit_utf8_char(uni, outp) + unsigned uni; + u_char *outp; +{ + if (uni < 0x80) { + *outp = uni; + return(1); + } + if (uni < 0x800) { + outp[0] = 0xC0 | (uni >> 6); + outp[1] = 0x80 | (uni & 0x3F); + return(2); + } + if (uni < 0x10000) { + outp[0] = 0xE0 | (uni >> 12); + outp[1] = 0x80 | ((uni >> 6) & 0x3F); + outp[2] = 0x80 | (uni & 0x3F); + return(3); + } + outp[0] = 0xF0 | (uni >> 18); + outp[1] = 0x80 | ((uni >> 12) & 0x3F); + outp[2] = 0x80 | ((uni >> 6) & 0x3F); + outp[3] = 0x80 | (uni & 0x3F); + return(4); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/grokdcs.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,46 @@ +/* + * This library module implements the function that distills the complex + * set of possible SMS DCS octet values to just one of 4 possibilities: + * 7-bit text (7), 8-bit data octets (8), UCS-2 text (16) or compressed + * data (9). + * + * The decoding is based on the 3GPP TS 23.038 V11.0.0 spec; + * reserved encodings are treated as 7-bit text as the spec instructs. + */ + +sms_dcs_classify(dcs) +{ + if (!(dcs & 0x80)) { + if (dcs & 0x20) + return(9); + switch (dcs & 0xC) { + case 0: + return(7); + case 4: + return(8); + case 8: + return(16); + default: + /* reserved, treating as 7-bit per the spec */ + return(7); + } + } + switch (dcs & 0xF0) { + case 0x80: + case 0x90: + case 0xA0: + case 0xB0: + /* reserved, treating as 7-bit per the spec */ + return(7); + case 0xC0: + case 0xD0: + return(7); + case 0xE0: + return(16); + case 0xF0: + if (dcs & 4) + return(8); + else + return(7); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/gsm7_decode.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,77 @@ +/* + * This library module implements the decoding of GSM7-encoded data + * into ASCII, ISO 8859-1 or UTF-8. + */ + +#include <sys/types.h> +#include <stdio.h> + +extern u_short gsm7_decode_table[128]; +extern u_short gsm7ext_decode_table[128]; + +gsm7_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok) + u_char *inbuf, *outbuf; + unsigned inlen, *outlenp; +{ + u_char *inp, *endp, *outp; + unsigned gsm, uni; + int is_ext; + + inp = inbuf; + endp = inbuf + inlen; + outp = outbuf; + while (inp < endp) { + gsm = *inp++; + if (gsm == 0x1B && inp < endp && *inp != 0x1B && *inp != '\n' + && *inp != '\r') { + gsm = *inp++; + uni = gsm7ext_decode_table[gsm]; + if (uni == '\\') { + *outp++ = '\\'; + *outp++ = '\\'; + continue; + } + if (uni == 0x20AC && ascii_ext < 2) { + *outp++ = '\\'; + *outp++ = 'E'; + continue; + } + is_ext = 1; + } else { + switch (gsm) { + case 0x1B: + *outp++ = '\\'; + *outp++ = 'e'; + continue; + case '\n': + if (newline_ok) + *outp++ = '\n'; + else { + *outp++ = '\\'; + *outp++ = 'n'; + } + continue; + case '\r': + *outp++ = '\\'; + *outp++ = 'r'; + continue; + } + uni = gsm7_decode_table[gsm]; + is_ext = 0; + } + if (!uni || !is_decoded_char_ok(uni, ascii_ext)) { + if (is_ext) { + *outp++ = '\\'; + *outp++ = 'e'; + } + sprintf(outp, "\\%02X", gsm); + outp += 3; + } else if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else + *outp++ = uni; + } + *outp = '\0'; + if (outlenp) + *outlenp = outp - outbuf; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/gsm7_decode_qstring.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,82 @@ +/* + * This module implements a function for decoding GSM7 strings + * to ASCII with output to a stdio file; it is an implementation + * of lossless conversion per our SIM-data-formats spec + * in freecalypso-docs. + */ + +#include <sys/types.h> +#include <stdio.h> + +static char basic_table[128] = { + '@', 0, '$', 0, 0, 0, 0, 0, + 0, 0, 'n'|0x80, 0, 0, 'r'|0x80, 0, 0, + 0, '_', 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ' ', '!', '"'|0x80, '#', 0, '%', '&', 0x27, + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0, 0, 0, 0, 0, + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0, 0, 0, 0, 0 +}; + +static char escape_table[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0, '\\'|0x80, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0, + '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,'E'|0x80,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +void +print_gsm7_string_to_file(data, nbytes, outf) + u_char *data; + unsigned nbytes; + FILE *outf; +{ + u_char *dp, *endp; + int b, c; + + dp = data; + endp = data + nbytes; + putc('"', outf); + while (dp < endp) { + b = *dp++; + if (b == 0x1B) { + if (dp >= endp || *dp == 0x1B || *dp == '\n' || + *dp == '\r') { + putc('\\', outf); + putc('e', outf); + continue; + } + b = *dp++; + c = escape_table[b]; + if (!c) { + fprintf(outf, "\\e\\%02X", b); + continue; + } + } else { + c = basic_table[b]; + if (!c) { + fprintf(outf, "\\%02X", b); + continue; + } + } + if (c & 0x80) { + putc('\\', outf); + c &= 0x7F; + } + putc(c, outf); + } + putc('"', outf); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/gsm7_decode_tables.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,36 @@ +/* + * This library module contains the tables for decoding the GSM 7-bit + * default alphabet (03.38 or 23.038) into Unicode. + */ + +#include <sys/types.h> + +u_short gsm7_decode_table[128] = { + '@', 0xA3, '$', 0xA5, 0xE8, 0xE9, 0xF9, 0xEC, + 0xF2, 0xC7, '\n', 0xD8, 0xF8, '\r', 0xC5, 0xE5, + 0x394,'_', 0x3A6,0x393,0x39B,0x3A9,0x3A0,0x3A8, + 0x3A3,0x398,0x39E, 0, 0xC6, 0xE6, 0xDF, 0xC9, + ' ', '!', '"', '#', 0xA4, '%', '&', 0x27, + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + 0xA1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0xC4, 0xD6, 0xD1, 0xDC, 0xA7, + 0xBF, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0xE4, 0xF6, 0xF1, 0xFC, 0xE0 +}; + +u_short gsm7ext_decode_table[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\n', 0, 0, '\r', 0, 0, + 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0, '\\', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0, + '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,0x20AC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/gsm7_unpack.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,22 @@ +/* + * This library module implements unpacking of GSM 7-bit data + * from packed octets. + */ + +#include <sys/types.h> + +static u_char shift[8] = {0, 7, 6, 5, 4, 3, 2, 1}; + +gsm7_unpack(inbuf, outbuf, nseptets) + u_char *inbuf, *outbuf; + unsigned nseptets; +{ + u_char *inp = inbuf, *outp = outbuf; + unsigned n; + + for (n = 0; n < nseptets; n++) { + *outp++ = (((inp[1] << 8) | inp[0]) >> shift[n&7]) & 0x7F; + if (n & 7) + inp++; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/gsmtime.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,27 @@ +/* + * This library module implements decoding of GSM timestamps. + */ + +#include <sys/types.h> +#include <stdio.h> + +gsm_timestamp_decode(inbuf, outbuf) + u_char *inbuf; + char *outbuf; +{ + u_char rev[7]; + int i, d1, d2, tzsign; + + for (i = 0; i < 7; i++) { + d1 = inbuf[i] & 0xF; + d2 = inbuf[i] >> 4; + rev[i] = (d1 << 4) | d2; + } + if (rev[6] & 0x80) { + rev[6] &= 0x7F; + tzsign = '-'; + } else + tzsign = '+'; + sprintf(outbuf, "%02X/%02X/%02X,%02X:%02X:%02X%c%02X", rev[0], rev[1], + rev[2], rev[3], rev[4], rev[5], tzsign, rev[6]); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/hexdecode.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,37 @@ +/* + * This library module implements decoding of long hex strings, + * such as SMS PDUs. + */ + +#include <sys/types.h> +#include <ctype.h> + +decode_hex_line(inbuf, outbuf, outmax) + char *inbuf; + u_char *outbuf; + unsigned outmax; +{ + char *inp = inbuf; + u_char *outp = outbuf; + unsigned outcnt = 0; + int c, d[2], i; + + while (*inp) { + if (!isxdigit(inp[0]) || !isxdigit(inp[1])) + return(-1); + if (outcnt >= outmax) + break; + for (i = 0; i < 2; i++) { + c = *inp++; + if (isdigit(c)) + d[i] = c - '0'; + else if (isupper(c)) + d[i] = c - 'A' + 10; + else + d[i] = c - 'a' + 10; + } + *outp++ = (d[0] << 4) | d[1]; + outcnt++; + } + return outcnt; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/hexdump.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,38 @@ +/* + * This library module implements a simple hex dump facility. + */ + +#include <sys/types.h> +#include <stdio.h> + +msg_bits_hexdump(dumpbuf, dumplen) + u_char *dumpbuf; + unsigned dumplen; +{ + u_char *buf = dumpbuf; + unsigned lineoff, linelen, i, c; + + for (lineoff = 0; lineoff < dumplen; ) { + linelen = dumplen - lineoff; + if (linelen > 16) + linelen = 16; + printf("%02X: ", lineoff); + for (i = 0; i < 16; i++) { + if (i < linelen) + printf("%02X ", buf[i]); + else + fputs(" ", stdout); + if (i == 7 || i == 15) + putchar(' '); + } + for (i = 0; i < linelen; i++) { + c = buf[i]; + if (c < ' ' || c > '~') + c = '.'; + putchar(c); + } + putchar('\n'); + buf += linelen; + lineoff += linelen; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/number_decode.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,29 @@ +/* + * This library module implements the decoding of number (address) digits. + */ + +#include <sys/types.h> + +char gsm_address_digits[16] = + {'0','1','2','3','4','5','6','7','8','9','*','#','a','b','c','?'}; + +decode_address_digits(inbuf, outbuf, ndigits) + u_char *inbuf; + char *outbuf; + unsigned ndigits; +{ + u_char *inp = inbuf; + char *outp = outbuf; + unsigned n = 0, b; + + while (n < ndigits) { + b = *inp++; + *outp++ = gsm_address_digits[b & 0xF]; + n++; + if (n >= ndigits) + break; + *outp++ = gsm_address_digits[b >> 4]; + n++; + } + *outp = '\0'; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/scaddr.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,18 @@ +/* + * This library module contains the code that figures out how many digits + * are there in a PDU-encoded Service Centre address. + */ + +#include <sys/types.h> + +sc_addr_ndigits(sca) + u_char *sca; +{ + unsigned nb, nd; + + nb = sca[0]; + nd = (nb - 1) * 2; + if ((sca[nb] & 0xF0) == 0xF0) + nd--; + return nd; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libcoding/ucs2_decode.c Thu Jun 13 02:29:29 2024 +0000 @@ -0,0 +1,62 @@ +/* + * This library module implements the conversion of UCS2-encoded data + * (typically received in SMS) into ASCII, ISO 8859-1 or UTF-8, + * maintaining parallelism with the corresponding function for decoding + * GSM7-encoded data. + */ + +#include <sys/types.h> +#include <stdio.h> + +ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok) + u_char *inbuf, *outbuf; + unsigned inlen, *outlenp; +{ + u_char *inp, *endp, *outp; + unsigned uni; + + inp = inbuf; + endp = inbuf + (inlen & ~1); + outp = outbuf; + while (inp < endp) { + if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 && + (inp[2] & 0xFC) == 0xDC) { + uni = ((inp[0] & 3) << 18) | (inp[1] << 10) | + ((inp[2] & 3) << 8) | inp[3]; + inp += 4; + uni += 0x10000; + if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else { + sprintf(outp, "\\U%06X", uni); + outp += 8; + } + continue; + } + uni = (inp[0] << 8) | inp[1]; + inp += 2; + if (uni == '\\') { + *outp++ = '\\'; + *outp++ = '\\'; + } else if (uni == '\r') { + *outp++ = '\\'; + *outp++ = 'r'; + } else if (uni == '\n') { + if (newline_ok) + *outp++ = '\n'; + else { + *outp++ = '\\'; + *outp++ = 'n'; + } + } else if (!is_decoded_char_ok(uni, ascii_ext)) { + sprintf(outp, "\\u%04X", uni); + outp += 6; + } else if (ascii_ext == 2) + outp += emit_utf8_char(uni, outp); + else + *outp++ = uni; + } + *outp = '\0'; + if (outlenp) + *outlenp = outp - outbuf; +}