changeset 27:7418ca2e9949

libcoding: add functions from freecalypso-tools/uptools/libcoding that are needed for sms-pdu-decode & pcm-sms-decode
author Mychaela Falconia <falcon@freecalypso.org>
date Thu, 13 Jun 2024 02:29:29 +0000
parents c8cb05b69118
children 6e925aa54727
files libcoding/Makefile libcoding/decode_helpers.c libcoding/grokdcs.c libcoding/gsm7_decode.c libcoding/gsm7_decode_qstring.c libcoding/gsm7_decode_tables.c libcoding/gsm7_unpack.c libcoding/gsmtime.c libcoding/hexdecode.c libcoding/hexdump.c libcoding/number_decode.c libcoding/scaddr.c libcoding/ucs2_decode.c
diffstat 13 files changed, 542 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/libcoding/Makefile	Thu Jun 13 01:55:19 2024 +0000
+++ b/libcoding/Makefile	Thu Jun 13 02:29:29 2024 +0000
@@ -1,6 +1,9 @@
-OBJS=	alpha_addr.o alpha_addr_enc.o check_high_bit.o gsm7_encode.o \
-	gsm7_encode2.o gsm7_encode_table.o gsm7_pack.o hexdigits.o hexout.o \
-	number_encode.o timestamp.o ucs2_bigend.o utf8_decode.o utf8_decode2.o
+OBJS=	alpha_addr.o alpha_addr_enc.o check_high_bit.o decode_helpers.o \
+	grokdcs.o gsm7_decode.o gsm7_decode_qstring.o gsm7_decode_tables.o \
+	gsm7_encode.o gsm7_encode2.o gsm7_encode_table.o gsm7_pack.o \
+	gsm7_unpack.o gsmtime.o hexdecode.o hexdigits.o hexdump.o hexout.o \
+	number_decode.o number_encode.o scaddr.o timestamp.o ucs2_bigend.o \
+	ucs2_decode.o utf8_decode.o utf8_decode2.o
 LIB=	libcoding.a
 
 include ../config.defs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/decode_helpers.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,62 @@
+/*
+ * This library module implements the is_decoded_char_ok() and emit_utf8_char()
+ * functions used by gsm7_to_ascii_or_ext() and ucs2_to_ascii_or_ext().
+ */
+
+#include <sys/types.h>
+
+is_decoded_char_ok(uni, ascii_ext)
+	unsigned uni;
+{
+	unsigned upper_limit;
+
+	/* weed out control chars first */
+	if (uni < 0x20)
+		return(0);
+	if (uni >= 0x7F && uni <= 0x9F)
+		return(0);
+	/* see what range our output encoding allows */
+	switch (ascii_ext) {
+	case 0:
+		upper_limit = 0x7F;
+		break;
+	case 1:
+		upper_limit = 0xFF;
+		break;
+	case 2:
+		upper_limit = 0xFFFF;
+		break;
+	default:
+		upper_limit = 0;
+	}
+	if (uni <= upper_limit)
+		return(1);
+	else
+		return(0);
+}
+
+emit_utf8_char(uni, outp)
+	unsigned uni;
+	u_char *outp;
+{
+	if (uni < 0x80) {
+		*outp = uni;
+		return(1);
+	}
+	if (uni < 0x800) {
+		outp[0] = 0xC0 | (uni >> 6);
+		outp[1] = 0x80 | (uni & 0x3F);
+		return(2);
+	}
+	if (uni < 0x10000) {
+		outp[0] = 0xE0 | (uni >> 12);
+		outp[1] = 0x80 | ((uni >> 6) & 0x3F);
+		outp[2] = 0x80 | (uni & 0x3F);
+		return(3);
+	}
+	outp[0] = 0xF0 | (uni >> 18);
+	outp[1] = 0x80 | ((uni >> 12) & 0x3F);
+	outp[2] = 0x80 | ((uni >> 6) & 0x3F);
+	outp[3] = 0x80 | (uni & 0x3F);
+	return(4);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/grokdcs.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,46 @@
+/*
+ * This library module implements the function that distills the complex
+ * set of possible SMS DCS octet values to just one of 4 possibilities:
+ * 7-bit text (7), 8-bit data octets (8), UCS-2 text (16) or compressed
+ * data (9).
+ *
+ * The decoding is based on the 3GPP TS 23.038 V11.0.0 spec;
+ * reserved encodings are treated as 7-bit text as the spec instructs.
+ */
+
+sms_dcs_classify(dcs)
+{
+	if (!(dcs & 0x80)) {
+		if (dcs & 0x20)
+			return(9);
+		switch (dcs & 0xC) {
+		case 0:
+			return(7);
+		case 4:
+			return(8);
+		case 8:
+			return(16);
+		default:
+			/* reserved, treating as 7-bit per the spec */
+			return(7);
+		}
+	}
+	switch (dcs & 0xF0) {
+	case 0x80:
+	case 0x90:
+	case 0xA0:
+	case 0xB0:
+		/* reserved, treating as 7-bit per the spec */
+		return(7);
+	case 0xC0:
+	case 0xD0:
+		return(7);
+	case 0xE0:
+		return(16);
+	case 0xF0:
+		if (dcs & 4)
+			return(8);
+		else
+			return(7);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_decode.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,77 @@
+/*
+ * This library module implements the decoding of GSM7-encoded data
+ * into ASCII, ISO 8859-1 or UTF-8.
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+
+extern u_short gsm7_decode_table[128];
+extern u_short gsm7ext_decode_table[128];
+
+gsm7_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok)
+	u_char *inbuf, *outbuf;
+	unsigned inlen, *outlenp;
+{
+	u_char *inp, *endp, *outp;
+	unsigned gsm, uni;
+	int is_ext;
+
+	inp = inbuf;
+	endp = inbuf + inlen;
+	outp = outbuf;
+	while (inp < endp) {
+		gsm = *inp++;
+		if (gsm == 0x1B && inp < endp && *inp != 0x1B && *inp != '\n'
+		    && *inp != '\r') {
+			gsm = *inp++;
+			uni = gsm7ext_decode_table[gsm];
+			if (uni == '\\') {
+				*outp++ = '\\';
+				*outp++ = '\\';
+				continue;
+			}
+			if (uni == 0x20AC && ascii_ext < 2) {
+				*outp++ = '\\';
+				*outp++ = 'E';
+				continue;
+			}
+			is_ext = 1;
+		} else {
+			switch (gsm) {
+			case 0x1B:
+				*outp++ = '\\';
+				*outp++ = 'e';
+				continue;
+			case '\n':
+				if (newline_ok)
+					*outp++ = '\n';
+				else {
+					*outp++ = '\\';
+					*outp++ = 'n';
+				}
+				continue;
+			case '\r':
+				*outp++ = '\\';
+				*outp++ = 'r';
+				continue;
+			}
+			uni = gsm7_decode_table[gsm];
+			is_ext = 0;
+		}
+		if (!uni || !is_decoded_char_ok(uni, ascii_ext)) {
+			if (is_ext) {
+				*outp++ = '\\';
+				*outp++ = 'e';
+			}
+			sprintf(outp, "\\%02X", gsm);
+			outp += 3;
+		} else if (ascii_ext == 2)
+			outp += emit_utf8_char(uni, outp);
+		else
+			*outp++ = uni;
+	}
+	*outp = '\0';
+	if (outlenp)
+		*outlenp = outp - outbuf;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_decode_qstring.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,82 @@
+/*
+ * This module implements a function for decoding GSM7 strings
+ * to ASCII with output to a stdio file; it is an implementation
+ * of lossless conversion per our SIM-data-formats spec
+ * in freecalypso-docs.
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+
+static char basic_table[128] = {
+	'@', 0,   '$',      0,   0,   0,        0,   0,
+	0,   0,   'n'|0x80, 0,   0,   'r'|0x80, 0,   0,
+	0,   '_', 0,        0,   0,   0,        0,   0,
+	0,   0,   0,        0,   0,   0,        0,   0,
+	' ', '!', '"'|0x80, '#', 0,   '%',      '&', 0x27,
+	'(', ')', '*',      '+', ',', '-',      '.', '/',
+	'0', '1', '2',      '3', '4', '5',      '6', '7',
+	'8', '9', ':',      ';', '<', '=',      '>', '?',
+	0,   'A', 'B',      'C', 'D', 'E',      'F', 'G',
+	'H', 'I', 'J',      'K', 'L', 'M',      'N', 'O',
+	'P', 'Q', 'R',      'S', 'T', 'U',      'V', 'W',
+	'X', 'Y', 'Z',      0,   0,   0,        0,   0,
+	0,   'a', 'b',      'c', 'd', 'e',      'f', 'g',
+	'h', 'i', 'j',      'k', 'l', 'm',      'n', 'o',
+	'p', 'q', 'r',      's', 't', 'u',      'v', 'w',
+	'x', 'y', 'z',      0,   0,   0,        0,   0
+};
+
+static char escape_table[128] = {
+	0,   0, 0, 0, 0,   0,    0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
+	0,   0, 0, 0, '^', 0,    0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
+	0,   0, 0, 0, 0,   0,    0, 0, '{', '}', 0, 0, 0,   0,   0,   '\\'|0x80,
+	0,   0, 0, 0, 0,   0,    0, 0, 0,   0,   0, 0, '[', '~', ']', 0,
+	'|', 0, 0, 0, 0,   0,    0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
+	0,   0, 0, 0, 0,   0,    0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
+	0,   0, 0, 0, 0,'E'|0x80,0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
+	0,   0, 0, 0, 0,   0,    0, 0, 0,   0,   0, 0, 0,   0,   0,   0
+};
+
+void
+print_gsm7_string_to_file(data, nbytes, outf)
+	u_char *data;
+	unsigned nbytes;
+	FILE *outf;
+{
+	u_char *dp, *endp;
+	int b, c;
+
+	dp = data;
+	endp = data + nbytes;
+	putc('"', outf);
+	while (dp < endp) {
+		b = *dp++;
+		if (b == 0x1B) {
+			if (dp >= endp || *dp == 0x1B || *dp == '\n' ||
+			    *dp == '\r') {
+				putc('\\', outf);
+				putc('e', outf);
+				continue;
+			}
+			b = *dp++;
+			c = escape_table[b];
+			if (!c) {
+				fprintf(outf, "\\e\\%02X", b);
+				continue;
+			}
+		} else {
+			c = basic_table[b];
+			if (!c) {
+				fprintf(outf, "\\%02X", b);
+				continue;
+			}
+		}
+		if (c & 0x80) {
+			putc('\\', outf);
+			c &= 0x7F;
+		}
+		putc(c, outf);
+	}
+	putc('"', outf);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_decode_tables.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,36 @@
+/*
+ * This library module contains the tables for decoding the GSM 7-bit
+ * default alphabet (03.38 or 23.038) into Unicode.
+ */
+
+#include <sys/types.h>
+
+u_short gsm7_decode_table[128] = {
+	'@',  0xA3, '$',  0xA5, 0xE8, 0xE9, 0xF9, 0xEC,
+	0xF2, 0xC7, '\n', 0xD8, 0xF8, '\r', 0xC5, 0xE5,
+	0x394,'_',  0x3A6,0x393,0x39B,0x3A9,0x3A0,0x3A8,
+	0x3A3,0x398,0x39E, 0,   0xC6, 0xE6, 0xDF, 0xC9,
+	' ',  '!',  '"',  '#',  0xA4, '%',  '&',  0x27,
+	'(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
+	'0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
+	'8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
+	0xA1, 'A',  'B',  'C',  'D',  'E',  'F',  'G',
+	'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
+	'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
+	'X',  'Y',  'Z',  0xC4, 0xD6, 0xD1, 0xDC, 0xA7,
+	0xBF, 'a',  'b',  'c',  'd',  'e',  'f',  'g',
+	'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
+	'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
+	'x',  'y',  'z',  0xE4, 0xF6, 0xF1, 0xFC, 0xE0
+};
+
+u_short gsm7ext_decode_table[128] = {
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   '\n', 0, 0,   '\r', 0,   0,
+	0,   0, 0, 0, '^', 0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, '{', '}', 0,    0, 0,   0,    0,   '\\',
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, '[', '~',  ']', 0,
+	'|', 0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,0x20AC, 0, 0, 0,   0,   0,    0, 0,   0,    0,   0,
+	0,   0, 0, 0, 0,   0,   0, 0, 0,   0,   0,    0, 0,   0,    0,   0
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsm7_unpack.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,22 @@
+/*
+ * This library module implements unpacking of GSM 7-bit data
+ * from packed octets.
+ */
+
+#include <sys/types.h>
+
+static u_char shift[8] = {0, 7, 6, 5, 4, 3, 2, 1};
+
+gsm7_unpack(inbuf, outbuf, nseptets)
+	u_char *inbuf, *outbuf;
+	unsigned nseptets;
+{
+	u_char *inp = inbuf, *outp = outbuf;
+	unsigned n;
+
+	for (n = 0; n < nseptets; n++) {
+		*outp++ = (((inp[1] << 8) | inp[0]) >> shift[n&7]) & 0x7F;
+		if (n & 7)
+			inp++;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/gsmtime.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,27 @@
+/*
+ * This library module implements decoding of GSM timestamps.
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+
+gsm_timestamp_decode(inbuf, outbuf)
+	u_char *inbuf;
+	char *outbuf;
+{
+	u_char rev[7];
+	int i, d1, d2, tzsign;
+
+	for (i = 0; i < 7; i++) {
+		d1 = inbuf[i] & 0xF;
+		d2 = inbuf[i] >> 4;
+		rev[i] = (d1 << 4) | d2;
+	}
+	if (rev[6] & 0x80) {
+		rev[6] &= 0x7F;
+		tzsign = '-';
+	} else
+		tzsign = '+';
+	sprintf(outbuf, "%02X/%02X/%02X,%02X:%02X:%02X%c%02X", rev[0], rev[1],
+		rev[2], rev[3], rev[4], rev[5], tzsign, rev[6]);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/hexdecode.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,37 @@
+/*
+ * This library module implements decoding of long hex strings,
+ * such as SMS PDUs.
+ */
+
+#include <sys/types.h>
+#include <ctype.h>
+
+decode_hex_line(inbuf, outbuf, outmax)
+	char *inbuf;
+	u_char *outbuf;
+	unsigned outmax;
+{
+	char *inp = inbuf;
+	u_char *outp = outbuf;
+	unsigned outcnt = 0;
+	int c, d[2], i;
+
+	while (*inp) {
+		if (!isxdigit(inp[0]) || !isxdigit(inp[1]))
+			return(-1);
+		if (outcnt >= outmax)
+			break;
+		for (i = 0; i < 2; i++) {
+			c = *inp++;
+			if (isdigit(c))
+				d[i] = c - '0';
+			else if (isupper(c))
+				d[i] = c - 'A' + 10;
+			else
+				d[i] = c - 'a' + 10;
+		}
+		*outp++ = (d[0] << 4) | d[1];
+		outcnt++;
+	}
+	return outcnt;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/hexdump.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,38 @@
+/*
+ * This library module implements a simple hex dump facility.
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+
+msg_bits_hexdump(dumpbuf, dumplen)
+	u_char *dumpbuf;
+	unsigned dumplen;
+{
+	u_char *buf = dumpbuf;
+	unsigned lineoff, linelen, i, c;
+
+	for (lineoff = 0; lineoff < dumplen; ) {
+		linelen = dumplen - lineoff;
+		if (linelen > 16)
+			linelen = 16;
+		printf("%02X:  ", lineoff);
+		for (i = 0; i < 16; i++) {
+			if (i < linelen)
+				printf("%02X ", buf[i]);
+			else
+				fputs("   ", stdout);
+			if (i == 7 || i == 15)
+				putchar(' ');
+		}
+		for (i = 0; i < linelen; i++) {
+			c = buf[i];
+			if (c < ' ' || c > '~')
+				c = '.';
+			putchar(c);
+		}
+		putchar('\n');
+		buf += linelen;
+		lineoff += linelen;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/number_decode.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,29 @@
+/*
+ * This library module implements the decoding of number (address) digits.
+ */
+
+#include <sys/types.h>
+
+char gsm_address_digits[16] =
+	{'0','1','2','3','4','5','6','7','8','9','*','#','a','b','c','?'};
+
+decode_address_digits(inbuf, outbuf, ndigits)
+	u_char *inbuf;
+	char *outbuf;
+	unsigned ndigits;
+{
+	u_char *inp = inbuf;
+	char *outp = outbuf;
+	unsigned n = 0, b;
+
+	while (n < ndigits) {
+		b = *inp++;
+		*outp++ = gsm_address_digits[b & 0xF];
+		n++;
+		if (n >= ndigits)
+			break;
+		*outp++ = gsm_address_digits[b >> 4];
+		n++;
+	}
+	*outp = '\0';
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/scaddr.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,18 @@
+/*
+ * This library module contains the code that figures out how many digits
+ * are there in a PDU-encoded Service Centre address.
+ */
+
+#include <sys/types.h>
+
+sc_addr_ndigits(sca)
+	u_char *sca;
+{
+	unsigned nb, nd;
+
+	nb = sca[0];
+	nd = (nb - 1) * 2;
+	if ((sca[nb] & 0xF0) == 0xF0)
+		nd--;
+	return nd;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libcoding/ucs2_decode.c	Thu Jun 13 02:29:29 2024 +0000
@@ -0,0 +1,62 @@
+/*
+ * This library module implements the conversion of UCS2-encoded data
+ * (typically received in SMS) into ASCII, ISO 8859-1 or UTF-8,
+ * maintaining parallelism with the corresponding function for decoding
+ * GSM7-encoded data.
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+
+ucs2_to_ascii_or_ext(inbuf, inlen, outbuf, outlenp, ascii_ext, newline_ok)
+	u_char *inbuf, *outbuf;
+	unsigned inlen, *outlenp;
+{
+	u_char *inp, *endp, *outp;
+	unsigned uni;
+
+	inp = inbuf;
+	endp = inbuf + (inlen & ~1);
+	outp = outbuf;
+	while (inp < endp) {
+		if ((endp - inp) >= 4 && (inp[0] & 0xFC) == 0xD8 &&
+		    (inp[2] & 0xFC) == 0xDC) {
+			uni = ((inp[0] & 3) << 18) | (inp[1] << 10) |
+			      ((inp[2] & 3) << 8) | inp[3];
+			inp += 4;
+			uni += 0x10000;
+			if (ascii_ext == 2)
+				outp += emit_utf8_char(uni, outp);
+			else {
+				sprintf(outp, "\\U%06X", uni);
+				outp += 8;
+			}
+			continue;
+		}
+		uni = (inp[0] << 8) | inp[1];
+		inp += 2;
+		if (uni == '\\') {
+			*outp++ = '\\';
+			*outp++ = '\\';
+		} else if (uni == '\r') {
+			*outp++ = '\\';
+			*outp++ = 'r';
+		} else if (uni == '\n') {
+			if (newline_ok)
+				*outp++ = '\n';
+			else {
+				*outp++ = '\\';
+				*outp++ = 'n';
+			}
+		} else if (!is_decoded_char_ok(uni, ascii_ext)) {
+			sprintf(outp, "\\u%04X", uni);
+			outp += 6;
+		} else if (ascii_ext == 2)
+			outp += emit_utf8_char(uni, outp);
+		else
+			*outp++ = uni;
+	}
+	*outp = '\0';
+	if (outlenp)
+		*outlenp = outp - outbuf;
+}