annotate uptools/libcoding/utf8_decode2.c @ 988:5a6019ed7e72

pln-ppb-test: implement read-id
author Mychaela Falconia <falcon@freecalypso.org>
date Sun, 03 Dec 2023 00:04:18 +0000
parents 6bf473f77fc4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
376
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
1 /*
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
2 * This library module implements the function for converting UTF-8 input
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
3 * to UCS-2 in outgoing SMS composition.
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
4 */
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
5
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
6 #include <sys/types.h>
967
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
7 #include <ctype.h>
376
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
8
967
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
9 static int
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
10 handle_escape(ipp, outp)
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
11 u_char **ipp;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
12 unsigned *outp;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
13 {
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
14 unsigned c, n, acc;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
15
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
16 c = *(*ipp)++;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
17 switch (c) {
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
18 case '"':
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
19 case '\\':
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
20 *outp = c;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
21 return(0);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
22 case 'n':
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
23 *outp = '\n';
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
24 return(0);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
25 case 'r':
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
26 *outp = '\r';
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
27 return(0);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
28 case 'u':
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
29 acc = 0;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
30 for (n = 0; n < 4; n++) {
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
31 c = *(*ipp)++;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
32 if (!isxdigit(c))
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
33 return(-3);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
34 acc <<= 4;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
35 acc |= decode_hex_digit(c);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
36 }
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
37 *outp = acc;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
38 return(0);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
39 default:
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
40 return(-3);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
41 }
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
42 }
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
43
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
44 utf8_to_ucs2(inbuf, outbuf, outmax, outlenp, allow_escape)
376
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
45 u_char *inbuf;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
46 u_short *outbuf;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
47 unsigned outmax, *outlenp;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
48 {
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
49 u_char *ip = inbuf;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
50 u_short *op = outbuf;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
51 unsigned outcnt = 0, c, n, uni;
967
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
52 int rc;
376
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
53
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
54 while (c = *ip++) {
967
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
55 if (c == '\\' && allow_escape) {
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
56 rc = handle_escape(&ip, &uni);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
57 if (rc < 0)
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
58 return(rc);
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
59 goto gotuni;
6bf473f77fc4 fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents: 376
diff changeset
60 }
376
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
61 if (c < 0x80) {
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
62 uni = c;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
63 goto gotuni;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
64 }
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
65 if (c < 0xC0 || c > 0xEF)
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
66 return(-1);
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
67 uni = c & 0x1F;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
68 if (c >= 0xE0)
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
69 n = 2;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
70 else
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
71 n = 1;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
72 for (; n; n--) {
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
73 c = *ip++;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
74 if (c < 0x80 || c > 0xBF)
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
75 return(-1);
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
76 uni <<= 6;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
77 uni |= c & 0x3F;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
78 }
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
79 gotuni: if (outcnt >= outmax)
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
80 return(-2);
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
81 *op++ = uni;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
82 outcnt++;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
83 }
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
84 *outlenp = outcnt;
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
85 return(0);
83c755829e31 uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff changeset
86 }