FreeCalypso > hg > freecalypso-tools
annotate uptools/libcoding/utf8_decode2.c @ 988:5a6019ed7e72
pln-ppb-test: implement read-id
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Sun, 03 Dec 2023 00:04:18 +0000 |
parents | 6bf473f77fc4 |
children |
rev | line source |
---|---|
376
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
1 /* |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
2 * This library module implements the function for converting UTF-8 input |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
3 * to UCS-2 in outgoing SMS composition. |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
4 */ |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
5 |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
6 #include <sys/types.h> |
967
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
7 #include <ctype.h> |
376
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
8 |
967
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
9 static int |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
10 handle_escape(ipp, outp) |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
11 u_char **ipp; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
12 unsigned *outp; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
13 { |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
14 unsigned c, n, acc; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
15 |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
16 c = *(*ipp)++; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
17 switch (c) { |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
18 case '"': |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
19 case '\\': |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
20 *outp = c; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
21 return(0); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
22 case 'n': |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
23 *outp = '\n'; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
24 return(0); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
25 case 'r': |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
26 *outp = '\r'; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
27 return(0); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
28 case 'u': |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
29 acc = 0; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
30 for (n = 0; n < 4; n++) { |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
31 c = *(*ipp)++; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
32 if (!isxdigit(c)) |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
33 return(-3); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
34 acc <<= 4; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
35 acc |= decode_hex_digit(c); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
36 } |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
37 *outp = acc; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
38 return(0); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
39 default: |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
40 return(-3); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
41 } |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
42 } |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
43 |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
44 utf8_to_ucs2(inbuf, outbuf, outmax, outlenp, allow_escape) |
376
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
45 u_char *inbuf; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
46 u_short *outbuf; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
47 unsigned outmax, *outlenp; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
48 { |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
49 u_char *ip = inbuf; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
50 u_short *op = outbuf; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
51 unsigned outcnt = 0, c, n, uni; |
967
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
52 int rc; |
376
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
53 |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
54 while (c = *ip++) { |
967
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
55 if (c == '\\' && allow_escape) { |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
56 rc = handle_escape(&ip, &uni); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
57 if (rc < 0) |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
58 return(rc); |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
59 goto gotuni; |
6bf473f77fc4
fcup-smsend: support backslash escapes in UCS-2 mode too
Mychaela Falconia <falcon@freecalypso.org>
parents:
376
diff
changeset
|
60 } |
376
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
61 if (c < 0x80) { |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
62 uni = c; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
63 goto gotuni; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
64 } |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
65 if (c < 0xC0 || c > 0xEF) |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
66 return(-1); |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
67 uni = c & 0x1F; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
68 if (c >= 0xE0) |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
69 n = 2; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
70 else |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
71 n = 1; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
72 for (; n; n--) { |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
73 c = *ip++; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
74 if (c < 0x80 || c > 0xBF) |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
75 return(-1); |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
76 uni <<= 6; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
77 uni |= c & 0x3F; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
78 } |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
79 gotuni: if (outcnt >= outmax) |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
80 return(-2); |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
81 *op++ = uni; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
82 outcnt++; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
83 } |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
84 *outlenp = outcnt; |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
85 return(0); |
83c755829e31
uptools/libcoding: added function for turning UTF-8 into UCS-2
Mychaela Falconia <falcon@freecalypso.org>
parents:
diff
changeset
|
86 } |