FreeCalypso > hg > freecalypso-reveng
comparison fluid-mnf/lzencode.c @ 311:9cecc930d78f
fluid-mnf: original source from TI,
defenestrated line endings and rearranged directory structure,
but no *.[ch] source file content changes yet
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Sat, 29 Feb 2020 05:36:07 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
310:ae39d76d5b7a | 311:9cecc930d78f |
---|---|
1 /******************************************************************************* | |
2 * | |
3 * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK | |
4 * | |
5 * File format: | |
6 * --------------------------------------------------- | |
7 * | Tag | Data | Tag | Data | Tag | Data etc... | |
8 * --------------------------------------------------- | |
9 * | |
10 * Where TAG is 1 bit indicating how to interpret the following data | |
11 * (0 = uncompressed, 1 = compressed) | |
12 * | |
13 * Depending on TAG the Data is: | |
14 * 0 : One uncompressed char of raw data | |
15 * 1 : A length of and a pointer to a match in the previous data | |
16 * | |
17 * ---------------------------------------- | |
18 * Length + pointer = | Length (3 bits) | Position (12 bits) | | |
19 * ---------------------------------------- | |
20 * MSB LSB MSB LSB | |
21 * | |
22 * Compression threshold is 1 i.e. add 1 to Length to get the actual Length. | |
23 * | |
24 * It has been found that 5+14 bits is the most optimum for our kind of | |
25 * data, but we use only 3+12 bits for improoved speed. Compression is | |
26 * almost as good because we can set the threshold at 1 instead of 2 (2 char | |
27 * matches can be compressed to 12+3 bits) | |
28 * | |
29 * With a 12 bit position we can use 4K for the sliding window. Position | |
30 * indicates where the match exists as an absolute address into the | |
31 * window. This way we have a very simple "pseudo sliding window" which | |
32 * automatically enables RLE (Run Length Encoding). | |
33 * | |
34 * The code performs a "lazy" string search i.e. a match will be dropped if | |
35 * it prevents an even greater match for the very next string. This yeild | |
36 * slightly better compression (approx. 1%) at minor cost of compression | |
37 * speed - decompression time is the same! A second order lazy match could | |
38 * be implemented, but it gives an insignificant improovement of about 0.1% | |
39 * when compressing executeables | |
40 * | |
41 * Searching in the window is done with help from an index and we maintain | |
42 * the window as a single linked list to further speed up the compression. | |
43 * | |
44 ******************************************************************************/ | |
45 | |
46 #include "fluid.h" | |
47 #include "lz.h" | |
48 | |
49 #include <stdlib.h> | |
50 #include <stdio.h> | |
51 #include <string.h> | |
52 #include <ctype.h> | |
53 | |
54 | |
55 /****************************************************************************** | |
56 * | |
57 ******************************************************************************/ | |
58 | |
59 #define NIL -1 // Used for indexing purposes | |
60 | |
61 // When we find a match put it into a struct like this. | |
62 typedef struct { | |
63 int16 pos; | |
64 int8 len; | |
65 } match_def_t; | |
66 | |
67 | |
68 /****************************************************************************** | |
69 * Globals | |
70 ******************************************************************************/ | |
71 | |
72 static uint8 window[WINDOW_SIZE]; // Our sliding window buffer | |
73 static int16 window_pos; // And the current position in it | |
74 static uint8 lookahead; // number of bytes in the lookahead area | |
75 | |
76 // Global input/output buffer pointers | |
77 static uint8 *buffer_out; | |
78 static uint8 *buffer_in; | |
79 static uint32 buffer_out_size; | |
80 static uint32 buffer_in_size; | |
81 | |
82 static uint32 total_in_size = 0; | |
83 static uint32 total_out_size = 0; | |
84 | |
85 static uint8 *p_tag_byte = 0; | |
86 static uint8 tag_pos = 0; | |
87 | |
88 static int16 lookup[256]; // Table for looking up a link to any given character | |
89 static int16 lookup_end[256]; // The last link in the window. | |
90 static int16 next[WINDOW_SIZE]; // Contain links to next similar chars | |
91 | |
92 | |
93 | |
94 /****************************************************************************** | |
95 * getchar/putchar | |
96 ******************************************************************************/ | |
97 | |
98 static int lze_getchar(void) | |
99 { | |
100 total_in_size++; | |
101 | |
102 if (buffer_in_size-- > 0) | |
103 return *buffer_in++; | |
104 | |
105 buffer_in_size++; | |
106 | |
107 return EOF; | |
108 } | |
109 | |
110 static int lze_putchar(unsigned char data) | |
111 { | |
112 total_out_size++; | |
113 | |
114 *buffer_out++ = data; | |
115 buffer_out_size++; | |
116 | |
117 return 0; | |
118 } | |
119 | |
120 static int lze_putword(uint16 data) | |
121 { | |
122 total_out_size += 2; | |
123 | |
124 *buffer_out++ = (uint8)((data >> 8) & 0x00FF); | |
125 *buffer_out++ = (uint8)(data & 0x00FF); | |
126 buffer_out_size += 2; | |
127 | |
128 return 0; | |
129 } | |
130 | |
131 /****************************************************************************** | |
132 * Code | |
133 ******************************************************************************/ | |
134 | |
135 | |
136 // Searches for the best possible match to the string at 'source_pos' within | |
137 // the window. Lazy searches is made faster by specifying a match to beat | |
138 // 'match_len' so we don't vaste time by testing too small matches. Will | |
139 // stop at any match 'max_len' in size. | |
140 match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len) | |
141 { | |
142 match_def_t current, best; | |
143 int16 start_pos; | |
144 | |
145 best.len = match_len; | |
146 | |
147 if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry | |
148 { | |
149 current.pos = start_pos; | |
150 do | |
151 { | |
152 if (window[WINDOW_MASK & (current.pos + best.len)] == | |
153 window[WINDOW_MASK & (source_pos + best.len)]) // Possible match? | |
154 { | |
155 current.len = 1; | |
156 | |
157 while((window[WINDOW_MASK & (current.pos + current.len)] == | |
158 window[WINDOW_MASK & (source_pos + current.len)]) && | |
159 current.len < max_len) | |
160 current.len++; | |
161 if (current.len > best.len) | |
162 best = current; | |
163 } | |
164 if (best.len >= max_len) | |
165 break; // no need to search for better matches when we have the best | |
166 | |
167 current.pos = next[current.pos]; // jump to next matching char | |
168 } while(current.pos != NIL); // All. possibilities tested? | |
169 } | |
170 if (best.len <= match_len) | |
171 best.len = 0; | |
172 return best; | |
173 } | |
174 | |
175 // Will remove 'count' links from 'begin' to make room for new data. Will be | |
176 // called when we need to overwrite some of the window by new lookahead | |
177 // chars. | |
178 void clear_index_positions(uint16 begin, uint16 count) | |
179 { | |
180 uint16 i, pos; | |
181 | |
182 for (i = 0; i < count; i++) | |
183 { | |
184 pos = WINDOW_MASK & (begin +i ); | |
185 | |
186 if (lookup[window[pos]] == pos) { | |
187 if (lookup_end[window[pos]] == pos) | |
188 lookup[window[pos]] = lookup_end[window[pos]] = NIL; | |
189 else | |
190 lookup[window[pos]] = next[pos]; | |
191 } | |
192 } | |
193 } | |
194 | |
195 | |
196 // Generate indexes for a string, so the match routines can find the | |
197 // characters in it very fast! We need to clean up obsolete indexes after | |
198 // this routine has been called | |
199 void index_string(int16 source_pos, int16 source_len) | |
200 { | |
201 int16 i, j; | |
202 uint8 c; | |
203 | |
204 for (i=0; i<source_len; i++) | |
205 { | |
206 j = WINDOW_MASK & (source_pos + i); | |
207 c = window[j]; | |
208 | |
209 next[j] = NIL; | |
210 if (lookup[c] == NIL) // first time we encounter this char? | |
211 lookup[c] = j; // generate index for this new char | |
212 else | |
213 next[lookup_end[c]] = j; | |
214 lookup_end[c] = j; | |
215 } | |
216 } | |
217 | |
218 | |
219 void update_tag_byte(uint8 compressed) | |
220 { | |
221 if(tag_pos == 0x00) | |
222 { | |
223 p_tag_byte = buffer_out++; // Grap a byte in the output stream | |
224 buffer_out_size++; | |
225 | |
226 compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00); | |
227 tag_pos = 0x40; | |
228 } | |
229 else | |
230 { | |
231 if(compressed) | |
232 *p_tag_byte |= tag_pos; | |
233 | |
234 tag_pos >>= 0x01; | |
235 } | |
236 } | |
237 | |
238 | |
239 | |
240 // Puts an uncompressed char into the file and update the window | |
241 // position. We cannot just putc(data) because the offset might be any | |
242 // number of bits off. | |
243 void save_raw(uint8 data) | |
244 { | |
245 update_tag_byte(0); | |
246 lze_putchar(data); | |
247 | |
248 index_string(window_pos, 1); | |
249 window_pos = WINDOW_MASK & (window_pos + 1); | |
250 lookahead--; | |
251 } | |
252 | |
253 // Puts a compressed match (length and position) into the file and update | |
254 // the window position. | |
255 void save_compressed(uint8 len, uint16 pos) | |
256 { | |
257 uint16 tmp = 0x0000; | |
258 | |
259 update_tag_byte(1); | |
260 | |
261 tmp = ((uint8)(len - (THRESHOLD + 1))) << POS_BITS; | |
262 tmp |= ((uint16)(pos & POS_MASK)); | |
263 | |
264 lze_putword(tmp); | |
265 | |
266 index_string(window_pos, len); | |
267 window_pos = WINDOW_MASK & (window_pos + len); | |
268 lookahead -= len; | |
269 } | |
270 | |
271 void compress_init(void) | |
272 { | |
273 int i; | |
274 | |
275 // Initialize - whole window is unindexed at beginning | |
276 for (i = 0; i < 256; i++) | |
277 lookup[i] = lookup_end[i] = NIL; | |
278 for (i = 0; i < WINDOW_SIZE; i++) | |
279 next[i] = window[i] = NIL; | |
280 | |
281 buffer_out_size = 0; | |
282 window_pos = 0; | |
283 } | |
284 | |
285 int compress(char *outbuf, char *inbuf, int size) | |
286 { | |
287 int ch, eof; | |
288 match_def_t match, lazy_match; | |
289 | |
290 buffer_out = outbuf; | |
291 buffer_out_size = 0; | |
292 buffer_in = inbuf; | |
293 buffer_in_size = size; | |
294 if (size <= 0) | |
295 return 0; | |
296 | |
297 p_tag_byte = NULL; | |
298 tag_pos = 0x00; | |
299 | |
300 do | |
301 { | |
302 clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH); | |
303 | |
304 // (re-)fill the lookahead buffer from the bit-stream | |
305 while (lookahead < MAX_MATCH) { | |
306 eof = ((ch = lze_getchar()) == EOF); | |
307 window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch; | |
308 if (eof) { | |
309 window[WINDOW_MASK & (window_pos + --lookahead)] = 0; | |
310 break; | |
311 } | |
312 } | |
313 // End of compression? | |
314 if (lookahead == 0) | |
315 break; | |
316 | |
317 // Search for a match so we can compress the data | |
318 match = find_best_match(window_pos, THRESHOLD, lookahead); | |
319 | |
320 if (match.len > THRESHOLD) { | |
321 | |
322 lazy_match.len = 0; | |
323 | |
324 // check if the lazy match is better | |
325 if (match.len < lookahead - 1) { | |
326 lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1), | |
327 match.len, lookahead-1); | |
328 } | |
329 if (lazy_match.len > match.len) { | |
330 // skip the current and use the lazy | |
331 save_raw(window[window_pos]); | |
332 save_compressed(lazy_match.len, lazy_match.pos); | |
333 } | |
334 else { | |
335 save_compressed(match.len, match.pos); | |
336 } | |
337 } | |
338 else { | |
339 // Compression failed - just stuff the raw data | |
340 save_raw(window[window_pos]); | |
341 } | |
342 | |
343 } while (!eof || lookahead > 0); | |
344 | |
345 // put_bits(-1, 0); // No more data, so flush the buffer and exit | |
346 | |
347 return buffer_out_size; | |
348 } | |
349 |