comparison fluid-mnf/lzencode.c @ 311:9cecc930d78f

fluid-mnf: original source from TI, defenestrated line endings and rearranged directory structure, but no *.[ch] source file content changes yet
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 29 Feb 2020 05:36:07 +0000
parents
children
comparison
equal deleted inserted replaced
310:ae39d76d5b7a 311:9cecc930d78f
1 /*******************************************************************************
2 *
3 * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK
4 *
5 * File format:
6 * ---------------------------------------------------
7 * | Tag | Data | Tag | Data | Tag | Data etc...
8 * ---------------------------------------------------
9 *
10 * Where TAG is 1 bit indicating how to interpret the following data
11 * (0 = uncompressed, 1 = compressed)
12 *
13 * Depending on TAG the Data is:
14 * 0 : One uncompressed char of raw data
15 * 1 : A length of and a pointer to a match in the previous data
16 *
17 * ----------------------------------------
18 * Length + pointer = | Length (3 bits) | Position (12 bits) |
19 * ----------------------------------------
20 * MSB LSB MSB LSB
21 *
22 * Compression threshold is 1 i.e. add 1 to Length to get the actual Length.
23 *
24 * It has been found that 5+14 bits is the most optimum for our kind of
25 * data, but we use only 3+12 bits for improoved speed. Compression is
26 * almost as good because we can set the threshold at 1 instead of 2 (2 char
27 * matches can be compressed to 12+3 bits)
28 *
29 * With a 12 bit position we can use 4K for the sliding window. Position
30 * indicates where the match exists as an absolute address into the
31 * window. This way we have a very simple "pseudo sliding window" which
32 * automatically enables RLE (Run Length Encoding).
33 *
34 * The code performs a "lazy" string search i.e. a match will be dropped if
35 * it prevents an even greater match for the very next string. This yeild
36 * slightly better compression (approx. 1%) at minor cost of compression
37 * speed - decompression time is the same! A second order lazy match could
38 * be implemented, but it gives an insignificant improovement of about 0.1%
39 * when compressing executeables
40 *
41 * Searching in the window is done with help from an index and we maintain
42 * the window as a single linked list to further speed up the compression.
43 *
44 ******************************************************************************/
45
46 #include "fluid.h"
47 #include "lz.h"
48
49 #include <stdlib.h>
50 #include <stdio.h>
51 #include <string.h>
52 #include <ctype.h>
53
54
55 /******************************************************************************
56 *
57 ******************************************************************************/
58
59 #define NIL -1 // Used for indexing purposes
60
61 // When we find a match put it into a struct like this.
62 typedef struct {
63 int16 pos;
64 int8 len;
65 } match_def_t;
66
67
68 /******************************************************************************
69 * Globals
70 ******************************************************************************/
71
72 static uint8 window[WINDOW_SIZE]; // Our sliding window buffer
73 static int16 window_pos; // And the current position in it
74 static uint8 lookahead; // number of bytes in the lookahead area
75
76 // Global input/output buffer pointers
77 static uint8 *buffer_out;
78 static uint8 *buffer_in;
79 static uint32 buffer_out_size;
80 static uint32 buffer_in_size;
81
82 static uint32 total_in_size = 0;
83 static uint32 total_out_size = 0;
84
85 static uint8 *p_tag_byte = 0;
86 static uint8 tag_pos = 0;
87
88 static int16 lookup[256]; // Table for looking up a link to any given character
89 static int16 lookup_end[256]; // The last link in the window.
90 static int16 next[WINDOW_SIZE]; // Contain links to next similar chars
91
92
93
94 /******************************************************************************
95 * getchar/putchar
96 ******************************************************************************/
97
98 static int lze_getchar(void)
99 {
100 total_in_size++;
101
102 if (buffer_in_size-- > 0)
103 return *buffer_in++;
104
105 buffer_in_size++;
106
107 return EOF;
108 }
109
110 static int lze_putchar(unsigned char data)
111 {
112 total_out_size++;
113
114 *buffer_out++ = data;
115 buffer_out_size++;
116
117 return 0;
118 }
119
120 static int lze_putword(uint16 data)
121 {
122 total_out_size += 2;
123
124 *buffer_out++ = (uint8)((data >> 8) & 0x00FF);
125 *buffer_out++ = (uint8)(data & 0x00FF);
126 buffer_out_size += 2;
127
128 return 0;
129 }
130
131 /******************************************************************************
132 * Code
133 ******************************************************************************/
134
135
136 // Searches for the best possible match to the string at 'source_pos' within
137 // the window. Lazy searches is made faster by specifying a match to beat
138 // 'match_len' so we don't vaste time by testing too small matches. Will
139 // stop at any match 'max_len' in size.
140 match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len)
141 {
142 match_def_t current, best;
143 int16 start_pos;
144
145 best.len = match_len;
146
147 if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry
148 {
149 current.pos = start_pos;
150 do
151 {
152 if (window[WINDOW_MASK & (current.pos + best.len)] ==
153 window[WINDOW_MASK & (source_pos + best.len)]) // Possible match?
154 {
155 current.len = 1;
156
157 while((window[WINDOW_MASK & (current.pos + current.len)] ==
158 window[WINDOW_MASK & (source_pos + current.len)]) &&
159 current.len < max_len)
160 current.len++;
161 if (current.len > best.len)
162 best = current;
163 }
164 if (best.len >= max_len)
165 break; // no need to search for better matches when we have the best
166
167 current.pos = next[current.pos]; // jump to next matching char
168 } while(current.pos != NIL); // All. possibilities tested?
169 }
170 if (best.len <= match_len)
171 best.len = 0;
172 return best;
173 }
174
175 // Will remove 'count' links from 'begin' to make room for new data. Will be
176 // called when we need to overwrite some of the window by new lookahead
177 // chars.
178 void clear_index_positions(uint16 begin, uint16 count)
179 {
180 uint16 i, pos;
181
182 for (i = 0; i < count; i++)
183 {
184 pos = WINDOW_MASK & (begin +i );
185
186 if (lookup[window[pos]] == pos) {
187 if (lookup_end[window[pos]] == pos)
188 lookup[window[pos]] = lookup_end[window[pos]] = NIL;
189 else
190 lookup[window[pos]] = next[pos];
191 }
192 }
193 }
194
195
196 // Generate indexes for a string, so the match routines can find the
197 // characters in it very fast! We need to clean up obsolete indexes after
198 // this routine has been called
199 void index_string(int16 source_pos, int16 source_len)
200 {
201 int16 i, j;
202 uint8 c;
203
204 for (i=0; i<source_len; i++)
205 {
206 j = WINDOW_MASK & (source_pos + i);
207 c = window[j];
208
209 next[j] = NIL;
210 if (lookup[c] == NIL) // first time we encounter this char?
211 lookup[c] = j; // generate index for this new char
212 else
213 next[lookup_end[c]] = j;
214 lookup_end[c] = j;
215 }
216 }
217
218
219 void update_tag_byte(uint8 compressed)
220 {
221 if(tag_pos == 0x00)
222 {
223 p_tag_byte = buffer_out++; // Grap a byte in the output stream
224 buffer_out_size++;
225
226 compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00);
227 tag_pos = 0x40;
228 }
229 else
230 {
231 if(compressed)
232 *p_tag_byte |= tag_pos;
233
234 tag_pos >>= 0x01;
235 }
236 }
237
238
239
240 // Puts an uncompressed char into the file and update the window
241 // position. We cannot just putc(data) because the offset might be any
242 // number of bits off.
243 void save_raw(uint8 data)
244 {
245 update_tag_byte(0);
246 lze_putchar(data);
247
248 index_string(window_pos, 1);
249 window_pos = WINDOW_MASK & (window_pos + 1);
250 lookahead--;
251 }
252
253 // Puts a compressed match (length and position) into the file and update
254 // the window position.
255 void save_compressed(uint8 len, uint16 pos)
256 {
257 uint16 tmp = 0x0000;
258
259 update_tag_byte(1);
260
261 tmp = ((uint8)(len - (THRESHOLD + 1))) << POS_BITS;
262 tmp |= ((uint16)(pos & POS_MASK));
263
264 lze_putword(tmp);
265
266 index_string(window_pos, len);
267 window_pos = WINDOW_MASK & (window_pos + len);
268 lookahead -= len;
269 }
270
271 void compress_init(void)
272 {
273 int i;
274
275 // Initialize - whole window is unindexed at beginning
276 for (i = 0; i < 256; i++)
277 lookup[i] = lookup_end[i] = NIL;
278 for (i = 0; i < WINDOW_SIZE; i++)
279 next[i] = window[i] = NIL;
280
281 buffer_out_size = 0;
282 window_pos = 0;
283 }
284
285 int compress(char *outbuf, char *inbuf, int size)
286 {
287 int ch, eof;
288 match_def_t match, lazy_match;
289
290 buffer_out = outbuf;
291 buffer_out_size = 0;
292 buffer_in = inbuf;
293 buffer_in_size = size;
294 if (size <= 0)
295 return 0;
296
297 p_tag_byte = NULL;
298 tag_pos = 0x00;
299
300 do
301 {
302 clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH);
303
304 // (re-)fill the lookahead buffer from the bit-stream
305 while (lookahead < MAX_MATCH) {
306 eof = ((ch = lze_getchar()) == EOF);
307 window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch;
308 if (eof) {
309 window[WINDOW_MASK & (window_pos + --lookahead)] = 0;
310 break;
311 }
312 }
313 // End of compression?
314 if (lookahead == 0)
315 break;
316
317 // Search for a match so we can compress the data
318 match = find_best_match(window_pos, THRESHOLD, lookahead);
319
320 if (match.len > THRESHOLD) {
321
322 lazy_match.len = 0;
323
324 // check if the lazy match is better
325 if (match.len < lookahead - 1) {
326 lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1),
327 match.len, lookahead-1);
328 }
329 if (lazy_match.len > match.len) {
330 // skip the current and use the lazy
331 save_raw(window[window_pos]);
332 save_compressed(lazy_match.len, lazy_match.pos);
333 }
334 else {
335 save_compressed(match.len, match.pos);
336 }
337 }
338 else {
339 // Compression failed - just stuff the raw data
340 save_raw(window[window_pos]);
341 }
342
343 } while (!eof || lookahead > 0);
344
345 // put_bits(-1, 0); // No more data, so flush the buffer and exit
346
347 return buffer_out_size;
348 }
349