FreeCalypso > hg > freecalypso-reveng
diff fluid-mnf/lzencode.c @ 311:9cecc930d78f
fluid-mnf: original source from TI,
defenestrated line endings and rearranged directory structure,
but no *.[ch] source file content changes yet
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Sat, 29 Feb 2020 05:36:07 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fluid-mnf/lzencode.c Sat Feb 29 05:36:07 2020 +0000 @@ -0,0 +1,349 @@ +/******************************************************************************* + * + * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK + * + * File format: + * --------------------------------------------------- + * | Tag | Data | Tag | Data | Tag | Data etc... + * --------------------------------------------------- + * + * Where TAG is 1 bit indicating how to interpret the following data + * (0 = uncompressed, 1 = compressed) + * + * Depending on TAG the Data is: + * 0 : One uncompressed char of raw data + * 1 : A length of and a pointer to a match in the previous data + * + * ---------------------------------------- + * Length + pointer = | Length (3 bits) | Position (12 bits) | + * ---------------------------------------- + * MSB LSB MSB LSB + * + * Compression threshold is 1 i.e. add 1 to Length to get the actual Length. + * + * It has been found that 5+14 bits is the most optimum for our kind of + * data, but we use only 3+12 bits for improoved speed. Compression is + * almost as good because we can set the threshold at 1 instead of 2 (2 char + * matches can be compressed to 12+3 bits) + * + * With a 12 bit position we can use 4K for the sliding window. Position + * indicates where the match exists as an absolute address into the + * window. This way we have a very simple "pseudo sliding window" which + * automatically enables RLE (Run Length Encoding). + * + * The code performs a "lazy" string search i.e. a match will be dropped if + * it prevents an even greater match for the very next string. This yeild + * slightly better compression (approx. 1%) at minor cost of compression + * speed - decompression time is the same! A second order lazy match could + * be implemented, but it gives an insignificant improovement of about 0.1% + * when compressing executeables + * + * Searching in the window is done with help from an index and we maintain + * the window as a single linked list to further speed up the compression. + * + ******************************************************************************/ + +#include "fluid.h" +#include "lz.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> + + +/****************************************************************************** + * + ******************************************************************************/ + +#define NIL -1 // Used for indexing purposes + +// When we find a match put it into a struct like this. +typedef struct { + int16 pos; + int8 len; +} match_def_t; + + +/****************************************************************************** + * Globals + ******************************************************************************/ + +static uint8 window[WINDOW_SIZE]; // Our sliding window buffer +static int16 window_pos; // And the current position in it +static uint8 lookahead; // number of bytes in the lookahead area + +// Global input/output buffer pointers +static uint8 *buffer_out; +static uint8 *buffer_in; +static uint32 buffer_out_size; +static uint32 buffer_in_size; + +static uint32 total_in_size = 0; +static uint32 total_out_size = 0; + +static uint8 *p_tag_byte = 0; +static uint8 tag_pos = 0; + +static int16 lookup[256]; // Table for looking up a link to any given character +static int16 lookup_end[256]; // The last link in the window. +static int16 next[WINDOW_SIZE]; // Contain links to next similar chars + + + +/****************************************************************************** + * getchar/putchar + ******************************************************************************/ + +static int lze_getchar(void) +{ + total_in_size++; + + if (buffer_in_size-- > 0) + return *buffer_in++; + + buffer_in_size++; + + return EOF; +} + +static int lze_putchar(unsigned char data) +{ + total_out_size++; + + *buffer_out++ = data; + buffer_out_size++; + + return 0; +} + +static int lze_putword(uint16 data) +{ + total_out_size += 2; + + *buffer_out++ = (uint8)((data >> 8) & 0x00FF); + *buffer_out++ = (uint8)(data & 0x00FF); + buffer_out_size += 2; + + return 0; +} + +/****************************************************************************** + * Code + ******************************************************************************/ + + +// Searches for the best possible match to the string at 'source_pos' within +// the window. Lazy searches is made faster by specifying a match to beat +// 'match_len' so we don't vaste time by testing too small matches. Will +// stop at any match 'max_len' in size. +match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len) +{ + match_def_t current, best; + int16 start_pos; + + best.len = match_len; + + if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry + { + current.pos = start_pos; + do + { + if (window[WINDOW_MASK & (current.pos + best.len)] == + window[WINDOW_MASK & (source_pos + best.len)]) // Possible match? + { + current.len = 1; + + while((window[WINDOW_MASK & (current.pos + current.len)] == + window[WINDOW_MASK & (source_pos + current.len)]) && + current.len < max_len) + current.len++; + if (current.len > best.len) + best = current; + } + if (best.len >= max_len) + break; // no need to search for better matches when we have the best + + current.pos = next[current.pos]; // jump to next matching char + } while(current.pos != NIL); // All. possibilities tested? + } + if (best.len <= match_len) + best.len = 0; + return best; +} + +// Will remove 'count' links from 'begin' to make room for new data. Will be +// called when we need to overwrite some of the window by new lookahead +// chars. +void clear_index_positions(uint16 begin, uint16 count) +{ + uint16 i, pos; + + for (i = 0; i < count; i++) + { + pos = WINDOW_MASK & (begin +i ); + + if (lookup[window[pos]] == pos) { + if (lookup_end[window[pos]] == pos) + lookup[window[pos]] = lookup_end[window[pos]] = NIL; + else + lookup[window[pos]] = next[pos]; + } + } +} + + +// Generate indexes for a string, so the match routines can find the +// characters in it very fast! We need to clean up obsolete indexes after +// this routine has been called +void index_string(int16 source_pos, int16 source_len) +{ + int16 i, j; + uint8 c; + + for (i=0; i<source_len; i++) + { + j = WINDOW_MASK & (source_pos + i); + c = window[j]; + + next[j] = NIL; + if (lookup[c] == NIL) // first time we encounter this char? + lookup[c] = j; // generate index for this new char + else + next[lookup_end[c]] = j; + lookup_end[c] = j; + } +} + + +void update_tag_byte(uint8 compressed) +{ + if(tag_pos == 0x00) + { + p_tag_byte = buffer_out++; // Grap a byte in the output stream + buffer_out_size++; + + compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00); + tag_pos = 0x40; + } + else + { + if(compressed) + *p_tag_byte |= tag_pos; + + tag_pos >>= 0x01; + } +} + + + +// Puts an uncompressed char into the file and update the window +// position. We cannot just putc(data) because the offset might be any +// number of bits off. +void save_raw(uint8 data) +{ + update_tag_byte(0); + lze_putchar(data); + + index_string(window_pos, 1); + window_pos = WINDOW_MASK & (window_pos + 1); + lookahead--; +} + +// Puts a compressed match (length and position) into the file and update +// the window position. +void save_compressed(uint8 len, uint16 pos) +{ + uint16 tmp = 0x0000; + + update_tag_byte(1); + + tmp = ((uint8)(len - (THRESHOLD + 1))) << POS_BITS; + tmp |= ((uint16)(pos & POS_MASK)); + + lze_putword(tmp); + + index_string(window_pos, len); + window_pos = WINDOW_MASK & (window_pos + len); + lookahead -= len; +} + +void compress_init(void) +{ + int i; + + // Initialize - whole window is unindexed at beginning + for (i = 0; i < 256; i++) + lookup[i] = lookup_end[i] = NIL; + for (i = 0; i < WINDOW_SIZE; i++) + next[i] = window[i] = NIL; + + buffer_out_size = 0; + window_pos = 0; +} + +int compress(char *outbuf, char *inbuf, int size) +{ + int ch, eof; + match_def_t match, lazy_match; + + buffer_out = outbuf; + buffer_out_size = 0; + buffer_in = inbuf; + buffer_in_size = size; + if (size <= 0) + return 0; + + p_tag_byte = NULL; + tag_pos = 0x00; + + do + { + clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH); + + // (re-)fill the lookahead buffer from the bit-stream + while (lookahead < MAX_MATCH) { + eof = ((ch = lze_getchar()) == EOF); + window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch; + if (eof) { + window[WINDOW_MASK & (window_pos + --lookahead)] = 0; + break; + } + } + // End of compression? + if (lookahead == 0) + break; + + // Search for a match so we can compress the data + match = find_best_match(window_pos, THRESHOLD, lookahead); + + if (match.len > THRESHOLD) { + + lazy_match.len = 0; + + // check if the lazy match is better + if (match.len < lookahead - 1) { + lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1), + match.len, lookahead-1); + } + if (lazy_match.len > match.len) { + // skip the current and use the lazy + save_raw(window[window_pos]); + save_compressed(lazy_match.len, lazy_match.pos); + } + else { + save_compressed(match.len, match.pos); + } + } + else { + // Compression failed - just stuff the raw data + save_raw(window[window_pos]); + } + + } while (!eof || lookahead > 0); + +// put_bits(-1, 0); // No more data, so flush the buffer and exit + + return buffer_out_size; +} +