FreeCalypso > hg > freecalypso-reveng
view fluid-mnf/lzencode.c @ 313:9b5079989681
frbl/reconst/inc/command.h: tab fixes
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Wed, 04 Mar 2020 23:10:48 +0000 |
parents | 9cecc930d78f |
children |
line wrap: on
line source
/******************************************************************************* * * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK * * File format: * --------------------------------------------------- * | Tag | Data | Tag | Data | Tag | Data etc... * --------------------------------------------------- * * Where TAG is 1 bit indicating how to interpret the following data * (0 = uncompressed, 1 = compressed) * * Depending on TAG the Data is: * 0 : One uncompressed char of raw data * 1 : A length of and a pointer to a match in the previous data * * ---------------------------------------- * Length + pointer = | Length (3 bits) | Position (12 bits) | * ---------------------------------------- * MSB LSB MSB LSB * * Compression threshold is 1 i.e. add 1 to Length to get the actual Length. * * It has been found that 5+14 bits is the most optimum for our kind of * data, but we use only 3+12 bits for improoved speed. Compression is * almost as good because we can set the threshold at 1 instead of 2 (2 char * matches can be compressed to 12+3 bits) * * With a 12 bit position we can use 4K for the sliding window. Position * indicates where the match exists as an absolute address into the * window. This way we have a very simple "pseudo sliding window" which * automatically enables RLE (Run Length Encoding). * * The code performs a "lazy" string search i.e. a match will be dropped if * it prevents an even greater match for the very next string. This yeild * slightly better compression (approx. 1%) at minor cost of compression * speed - decompression time is the same! A second order lazy match could * be implemented, but it gives an insignificant improovement of about 0.1% * when compressing executeables * * Searching in the window is done with help from an index and we maintain * the window as a single linked list to further speed up the compression. * ******************************************************************************/ #include "fluid.h" #include "lz.h" #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> /****************************************************************************** * ******************************************************************************/ #define NIL -1 // Used for indexing purposes // When we find a match put it into a struct like this. typedef struct { int16 pos; int8 len; } match_def_t; /****************************************************************************** * Globals ******************************************************************************/ static uint8 window[WINDOW_SIZE]; // Our sliding window buffer static int16 window_pos; // And the current position in it static uint8 lookahead; // number of bytes in the lookahead area // Global input/output buffer pointers static uint8 *buffer_out; static uint8 *buffer_in; static uint32 buffer_out_size; static uint32 buffer_in_size; static uint32 total_in_size = 0; static uint32 total_out_size = 0; static uint8 *p_tag_byte = 0; static uint8 tag_pos = 0; static int16 lookup[256]; // Table for looking up a link to any given character static int16 lookup_end[256]; // The last link in the window. static int16 next[WINDOW_SIZE]; // Contain links to next similar chars /****************************************************************************** * getchar/putchar ******************************************************************************/ static int lze_getchar(void) { total_in_size++; if (buffer_in_size-- > 0) return *buffer_in++; buffer_in_size++; return EOF; } static int lze_putchar(unsigned char data) { total_out_size++; *buffer_out++ = data; buffer_out_size++; return 0; } static int lze_putword(uint16 data) { total_out_size += 2; *buffer_out++ = (uint8)((data >> 8) & 0x00FF); *buffer_out++ = (uint8)(data & 0x00FF); buffer_out_size += 2; return 0; } /****************************************************************************** * Code ******************************************************************************/ // Searches for the best possible match to the string at 'source_pos' within // the window. Lazy searches is made faster by specifying a match to beat // 'match_len' so we don't vaste time by testing too small matches. Will // stop at any match 'max_len' in size. match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len) { match_def_t current, best; int16 start_pos; best.len = match_len; if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry { current.pos = start_pos; do { if (window[WINDOW_MASK & (current.pos + best.len)] == window[WINDOW_MASK & (source_pos + best.len)]) // Possible match? { current.len = 1; while((window[WINDOW_MASK & (current.pos + current.len)] == window[WINDOW_MASK & (source_pos + current.len)]) && current.len < max_len) current.len++; if (current.len > best.len) best = current; } if (best.len >= max_len) break; // no need to search for better matches when we have the best current.pos = next[current.pos]; // jump to next matching char } while(current.pos != NIL); // All. possibilities tested? } if (best.len <= match_len) best.len = 0; return best; } // Will remove 'count' links from 'begin' to make room for new data. Will be // called when we need to overwrite some of the window by new lookahead // chars. void clear_index_positions(uint16 begin, uint16 count) { uint16 i, pos; for (i = 0; i < count; i++) { pos = WINDOW_MASK & (begin +i ); if (lookup[window[pos]] == pos) { if (lookup_end[window[pos]] == pos) lookup[window[pos]] = lookup_end[window[pos]] = NIL; else lookup[window[pos]] = next[pos]; } } } // Generate indexes for a string, so the match routines can find the // characters in it very fast! We need to clean up obsolete indexes after // this routine has been called void index_string(int16 source_pos, int16 source_len) { int16 i, j; uint8 c; for (i=0; i<source_len; i++) { j = WINDOW_MASK & (source_pos + i); c = window[j]; next[j] = NIL; if (lookup[c] == NIL) // first time we encounter this char? lookup[c] = j; // generate index for this new char else next[lookup_end[c]] = j; lookup_end[c] = j; } } void update_tag_byte(uint8 compressed) { if(tag_pos == 0x00) { p_tag_byte = buffer_out++; // Grap a byte in the output stream buffer_out_size++; compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00); tag_pos = 0x40; } else { if(compressed) *p_tag_byte |= tag_pos; tag_pos >>= 0x01; } } // Puts an uncompressed char into the file and update the window // position. We cannot just putc(data) because the offset might be any // number of bits off. void save_raw(uint8 data) { update_tag_byte(0); lze_putchar(data); index_string(window_pos, 1); window_pos = WINDOW_MASK & (window_pos + 1); lookahead--; } // Puts a compressed match (length and position) into the file and update // the window position. void save_compressed(uint8 len, uint16 pos) { uint16 tmp = 0x0000; update_tag_byte(1); tmp = ((uint8)(len - (THRESHOLD + 1))) << POS_BITS; tmp |= ((uint16)(pos & POS_MASK)); lze_putword(tmp); index_string(window_pos, len); window_pos = WINDOW_MASK & (window_pos + len); lookahead -= len; } void compress_init(void) { int i; // Initialize - whole window is unindexed at beginning for (i = 0; i < 256; i++) lookup[i] = lookup_end[i] = NIL; for (i = 0; i < WINDOW_SIZE; i++) next[i] = window[i] = NIL; buffer_out_size = 0; window_pos = 0; } int compress(char *outbuf, char *inbuf, int size) { int ch, eof; match_def_t match, lazy_match; buffer_out = outbuf; buffer_out_size = 0; buffer_in = inbuf; buffer_in_size = size; if (size <= 0) return 0; p_tag_byte = NULL; tag_pos = 0x00; do { clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH); // (re-)fill the lookahead buffer from the bit-stream while (lookahead < MAX_MATCH) { eof = ((ch = lze_getchar()) == EOF); window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch; if (eof) { window[WINDOW_MASK & (window_pos + --lookahead)] = 0; break; } } // End of compression? if (lookahead == 0) break; // Search for a match so we can compress the data match = find_best_match(window_pos, THRESHOLD, lookahead); if (match.len > THRESHOLD) { lazy_match.len = 0; // check if the lazy match is better if (match.len < lookahead - 1) { lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1), match.len, lookahead-1); } if (lazy_match.len > match.len) { // skip the current and use the lazy save_raw(window[window_pos]); save_compressed(lazy_match.len, lazy_match.pos); } else { save_compressed(match.len, match.pos); } } else { // Compression failed - just stuff the raw data save_raw(window[window_pos]); } } while (!eof || lookahead > 0); // put_bits(-1, 0); // No more data, so flush the buffer and exit return buffer_out_size; }