FreeCalypso > hg > freecalypso-reveng

diff fluid-mnf/lzencode.c @ 311:9cecc930d78f
fluid-mnf: original source from TI, defenestrated line endings and rearranged directory structure, but no *.[ch] source file content changes yet
author: Mychaela Falconia <falcon@freecalypso.org>
date: Sat, 29 Feb 2020 05:36:07 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fluid-mnf/lzencode.c	Sat Feb 29 05:36:07 2020 +0000
@@ -0,0 +1,349 @@
+/*******************************************************************************
+ *
+ * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK    
+ *                                                                        
+ * File format:                                                          
+ * ---------------------------------------------------                   
+ * | Tag |   Data   | Tag |   Data   | Tag |   Data    etc...            
+ * ---------------------------------------------------                   
+ *                                                                        
+ * Where TAG is 1 bit indicating how to interpret the following data     
+ * (0 = uncompressed, 1 = compressed)
+ *                                                                        
+ * Depending on TAG the Data is:                                         
+ *   0 : One uncompressed char of raw data                                
+ *   1 : A length of and a pointer to a match in the previous data        
+ *
+ *                    ----------------------------------------        
+ * Length + pointer = | Length (3 bits) | Position (12 bits) |        
+ *                    ----------------------------------------        
+ *                     MSB           LSB MSB              LSB         
+ *                                                                        
+ * Compression threshold is 1 i.e. add 1 to Length to get the actual Length.
+ *                                                                        
+ * It has been found that 5+14 bits is the most optimum for our kind of
+ * data, but we use only 3+12 bits for improoved speed.  Compression is
+ * almost as good because we can set the threshold at 1 instead of 2 (2 char
+ * matches can be compressed to 12+3 bits)
+ *
+ * With a 12 bit position we can use 4K for the sliding window.  Position
+ * indicates where the match exists as an absolute address into the
+ * window. This way we have a very simple "pseudo sliding window" which
+ * automatically enables RLE (Run Length Encoding).
+ *                                                                        
+ * The code performs a "lazy" string search i.e. a match will be dropped if
+ * it prevents an even greater match for the very next string. This yeild
+ * slightly better compression (approx. 1%) at minor cost of compression
+ * speed - decompression time is the same!  A second order lazy match could
+ * be implemented, but it gives an insignificant improovement of about 0.1%
+ * when compressing executeables
+ *                                                                        
+ * Searching in the window is done with help from an index and we maintain
+ * the window as a single linked list to further speed up the compression.
+ *                                                                        
+ ******************************************************************************/
+
+#include "fluid.h"
+#include "lz.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+
+#define NIL -1 // Used for indexing purposes
+
+// When we find a match put it into a struct like this.
+typedef struct {
+    int16 pos;
+    int8  len;
+} match_def_t;
+
+
+/******************************************************************************
+ * Globals
+ ******************************************************************************/
+
+static uint8 window[WINDOW_SIZE]; // Our sliding window buffer
+static int16 window_pos;          // And the current position in it
+static uint8 lookahead;          // number of bytes in the lookahead area
+
+// Global input/output buffer pointers
+static uint8  *buffer_out;
+static uint8  *buffer_in;
+static uint32 buffer_out_size;
+static uint32 buffer_in_size;
+
+static uint32 total_in_size = 0;
+static uint32 total_out_size = 0;
+
+static uint8  *p_tag_byte = 0;
+static uint8  tag_pos = 0;
+
+static int16 lookup[256]; // Table for looking up a link to any given character
+static int16 lookup_end[256]; // The last link in the window.
+static int16 next[WINDOW_SIZE]; // Contain links to next similar chars
+
+
+
+/******************************************************************************
+ * getchar/putchar
+ ******************************************************************************/
+
+static int lze_getchar(void)
+{
+    total_in_size++;
+
+    if (buffer_in_size-- > 0)
+        return *buffer_in++;
+
+    buffer_in_size++;
+
+    return EOF;
+}
+
+static int lze_putchar(unsigned char data)
+{
+    total_out_size++;
+
+    *buffer_out++ = data;
+    buffer_out_size++;
+
+    return 0;
+}
+
+static int lze_putword(uint16 data)
+{
+    total_out_size += 2;
+
+    *buffer_out++ = (uint8)((data >> 8) & 0x00FF);
+    *buffer_out++ = (uint8)(data  & 0x00FF);
+    buffer_out_size += 2;
+
+    return 0;
+}
+
+/******************************************************************************
+ * Code
+ ******************************************************************************/
+
+
+// Searches for the best possible match to the string at 'source_pos' within
+// the window. Lazy searches is made faster by specifying a match to beat
+// 'match_len' so we don't vaste time by testing too small matches. Will
+// stop at any match 'max_len' in size.
+match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len)
+{
+    match_def_t current, best;
+    int16 start_pos;
+
+    best.len = match_len;
+
+    if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry
+    {
+        current.pos = start_pos;
+        do
+        {
+            if (window[WINDOW_MASK & (current.pos + best.len)] ==
+                window[WINDOW_MASK & (source_pos + best.len)]) // Possible match?
+            {
+                current.len = 1;
+
+                while((window[WINDOW_MASK & (current.pos + current.len)] ==
+                       window[WINDOW_MASK & (source_pos + current.len)]) &&
+                      current.len < max_len)
+                    current.len++;
+                if (current.len > best.len)
+                    best = current;
+            }
+            if (best.len >= max_len)
+                break; // no need to search for better matches when we have the best
+
+            current.pos = next[current.pos]; // jump to next matching char
+        } while(current.pos != NIL); // All. possibilities tested?
+    }
+    if (best.len <= match_len)
+        best.len = 0;
+    return best;
+}
+
+// Will remove 'count' links from 'begin' to make room for new data. Will be
+// called when we need to overwrite some of the window by new lookahead
+// chars.
+void clear_index_positions(uint16 begin, uint16 count)
+{
+    uint16 i, pos;
+
+    for (i = 0; i < count; i++)
+    {
+        pos = WINDOW_MASK & (begin +i );
+
+        if (lookup[window[pos]] == pos) {
+            if (lookup_end[window[pos]] == pos)
+                lookup[window[pos]] = lookup_end[window[pos]] = NIL;
+            else
+                lookup[window[pos]] = next[pos];
+        }
+    }
+}
+
+
+// Generate indexes for a string, so the match routines can find the
+// characters in it very fast! We need to clean up obsolete indexes after
+// this routine has been called
+void index_string(int16 source_pos, int16 source_len)
+{
+    int16 i, j;
+    uint8 c;
+
+    for (i=0; i<source_len; i++)
+    {
+        j = WINDOW_MASK & (source_pos + i);
+        c = window[j];
+
+        next[j] = NIL;
+        if (lookup[c] == NIL) // first time we encounter this char?
+            lookup[c] = j; // generate index for this new char
+        else
+            next[lookup_end[c]] = j;
+        lookup_end[c] = j;
+    }
+}
+
+
+void update_tag_byte(uint8 compressed)
+{
+  if(tag_pos == 0x00)
+  {
+    p_tag_byte = buffer_out++; // Grap a byte in the output stream
+    buffer_out_size++;
+    
+    compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00); 
+    tag_pos = 0x40;
+  }
+  else
+  {
+    if(compressed)
+      *p_tag_byte |= tag_pos;
+
+    tag_pos >>= 0x01;
+  }
+}
+
+
+
+// Puts an uncompressed char into the file and update the window
+// position. We cannot just putc(data) because the offset might be any
+// number of bits off.
+void save_raw(uint8 data)
+{
+    update_tag_byte(0);
+    lze_putchar(data);
+
+    index_string(window_pos, 1);
+    window_pos = WINDOW_MASK & (window_pos + 1);
+    lookahead--;
+}
+
+// Puts a compressed match (length and position) into the file and update
+// the window position.
+void save_compressed(uint8 len, uint16 pos)
+{
+    uint16 tmp = 0x0000;
+
+    update_tag_byte(1);
+    
+    tmp =  ((uint8)(len - (THRESHOLD + 1))) << POS_BITS;
+    tmp |= ((uint16)(pos & POS_MASK));
+
+    lze_putword(tmp);
+
+    index_string(window_pos, len);
+    window_pos = WINDOW_MASK & (window_pos + len);
+    lookahead -= len;
+}
+
+void compress_init(void)
+{
+    int i;
+
+    // Initialize - whole window is unindexed at beginning
+    for (i = 0; i < 256; i++)
+        lookup[i] = lookup_end[i] = NIL;
+    for (i = 0; i < WINDOW_SIZE; i++)
+        next[i] = window[i] =  NIL;
+    
+    buffer_out_size = 0;
+    window_pos = 0;
+}
+
+int compress(char *outbuf, char *inbuf, int size)
+{
+    int ch, eof;
+    match_def_t match, lazy_match;
+
+    buffer_out      = outbuf;
+    buffer_out_size = 0;
+    buffer_in       = inbuf;
+    buffer_in_size  = size;
+    if (size <= 0)
+        return 0;
+
+    p_tag_byte = NULL;
+    tag_pos    = 0x00;  
+
+    do
+    {
+        clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH);
+
+        // (re-)fill the lookahead buffer from the bit-stream
+        while (lookahead < MAX_MATCH) {
+            eof = ((ch = lze_getchar()) == EOF);
+            window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch;
+            if (eof) {
+                window[WINDOW_MASK & (window_pos + --lookahead)] = 0;
+                break;
+            }
+        }
+        // End of compression?
+        if (lookahead == 0)
+            break;
+
+        // Search for a match so we can compress the data
+        match = find_best_match(window_pos, THRESHOLD, lookahead);
+
+        if (match.len > THRESHOLD) {
+
+            lazy_match.len = 0;
+
+            // check if the lazy match is better
+            if (match.len < lookahead - 1) {
+                lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1),
+                                             match.len, lookahead-1);
+            }
+            if (lazy_match.len > match.len) {
+                // skip the current and use the lazy
+                save_raw(window[window_pos]);
+                save_compressed(lazy_match.len, lazy_match.pos);
+            }
+            else {
+                save_compressed(match.len, match.pos);
+            }
+        }
+        else {
+            // Compression failed - just stuff the raw data
+            save_raw(window[window_pos]);
+        }
+
+    } while (!eof || lookahead > 0);
+
+//    put_bits(-1, 0); // No more data, so flush the buffer and exit
+
+    return buffer_out_size;
+}
+
author	Mychaela Falconia <falcon@freecalypso.org>
date	Sat, 29 Feb 2020 05:36:07 +0000
parents
children