view fluid-mnf/lzencode.c @ 311:9cecc930d78f

fluid-mnf: original source from TI, defenestrated line endings and rearranged directory structure, but no *.[ch] source file content changes yet
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 29 Feb 2020 05:36:07 +0000
parents
children
line wrap: on
line source

/*******************************************************************************
 *
 * Simple LZ77 compressor (c) Texas Instruments, Jesper Pedersem TI-DK    
 *                                                                        
 * File format:                                                          
 * ---------------------------------------------------                   
 * | Tag |   Data   | Tag |   Data   | Tag |   Data    etc...            
 * ---------------------------------------------------                   
 *                                                                        
 * Where TAG is 1 bit indicating how to interpret the following data     
 * (0 = uncompressed, 1 = compressed)
 *                                                                        
 * Depending on TAG the Data is:                                         
 *   0 : One uncompressed char of raw data                                
 *   1 : A length of and a pointer to a match in the previous data        
 *
 *                    ----------------------------------------        
 * Length + pointer = | Length (3 bits) | Position (12 bits) |        
 *                    ----------------------------------------        
 *                     MSB           LSB MSB              LSB         
 *                                                                        
 * Compression threshold is 1 i.e. add 1 to Length to get the actual Length.
 *                                                                        
 * It has been found that 5+14 bits is the most optimum for our kind of
 * data, but we use only 3+12 bits for improoved speed.  Compression is
 * almost as good because we can set the threshold at 1 instead of 2 (2 char
 * matches can be compressed to 12+3 bits)
 *
 * With a 12 bit position we can use 4K for the sliding window.  Position
 * indicates where the match exists as an absolute address into the
 * window. This way we have a very simple "pseudo sliding window" which
 * automatically enables RLE (Run Length Encoding).
 *                                                                        
 * The code performs a "lazy" string search i.e. a match will be dropped if
 * it prevents an even greater match for the very next string. This yeild
 * slightly better compression (approx. 1%) at minor cost of compression
 * speed - decompression time is the same!  A second order lazy match could
 * be implemented, but it gives an insignificant improovement of about 0.1%
 * when compressing executeables
 *                                                                        
 * Searching in the window is done with help from an index and we maintain
 * the window as a single linked list to further speed up the compression.
 *                                                                        
 ******************************************************************************/

#include "fluid.h"
#include "lz.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>


/******************************************************************************
 *
 ******************************************************************************/

#define NIL -1 // Used for indexing purposes

// When we find a match put it into a struct like this.
typedef struct {
    int16 pos;
    int8  len;
} match_def_t;


/******************************************************************************
 * Globals
 ******************************************************************************/

static uint8 window[WINDOW_SIZE]; // Our sliding window buffer
static int16 window_pos;          // And the current position in it
static uint8 lookahead;          // number of bytes in the lookahead area

// Global input/output buffer pointers
static uint8  *buffer_out;
static uint8  *buffer_in;
static uint32 buffer_out_size;
static uint32 buffer_in_size;

static uint32 total_in_size = 0;
static uint32 total_out_size = 0;

static uint8  *p_tag_byte = 0;
static uint8  tag_pos = 0;

static int16 lookup[256]; // Table for looking up a link to any given character
static int16 lookup_end[256]; // The last link in the window.
static int16 next[WINDOW_SIZE]; // Contain links to next similar chars



/******************************************************************************
 * getchar/putchar
 ******************************************************************************/

static int lze_getchar(void)
{
    total_in_size++;

    if (buffer_in_size-- > 0)
        return *buffer_in++;

    buffer_in_size++;

    return EOF;
}

static int lze_putchar(unsigned char data)
{
    total_out_size++;

    *buffer_out++ = data;
    buffer_out_size++;

    return 0;
}

static int lze_putword(uint16 data)
{
    total_out_size += 2;

    *buffer_out++ = (uint8)((data >> 8) & 0x00FF);
    *buffer_out++ = (uint8)(data  & 0x00FF);
    buffer_out_size += 2;

    return 0;
}

/******************************************************************************
 * Code
 ******************************************************************************/


// Searches for the best possible match to the string at 'source_pos' within
// the window. Lazy searches is made faster by specifying a match to beat
// 'match_len' so we don't vaste time by testing too small matches. Will
// stop at any match 'max_len' in size.
match_def_t find_best_match(int16 source_pos, int16 match_len, int16 max_len)
{
    match_def_t current, best;
    int16 start_pos;

    best.len = match_len;

    if ((start_pos = lookup[window[source_pos]]) != NIL) // Get the first entry
    {
        current.pos = start_pos;
        do
        {
            if (window[WINDOW_MASK & (current.pos + best.len)] ==
                window[WINDOW_MASK & (source_pos + best.len)]) // Possible match?
            {
                current.len = 1;

                while((window[WINDOW_MASK & (current.pos + current.len)] ==
                       window[WINDOW_MASK & (source_pos + current.len)]) &&
                      current.len < max_len)
                    current.len++;
                if (current.len > best.len)
                    best = current;
            }
            if (best.len >= max_len)
                break; // no need to search for better matches when we have the best

            current.pos = next[current.pos]; // jump to next matching char
        } while(current.pos != NIL); // All. possibilities tested?
    }
    if (best.len <= match_len)
        best.len = 0;
    return best;
}

// Will remove 'count' links from 'begin' to make room for new data. Will be
// called when we need to overwrite some of the window by new lookahead
// chars.
void clear_index_positions(uint16 begin, uint16 count)
{
    uint16 i, pos;

    for (i = 0; i < count; i++)
    {
        pos = WINDOW_MASK & (begin +i );

        if (lookup[window[pos]] == pos) {
            if (lookup_end[window[pos]] == pos)
                lookup[window[pos]] = lookup_end[window[pos]] = NIL;
            else
                lookup[window[pos]] = next[pos];
        }
    }
}


// Generate indexes for a string, so the match routines can find the
// characters in it very fast! We need to clean up obsolete indexes after
// this routine has been called
void index_string(int16 source_pos, int16 source_len)
{
    int16 i, j;
    uint8 c;

    for (i=0; i<source_len; i++)
    {
        j = WINDOW_MASK & (source_pos + i);
        c = window[j];

        next[j] = NIL;
        if (lookup[c] == NIL) // first time we encounter this char?
            lookup[c] = j; // generate index for this new char
        else
            next[lookup_end[c]] = j;
        lookup_end[c] = j;
    }
}


void update_tag_byte(uint8 compressed)
{
  if(tag_pos == 0x00)
  {
    p_tag_byte = buffer_out++; // Grap a byte in the output stream
    buffer_out_size++;
    
    compressed ? (*p_tag_byte = 0x80) : (*p_tag_byte = 0x00); 
    tag_pos = 0x40;
  }
  else
  {
    if(compressed)
      *p_tag_byte |= tag_pos;

    tag_pos >>= 0x01;
  }
}



// Puts an uncompressed char into the file and update the window
// position. We cannot just putc(data) because the offset might be any
// number of bits off.
void save_raw(uint8 data)
{
    update_tag_byte(0);
    lze_putchar(data);

    index_string(window_pos, 1);
    window_pos = WINDOW_MASK & (window_pos + 1);
    lookahead--;
}

// Puts a compressed match (length and position) into the file and update
// the window position.
void save_compressed(uint8 len, uint16 pos)
{
    uint16 tmp = 0x0000;

    update_tag_byte(1);
    
    tmp =  ((uint8)(len - (THRESHOLD + 1))) << POS_BITS;
    tmp |= ((uint16)(pos & POS_MASK));

    lze_putword(tmp);

    index_string(window_pos, len);
    window_pos = WINDOW_MASK & (window_pos + len);
    lookahead -= len;
}

void compress_init(void)
{
    int i;

    // Initialize - whole window is unindexed at beginning
    for (i = 0; i < 256; i++)
        lookup[i] = lookup_end[i] = NIL;
    for (i = 0; i < WINDOW_SIZE; i++)
        next[i] = window[i] =  NIL;
    
    buffer_out_size = 0;
    window_pos = 0;
}

int compress(char *outbuf, char *inbuf, int size)
{
    int ch, eof;
    match_def_t match, lazy_match;

    buffer_out      = outbuf;
    buffer_out_size = 0;
    buffer_in       = inbuf;
    buffer_in_size  = size;
    if (size <= 0)
        return 0;

    p_tag_byte = NULL;
    tag_pos    = 0x00;  

    do
    {
        clear_index_positions(WINDOW_MASK & (window_pos + lookahead), MAX_MATCH);

        // (re-)fill the lookahead buffer from the bit-stream
        while (lookahead < MAX_MATCH) {
            eof = ((ch = lze_getchar()) == EOF);
            window[WINDOW_MASK & (window_pos + lookahead++)] = (uint8) ch;
            if (eof) {
                window[WINDOW_MASK & (window_pos + --lookahead)] = 0;
                break;
            }
        }
        // End of compression?
        if (lookahead == 0)
            break;

        // Search for a match so we can compress the data
        match = find_best_match(window_pos, THRESHOLD, lookahead);

        if (match.len > THRESHOLD) {

            lazy_match.len = 0;

            // check if the lazy match is better
            if (match.len < lookahead - 1) {
                lazy_match = find_best_match(WINDOW_MASK & (window_pos + 1),
                                             match.len, lookahead-1);
            }
            if (lazy_match.len > match.len) {
                // skip the current and use the lazy
                save_raw(window[window_pos]);
                save_compressed(lazy_match.len, lazy_match.pos);
            }
            else {
                save_compressed(match.len, match.pos);
            }
        }
        else {
            // Compression failed - just stuff the raw data
            save_raw(window[window_pos]);
        }

    } while (!eof || lookahead > 0);

//    put_bits(-1, 0); // No more data, so flush the buffer and exit

    return buffer_out_size;
}