view libgsmefr/cod_12k2.c @ 122:b33f2168fdec

doc/EFR-rationale article written
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 10 Dec 2022 08:51:01 +0000
parents d9ad0f5121e8
children
line wrap: on
line source

/***************************************************************************
 *
 *  FILE NAME:    cod_12k2.c
 *
 *  FUNCTIONS DEFINED IN THIS FILE:
 *                   Coder_12k2  and  Init_Coder_12k2
 *
 *
 *  Init_Coder_12k2(void):
 *      Initialization of variables for the coder section.
 *
 *  Coder_12k2(Word16 ana[], Word16 synth[]):
 *      Speech encoder routine operating on a frame basis.
 *

***************************************************************************/

#include "gsm_efr.h"
#include "typedef.h"
#include "namespace.h"
#include "basic_op.h"
#include "sig_proc.h"
#include "memops.h"
#include "no_count.h"
#include "codec.h"
#include "cnst.h"
#include "enc_state.h"

#include "window2.tab"

#include "vad.h"
#include "dtx.h"

/*-----------------------------------------------------------*
 *    Coder constant parameters (defined in "cnst.h")        *
 *-----------------------------------------------------------*
 *   L_WINDOW    : LPC analysis window size                  *
 *   L_FRAME     : Frame size                                *
 *   L_FRAME_BY2 : Half the frame size                       *
 *   L_SUBFR     : Sub-frame size                            *
 *   M           : LPC order                                 *
 *   MP1         : LPC order+1                               *
 *   L_TOTAL     : Total size of speech buffer               *
 *   PIT_MIN     : Minimum pitch lag                         *
 *   PIT_MAX     : Maximum pitch lag                         *
 *   L_INTERPOL  : Length of filter for interpolation        *
 *-----------------------------------------------------------*/

 /* Spectral expansion factors */

static const Word16 F_gamma1[M] =
{
    29491, 26542, 23888, 21499, 19349,
    17414, 15672, 14105, 12694, 11425
};
static const Word16 F_gamma2[M] =
{
    19661, 11797, 7078, 4247, 2548,
    1529, 917, 550, 330, 198
};

/***************************************************************************
 *  FUNCTION:   Init_Coder_12k2
 *
 *  PURPOSE:   Initialization of variables for the coder section.
 *
 *  DESCRIPTION:
 *       - initilize pointers to speech buffer
 *       - initialize static  pointers
 *       - set static vectors to zero
 *
 ***************************************************************************/

void Init_Coder_12k2 (struct EFR_encoder_state *st)
{
    /* Static vectors to zero */

    Set_zero (st->old_speech, L_TOTAL);
    Set_zero (st->old_exc, PIT_MAX + L_INTERPOL);
    Set_zero (st->old_wsp, PIT_MAX);
    Set_zero (st->mem_syn, M);
    Set_zero (st->mem_w, M);
    Set_zero (st->mem_w0, M);
    Set_zero (st->mem_err, M);
    Set_zero (st->ai_zero + MP1, L_SUBFR);
    Set_zero (st->hvec, L_SUBFR);   /* set to zero "h1[-L_SUBFR..-1]" */

    /* Initialize lsp_old [] */

    st->lsp_old[0] = 30000;
    st->lsp_old[1] = 26000;
    st->lsp_old[2] = 21000;
    st->lsp_old[3] = 15000;
    st->lsp_old[4] = 8000;
    st->lsp_old[5] = 0;
    st->lsp_old[6] = -8000;
    st->lsp_old[7] = -15000;
    st->lsp_old[8] = -21000;
    st->lsp_old[9] = -26000;

    /* Initialize lsp_old_q[] */

    Copy (st->lsp_old, st->lsp_old_q, M);

    return;
}

/***************************************************************************
 *   FUNCTION:   Coder_12k2
 *
 *   PURPOSE:  Principle encoder routine.
 *
 *   DESCRIPTION: This function is called every 20 ms speech frame,
 *       operating on the newly read 160 speech samples. It performs the
 *       principle encoding functions to produce the set of encoded parameters
 *       which include the LSP, adaptive codebook, and fixed codebook
 *       quantization indices (addresses and gains).
 *
 *   INPUTS:
 *       No input arguments are passed to this function. However, before
 *       calling this function, 160 new speech data samples should be copied to
 *       the vector new_speech[]. This is a global pointer which is declared in
 *       this file (it points to the end of speech buffer minus 160).
 *
 *   OUTPUTS:
 *
 *       ana[]:     vector of analysis parameters.
 *       synth[]:   Local synthesis speech (for debugging purposes)
 *
 ***************************************************************************/

void Coder_12k2 (
    struct EFR_encoder_state *st,
    Word16 ana[],    /* output  : Analysis parameters */
    Word16 synth[]   /* output  : Local synthesis     */
)
{
    /* handy pointers that were static vars in the original code */
    Word16 *speech = st->old_speech + L_TOTAL - L_FRAME;
    Word16 *p_window = st->old_speech + L_TOTAL - L_WINDOW;
    Word16 *wsp = st->old_wsp + PIT_MAX;
    Word16 *exc = st->old_exc + PIT_MAX + L_INTERPOL;
    Word16 *zero = st->ai_zero + MP1;
    Word16 *h1 = st->hvec + L_SUBFR;
    Word16 *error = st->mem_err + M;

    /* LPC coefficients */

    Word16 r_l[MP1], r_h[MP1];      /* Autocorrelations lo and hi           */
    Word16 A_t[(MP1) * 4];          /* A(z) unquantized for the 4 subframes */
    Word16 Aq_t[(MP1) * 4];         /* A(z)   quantized for the 4 subframes */
    Word16 Ap1[MP1];                /* A(z) with spectral expansion         */
    Word16 Ap2[MP1];                /* A(z) with spectral expansion         */
    Word16 *A, *Aq;                 /* Pointer on A_t and Aq_t              */
    Word16 lsp_new[M], lsp_new_q[M];/* LSPs at 4th subframe                 */
    Word16 lsp_mid[M], lsp_mid_q[M];/* LSPs at 2nd subframe                 */

    /* Other vectors */

    Word16 xn[L_SUBFR];            /* Target vector for pitch search        */
    Word16 xn2[L_SUBFR];           /* Target vector for codebook search     */
    Word16 res2[L_SUBFR];          /* Long term prediction residual         */
    Word16 code[L_SUBFR];          /* Fixed codebook excitation             */
    Word16 y1[L_SUBFR];            /* Filtered adaptive excitation          */
    Word16 y2[L_SUBFR];            /* Filtered fixed codebook excitation    */

    /* Scalars */

    Word16 i, j, k, i_subfr;
    Word16 T_op, T0, T0_min, T0_max, T0_frac;
    Word16 gain_pit, gain_code, pit_flag, pit_sharp;
    Word16 temp;
    Word32 L_temp;

    Word16 scal_acf, VAD_flag, lags[2], rc[4];

    /*----------------------------------------------------------------------*
     *  - Perform LPC analysis: (twice per frame)                           *
     *       * autocorrelation + lag windowing                              *
     *       * Levinson-Durbin algorithm to find a[]                        *
     *       * convert a[] to lsp[]                                         *
     *       * quantize and code the LSPs                                   *
     *       * find the interpolated LSPs and convert to a[] for all        *
     *         subframes (both quantized and unquantized)                   *
     *----------------------------------------------------------------------*/

    /* LP analysis centered at 2nd subframe */

    scal_acf = Autocorr (p_window, M, r_h, r_l, window_160_80);
                                /* Autocorrelations */

    Lag_window (M, r_h, r_l);   /* Lag windowing    */

    Levinson (st, r_h, r_l, &A_t[MP1], rc); /* Levinson-Durbin  */

    Az_lsp (&A_t[MP1], lsp_mid, st->lsp_old); /* From A(z) to lsp */

    /* LP analysis centered at 4th subframe */

    /* Autocorrelations */
    scal_acf = Autocorr (p_window, M, r_h, r_l, window_232_8);

    Lag_window (M, r_h, r_l);   /* Lag windowing    */

    Levinson (st, r_h, r_l, &A_t[MP1 * 3], rc); /* Levinson-Durbin  */

    Az_lsp (&A_t[MP1 * 3], lsp_new, lsp_mid); /* From A(z) to lsp */

    if (st->dtx_mode)
    {
        /* DTX enabled, make voice activity decision */
        VAD_flag = vad_computation (st, r_h, r_l, scal_acf, rc, st->ptch);
        tx_dtx (st, VAD_flag); /* TX DTX handler */
    }
    else
    {
        /* DTX disabled, active speech in every frame */
        VAD_flag = 1;
        st->txdtx_ctrl = TX_VAD_FLAG | TX_SP_FLAG;
    }

    /* LSP quantization (lsp_mid[] and lsp_new[] jointly quantized) */

    Q_plsf_5 (st, lsp_mid, lsp_new, lsp_mid_q, lsp_new_q, ana, st->txdtx_ctrl);

    ana += 5;

    /*--------------------------------------------------------------------*
     * Find interpolated LPC parameters in all subframes (both quantized  *
     * and unquantized).                                                  *
     * The interpolated parameters are in array A_t[] of size (M+1)*4     *
     * and the quantized interpolated parameters are in array Aq_t[]      *
     *--------------------------------------------------------------------*/

    Int_lpc2 (st->lsp_old, lsp_mid, lsp_new, A_t);

    if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
    {
        Int_lpc (st->lsp_old_q, lsp_mid_q, lsp_new_q, Aq_t);

        /* update the LSPs for the next frame */
        Copy (lsp_new, st->lsp_old, M);
        Copy (lsp_new_q, st->lsp_old_q, M);
    }
    else
    {
        /* Use unquantized LPC parameters in case of no speech activity */
        for (i = 0; i < MP1; i++)
        {
            Aq_t[i] = A_t[i];                                   move16 (); 
            Aq_t[i + MP1] = A_t[i + MP1];                       move16 (); 
            Aq_t[i + MP1 * 2] = A_t[i + MP1 * 2];               move16 (); 
            Aq_t[i + MP1 * 3] = A_t[i + MP1 * 3];               move16 (); 
        }

        /* update the LSPs for the next frame */
        Copy (lsp_new, st->lsp_old, M);
        Copy (lsp_new, st->lsp_old_q, M);
    }

    /*----------------------------------------------------------------------*
     * - Find the weighted input speech wsp[] for the whole speech frame    *
     * - Find the open-loop pitch delay for first 2 subframes               *
     * - Set the range for searching closed-loop pitch in 1st subframe      *
     * - Find the open-loop pitch delay for last 2 subframes                *
     *----------------------------------------------------------------------*/

    A = A_t;                                                    move16 (); 
    for (i = 0; i < L_FRAME; i += L_SUBFR)
    {
        Weight_Ai (A, F_gamma1, Ap1);

        Weight_Ai (A, F_gamma2, Ap2);

        Residu (Ap1, &speech[i], &wsp[i], L_SUBFR);

        Syn_filt (Ap2, &wsp[i], &wsp[i], L_SUBFR, st->mem_w, 1);

        A += MP1;                                               move16 (); 
    }

    /* Find open loop pitch lag for first two subframes */

    T_op = Pitch_ol (wsp, PIT_MIN, PIT_MAX, L_FRAME_BY2);       move16 (); 

    lags[0] = T_op;                                             move16 (); 

    if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
    {
        /* Range for closed loop pitch search in 1st subframe */

        T0_min = sub (T_op, 3);
        if (T0_min < PIT_MIN)
        {
            T0_min = PIT_MIN;                                   move16 (); 
        }
        T0_max = add (T0_min, 6);
        if (T0_max > PIT_MAX)
        {
            T0_max = PIT_MAX;                                   move16 (); 
            T0_min = sub (T0_max, 6);
        }
    }
    /* Find open loop pitch lag for last two subframes */

    T_op = Pitch_ol (&wsp[L_FRAME_BY2], PIT_MIN, PIT_MAX, L_FRAME_BY2);

    if (st->dtx_mode)
    {
        lags[1] = T_op;                                         move16 (); 
        periodicity_update (st, lags);
    }
    /*----------------------------------------------------------------------*
     *          Loop for every subframe in the analysis frame               *
     *----------------------------------------------------------------------*
     *  To find the pitch and innovation parameters. The subframe size is   *
     *  L_SUBFR and the loop is repeated L_FRAME/L_SUBFR times.             *
     *     - find the weighted LPC coefficients                             *
     *     - find the LPC residual signal res[]                             *
     *     - compute the target signal for pitch search                     *
     *     - compute impulse response of weighted synthesis filter (h1[])   *
     *     - find the closed-loop pitch parameters                          *
     *     - encode the pitch delay                                         *
     *     - update the impulse response h1[] by including pitch            *
     *     - find target vector for codebook search                         *
     *     - codebook search                                                *
     *     - encode codebook address                                        *
     *     - VQ of pitch and codebook gains                                 *
     *     - find synthesis speech                                          *
     *     - update states of weighting filter                              *
     *----------------------------------------------------------------------*/

    /* pointer to interpolated LPC parameters          */
    A = A_t;                                                    move16 ();
    /* pointer to interpolated quantized LPC parameters */    
    Aq = Aq_t;                                                  move16 (); 

    for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR)
    {
        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*---------------------------------------------------------------*
             * Find the weighted LPC coefficients for the weighting filter.  *
             *---------------------------------------------------------------*/

            Weight_Ai (A, F_gamma1, Ap1);

            Weight_Ai (A, F_gamma2, Ap2);

            /*---------------------------------------------------------------*
             * Compute impulse response, h1[], of weighted synthesis filter  *
             *---------------------------------------------------------------*/

            Copy (Ap1, st->ai_zero, M+1);

            Syn_filt (Aq, st->ai_zero, h1, L_SUBFR, zero, 0);

            Syn_filt (Ap2, h1, h1, L_SUBFR, zero, 0);

        }
        /*---------------------------------------------------------------*
         *          Find the target vector for pitch search:             *
         *---------------------------------------------------------------*/

        Residu (Aq, &speech[i_subfr], res2, L_SUBFR);   /* LPC residual */

        if ((st->txdtx_ctrl & TX_SP_FLAG) == 0)
        {
            /* Compute comfort noise excitation gain based on
            LP residual energy */

            st->CN_excitation_gain = compute_CN_excitation_gain (res2);
        }
        else
        {
            Copy (res2, &exc[i_subfr], L_SUBFR);

            Syn_filt (Aq, &exc[i_subfr], error, L_SUBFR, st->mem_err, 0);

            Residu (Ap1, error, xn, L_SUBFR);

            /* target signal xn[] */
            Syn_filt (Ap2, xn, xn, L_SUBFR, st->mem_w0, 0);

            /*--------------------------------------------------------------*
             *                 Closed-loop fractional pitch search          *
             *--------------------------------------------------------------*/

            /* flag for first and 3th subframe */            
            pit_flag = i_subfr;                                 move16 (); 
            /* set t0_min and t0_max for 3th subf.*/
            if (i_subfr == L_FRAME_BY2)
            {
                T0_min = sub (T_op, 3);

                if (T0_min < PIT_MIN)
                {
                    T0_min = PIT_MIN;                           move16 (); 
                }
                T0_max = add (T0_min, 6);
                if (T0_max > PIT_MAX)
                {
                    T0_max = PIT_MAX;                           move16 (); 
                    T0_min = sub (T0_max, 6);
                }
                pit_flag = 0;                                   move16 (); 
            }

            T0 = Pitch_fr6 (&exc[i_subfr], xn, h1, L_SUBFR, T0_min, T0_max,
                            pit_flag, &T0_frac);                move16 (); 

            *ana = Enc_lag6 (T0, &T0_frac, &T0_min, &T0_max, PIT_MIN,
                             PIT_MAX, pit_flag);
        }
        ana++;
        /* Incrementation of ana is done here to work also
        when no speech activity is present */

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*---------------------------------------------------------------*
             * - find unity gain pitch excitation (adaptive codebook entry)  *
             *   with fractional interpolation.                              *
             * - find filtered pitch exc. y1[]=exc[] convolved with h1[]     *
             * - compute pitch gain and limit between 0 and 1.2              *
             * - update target vector for codebook search                    *
             * - find LTP residual.                                          *
             *---------------------------------------------------------------*/

            Pred_lt_6 (&exc[i_subfr], T0, T0_frac, L_SUBFR);

            Convolve (&exc[i_subfr], h1, y1, L_SUBFR);

            gain_pit = G_pitch (xn, y1, L_SUBFR);      move16 (); 

            *ana = q_gain_pitch (&gain_pit);                    move16 (); 

        }
        else
        {
            gain_pit = 0;                                       move16 (); 
        }

        ana++;                  /* Incrementation of ana is done here to work
                                   also when no speech activity is present */

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {
            /* xn2[i]   = xn[i] - y1[i] * gain_pit  */
            /* res2[i] -= exc[i+i_subfr] * gain_pit */

            for (i = 0; i < L_SUBFR; i++)
            {
                L_temp = L_mult (y1[i], gain_pit);
                L_temp = L_shl (L_temp, 3);
                xn2[i] = sub (xn[i], extract_h (L_temp));       move16 (); 

                L_temp = L_mult (exc[i + i_subfr], gain_pit);
                L_temp = L_shl (L_temp, 3);
                res2[i] = sub (res2[i], extract_h (L_temp));    move16 (); 
            }

            /*-------------------------------------------------------------*
             * - include pitch contribution into impulse resp. h1[]        *
             *-------------------------------------------------------------*/

            /* pit_sharp = gain_pit;                   */
            /* if (pit_sharp > 1.0) pit_sharp = 1.0;   */

            pit_sharp = shl (gain_pit, 3);

            for (i = T0; i < L_SUBFR; i++)
            {
                temp = mult (h1[i - T0], pit_sharp);
                h1[i] = add (h1[i], temp);                      move16 (); 
            }

            /*--------------------------------------------------------------*
             * - Innovative codebook search (find index and gain)           *
             *--------------------------------------------------------------*/

            code_10i40_35bits (xn2, res2, h1, code, y2, ana);
        }
        else
        {
            build_CN_code (code, &st->L_pn_seed_tx);
        }
        ana += 10;                                              move16 (); 

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            /*-------------------------------------------------------*
             * - Add the pitch contribution to code[].               *
             *-------------------------------------------------------*/

            for (i = T0; i < L_SUBFR; i++)
            {
                temp = mult (code[i - T0], pit_sharp);
                code[i] = add (code[i], temp);                  move16 (); 
            }

            /*------------------------------------------------------*
             * - Quantization of fixed codebook gain.               *
             *------------------------------------------------------*/

            gain_code = G_code (xn2, y2);                       move16 (); 
        }
        *ana++ = q_gain_code (st, code, L_SUBFR, &gain_code, st->txdtx_ctrl,
                              i_subfr);

        /*------------------------------------------------------*
         * - Find the total excitation                          *
         * - find synthesis speech corresponding to exc[]       *
         * - update filter memories for finding the target      *
         *   vector in the next subframe                        *
         *   (update mem_err[] and mem_w0[])                    *
         *------------------------------------------------------*/

        for (i = 0; i < L_SUBFR; i++)
        {
            /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */

            L_temp = L_mult (exc[i + i_subfr], gain_pit);
            L_temp = L_mac (L_temp, code[i], gain_code);
            L_temp = L_shl (L_temp, 3);
            exc[i + i_subfr] = round (L_temp);                  move16 (); 
        }

        Syn_filt (Aq, &exc[i_subfr], &synth[i_subfr], L_SUBFR, st->mem_syn, 1);

        if ((st->txdtx_ctrl & TX_SP_FLAG) != 0)
        {

            for (i = L_SUBFR - M, j = 0; i < L_SUBFR; i++, j++)
            {
                st->mem_err[j] = sub (speech[i_subfr + i], synth[i_subfr + i]);
                temp = extract_h (L_shl (L_mult (y1[i], gain_pit), 3));
                k = extract_h (L_shl (L_mult (y2[i], gain_code), 5));
                st->mem_w0[j] = sub (xn[i], add (temp, k));
            }
        }
        else
        {
            Set_zero (st->mem_err, M);
            Set_zero (st->mem_w0, M);
        }

        /* interpolated LPC parameters for next subframe */
        A += MP1;                                               move16 (); 
        Aq += MP1;                                              move16 (); 
    }

    /*--------------------------------------------------*
     * Update signal for next frame.                    *
     * -> shift to the left by L_FRAME:                 *
     *     speech[], wsp[] and  exc[]                   *
     *--------------------------------------------------*/

    Copy (&st->old_speech[L_FRAME], &st->old_speech[0], L_TOTAL - L_FRAME);

    Copy (&st->old_wsp[L_FRAME], &st->old_wsp[0], PIT_MAX);

    Copy (&st->old_exc[L_FRAME], &st->old_exc[0], PIT_MAX + L_INTERPOL);

    return;
}