FreeCalypso > hg > gsm-codec-lib
view libgsmefr/dec_12k2.c @ 242:f081a6850fb5
libgsmfrp: new refined implementation
The previous implementation exhibited the following defects,
which are now fixed:
1) The last received valid SID was cached forever for the purpose of
handling future invalid SIDs - we could have received some valid
SID ages ago, then lots of speech or NO_DATA, and if we then get
an invalid SID, we would resurrect the last valid SID from ancient
history - a bad design. In our new design, we handle invalid SID
based on the current state, much like BFI.
2) GSM 06.11 spec says clearly that after the second lost SID
(received BFI=1 && TAF=1 in CN state) we need to gradually decrease
the output level, rather than jump directly to emitting silence
frames - we previously failed to implement such logic.
3) Per GSM 06.12 section 5.2, Xmaxc should be the same in all 4 subframes
in a SID frame. What should we do if we receive an otherwise valid
SID frame with different Xmaxc? Our previous approach would
replicate this Xmaxc oddity in every subsequent generated CN frame,
which is rather bad. In our new design, the very first CN frame
(which can be seen as a transformation of the SID frame itself)
retains the original 4 distinct Xmaxc, but all subsequent CN frames
are based on the Xmaxc from the last subframe of the most recent SID.
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Tue, 09 May 2023 05:16:31 +0000 |
parents | 7dd6336e15b2 |
children |
line wrap: on
line source
/*************************************************************************** * * FILE NAME: dec_12k2.c * * FUNCTIONS DEFINED IN THIS FILE: * Init_Decoder_12k2 and Decoder_12k2 * * * Init_Decoder_12k2(): * Initialization of variables for the decoder section. * * Decoder_12k2(): * Speech decoder routine operating on a frame basis. * ***************************************************************************/ #include "gsm_efr.h" #include "typedef.h" #include "namespace.h" #include "basic_op.h" #include "sig_proc.h" #include "memops.h" #include "no_count.h" #include "codec.h" #include "cnst.h" #include "dec_state.h" #include "dtx.h" /*---------------------------------------------------------------* * Decoder constant parameters (defined in "cnst.h") * *---------------------------------------------------------------* * L_FRAME : Frame size. * * L_FRAME_BY2 : Half the frame size. * * L_SUBFR : Sub-frame size. * * M : LPC order. * * MP1 : LPC order+1 * * PIT_MIN : Minimum pitch lag. * * PIT_MAX : Maximum pitch lag. * * L_INTERPOL : Length of filter for interpolation * * PRM_SIZE : size of vector containing analysis parameters * *---------------------------------------------------------------*/ /*--------------------------------------------------------* * Static memory allocation. * *--------------------------------------------------------*/ /*************************************************************************** * * FUNCTION: Init_Decoder_12k2 * * PURPOSE: Initialization of variables for the decoder section. * ***************************************************************************/ void Init_Decoder_12k2 (struct EFR_decoder_state *st) { /* Static vectors to zero */ Set_zero (st->old_exc, PIT_MAX + L_INTERPOL); Set_zero (st->mem_syn, M); /* Initialize lsp_old [] */ st->lsp_old[0] = 30000; st->lsp_old[1] = 26000; st->lsp_old[2] = 21000; st->lsp_old[3] = 15000; st->lsp_old[4] = 8000; st->lsp_old[5] = 0; st->lsp_old[6] = -8000; st->lsp_old[7] = -15000; st->lsp_old[8] = -21000; st->lsp_old[9] = -26000; /* Initialize memories of bad frame handling */ st->prev_bf = 0; st->bf_state = 0; return; } /*************************************************************************** * * FUNCTION: Decoder_12k2 * * PURPOSE: Speech decoder routine. * ***************************************************************************/ void Decoder_12k2 ( struct EFR_decoder_state *st, Word16 bfi, /* input : Bad Frame Indication */ const Word16 parm[], /* input : vector of synthesis parameters */ Word16 synth[], /* output: synthesis speech */ Word16 A_t[], /* output: decoded LP filter in 4 subframes */ Word16 TAF, Word16 SID_flag ) { Word16 *exc = st->old_exc + PIT_MAX + L_INTERPOL; /* LPC coefficients */ Word16 *Az; /* Pointer on A_t */ /* LSPs */ Word16 lsp_new[M]; Word16 lsp_mid[M]; /* Algebraic codevector */ Word16 code[L_SUBFR]; /* excitation */ Word16 excp[L_SUBFR]; /* Scalars */ Word16 i, i_subfr; Word16 T0, T0_frac, index; Word16 gain_pit, gain_code, pit_sharp; Word16 temp; Word32 L_temp; /* Set state machine */ if (bfi != 0) { st->bf_state++; } else if (st->bf_state == 6) { st->bf_state = 5; } else { st->bf_state = 0; } if (st->bf_state > 6) { st->bf_state = 6; } rx_dtx (st, TAF, bfi, SID_flag); /* If this frame is the first speech frame after CNI period, */ /* set the BFH state machine to an appropriate state depending */ /* on whether there was DTX muting before start of speech or not */ /* If there was DTX muting, the first speech frame is muted. */ /* If there was no DTX muting, the first speech frame is not */ /* muted. The BFH state machine starts from state 5, however, to */ /* keep the audible noise resulting from a SID frame which is */ /* erroneously interpreted as a good speech frame as small as */ /* possible (the decoder output in this case is quickly muted) */ if ((st->rxdtx_ctrl & RX_FIRST_SP_FLAG) != 0) { if ((st->rxdtx_ctrl & RX_PREV_DTX_MUTING) != 0) { st->bf_state = 5; st->prev_bf = 1; } else { st->bf_state = 5; st->prev_bf = 0; } } D_plsf_5 (st, parm, lsp_mid, lsp_new, bfi, st->rxdtx_ctrl, st->rx_dtx_state); /* Advance synthesis parameters pointer */ parm += 5; move16 (); if ((st->rxdtx_ctrl & RX_SP_FLAG) != 0) { /* Interpolation of LPC for the 4 subframes */ Int_lpc (st->lsp_old, lsp_mid, lsp_new, A_t); } else { /* Comfort noise: use the same parameters in each subframe */ Lsp_Az (lsp_new, A_t); for (i = 0; i < MP1; i++) { A_t[i + MP1] = A_t[i]; move16 (); A_t[i + 2 * MP1] = A_t[i]; move16 (); A_t[i + 3 * MP1] = A_t[i]; move16 (); } } /* update the LSPs for the next frame */ for (i = 0; i < M; i++) { st->lsp_old[i] = lsp_new[i]; } /*---------------------------------------------------------------------* * Loop for every subframe in the analysis frame * *---------------------------------------------------------------------* * The subframe size is L_SUBFR and the loop is repeated * * L_FRAME/L_SUBFR times * * - decode the pitch delay * * - decode algebraic code * * - decode pitch and codebook gains * * - find the excitation and compute synthesis speech * *---------------------------------------------------------------------*/ /* pointer to interpolated LPC parameters */ Az = A_t; move16 (); for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { Word16 T0_min, T0_max; /* previously static inside Dec_lag6() */ index = *parm++; move16 (); /* pitch index */ if ((st->rxdtx_ctrl & RX_SP_FLAG) != 0) { T0 = Dec_lag6 (st, index, PIT_MIN, PIT_MAX, i_subfr, L_FRAME_BY2, &T0_frac, bfi, &T0_min, &T0_max); /*-------------------------------------------------* * - Find the adaptive codebook vector. * *-------------------------------------------------*/ Pred_lt_6 (&exc[i_subfr], T0, T0_frac, L_SUBFR); } else { T0 = L_SUBFR; move16 (); } /*-------------------------------------------------------* * - Decode pitch gain. * *-------------------------------------------------------*/ index = *parm++; move16 (); gain_pit = d_gain_pitch (st, index, bfi, st->bf_state, st->prev_bf, st->rxdtx_ctrl); /*-------------------------------------------------------* * - Decode innovative codebook. * *-------------------------------------------------------*/ if ((st->rxdtx_ctrl & RX_SP_FLAG) != 0) { dec_10i40_35bits (parm, code); } else { /* Use pseudo noise for excitation when SP_flag == 0 */ build_CN_code (code, &st->L_pn_seed_rx); } parm += 10; move16 (); /*-------------------------------------------------------* * - Add the pitch contribution to code[]. * *-------------------------------------------------------*/ /* pit_sharp = gain_pit; */ /* if (pit_sharp > 1.0) pit_sharp = 1.0; */ pit_sharp = shl (gain_pit, 3); /* This loop is not entered when SP_FLAG is 0 */ for (i = T0; i < L_SUBFR; i++) { temp = mult (code[i - T0], pit_sharp); code[i] = add (code[i], temp); move16 (); } /* post processing of excitation elements */ test (); /* This test is not passed when SP_FLAG is 0 */ if (pit_sharp > 16384) { for (i = 0; i < L_SUBFR; i++) { temp = mult (exc[i + i_subfr], pit_sharp); L_temp = L_mult (temp, gain_pit); L_temp = L_shl (L_temp, 1); excp[i] = round (L_temp); move16 (); } } /*-------------------------------------------------* * - Decode codebook gain. * *-------------------------------------------------*/ index = *parm++; move16 (); /* index of energy VQ */ d_gain_code (st, index, code, L_SUBFR, &gain_code, bfi, st->bf_state, st->prev_bf, st->rxdtx_ctrl, i_subfr, st->rx_dtx_state); /*-------------------------------------------------------* * - Find the total excitation. * * - Find synthesis speech corresponding to exc[]. * *-------------------------------------------------------*/ for (i = 0; i < L_SUBFR; i++) { /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */ L_temp = L_mult (exc[i + i_subfr], gain_pit); L_temp = L_mac (L_temp, code[i], gain_code); L_temp = L_shl (L_temp, 3); exc[i + i_subfr] = round (L_temp); move16 (); } if (pit_sharp > 16384) { for (i = 0; i < L_SUBFR; i++) { excp[i] = add (excp[i], exc[i + i_subfr]); } agc2 (&exc[i_subfr], excp, L_SUBFR); Syn_filt (Az, excp, &synth[i_subfr], L_SUBFR, st->mem_syn, 1); } else { Syn_filt (Az, &exc[i_subfr], &synth[i_subfr], L_SUBFR, st->mem_syn, 1); } /* interpolated LPC parameters for next subframe */ Az += MP1; move16 (); } /*--------------------------------------------------* * Update signal for next frame. * * -> shift to the left by L_FRAME exc[] * *--------------------------------------------------*/ Copy (&st->old_exc[L_FRAME], &st->old_exc[0], PIT_MAX + L_INTERPOL); st->prev_bf = bfi; return; }