# HG changeset patch # User Mychaela Falconia # Date 1715043370 0 # Node ID 8847c1740e78eaa8c9a68d1da2ba41b2a70bccff # Parent 5a1d18542f8a819de9a120085c428aab9c9e4423 libtwamr: integrate VAD1 diff -r 5a1d18542f8a -r 8847c1740e78 libtwamr/Makefile --- a/libtwamr/Makefile Tue May 07 00:05:12 2024 +0000 +++ b/libtwamr/Makefile Tue May 07 00:56:10 2024 +0000 @@ -15,7 +15,7 @@ q_gain_c.o q_gain_p.o q_plsf.o q_plsf3_tab.o q_plsf5_tab.o q_plsf_3.o \ q_plsf_5.o qgain475.o qgain795.o qua_gain.o qua_gain_tab.o reorder.o \ residu.o s10_8pf.o set_sign.o sid_sync.o spreproc.o spstproc.o sqrt_l.o\ - syn_filt.o tls_flags.o ton_stab.o weight_a.o window.o + syn_filt.o tls_flags.o ton_stab.o vad1.o weight_a.o window.o HDRS= namespace.h LIB= libtwamr.a diff -r 5a1d18542f8a -r 8847c1740e78 libtwamr/cnst_vad.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libtwamr/cnst_vad.h Tue May 07 00:56:10 2024 +0000 @@ -0,0 +1,105 @@ +/* +******************************************************************************** +**-------------------------------------------------------------------------** +** ** +** GSM AMR-NB speech codec R98 Version 7.6.0 December 12, 2001 ** +** R99 Version 3.3.0 ** +** REL-4 Version 4.1.0 ** +** ** +**-------------------------------------------------------------------------** +******************************************************************************** +* +* File : cnst_vad.h +* Purpose : Constants and definitions for VAD +* +******************************************************************************** +*/ +#ifndef cnst_vad_h +#define cnst_vad_h "$Id $" + +#define FRAME_LEN 160 /* Length (samples) of the input frame */ +#define COMPLEN 9 /* Number of sub-bands used by VAD */ +#define INV_COMPLEN 3641 /* 1.0/COMPLEN*2^15 */ +#define LOOKAHEAD 40 /* length of the lookahead used by speech coder */ + +#define UNITY 512 /* Scaling used with SNR calculation */ +#define UNIRSHFT 6 /* = log2(MAX_16/UNITY) */ + +#define TONE_THR (Word16)(0.65*MAX_16) /* Threshold for tone detection */ + +/* Constants for background spectrum update */ +#define ALPHA_UP1 (Word16)((1.0 - 0.95)*MAX_16) /* Normal update, upwards: */ +#define ALPHA_DOWN1 (Word16)((1.0 - 0.936)*MAX_16) /* Normal update, downwards */ +#define ALPHA_UP2 (Word16)((1.0 - 0.985)*MAX_16) /* Forced update, upwards */ +#define ALPHA_DOWN2 (Word16)((1.0 - 0.943)*MAX_16) /* Forced update, downwards */ +#define ALPHA3 (Word16)((1.0 - 0.95)*MAX_16) /* Update downwards */ +#define ALPHA4 (Word16)((1.0 - 0.9)*MAX_16) /* For stationary estimation */ +#define ALPHA5 (Word16)((1.0 - 0.5)*MAX_16) /* For stationary estimation */ + +/* Constants for VAD threshold */ +#define VAD_THR_HIGH 1260 /* Highest threshold */ +#define VAD_THR_LOW 720 /* Lowest threshold */ +#define VAD_P1 0 /* Noise level for highest threshold */ +#define VAD_P2 6300 /* Noise level for lowest threshold */ +#define VAD_SLOPE (Word16)(MAX_16*(float)(VAD_THR_LOW-VAD_THR_HIGH)/(float)(VAD_P2-VAD_P1)) + +/* Parameters for background spectrum recovery function */ +#define STAT_COUNT 20 /* threshold of stationary detection counter */ +#define STAT_COUNT_BY_2 10 /* threshold of stationary detection counter */ +#define CAD_MIN_STAT_COUNT 5 /* threshold of stationary detection counter */ + +#define STAT_THR_LEVEL 184 /* Threshold level for stationarity detection */ +#define STAT_THR 1000 /* Threshold for stationarity detection */ + +/* Limits for background noise estimate */ +#define NOISE_MIN 40 /* minimum */ +#define NOISE_MAX 16000 /* maximum */ +#define NOISE_INIT 150 /* initial */ + +/* Constants for VAD hangover addition */ +#define HANG_NOISE_THR 100 +#define BURST_LEN_HIGH_NOISE 4 +#define HANG_LEN_HIGH_NOISE 7 +#define BURST_LEN_LOW_NOISE 5 +#define HANG_LEN_LOW_NOISE 4 + +/* Thresholds for signal power */ +#define VAD_POW_LOW (Word32)15000 /* If input power is lower, */ + /* VAD is set to 0 */ +#define POW_PITCH_THR (Word32)343040 /* If input power is lower, pitch */ + /* detection is ignored */ + +#define POW_COMPLEX_THR (Word32)15000 /* If input power is lower, complex */ + /* flags value for previous frame is un-set */ + + +/* Constants for the filter bank */ +#define LEVEL_SHIFT 0 /* scaling */ +#define COEFF3 13363 /* coefficient for the 3rd order filter */ +#define COEFF5_1 21955 /* 1st coefficient the for 5th order filter */ +#define COEFF5_2 6390 /* 2nd coefficient the for 5th order filter */ + +/* Constants for pitch detection */ +#define LTHRESH 4 +#define NTHRESH 4 + +/* Constants for complex signal VAD */ +#define CVAD_THRESH_ADAPT_HIGH (Word16)(0.6 * MAX_16) /* threshold for adapt stopping high */ +#define CVAD_THRESH_ADAPT_LOW (Word16)(0.5 * MAX_16) /* threshold for adapt stopping low */ +#define CVAD_THRESH_IN_NOISE (Word16)(0.65 * MAX_16) /* threshold going into speech on + a short term basis */ + +#define CVAD_THRESH_HANG (Word16)(0.70 * MAX_16) /* threshold */ +#define CVAD_HANG_LIMIT (Word16)(100) /* 2 second estimation time */ +#define CVAD_HANG_LENGTH (Word16)(250) /* 5 second hangover */ + +#define CVAD_LOWPOW_RESET (Word16) (0.40 * MAX_16) /* init in low power segment */ +#define CVAD_MIN_CORR (Word16) (0.40 * MAX_16) /* lowest adaptation value */ + +#define CVAD_BURST 20 /* speech burst length for speech reset */ +#define CVAD_ADAPT_SLOW (Word16)(( 1.0 - 0.98) * MAX_16) /* threshold for slow adaption */ +#define CVAD_ADAPT_FAST (Word16)((1.0 - 0.92) * MAX_16) /* threshold for fast adaption */ +#define CVAD_ADAPT_REALLY_FAST (Word16)((1.0 - 0.80) * MAX_16) /* threshold for really fast + adaption */ + +#endif diff -r 5a1d18542f8a -r 8847c1740e78 libtwamr/namespace.list --- a/libtwamr/namespace.list Tue May 07 00:05:12 2024 +0000 +++ b/libtwamr/namespace.list Tue May 07 00:56:10 2024 +0000 @@ -53,6 +53,8 @@ dtx_dec dtx_dec_reset dtx_dec_activity_update rx_dtx_handler dtx_enc dtx_enc_reset dtx_buffer tx_dtx_handler +vad1 vad1_reset vad_complex_detection_update vad_tone_detection +vad_tone_detection_update vad_pitch_detection Bits2prm Prm2bits diff -r 5a1d18542f8a -r 8847c1740e78 libtwamr/vad1.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libtwamr/vad1.c Tue May 07 00:56:10 2024 +0000 @@ -0,0 +1,1008 @@ +/* +***************************************************************************** +**-------------------------------------------------------------------------** +** ** +** GSM AMR-NB speech codec R98 Version 7.6.0 December 12, 2001 ** +** R99 Version 3.3.0 ** +** REL-4 Version 4.1.0 ** +** ** +**-------------------------------------------------------------------------** +***************************************************************************** +* +* File : vad1.c +* Purpose : Voice Activity Detection (VAD) for AMR (option 1) +* +***************************************************************************** +*/ + +/* +***************************************************************************** +* MODULE INCLUDE FILE AND VERSION ID +***************************************************************************** +*/ +#include "namespace.h" +#include "vad1.h" + +/* +***************************************************************************** +* INCLUDE FILES +***************************************************************************** +*/ +#include "typedef.h" +#include "basic_op.h" +#include "no_count.h" +#include "oper_32b.h" +#include "cnst_vad.h" + +/* +***************************************************************************** +* LOCAL VARIABLES AND TABLES +***************************************************************************** +*/ + +/* +******************************************************************************** +* PRIVATE PROGRAM CODE +******************************************************************************** +*/ +/**************************************************************************** + * + * Function : first_filter_stage + * Purpose : Scale input down by one bit. Calculate 5th order + * half-band lowpass/highpass filter pair with + * decimation. + * + ***************************************************************************/ +static void first_filter_stage(Word16 in[], /* i : input signal */ + Word16 out[], /* o : output values, every other */ + /* output is low-pass part and */ + /* other is high-pass part every */ + Word16 data[] /* i/o : filter memory */ + ) +{ + Word16 temp0, temp1, temp2, temp3, i; + Word16 data0, data1; + + data0 = data[0]; move16 (); + data1 = data[1]; move16 (); + + for (i = 0; i < FRAME_LEN/4; i++) + { + temp0 = sub(shr(in[4*i+0], 2), mult(COEFF5_1, data0)); + temp1 = add(data0, mult(COEFF5_1, temp0)); + + temp3 = sub(shr(in[4*i+1], 2), mult(COEFF5_2, data1)); + temp2 = add(data1, mult(COEFF5_2, temp3)); + + out[4*i+0] = add(temp1, temp2); move16 (); + out[4*i+1] = sub(temp1, temp2); move16 (); + + data0 = sub(shr(in[4*i+2], 2), mult(COEFF5_1, temp0)); + temp1 = add(temp0, mult(COEFF5_1, data0)); + + data1 = sub(shr(in[4*i+3], 2), mult(COEFF5_2, temp3)); + temp2 = add(temp3, mult(COEFF5_2, data1)); + + out[4*i+2] = add(temp1, temp2); move16 (); + out[4*i+3] = sub(temp1, temp2); move16 (); + } + + data[0] = data0; move16 (); + data[1] = data1; move16 (); +} + +/**************************************************************************** + * + * Function : filter5 + * Purpose : Fifth-order half-band lowpass/highpass filter pair with + * decimation. + * + ***************************************************************************/ +static void filter5(Word16 *in0, /* i/o : input values; output low-pass part */ + Word16 *in1, /* i/o : input values; output high-pass part */ + Word16 data[] /* i/o : updated filter memory */ + ) +{ + Word16 temp0, temp1, temp2; + + temp0 = sub(*in0, mult(COEFF5_1, data[0])); + temp1 = add(data[0], mult(COEFF5_1, temp0)); + data[0] = temp0; move16 (); + + temp0 = sub(*in1, mult(COEFF5_2, data[1])); + temp2 = add(data[1], mult(COEFF5_2, temp0)); + data[1] = temp0; move16 (); + + *in0 = shr(add(temp1, temp2), 1); move16 (); + *in1 = shr(sub(temp1, temp2), 1); move16 (); +} + +/**************************************************************************** + * + * Function : filter3 + * Purpose : Third-order half-band lowpass/highpass filter pair with + * decimation. + * Return value : + * + ***************************************************************************/ +static void filter3(Word16 *in0, /* i/o : input values; output low-pass part */ + Word16 *in1, /* i/o : input values; output high-pass part */ + Word16 *data /* i/o : updated filter memory */ + ) +{ + Word16 temp1, temp2; + + temp1 = sub(*in1, mult(COEFF3, *data)); + temp2 = add(*data, mult(COEFF3, temp1)); + *data = temp1; move16 (); + + *in1 = shr(sub(*in0, temp2), 1); move16 (); + *in0 = shr(add(*in0, temp2), 1); move16 (); +} + +/**************************************************************************** + * + * Function : level_calculation + * Purpose : Calculate signal level in a sub-band. Level is calculated + * by summing absolute values of the input data. + * Return value : signal level + * + ***************************************************************************/ +static Word16 level_calculation( + Word16 data[], /* i : signal buffer */ + Word16 *sub_level, /* i : level calculate at the end of the previous frame */ + /* o : level of signal calculated from the last */ + /* (count2 - count1) samples */ + Word16 count1, /* i : number of samples to be counted */ + Word16 count2, /* i : number of samples to be counted */ + Word16 ind_m, /* i : step size for the index of the data buffer */ + Word16 ind_a, /* i : starting index of the data buffer */ + Word16 scale /* i : scaling for the level calculation */ + ) +{ + Word32 l_temp1, l_temp2; + Word16 level, i; + + l_temp1 = 0L; move32 (); + for (i = count1; i < count2; i++) + { + l_temp1 = L_mac(l_temp1, 1, abs_s(data[ind_m*i+ind_a])); + } + + l_temp2 = L_add(l_temp1, L_shl(*sub_level, sub(16, scale))); + *sub_level = extract_h(L_shl(l_temp1, scale)); + + for (i = 0; i < count1; i++) + { + l_temp2 = L_mac(l_temp2, 1, abs_s(data[ind_m*i+ind_a])); + } + level = extract_h(L_shl(l_temp2, scale)); + + return level; +} + +/**************************************************************************** + * + * Function : filter_bank + * Purpose : Divides input signal into 9-bands and calculas level of + * the signal in each band + * + ***************************************************************************/ +static void filter_bank(vadState1 *st, /* i/o : State struct */ + Word16 in[], /* i : input frame */ + Word16 level[] /* 0 : signal levels at each band */ + ) +{ + Word16 i; + Word16 tmp_buf[FRAME_LEN]; + + /* calculate the filter bank */ + + first_filter_stage(in, tmp_buf, st->a_data5[0]); + + for (i = 0; i < FRAME_LEN/4; i++) + { + filter5(&tmp_buf[4*i], &tmp_buf[4*i+2], st->a_data5[1]); + filter5(&tmp_buf[4*i+1], &tmp_buf[4*i+3], st->a_data5[2]); + } + for (i = 0; i < FRAME_LEN/8; i++) + { + filter3(&tmp_buf[8*i+0], &tmp_buf[8*i+4], &st->a_data3[0]); + filter3(&tmp_buf[8*i+2], &tmp_buf[8*i+6], &st->a_data3[1]); + filter3(&tmp_buf[8*i+3], &tmp_buf[8*i+7], &st->a_data3[4]); + } + + for (i = 0; i < FRAME_LEN/16; i++) + { + filter3(&tmp_buf[16*i+0], &tmp_buf[16*i+8], &st->a_data3[2]); + filter3(&tmp_buf[16*i+4], &tmp_buf[16*i+12], &st->a_data3[3]); + } + + /* calculate levels in each frequency band */ + + /* 3000 - 4000 Hz*/ + level[8] = level_calculation(tmp_buf, &st->sub_level[8], FRAME_LEN/4-8, + FRAME_LEN/4, 4, 1, 15); + move16 (); + /* 2500 - 3000 Hz*/ + level[7] = level_calculation(tmp_buf, &st->sub_level[7], FRAME_LEN/8-4, + FRAME_LEN/8, 8, 7, 16); + move16 (); + /* 2000 - 2500 Hz*/ + level[6] = level_calculation(tmp_buf, &st->sub_level[6], FRAME_LEN/8-4, + FRAME_LEN/8, 8, 3, 16); + move16 (); + /* 1500 - 2000 Hz*/ + level[5] = level_calculation(tmp_buf, &st->sub_level[5], FRAME_LEN/8-4, + FRAME_LEN/8, 8, 2, 16); + move16 (); + /* 1000 - 1500 Hz*/ + level[4] = level_calculation(tmp_buf, &st->sub_level[4], FRAME_LEN/8-4, + FRAME_LEN/8, 8, 6, 16); + move16 (); + /* 750 - 1000 Hz*/ + level[3] = level_calculation(tmp_buf, &st->sub_level[3], FRAME_LEN/16-2, + FRAME_LEN/16, 16, 4, 16); + move16 (); + /* 500 - 750 Hz*/ + level[2] = level_calculation(tmp_buf, &st->sub_level[2], FRAME_LEN/16-2, + FRAME_LEN/16, 16, 12, 16); + move16 (); + /* 250 - 500 Hz*/ + level[1] = level_calculation(tmp_buf, &st->sub_level[1], FRAME_LEN/16-2, + FRAME_LEN/16, 16, 8, 16); + move16 (); + /* 0 - 250 Hz*/ + level[0] = level_calculation(tmp_buf, &st->sub_level[0], FRAME_LEN/16-2, + FRAME_LEN/16, 16, 0, 16); + move16 (); +} + +/**************************************************************************** + * + * Function : update_cntrl + * Purpose : Control update of the background noise estimate. + * Inputs : pitch: flags for pitch detection + * stat_count: stationary counter + * tone: flags indicating presence of a tone + * complex: flags for complex detection + * vadreg: intermediate VAD flags + * Output : stat_count: stationary counter + * + ***************************************************************************/ +static void update_cntrl(vadState1 *st, /* i/o : State struct */ + Word16 level[] /* i : sub-band levels of the input frame */ + ) +{ + Word16 i, temp, stat_rat, exp; + Word16 num, denom; + Word16 alpha; + + /* handle highband complex signal input separately */ + /* if ther has been highband correlation for some time */ + /* make sure that the VAD update speed is low for a while */ + test (); + if (st->complex_warning != 0) + { + test (); + if (sub(st->stat_count, CAD_MIN_STAT_COUNT) < 0) + { + st->stat_count = CAD_MIN_STAT_COUNT; move16 (); + } + } + /* NB stat_count is allowed to be decreased by one below again */ + /* deadlock in speech is not possible unless the signal is very */ + /* complex and need a high rate */ + + /* if fullband pitch or tone have been detected for a while, initialize stat_count */ + logic16 (); test (); logic16 (); test (); + if ((sub((st->pitch & 0x6000), 0x6000) == 0) || + (sub((st->tone & 0x7c00), 0x7c00) == 0)) + { + st->stat_count = STAT_COUNT; move16 (); + } + else + { + /* if 8 last vad-decisions have been "0", reinitialize stat_count */ + logic16 (); test (); + if ((st->vadreg & 0x7f80) == 0) + { + st->stat_count = STAT_COUNT; move16 (); + } + else + { + stat_rat = 0; move16 (); + for (i = 0; i < COMPLEN; i++) + { + test (); + if (sub(level[i], st->ave_level[i]) > 0) + { + num = level[i]; move16 (); + denom = st->ave_level[i]; move16 (); + } + else + { + num = st->ave_level[i]; move16 (); + denom = level[i]; move16 (); + } + /* Limit nimimum value of num and denom to STAT_THR_LEVEL */ + test (); + if (sub(num, STAT_THR_LEVEL) < 0) + { + num = STAT_THR_LEVEL; move16 (); + } + test (); + if (sub(denom, STAT_THR_LEVEL) < 0) + { + denom = STAT_THR_LEVEL; move16 (); + } + + exp = norm_s(denom); + denom = shl(denom, exp); + + /* stat_rat = num/denom * 64 */ + temp = div_s(shr(num, 1), denom); + stat_rat = add(stat_rat, shr(temp, sub(8, exp))); + } + + /* compare stat_rat with a threshold and update stat_count */ + test (); + if (sub(stat_rat, STAT_THR) > 0) + { + st->stat_count = STAT_COUNT; move16 (); + } + else + { + logic16 ();test (); + if ((st->vadreg & 0x4000) != 0) + { + test (); + if (st->stat_count != 0) + { + st->stat_count = sub(st->stat_count, 1); move16 (); + } + } + } + } + } + + /* Update average amplitude estimate for stationarity estimation */ + alpha = ALPHA4; move16 (); + test (); + if (sub(st->stat_count, STAT_COUNT) == 0) + { + alpha = 32767; move16 (); + } + else if ((st->vadreg & 0x4000) == 0) + { + logic16 (); test (); + alpha = ALPHA5; move16 (); + } + + for (i = 0; i < COMPLEN; i++) + { + st->ave_level[i] = add(st->ave_level[i], + mult_r(alpha, sub(level[i], st->ave_level[i]))); + move16 (); + } +} + +/**************************************************************************** + * + * Function : hangover_addition + * Purpose : Add hangover for complex signal or after speech bursts + * Inputs : burst_count: counter for the length of speech bursts + * hang_count: hangover counter + * vadreg: intermediate VAD decision + * Outputs : burst_count: counter for the length of speech bursts + * hang_count: hangover counter + * Return value : VAD_flag indicating final VAD decision + * + ***************************************************************************/ +static Word16 hangover_addition( + vadState1 *st, /* i/o : State struct */ + Word16 noise_level, /* i : average level of the noise */ + /* estimates */ + Word16 low_power /* i : flag power of the input frame */ + ) +{ + Word16 hang_len, burst_len; + + /* + Calculate burst_len and hang_len + burst_len: number of consecutive intermediate vad flags with "1"-decision + required for hangover addition + hang_len: length of the hangover + */ + + test (); + if (sub(noise_level, HANG_NOISE_THR) > 0) + { + burst_len = BURST_LEN_HIGH_NOISE; move16 (); + hang_len = HANG_LEN_HIGH_NOISE; move16 (); + } + else + { + burst_len = BURST_LEN_LOW_NOISE; move16 (); + hang_len = HANG_LEN_LOW_NOISE; move16 (); + } + + /* if the input power (pow_sum) is lower than a threshold, clear + counters and set VAD_flag to "0" "fast exit" */ + test (); + if (low_power != 0) + { + st->burst_count = 0; move16 (); + st->hang_count = 0; move16 (); + st->complex_hang_count = 0; move16 (); + st->complex_hang_timer = 0; move16 (); + return 0; + } + + test (); + if (sub(st->complex_hang_timer, CVAD_HANG_LIMIT) > 0) + { + test (); + if (sub(st->complex_hang_count, CVAD_HANG_LENGTH) < 0) + { + st->complex_hang_count = CVAD_HANG_LENGTH; move16 (); + } + } + + /* long time very complex signal override VAD output function */ + test (); + if (st->complex_hang_count != 0) + { + st->burst_count = BURST_LEN_HIGH_NOISE; move16 (); + st->complex_hang_count = sub(st->complex_hang_count, 1); move16 (); + return 1; + } + else + { + /* let hp_corr work in from a noise_period indicated by the VAD */ + test (); test (); logic16 (); + if (((st->vadreg & 0x3ff0) == 0) && + (sub(st->corr_hp_fast, CVAD_THRESH_IN_NOISE) > 0)) + { + return 1; + } + } + + /* update the counters (hang_count, burst_count) */ + logic16 (); test (); + if ((st->vadreg & 0x4000) != 0) + { + st->burst_count = add(st->burst_count, 1); move16 (); + test (); + if (sub(st->burst_count, burst_len) >= 0) + { + st->hang_count = hang_len; move16 (); + } + return 1; + } + else + { + st->burst_count = 0; move16 (); + test (); + if (st->hang_count > 0) + { + st->hang_count = sub(st->hang_count, 1); move16 (); + return 1; + } + } + return 0; +} + +/**************************************************************************** + * + * Function : noise_estimate_update + * Purpose : Update of background noise estimate + * Inputs : bckr_est: background noise estimate + * pitch: flags for pitch detection + * stat_count: stationary counter + * Outputs : bckr_est: background noise estimate + * + ***************************************************************************/ +static void noise_estimate_update( + vadState1 *st, /* i/o : State struct */ + Word16 level[] /* i : sub-band levels of the input frame */ + ) +{ + Word16 i, alpha_up, alpha_down, bckr_add; + + /* Control update of bckr_est[] */ + update_cntrl(st, level); + + /* Choose update speed */ + bckr_add = 2; move16 (); + + logic16 (); test (); logic16 (); test (); test (); + if (((0x7800 & st->vadreg) == 0) && + ((st->pitch & 0x7800) == 0) + && (st->complex_hang_count == 0)) + { + alpha_up = ALPHA_UP1; move16 (); + alpha_down = ALPHA_DOWN1; move16 (); + } + else + { + test (); test (); + if ((st->stat_count == 0) + && (st->complex_hang_count == 0)) + { + alpha_up = ALPHA_UP2; move16 (); + alpha_down = ALPHA_DOWN2; move16 (); + } + else + { + alpha_up = 0; move16 (); + alpha_down = ALPHA3; move16 (); + bckr_add = 0; move16 (); + } + } + + /* Update noise estimate (bckr_est) */ + for (i = 0; i < COMPLEN; i++) + { + Word16 temp; + temp = sub(st->old_level[i], st->bckr_est[i]); + + test (); + if (temp < 0) + { /* update downwards*/ + st->bckr_est[i] = add(-2, add(st->bckr_est[i], mult_r(alpha_down, temp))); + move16 (); + + /* limit minimum value of the noise estimate to NOISE_MIN */ + test (); + if (sub(st->bckr_est[i], NOISE_MIN) < 0) + { + st->bckr_est[i] = NOISE_MIN; move16 (); + } + } + else + { /* update upwards */ + st->bckr_est[i] = add(bckr_add, add(st->bckr_est[i], mult_r(alpha_up, temp))); + move16 (); + + /* limit maximum value of the noise estimate to NOISE_MAX */ + test (); + if (sub(st->bckr_est[i], NOISE_MAX) > 0) + { + st->bckr_est[i] = NOISE_MAX; move16 (); + } + } + } + + /* Update signal levels of the previous frame (old_level) */ + for(i = 0; i < COMPLEN; i++) + { + st->old_level[i] = level[i]; move16 (); + } +} + +/**************************************************************************** + * + * Function : complex_estimate_adapt + * Purpose : Update/adapt of complex signal estimate + * Inputs : low_power: low signal power flag + * Outputs : st->corr_hp_fast: long term complex signal estimate + * + ***************************************************************************/ +static void complex_estimate_adapt( + vadState1 *st, /* i/o : VAD state struct */ + Word16 low_power /* i : very low level flag of the input frame */ + ) +{ + Word16 alpha; /* Q15 */ + Word32 L_tmp; /* Q31 */ + + + /* adapt speed on own state */ + test (); + if (sub(st->best_corr_hp, st->corr_hp_fast) < 0) /* decrease */ + { + test (); + if (sub(st->corr_hp_fast, CVAD_THRESH_ADAPT_HIGH) < 0) + { /* low state */ + alpha = CVAD_ADAPT_FAST; move16(); + } + else + { /* high state */ + alpha = CVAD_ADAPT_REALLY_FAST; move16(); + } + } + else /* increase */ + { + test (); + if (sub(st->corr_hp_fast, CVAD_THRESH_ADAPT_HIGH) < 0) + { + alpha = CVAD_ADAPT_FAST; move16(); + } + else + { + alpha = CVAD_ADAPT_SLOW; move16(); + } + } + + L_tmp = L_deposit_h(st->corr_hp_fast); + L_tmp = L_msu(L_tmp, alpha, st->corr_hp_fast); + L_tmp = L_mac(L_tmp, alpha, st->best_corr_hp); + st->corr_hp_fast = round(L_tmp); /* Q15 */ move16(); + + test (); + if (sub(st->corr_hp_fast, CVAD_MIN_CORR) < 0) + { + st->corr_hp_fast = CVAD_MIN_CORR; move16(); + } + + test (); + if (low_power != 0) + { + st->corr_hp_fast = CVAD_MIN_CORR; move16(); + } +} + +/**************************************************************************** + * + * Function : complex_vad + * Purpose : complex background decision + * Return value : the complex background decision + * + ***************************************************************************/ +static Word16 complex_vad(vadState1 *st, /* i/o : VAD state struct */ + Word16 low_power /* i : flag power of the input frame */ + ) +{ + st->complex_high = shr(st->complex_high, 1); move16 (); + st->complex_low = shr(st->complex_low, 1); move16 (); + + test (); + if (low_power == 0) + { + test (); + if (sub(st->corr_hp_fast, CVAD_THRESH_ADAPT_HIGH) > 0) + { + st->complex_high = st->complex_high | 0x4000; logic16 (); move16 (); + } + + test (); + if (sub(st->corr_hp_fast, CVAD_THRESH_ADAPT_LOW) > 0 ) + { + st->complex_low = st->complex_low | 0x4000; logic16 (); move16 (); + } + } + + test (); + if (sub(st->corr_hp_fast, CVAD_THRESH_HANG) > 0) + { + st->complex_hang_timer = add(st->complex_hang_timer, 1); move16 (); + } + else + { + st->complex_hang_timer = 0; move16 (); + } + + test (); logic16 (); test (); logic16 (); + return ((sub((st->complex_high & 0x7f80), 0x7f80) == 0) || + (sub((st->complex_low & 0x7fff), 0x7fff) == 0)); +} + +/**************************************************************************** + * + * Function : vad_decision + * Purpose : Calculates VAD_flag + * Inputs : bckr_est: background noise estimate + * vadreg: intermediate VAD flags + * Outputs : noise_level: average level of the noise estimates + * vadreg: intermediate VAD flags + * Return value : VAD_flag + * + ***************************************************************************/ +static Word16 vad_decision( + vadState1 *st, /* i/o : State struct */ + Word16 level[COMPLEN], /* i : sub-band levels of the input frame */ + Word32 pow_sum /* i : power of the input frame */ + ) +{ + Word16 i; + Word16 snr_sum; + Word32 L_temp; + Word16 vad_thr, temp, noise_level; + Word16 low_power_flag; + + /* + Calculate squared sum of the input levels (level) + divided by the background noise components (bckr_est). + */ + L_temp = 0; move32(); + for (i = 0; i < COMPLEN; i++) + { + Word16 exp; + + exp = norm_s(st->bckr_est[i]); + temp = shl(st->bckr_est[i], exp); + temp = div_s(shr(level[i], 1), temp); + temp = shl(temp, sub(exp, UNIRSHFT-1)); + L_temp = L_mac(L_temp, temp, temp); + } + snr_sum = extract_h(L_shl(L_temp, 6)); + snr_sum = mult(snr_sum, INV_COMPLEN); + + /* Calculate average level of estimated background noise */ + L_temp = 0; move32(); + for (i = 0; i < COMPLEN; i++) + { + L_temp = L_add(L_temp, st->bckr_est[i]); + } + + noise_level = extract_h(L_shl(L_temp, 13)); + + /* Calculate VAD threshold */ + vad_thr = add(mult(VAD_SLOPE, sub(noise_level, VAD_P1)), VAD_THR_HIGH); + + test (); + if (sub(vad_thr, VAD_THR_LOW) < 0) + { + vad_thr = VAD_THR_LOW; move16 (); + } + + /* Shift VAD decision register */ + st->vadreg = shr(st->vadreg, 1); move16 (); + + /* Make intermediate VAD decision */ + test (); + if (sub(snr_sum, vad_thr) > 0) + { + st->vadreg = st->vadreg | 0x4000; logic16 (); move16 (); + } + /* primary vad decsion made */ + + /* check if the input power (pow_sum) is lower than a threshold" */ + test (); + if (L_sub(pow_sum, VAD_POW_LOW) < 0) + { + low_power_flag = 1; move16 (); + } + else + { + low_power_flag = 0; move16 (); + } + + /* update complex signal estimate st->corr_hp_fast and hangover reset timer using */ + /* low_power_flag and corr_hp_fast and various adaptation speeds */ + complex_estimate_adapt(st, low_power_flag); + + /* check multiple thresholds of the st->corr_hp_fast value */ + st->complex_warning = complex_vad(st, low_power_flag); move16(); + + /* Update speech subband vad background noise estimates */ + noise_estimate_update(st, level); + + /* Add speech and complex hangover and return speech VAD_flag */ + /* long term complex hangover may be added */ + st->speech_vad_decision = hangover_addition(st, noise_level, low_power_flag); + move16 (); + + return (st->speech_vad_decision); +} + +/* +***************************************************************************** +* PUBLIC PROGRAM CODE +***************************************************************************** +*/ + +/************************************************************************* +* +* Function: vad1_reset +* Purpose: Initializes state memory to zero +* +************************************************************************** +*/ +void vad1_reset (vadState1 *state) +{ + Word16 i, j; + + /* Initialize pitch detection variables */ + state->oldlag_count = 0; + state->oldlag = 0; + state->pitch = 0; + state->tone = 0; + + state->complex_high = 0; + state->complex_low = 0; + state->complex_hang_timer = 0; + + state->vadreg = 0; + + state->stat_count = 0; + state->burst_count = 0; + state->hang_count = 0; + state->complex_hang_count = 0; + + /* initialize memory used by the filter bank */ + for (i = 0; i < 3; i++) + { + for (j = 0; j < 2; j++) + { + state->a_data5[i][j] = 0; + } + } + + for (i = 0; i < 5; i++) + { + state->a_data3[i] = 0; + } + + /* initialize the rest of the memory */ + for (i = 0; i < COMPLEN; i++) + { + state->bckr_est[i] = NOISE_INIT; + state->old_level[i] = NOISE_INIT; + state->ave_level[i] = NOISE_INIT; + state->sub_level[i] = 0; + } + + state->best_corr_hp = CVAD_LOWPOW_RESET; + + state->speech_vad_decision = 0; + state->complex_warning = 0; + state->sp_burst_count = 0; + + state->corr_hp_fast = CVAD_LOWPOW_RESET; +} + +/**************************************************************************** + * + * Function : vad_complex_detection_update + * Purpose : update vad->bestCorr_hp complex signal feature state + * + ***************************************************************************/ +void vad_complex_detection_update (vadState1 *st, /* i/o : State struct */ + Word16 best_corr_hp /* i : best Corr */ + ) +{ + st->best_corr_hp = best_corr_hp; move16(); +} + +/**************************************************************************** + * + * Function : vad_tone_detection + * Purpose : Set tone flag if pitch gain is high. This is used to detect + * signaling tones and other signals with high pitch gain. + * Inputs : tone: flags indicating presence of a tone + * Outputs : tone: flags indicating presence of a tone + * + ***************************************************************************/ +void vad_tone_detection (vadState1 *st, /* i/o : State struct */ + Word32 t0, /* i : autocorrelation maxima */ + Word32 t1 /* i : energy */ + ) +{ + Word16 temp; + /* + if (t0 > TONE_THR * t1) + set tone flag + */ + temp = round(t1); + + test (); test (); + if ((temp > 0) && (L_msu(t0, temp, TONE_THR) > 0)) + { + st->tone = st->tone | 0x4000; logic16 (); move16 (); + } +} + +/**************************************************************************** + * + * Function : vad_tone_detection_update + * Purpose : Update the tone flag register. Tone flags are shifted right + * by one bit. This function should be called from the speech + * encoder before call to Vad_tone_detection() function. + * + ***************************************************************************/ +void vad_tone_detection_update ( + vadState1 *st, /* i/o : State struct */ + Word16 one_lag_per_frame /* i : 1 if one open-loop lag is + calculated per each frame, + otherwise 0 */ + ) +{ + /* Shift tone flags right by one bit */ + st->tone = shr(st->tone, 1); move16 (); + + /* If open-loop lag is calculated only once in each frame, do extra update + and assume that the other tone flag of the frame is one. */ + if (one_lag_per_frame != 0) + { + st->tone = shr(st->tone, 1); + st->tone = st->tone | 0x2000; logic16 (); move16 (); + } +} + +/**************************************************************************** + * + * Function : vad_pitch_detection + * Purpose : Test whether signal contains pitch or other periodic + * component. + * Return value : Boolean voiced / unvoiced decision in state variable + * + ***************************************************************************/ +void vad_pitch_detection (vadState1 *st, /* i/o : State struct */ + Word16 T_op[] /* i : speech encoder open loop lags */ + ) +{ + Word16 lagcount, i; + + lagcount = 0; move16 (); + + for (i = 0; i < 2; i++) + { + test (); + if (sub (abs_s (sub (st->oldlag, T_op[i])), LTHRESH) < 0) + { + lagcount = add (lagcount, 1); + } + + /* Save the current LTP lag */ + st->oldlag = T_op[i]; move16 (); + } + + /* Make pitch decision. + Save flag of the pitch detection to the variable pitch. + */ + st->pitch = shr(st->pitch, 1); move16(); + + test (); + if (sub ( add (st->oldlag_count, lagcount), NTHRESH) >= 0) + { + st->pitch = st->pitch | 0x4000; logic16(); move16(); + } + + /* Update oldlagcount */ + st->oldlag_count = lagcount; move16 (); +} + +/**************************************************************************** + * + * Function : vad + * Purpose : Main program for Voice Activity Detection (VAD) for AMR + * Return value : VAD Decision, 1 = speech, 0 = noise + * + ***************************************************************************/ +Word16 vad1(vadState1 *st, /* i/o : State struct */ + Word16 in_buf[] /* i : samples of the input frame */ + ) +{ + Word16 level[COMPLEN]; + Word32 pow_sum; + Word16 i; + + /* Calculate power of the input frame. */ + pow_sum = 0L; move32 (); + + for (i = 0; i < FRAME_LEN; i++) + { + pow_sum = L_mac(pow_sum, in_buf[i-LOOKAHEAD], in_buf[i-LOOKAHEAD]); + } + + /* If input power is very low, clear pitch flag of the current frame */ + test (); + if (L_sub(pow_sum, POW_PITCH_THR) < 0) + { + st->pitch = st->pitch & 0x3fff; logic16 (); move16 (); + } + + /* If input power is very low, clear complex flag of the "current" frame */ + test (); + if (L_sub(pow_sum, POW_COMPLEX_THR) < 0) + { + st->complex_low = st->complex_low & 0x3fff; logic16 (); move16 (); + } + + /* Run the filter bank which calculates signal levels at each band */ + filter_bank(st, in_buf, level); + + return (vad_decision(st, level, pow_sum)); +} diff -r 5a1d18542f8a -r 8847c1740e78 libtwamr/vad1.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libtwamr/vad1.h Tue May 07 00:56:10 2024 +0000 @@ -0,0 +1,116 @@ +/* +******************************************************************************** +**-------------------------------------------------------------------------** +** ** +** GSM AMR-NB speech codec R98 Version 7.6.0 December 12, 2001 ** +** R99 Version 3.3.0 ** +** REL-4 Version 4.1.0 ** +** ** +**-------------------------------------------------------------------------** +******************************************************************************** +* +* File : vad1.h +* Purpose : Voice Activity Detection (VAD) for AMR (option 1) +* +******************************************************************************** +*/ +#ifndef vad1_h +#define vad1_h "$Id $" + +/* +******************************************************************************** +* INCLUDE FILES +******************************************************************************** +*/ +#include "typedef.h" +#include "cnst_vad.h" + +/* +******************************************************************************** +* LOCAL VARIABLES AND TABLES +******************************************************************************** +*/ + +/* +******************************************************************************** +* DEFINITION OF DATA TYPES +******************************************************************************** +*/ + +/* state variable */ +typedef struct { + + Word16 bckr_est[COMPLEN]; /* background noise estimate */ + Word16 ave_level[COMPLEN]; /* averaged input components for stationary */ + /* estimation */ + Word16 old_level[COMPLEN]; /* input levels of the previous frame */ + Word16 sub_level[COMPLEN]; /* input levels calculated at the end of + a frame (lookahead) */ + Word16 a_data5[3][2]; /* memory for the filter bank */ + Word16 a_data3[5]; /* memory for the filter bank */ + + Word16 burst_count; /* counts length of a speech burst */ + Word16 hang_count; /* hangover counter */ + Word16 stat_count; /* stationary counter */ + + /* Note that each of the following three variables (vadreg, pitch and tone) + holds 15 flags. Each flag reserves 1 bit of the variable. The newest + flag is in the bit 15 (assuming that LSB is bit 1 and MSB is bit 16). */ + Word16 vadreg; /* flags for intermediate VAD decisions */ + Word16 pitch; /* flags for pitch detection */ + Word16 tone; /* flags for tone detection */ + Word16 complex_high; /* flags for complex detection */ + Word16 complex_low; /* flags for complex detection */ + + Word16 oldlag_count, oldlag; /* variables for pitch detection */ + + Word16 complex_hang_count; /* complex hangover counter, used by VAD */ + Word16 complex_hang_timer; /* hangover initiator, used by CAD */ + + Word16 best_corr_hp; /* FIP filtered value Q15 */ + + Word16 speech_vad_decision; /* final decision */ + Word16 complex_warning; /* complex background warning */ + + Word16 sp_burst_count; /* counts length of a speech burst incl + HO addition */ + Word16 corr_hp_fast; /* filtered value */ +} vadState1; + +/* +******************************************************************************** +* DECLARATION OF PROTOTYPES +******************************************************************************** +*/ + +void vad1_reset (vadState1 *st); +/* reset of pre processing state (i.e. set state memory to zero) + returns 0 on success + */ + +void vad_complex_detection_update (vadState1 *st, /* i/o : State struct */ + Word16 best_corr_hp /* i : best Corr Q15 */ + ); + +void vad_tone_detection (vadState1 *st, /* i/o : State struct */ + Word32 t0, /* i : autocorrelation maxima */ + Word32 t1 /* i : energy */ + ); + +void vad_tone_detection_update ( + vadState1 *st, /* i/o : State struct */ + Word16 one_lag_per_frame /* i : 1 if one open-loop lag is + calculated per each frame, + otherwise 0 */ + ); + +void vad_pitch_detection (vadState1 *st, /* i/o : State struct */ + Word16 lags[] /* i : speech encoder open loop lags */ + ); + +Word16 vad1 (vadState1 *st, /* i/o : State struct */ + Word16 in_buf[] /* i : samples of the input frame + inbuf[159] is the very last sample, + incl lookahead */ + ); +#endif