FreeCalypso > hg > gsm-codec-lib
comparison libtwamr/vad2.c @ 410:0152c069d01f
libtwamr: integrate VAD2 main body
author | Mychaela Falconia <falcon@freecalypso.org> |
---|---|
date | Tue, 07 May 2024 01:14:14 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
409:4184ccc136a3 | 410:0152c069d01f |
---|---|
1 /* | |
2 ***************************************************************************** | |
3 * | |
4 * GSM AMR-NB speech codec R98 Version 7.6.0 December 12, 2001 | |
5 * R99 Version 3.3.0 | |
6 * REL-4 Version 4.1.0 | |
7 * | |
8 ***************************************************************************** | |
9 * | |
10 * File : vad2.c | |
11 * Purpose : Voice Activity Detection (VAD) for AMR (option 2) | |
12 * | |
13 ***************************************************************************** | |
14 */ | |
15 | |
16 /*************************************************************************** | |
17 * | |
18 * FUNCTION NAME: vad2() | |
19 * | |
20 * PURPOSE: | |
21 * This function provides the Voice Activity Detection function option 2 | |
22 * for the Adaptive Multi-rate (AMR) codec. | |
23 * | |
24 * INPUTS: | |
25 * | |
26 * farray_ptr | |
27 * pointer to Word16[80] input array | |
28 * vadState2 | |
29 * pointer to vadState2 state structure | |
30 * | |
31 * OUTPUTS: | |
32 * | |
33 * state variables are updated | |
34 * | |
35 * RETURN VALUE: | |
36 * | |
37 * Word16 | |
38 * VAD(m) - two successive calls to vad2() yield | |
39 * the VAD decision for the 20 ms frame: | |
40 * VAD_flag = VAD(m-1) || VAD(m) | |
41 * | |
42 * | |
43 *************************************************************************/ | |
44 | |
45 /* Includes */ | |
46 | |
47 #include <stdint.h> | |
48 #include <string.h> | |
49 #include "tw_amr.h" | |
50 #include "namespace.h" | |
51 #include "typedef.h" | |
52 #include "cnst.h" | |
53 #include "basic_op.h" | |
54 #include "oper_32b.h" | |
55 #include "no_count.h" | |
56 #include "log2.h" | |
57 #include "pow2.h" | |
58 #include "vad2.h" | |
59 | |
60 | |
61 /* Local functions */ | |
62 | |
63 /*************************************************************************** | |
64 * | |
65 * FUNCTION NAME: fn10Log10 | |
66 * | |
67 * PURPOSE: | |
68 * The purpose of this function is to take the 10*log base 10 of input and | |
69 * divide by 128 and return; i.e. output = 10*log10(input)/128 (scaled as 7,8) | |
70 * | |
71 * INPUTS: | |
72 * | |
73 * L_Input | |
74 * input (scaled as 31-fbits,fbits) | |
75 * fbits | |
76 * number of fractional bits on input | |
77 * | |
78 * OUTPUTS: | |
79 * | |
80 * none | |
81 * | |
82 * RETURN VALUE: | |
83 * | |
84 * Word16 | |
85 * output (scaled as 7,8) | |
86 * | |
87 * DESCRIPTION: | |
88 * | |
89 * 10*log10(x)/128 = 10*(log10(2) * (log2(x<<fbits)-log2(1<<fbits)) >> 7 | |
90 * = 3.0103 * (log2(x<<fbits) - fbits) >> 7 | |
91 * = ((3.0103/4.0 * (log2(x<<fbits) - fbits) << 2) >> 7 | |
92 * = (3.0103/4.0 * (log2(x<<fbits) - fbits) >> 5 | |
93 * | |
94 *************************************************************************/ | |
95 | |
96 static Word16 fn10Log10 (Word32 L_Input, Word16 fbits) | |
97 { | |
98 | |
99 Word16 integer; /* Integer part of Log2. (range: 0<=val<=30) */ | |
100 Word16 fraction; /* Fractional part of Log2. (range: 0<=val<1) */ | |
101 | |
102 Word32 Ltmp; | |
103 Word16 tmp; | |
104 | |
105 Log2(L_Input, &integer, &fraction); | |
106 | |
107 integer = sub(integer, fbits); | |
108 Ltmp = Mpy_32_16 (integer, fraction, 24660); /* 24660 = 10*log10(2)/4 scaled 0,15 */ | |
109 Ltmp = L_shr_r(Ltmp, 5+1); /* extra shift for 30,1 => 15,0 extract correction */ | |
110 tmp = extract_l(Ltmp); | |
111 | |
112 return (tmp); | |
113 } | |
114 | |
115 | |
116 /*************************************************************************** | |
117 * | |
118 * FUNCTION NAME: block_norm | |
119 * | |
120 * PURPOSE: | |
121 * The purpose of this function is block normalise the input data sequence | |
122 * | |
123 * INPUTS: | |
124 * | |
125 * &in[0] | |
126 * pointer to data sequence to be normalised | |
127 * length | |
128 * number of elements in data sequence | |
129 * headroom | |
130 * number of headroom bits (i.e., | |
131 * | |
132 * OUTPUTS: | |
133 * | |
134 * &out[0] | |
135 * normalised output data sequence pointed to by &out[0] | |
136 * | |
137 * RETURN VALUE: | |
138 * | |
139 * Word16 | |
140 * number of bits sequence was left shifted | |
141 * | |
142 * DESCRIPTION: | |
143 * | |
144 * 1) Search for maximum absolute valued data element | |
145 * 2) Normalise the max element with "headroom" | |
146 * 3) Transfer/shift the input sequence to the output buffer | |
147 * 4) Return the number of left shifts | |
148 * | |
149 * CAVEATS: | |
150 * An input sequence of all zeros will return the maximum | |
151 * number of left shifts allowed, NOT the value returned | |
152 * by a norm_s(0) call, since it desired to associate an | |
153 * all zeros sequence with low energy. | |
154 * | |
155 *************************************************************************/ | |
156 | |
157 static | |
158 Word16 block_norm (Word16 * in, Word16 * out, Word16 length, Word16 headroom) | |
159 { | |
160 | |
161 Word16 i, max, scnt, adata; | |
162 | |
163 max = abs_s(in[0]); | |
164 for (i = 1; i < length; i++) | |
165 { | |
166 adata = abs_s(in[i]); test(); | |
167 if (sub(adata, max) > 0) | |
168 { | |
169 max = adata; move16(); | |
170 } | |
171 } | |
172 test(); | |
173 if (max != 0) | |
174 { | |
175 scnt = sub(norm_s(max), headroom); | |
176 for (i = 0; i < length; i++) | |
177 { | |
178 out[i] = shl(in[i], scnt); move16(); | |
179 } | |
180 } | |
181 else | |
182 { | |
183 scnt = sub(16, headroom); | |
184 for (i = 0; i < length; i++) | |
185 { | |
186 out[i] = 0; move16(); | |
187 } | |
188 } | |
189 return (scnt); | |
190 } | |
191 | |
192 | |
193 /********************************************* The VAD function ***************************************************/ | |
194 | |
195 Word16 vad2 (Word16 * farray_ptr, vadState2 * st) | |
196 { | |
197 | |
198 /* | |
199 * The channel table is defined below. In this table, the | |
200 * lower and higher frequency coefficients for each of the 16 | |
201 * channels are specified. The table excludes the coefficients | |
202 * with numbers 0 (DC), 1, and 64 (Foldover frequency). | |
203 */ | |
204 | |
205 static const Word16 ch_tbl[NUM_CHAN][2] = | |
206 { | |
207 | |
208 {2, 3}, | |
209 {4, 5}, | |
210 {6, 7}, | |
211 {8, 9}, | |
212 {10, 11}, | |
213 {12, 13}, | |
214 {14, 16}, | |
215 {17, 19}, | |
216 {20, 22}, | |
217 {23, 26}, | |
218 {27, 30}, | |
219 {31, 35}, | |
220 {36, 41}, | |
221 {42, 48}, | |
222 {49, 55}, | |
223 {56, 63} | |
224 | |
225 }; | |
226 | |
227 /* channel energy scaling table - allows efficient division by number | |
228 * of DFT bins in the channel: 1/2, 1/3, 1/4, etc. | |
229 */ | |
230 | |
231 static const Word16 ch_tbl_sh[NUM_CHAN] = | |
232 { | |
233 16384, 16384, 16384, 16384, 16384, 16384, 10923, 10923, | |
234 10923, 8192, 8192, 6554, 5461, 4681, 4681, 4096 | |
235 }; | |
236 | |
237 /* | |
238 * The voice metric table is defined below. It is a non- | |
239 * linear table with a deadband near zero. It maps the SNR | |
240 * index (quantized SNR value) to a number that is a measure | |
241 * of voice quality. | |
242 */ | |
243 | |
244 static const Word16 vm_tbl[90] = | |
245 { | |
246 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
247 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, | |
248 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 13, 14, 15, | |
249 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 23, 24, | |
250 24, 25, 26, 27, 28, 28, 29, 30, 31, 32, 33, 34, | |
251 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
252 46, 47, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, | |
253 50, 50 | |
254 }; | |
255 | |
256 /* hangover as a function of peak SNR (3 dB steps) */ | |
257 static const Word16 hangover_table[20] = | |
258 { | |
259 30, 30, 30, 30, 30, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 8, 8, 8 | |
260 }; | |
261 | |
262 /* burst sensitivity as a function of peak SNR (3 dB steps) */ | |
263 static const Word16 burstcount_table[20] = | |
264 { | |
265 8, 8, 8, 8, 8, 8, 8, 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4 | |
266 }; | |
267 | |
268 /* voice metric sensitivity as a function of peak SNR (3 dB steps) */ | |
269 static const Word16 vm_threshold_table[20] = | |
270 { | |
271 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 51, 71, 100, 139, 191, 257, 337, 432 | |
272 }; | |
273 | |
274 | |
275 /* State tables that use 22,9 or 27,4 scaling for ch_enrg[] */ | |
276 | |
277 static const Word16 noise_floor_chan[2] = {NOISE_FLOOR_CHAN_0, NOISE_FLOOR_CHAN_1}; | |
278 static const Word16 min_chan_enrg[2] = {MIN_CHAN_ENRG_0, MIN_CHAN_ENRG_1}; | |
279 static const Word16 ine_noise[2] = {INE_NOISE_0, INE_NOISE_1}; | |
280 static const Word16 fbits[2] = {FRACTIONAL_BITS_0, FRACTIONAL_BITS_1}; | |
281 static const Word16 state_change_shift_r[2] = {STATE_1_TO_0_SHIFT_R, STATE_0_TO_1_SHIFT_R}; | |
282 | |
283 /* Energy scale table given 30,1 input scaling (also account for -6 dB shift on input) */ | |
284 static const Word16 enrg_norm_shift[2] = {(FRACTIONAL_BITS_0-1+2), (FRACTIONAL_BITS_1-1+2)}; | |
285 | |
286 | |
287 /* Automatic variables */ | |
288 | |
289 Word32 Lenrg; /* scaled as 30,1 */ | |
290 Word32 Ltne; /* scaled as 22,9 */ | |
291 Word32 Ltce; /* scaled as 22,9 or 27,4 */ | |
292 | |
293 Word16 tne_db; /* scaled as 7,8 */ | |
294 Word16 tce_db; /* scaled as 7,8 */ | |
295 | |
296 Word16 input_buffer[FRM_LEN]; /* used for block normalising input data */ | |
297 Word16 data_buffer[FFT_LEN]; /* used for in-place FFT */ | |
298 | |
299 Word16 ch_snr[NUM_CHAN]; /* scaled as 7,8 */ | |
300 Word16 ch_snrq; /* scaled as 15,0 (in 0.375 dB steps) */ | |
301 Word16 vm_sum; /* scaled as 15,0 */ | |
302 Word16 ch_enrg_dev; /* scaled as 7,8 */ | |
303 | |
304 Word32 Lpeak; /* maximum channel energy */ | |
305 Word16 p2a_flag; /* flag to indicate spectral peak-to-average ratio > 10 dB */ | |
306 | |
307 Word16 ch_enrg_db[NUM_CHAN]; /* scaled as 7,8 */ | |
308 Word16 ch_noise_db; /* scaled as 7,8 */ | |
309 | |
310 Word16 alpha; /* scaled as 0,15 */ | |
311 Word16 one_m_alpha; /* scaled as 0,15 */ | |
312 Word16 update_flag; /* set to indicate a background noise estimate update */ | |
313 | |
314 Word16 i, j, j1, j2; /* Scratch variables */ | |
315 Word16 hi1, lo1; | |
316 | |
317 Word32 Ltmp, Ltmp1, Ltmp2; | |
318 Word16 tmp; | |
319 | |
320 Word16 normb_shift; /* block norm shift count */ | |
321 | |
322 Word16 ivad; /* intermediate VAD decision (return value) */ | |
323 Word16 tsnrq; /* total signal-to-noise ratio (quantized 3 dB steps) scaled as 15,0 */ | |
324 Word16 xt; /* instantaneous frame SNR in dB, scaled as 7,8 */ | |
325 | |
326 Word16 state_change; | |
327 | |
328 | |
329 /* Increment frame counter */ | |
330 st->Lframe_cnt = L_add(st->Lframe_cnt, 1); | |
331 | |
332 /* Block normalize the input */ | |
333 normb_shift = block_norm(farray_ptr, input_buffer, FRM_LEN, FFT_HEADROOM); | |
334 | |
335 /* Pre-emphasize the input data and store in the data buffer with the appropriate offset */ | |
336 for (i = 0; i < DELAY; i++) | |
337 { | |
338 data_buffer[i] = 0; move16(); | |
339 } | |
340 | |
341 st->pre_emp_mem = shr_r(st->pre_emp_mem, sub(st->last_normb_shift, normb_shift)); | |
342 st->last_normb_shift = normb_shift; move16(); | |
343 | |
344 data_buffer[DELAY] = add(input_buffer[0], mult(PRE_EMP_FAC, st->pre_emp_mem)); move16(); | |
345 | |
346 for (i = DELAY + 1, j = 1; i < DELAY + FRM_LEN; i++, j++) | |
347 { | |
348 data_buffer[i] = add(input_buffer[j], mult(PRE_EMP_FAC, input_buffer[j-1])); move16(); | |
349 } | |
350 st->pre_emp_mem = input_buffer[FRM_LEN-1]; move16(); | |
351 | |
352 for (i = DELAY + FRM_LEN; i < FFT_LEN; i++) | |
353 { | |
354 data_buffer[i] = 0; move16(); | |
355 } | |
356 | |
357 | |
358 /* Perform FFT on the data buffer */ | |
359 r_fft(data_buffer); | |
360 | |
361 | |
362 /* Use normb_shift factor to determine the scaling of the energy estimates */ | |
363 state_change = 0; move16(); | |
364 test(); | |
365 if (st->shift_state == 0) | |
366 { test(); | |
367 if (sub(normb_shift, -FFT_HEADROOM+2) <= 0) | |
368 { | |
369 state_change = 1; move16(); | |
370 st->shift_state = 1; move16(); | |
371 } | |
372 } | |
373 else | |
374 { test(); | |
375 if (sub(normb_shift, -FFT_HEADROOM+5) >= 0) | |
376 { | |
377 state_change = 1; move16(); | |
378 st->shift_state = 0; move16(); | |
379 } | |
380 } | |
381 | |
382 /* Scale channel energy estimate */ test(); | |
383 if (state_change) | |
384 { | |
385 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
386 { | |
387 st->Lch_enrg[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[st->shift_state]); move32(); | |
388 } | |
389 } | |
390 | |
391 | |
392 /* Estimate the energy in each channel */ | |
393 test(); | |
394 if (L_sub(st->Lframe_cnt, 1) == 0) | |
395 { | |
396 alpha = 32767; move16(); | |
397 one_m_alpha = 0; move16(); | |
398 } | |
399 else | |
400 { | |
401 alpha = CEE_SM_FAC; move16(); | |
402 one_m_alpha = ONE_MINUS_CEE_SM_FAC; move16(); | |
403 } | |
404 | |
405 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
406 { | |
407 Lenrg = 0; move16(); | |
408 j1 = ch_tbl[i][0]; move16(); | |
409 j2 = ch_tbl[i][1]; move16(); | |
410 | |
411 for (j = j1; j <= j2; j++) | |
412 { | |
413 Lenrg = L_mac(Lenrg, data_buffer[2 * j], data_buffer[2 * j]); | |
414 Lenrg = L_mac(Lenrg, data_buffer[2 * j + 1], data_buffer[2 * j + 1]); | |
415 } | |
416 | |
417 /* Denorm energy & scale 30,1 according to the state */ | |
418 Lenrg = L_shr_r(Lenrg, sub(shl(normb_shift, 1), enrg_norm_shift[st->shift_state])); | |
419 | |
420 /* integrate over time: e[i] = (1-alpha)*e[i] + alpha*enrg/num_bins_in_chan */ | |
421 tmp = mult(alpha, ch_tbl_sh[i]); | |
422 L_Extract (Lenrg, &hi1, &lo1); | |
423 Ltmp = Mpy_32_16(hi1, lo1, tmp); | |
424 | |
425 L_Extract (st->Lch_enrg[i], &hi1, &lo1); | |
426 st->Lch_enrg[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, one_m_alpha)); move32(); | |
427 test(); | |
428 if (L_sub(st->Lch_enrg[i], min_chan_enrg[st->shift_state]) < 0) | |
429 { | |
430 st->Lch_enrg[i] = min_chan_enrg[st->shift_state]; move32(); | |
431 } | |
432 | |
433 } | |
434 | |
435 | |
436 /* Compute the total channel energy estimate (Ltce) */ | |
437 Ltce = 0; move16(); | |
438 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
439 { | |
440 Ltce = L_add(Ltce, st->Lch_enrg[i]); | |
441 } | |
442 | |
443 | |
444 /* Calculate spectral peak-to-average ratio, set flag if p2a > 10 dB */ | |
445 Lpeak = 0; move32(); | |
446 for (i = LO_CHAN+2; i <= HI_CHAN; i++) /* Sine waves not valid for low frequencies */ | |
447 { test(); | |
448 if (L_sub(st->Lch_enrg [i], Lpeak) > 0) | |
449 { | |
450 Lpeak = st->Lch_enrg [i]; move32(); | |
451 } | |
452 } | |
453 | |
454 /* Set p2a_flag if peak (dB) > average channel energy (dB) + 10 dB */ | |
455 /* Lpeak > Ltce/num_channels * 10^(10/10) */ | |
456 /* Lpeak > (10/16)*Ltce */ | |
457 | |
458 L_Extract (Ltce, &hi1, &lo1); | |
459 Ltmp = Mpy_32_16(hi1, lo1, 20480); | |
460 test(); | |
461 if (L_sub(Lpeak, Ltmp) > 0) | |
462 { | |
463 p2a_flag = TRUE; move16(); | |
464 } | |
465 else | |
466 { | |
467 p2a_flag = FALSE; move16(); | |
468 } | |
469 | |
470 | |
471 /* Initialize channel noise estimate to either the channel energy or fixed level */ | |
472 /* Scale the energy appropriately to yield state 0 (22,9) scaling for noise */ | |
473 test(); | |
474 if (L_sub(st->Lframe_cnt, 4) <= 0) | |
475 { test(); | |
476 if (p2a_flag == TRUE) | |
477 { | |
478 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
479 { | |
480 st->Lch_noise[i] = INE_NOISE_0; move32(); | |
481 } | |
482 } | |
483 else | |
484 { | |
485 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
486 { test(); | |
487 if (L_sub(st->Lch_enrg[i], ine_noise[st->shift_state]) < 0) | |
488 { | |
489 st->Lch_noise[i] = INE_NOISE_0; move32(); | |
490 } | |
491 else | |
492 { test(); | |
493 if (st->shift_state == 1) | |
494 { | |
495 st->Lch_noise[i] = L_shr(st->Lch_enrg[i], state_change_shift_r[0]); | |
496 move32(); | |
497 } | |
498 else | |
499 { | |
500 st->Lch_noise[i] = st->Lch_enrg[i]; move32(); | |
501 } | |
502 } | |
503 } | |
504 } | |
505 } | |
506 | |
507 | |
508 /* Compute the channel energy (in dB), the channel SNRs, and the sum of voice metrics */ | |
509 vm_sum = 0; move16(); | |
510 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
511 { | |
512 ch_enrg_db[i] = fn10Log10(st->Lch_enrg[i], fbits[st->shift_state]); move16(); | |
513 ch_noise_db = fn10Log10(st->Lch_noise[i], FRACTIONAL_BITS_0); | |
514 | |
515 ch_snr[i] = sub(ch_enrg_db[i], ch_noise_db); move16(); | |
516 | |
517 /* quantize channel SNR in 3/8 dB steps (scaled 7,8 => 15,0) */ | |
518 /* ch_snr = round((snr/(3/8))>>8) */ | |
519 /* = round(((0.6667*snr)<<2)>>8) */ | |
520 /* = round((0.6667*snr)>>6) */ | |
521 | |
522 ch_snrq = shr_r(mult(21845, ch_snr[i]), 6); | |
523 | |
524 /* Accumulate the sum of voice metrics */ test(); | |
525 if (sub(ch_snrq, 89) < 0) | |
526 { test(); | |
527 if (ch_snrq > 0) | |
528 { | |
529 j = ch_snrq; move16(); | |
530 } | |
531 else | |
532 { | |
533 j = 0; move16(); | |
534 } | |
535 } | |
536 else | |
537 { | |
538 j = 89; move16(); | |
539 } | |
540 vm_sum = add(vm_sum, vm_tbl[j]); | |
541 } | |
542 | |
543 | |
544 /* Initialize NOMINAL peak voice energy and average noise energy, calculate instantaneous SNR */ | |
545 test(),test(),logic16(); | |
546 if (L_sub(st->Lframe_cnt, 4) <= 0 || st->fupdate_flag == TRUE) | |
547 { | |
548 /* tce_db = (96 - 22 - 10*log10(64) (due to FFT)) scaled as 7,8 */ | |
549 tce_db = 14320; move16(); | |
550 st->negSNRvar = 0; move16(); | |
551 st->negSNRbias = 0; move16(); | |
552 | |
553 /* Compute the total noise estimate (Ltne) */ | |
554 Ltne = 0; move32(); | |
555 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
556 { | |
557 Ltne = L_add(Ltne, st->Lch_noise[i]); | |
558 } | |
559 | |
560 /* Get total noise in dB */ | |
561 tne_db = fn10Log10(Ltne, FRACTIONAL_BITS_0); | |
562 | |
563 /* Initialise instantaneous and long-term peak signal-to-noise ratios */ | |
564 xt = sub(tce_db, tne_db); | |
565 st->tsnr = xt; move16(); | |
566 } | |
567 else | |
568 { | |
569 /* Calculate instantaneous frame signal-to-noise ratio */ | |
570 /* xt = 10*log10( sum(2.^(ch_snr*0.1*log2(10)))/length(ch_snr) ) */ | |
571 Ltmp1 = 0; move32(); | |
572 for (i=LO_CHAN; i<=HI_CHAN; i++) { | |
573 /* Ltmp2 = ch_snr[i] * 0.1 * log2(10); (ch_snr scaled as 7,8) */ | |
574 Ltmp2 = L_shr(L_mult(ch_snr[i], 10885), 8); | |
575 L_Extract(Ltmp2, &hi1, &lo1); | |
576 hi1 = add(hi1, 3); /* 2^3 to compensate for negative SNR */ | |
577 Ltmp1 = L_add(Ltmp1, Pow2(hi1, lo1)); | |
578 } | |
579 xt = fn10Log10(Ltmp1, 4+3); /* average by 16, inverse compensation 2^3 */ | |
580 | |
581 /* Estimate long-term "peak" SNR */ test(),test(); | |
582 if (sub(xt, st->tsnr) > 0) | |
583 { | |
584 /* tsnr = 0.9*tsnr + 0.1*xt; */ | |
585 st->tsnr = round(L_add(L_mult(29491, st->tsnr), L_mult(3277, xt))); | |
586 } | |
587 /* else if (xt > 0.625*tsnr) */ | |
588 else if (sub(xt, mult(20480, st->tsnr)) > 0) | |
589 { | |
590 /* tsnr = 0.998*tsnr + 0.002*xt; */ | |
591 st->tsnr = round(L_add(L_mult(32702, st->tsnr), L_mult(66, xt))); | |
592 } | |
593 } | |
594 | |
595 /* Quantize the long-term SNR in 3 dB steps, limit to 0 <= tsnrq <= 19 */ | |
596 tsnrq = shr(mult(st->tsnr, 10923), 8); | |
597 | |
598 /* tsnrq = min(19, max(0, tsnrq)); */ test(),test(); | |
599 if (sub(tsnrq, 19) > 0) | |
600 { | |
601 tsnrq = 19; move16(); | |
602 } | |
603 else if (tsnrq < 0) | |
604 { | |
605 tsnrq = 0; move16(); | |
606 } | |
607 | |
608 /* Calculate the negative SNR sensitivity bias */ | |
609 test(); | |
610 if (xt < 0) | |
611 { | |
612 /* negSNRvar = 0.99*negSNRvar + 0.01*xt*xt; */ | |
613 /* xt scaled as 7,8 => xt*xt scaled as 14,17, shift to 7,8 and round */ | |
614 tmp = round(L_shl(L_mult(xt, xt), 7)); | |
615 st->negSNRvar = round(L_add(L_mult(32440, st->negSNRvar), L_mult(328, tmp))); | |
616 | |
617 /* if (negSNRvar > 4.0) negSNRvar = 4.0; */ test(); | |
618 if (sub(st->negSNRvar, 1024) > 0) | |
619 { | |
620 st->negSNRvar = 1024; move16(); | |
621 } | |
622 | |
623 /* negSNRbias = max(12.0*(negSNRvar - 0.65), 0.0); */ | |
624 tmp = mult_r(shl(sub(st->negSNRvar, 166), 4), 24576); test(); | |
625 | |
626 if (tmp < 0) | |
627 { | |
628 st->negSNRbias = 0; move16(); | |
629 } | |
630 else | |
631 { | |
632 st->negSNRbias = shr(tmp, 8); | |
633 } | |
634 } | |
635 | |
636 | |
637 /* Determine VAD as a function of the voice metric sum and quantized SNR */ | |
638 | |
639 tmp = add(vm_threshold_table[tsnrq], st->negSNRbias); test(); | |
640 if (sub(vm_sum, tmp) > 0) | |
641 { | |
642 ivad = 1; move16(); | |
643 st->burstcount = add(st->burstcount, 1); test(); | |
644 if (sub(st->burstcount, burstcount_table[tsnrq]) > 0) | |
645 { | |
646 st->hangover = hangover_table[tsnrq]; move16(); | |
647 } | |
648 } | |
649 else | |
650 { | |
651 st->burstcount = 0; move16(); | |
652 st->hangover = sub(st->hangover, 1); test(); | |
653 if (st->hangover <= 0) | |
654 { | |
655 ivad = 0; move16(); | |
656 st->hangover = 0; move16(); | |
657 } | |
658 else | |
659 { | |
660 ivad = 1; move16(); | |
661 } | |
662 } | |
663 | |
664 | |
665 /* Calculate log spectral deviation */ | |
666 ch_enrg_dev = 0; move16(); | |
667 test(); | |
668 if (L_sub(st->Lframe_cnt, 1) == 0) | |
669 { | |
670 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
671 { | |
672 st->ch_enrg_long_db[i] = ch_enrg_db[i]; move16(); | |
673 } | |
674 } | |
675 else | |
676 { | |
677 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
678 { | |
679 tmp = abs_s(sub(st->ch_enrg_long_db[i], ch_enrg_db[i])); | |
680 ch_enrg_dev = add(ch_enrg_dev, tmp); | |
681 } | |
682 } | |
683 | |
684 /* | |
685 * Calculate long term integration constant as a function of instantaneous SNR | |
686 * (i.e., high SNR (tsnr dB) -> slower integration (alpha = HIGH_ALPHA), | |
687 * low SNR (0 dB) -> faster integration (alpha = LOW_ALPHA) | |
688 */ | |
689 | |
690 /* alpha = HIGH_ALPHA - ALPHA_RANGE * (tsnr - xt) / tsnr, low <= alpha <= high */ | |
691 tmp = sub(st->tsnr, xt); test(),logic16(),test(),test(); | |
692 if (tmp <= 0 || st->tsnr <= 0) | |
693 { | |
694 alpha = HIGH_ALPHA; move16(); | |
695 one_m_alpha = 32768L-HIGH_ALPHA; move16(); | |
696 } | |
697 else if (sub(tmp, st->tsnr) > 0) | |
698 { | |
699 alpha = LOW_ALPHA; move16(); | |
700 one_m_alpha = 32768L-LOW_ALPHA; move16(); | |
701 } | |
702 else | |
703 { | |
704 tmp = div_s(tmp, st->tsnr); | |
705 alpha = sub(HIGH_ALPHA, mult(ALPHA_RANGE, tmp)); | |
706 one_m_alpha = sub(32767, alpha); | |
707 } | |
708 | |
709 /* Calc long term log spectral energy */ | |
710 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
711 { | |
712 Ltmp1 = L_mult(one_m_alpha, ch_enrg_db[i]); | |
713 Ltmp2 = L_mult(alpha, st->ch_enrg_long_db[i]); | |
714 st->ch_enrg_long_db[i] = round(L_add(Ltmp1, Ltmp2)); | |
715 } | |
716 | |
717 | |
718 /* Set or clear the noise update flags */ | |
719 update_flag = FALSE; move16(); | |
720 st->fupdate_flag = FALSE; move16(); | |
721 test(),test(); | |
722 if (sub(vm_sum, UPDATE_THLD) <= 0) | |
723 { test(); | |
724 if (st->burstcount == 0) | |
725 { | |
726 update_flag = TRUE; move16(); | |
727 st->update_cnt = 0; move16(); | |
728 } | |
729 } | |
730 else if (L_sub(Ltce, noise_floor_chan[st->shift_state]) > 0) | |
731 { test(); | |
732 if (sub(ch_enrg_dev, DEV_THLD) < 0) | |
733 { test(); | |
734 if (p2a_flag == FALSE) | |
735 { test(); | |
736 if (st->LTP_flag == FALSE) | |
737 { | |
738 st->update_cnt = add(st->update_cnt, 1); test(); | |
739 if (sub(st->update_cnt, UPDATE_CNT_THLD) >= 0) | |
740 { | |
741 update_flag = TRUE; move16(); | |
742 st->fupdate_flag = TRUE; move16(); | |
743 } | |
744 } | |
745 } | |
746 } | |
747 } | |
748 test(); | |
749 if (sub(st->update_cnt, st->last_update_cnt) == 0) | |
750 { | |
751 st->hyster_cnt = add(st->hyster_cnt, 1); | |
752 } | |
753 else | |
754 { | |
755 st->hyster_cnt = 0; move16(); | |
756 } | |
757 | |
758 st->last_update_cnt = st->update_cnt; move16(); | |
759 test(); | |
760 if (sub(st->hyster_cnt, HYSTER_CNT_THLD) > 0) | |
761 { | |
762 st->update_cnt = 0; move16(); | |
763 } | |
764 | |
765 | |
766 /* Conditionally update the channel noise estimates */ | |
767 test(); | |
768 if (update_flag == TRUE) | |
769 { | |
770 /* Check shift state */ test(); | |
771 if (st->shift_state == 1) | |
772 { | |
773 /* get factor to shift ch_enrg[] from state 1 to 0 (noise always state 0) */ | |
774 tmp = state_change_shift_r[0]; move16(); | |
775 } | |
776 else | |
777 { | |
778 /* No shift if already state 0 */ | |
779 tmp = 0; move16(); | |
780 } | |
781 | |
782 /* Update noise energy estimate */ | |
783 for (i = LO_CHAN; i <= HI_CHAN; i++) | |
784 { test(); | |
785 /* integrate over time: en[i] = (1-alpha)*en[i] + alpha*e[n] */ | |
786 /* (extract with shift compensation for state 1) */ | |
787 L_Extract (L_shr(st->Lch_enrg[i], tmp), &hi1, &lo1); | |
788 Ltmp = Mpy_32_16(hi1, lo1, CNE_SM_FAC); | |
789 | |
790 L_Extract (st->Lch_noise[i], &hi1, &lo1); | |
791 st->Lch_noise[i] = L_add(Ltmp, Mpy_32_16(hi1, lo1, ONE_MINUS_CNE_SM_FAC)); move32(); | |
792 | |
793 /* Limit low level noise */ test(); | |
794 if (L_sub(st->Lch_noise[i], MIN_NOISE_ENRG_0) < 0) | |
795 { | |
796 st->Lch_noise[i] = MIN_NOISE_ENRG_0; move32(); | |
797 } | |
798 } | |
799 } | |
800 | |
801 return(ivad); | |
802 } /* end of vad2 () */ | |
803 | |
804 | |
805 /**** Other related functions *****/ | |
806 | |
807 /*************************************************************************** | |
808 * | |
809 * FUNCTION NAME: vad2_reset() | |
810 * | |
811 * PURPOSE: | |
812 * The purpose of this function is to initialise the vad2() state | |
813 * variables. | |
814 * | |
815 * INPUTS: | |
816 * | |
817 * &st | |
818 * pointer to data structure of vad2 state variables | |
819 * | |
820 * OUTPUTS: | |
821 * | |
822 * none | |
823 * | |
824 * RETURN VALUE: | |
825 * | |
826 * none | |
827 * | |
828 * DESCRIPTION: | |
829 * | |
830 * Set all values in vad2 state to zero. Since it is | |
831 * known that all elements in the structure contain | |
832 * 16 and 32 bit fixed point elements, the initialisation | |
833 * is performed by zeroing out the number of bytes in the | |
834 * structure divided by two. | |
835 * | |
836 *************************************************************************/ | |
837 | |
838 void vad2_reset (vadState2 * st) | |
839 { | |
840 memset(st, 0, sizeof(vadState2)); | |
841 } /* end of vad2_reset () */ |