53 #include "sphinxbase/byteorder.h"
54 #include "sphinxbase/fixpoint.h"
60 #include "fe_internal.h"
63 static const arg_t fe_args[] = {
64 waveform_to_cepstral_command_line_macro(),
65 { NULL, 0, NULL, NULL }
74 fe->sampling_rate = cmd_ln_float32_r(config,
"-samprate");
75 frate = cmd_ln_int32_r(config,
"-frate");
76 if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
78 (
"Frame rate %d can not be bigger than sample rate %.02f\n",
79 frate, fe->sampling_rate);
83 fe->frame_rate = (int16)frate;
86 fe->seed = cmd_ln_int32_r(config,
"-seed");
88 #ifdef WORDS_BIGENDIAN
89 fe->swap = strcmp(
"big",
cmd_ln_str_r(config,
"-input_endian")) == 0 ? 0 : 1;
91 fe->swap = strcmp(
"little",
cmd_ln_str_r(config,
"-input_endian")) == 0 ? 0 : 1;
93 fe->window_length = cmd_ln_float32_r(config,
"-wlen");
94 fe->pre_emphasis_alpha = cmd_ln_float32_r(config,
"-alpha");
96 fe->num_cepstra = (uint8)cmd_ln_int32_r(config,
"-ncep");
97 fe->fft_size = (int16)cmd_ln_int32_r(config,
"-nfft");
100 for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
101 if (((j % 2) != 0) || (fe->fft_size <= 0)) {
102 E_ERROR(
"fft: number of points must be a power of 2 (is %d)\n",
108 if (fe->fft_size < (
int)(fe->window_length * fe->sampling_rate)) {
109 E_ERROR(
"FFT: Number of points must be greater or equal to frame size (%d samples)\n",
110 (
int)(fe->window_length * fe->sampling_rate));
116 if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"dct"))
117 fe->transform = DCT_II;
118 else if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"legacy"))
119 fe->transform = LEGACY_DCT;
120 else if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"htk"))
121 fe->transform = DCT_HTK;
123 E_ERROR(
"Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
128 fe->log_spec = RAW_LOG_SPEC;
130 fe->log_spec = SMOOTH_LOG_SPEC;
138 mel->sampling_rate = fe->sampling_rate;
139 mel->fft_size = fe->fft_size;
140 mel->num_cepstra = fe->num_cepstra;
141 mel->num_filters = cmd_ln_int32_r(config,
"-nfilt");
144 fe->feature_dimension = mel->num_filters;
146 fe->feature_dimension = fe->num_cepstra;
148 mel->upper_filt_freq = cmd_ln_float32_r(config,
"-upperf");
149 mel->lower_filt_freq = cmd_ln_float32_r(config,
"-lowerf");
154 mel->warp_params =
cmd_ln_str_r(config,
"-warp_params");
155 mel->lifter_val = cmd_ln_int32_r(config,
"-lifter");
160 if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
161 E_ERROR(
"Failed to initialize the warping function.\n");
164 fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
169 fe_print_current(
fe_t const *fe)
171 E_INFO(
"Current FE Parameters:\n");
172 E_INFO(
"\tSampling Rate: %f\n", fe->sampling_rate);
173 E_INFO(
"\tFrame Size: %d\n", fe->frame_size);
174 E_INFO(
"\tFrame Shift: %d\n", fe->frame_shift);
175 E_INFO(
"\tFFT Size: %d\n", fe->fft_size);
176 E_INFO(
"\tLower Frequency: %g\n",
177 fe->mel_fb->lower_filt_freq);
178 E_INFO(
"\tUpper Frequency: %g\n",
179 fe->mel_fb->upper_filt_freq);
180 E_INFO(
"\tNumber of filters: %d\n", fe->mel_fb->num_filters);
181 E_INFO(
"\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
182 E_INFO(
"\tStart Utt Status: %d\n", fe->start_flag);
183 E_INFO(
"Will %sremove DC offset at frame level\n",
184 fe->remove_dc ?
"" :
"not ");
186 E_INFO(
"Will add dither to audio\n");
187 E_INFO(
"Dither seeded with %d\n", fe->seed);
190 E_INFO(
"Will not add dither to audio\n");
192 if (fe->mel_fb->lifter_val) {
193 E_INFO(
"Will apply sine-curve liftering, period %d\n",
194 fe->mel_fb->lifter_val);
196 E_INFO(
"Will %snormalize filters to unit area\n",
197 fe->mel_fb->unit_area ?
"" :
"not ");
198 E_INFO(
"Will %sround filter frequencies to DFT points\n",
199 fe->mel_fb->round_filters ?
"" :
"not ");
200 E_INFO(
"Will %suse double bandwidth in mel filter\n",
201 fe->mel_fb->doublewide ?
"" :
"not ");
219 if (fe_parse_general_params(
cmd_ln_retain(config), fe) < 0) {
228 fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
229 fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
231 fe->frame_counter = 0;
233 assert (fe->frame_shift > 1);
235 if (fe->frame_size > (fe->fft_size)) {
237 (
"Number of FFT points has to be a power of 2 higher than %d\n",
244 fe_init_dither(fe->seed);
247 fe->overflow_samps =
ckd_calloc(fe->frame_size,
sizeof(int16));
248 fe->hamming_window =
ckd_calloc(fe->frame_size/2,
sizeof(window_t));
251 fe_create_hamming(fe->hamming_window, fe->frame_size);
254 fe->mel_fb =
ckd_calloc(1,
sizeof(*fe->mel_fb));
257 fe_parse_melfb_params(config, fe, fe->mel_fb);
258 fe_build_melfilters(fe->mel_fb);
259 fe_compute_melcosine(fe->mel_fb);
263 fe->spch =
ckd_calloc(fe->frame_size,
sizeof(*fe->spch));
264 fe->frame =
ckd_calloc(fe->fft_size,
sizeof(*fe->frame));
265 fe->spec =
ckd_calloc(fe->fft_size,
sizeof(*fe->spec));
266 fe->mfspec =
ckd_calloc(fe->mel_fb->num_filters,
sizeof(*fe->mfspec));
269 fe->ccc =
ckd_calloc(fe->fft_size / 4,
sizeof(*fe->ccc));
270 fe->sss =
ckd_calloc(fe->fft_size / 4,
sizeof(*fe->sss));
271 fe_create_twiddle(fe);
274 fe_print_current(fe);
290 fe_get_config(
fe_t *fe)
296 fe_init_dither(int32 seed)
299 E_INFO(
"You are using the internal mechanism to generate the seed.\n");
307 E_INFO(
"You are using %d as the seed.\n", seed);
313 fe_start_utt(
fe_t * fe)
315 fe->num_overflow_samps = 0;
316 memset(fe->overflow_samps, 0, fe->frame_size *
sizeof(int16));
323 fe_get_output_size(
fe_t *fe)
325 return (
int)fe->feature_dimension;
329 fe_get_input_size(
fe_t *fe,
int *out_frame_shift,
333 *out_frame_shift = fe->frame_shift;
335 *out_frame_size = fe->frame_size;
339 fe_process_frame(
fe_t * fe, int16
const *spch, int32 nsamps, mfcc_t * fr_cep)
341 fe_read_frame(fe, spch, nsamps);
342 return fe_write_frame(fe, fr_cep);
346 fe_process_frames(
fe_t *fe,
347 int16
const **inout_spch,
348 size_t *inout_nsamps,
350 int32 *inout_nframes)
353 int outidx, i, n, n_overflow, orig_n_overflow;
354 int16
const *orig_spch;
358 if (buf_cep == NULL) {
359 if (*inout_nsamps + fe->num_overflow_samps < (
size_t)fe->frame_size)
363 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
365 return *inout_nframes;
369 if (*inout_nsamps + fe->num_overflow_samps < (
size_t)fe->frame_size) {
370 if (*inout_nsamps > 0) {
372 memcpy(fe->overflow_samps + fe->num_overflow_samps,
373 *inout_spch, *inout_nsamps * (
sizeof(int16)));
374 fe->num_overflow_samps += *inout_nsamps;
376 *inout_spch += *inout_nsamps;
385 if (*inout_nframes < 1) {
391 orig_spch = *inout_spch;
392 orig_n_overflow = fe->num_overflow_samps;
395 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
398 if (frame_count > *inout_nframes)
399 frame_count = *inout_nframes;
404 if (fe->num_overflow_samps) {
405 int offset = fe->frame_size - fe->num_overflow_samps;
408 memcpy(fe->overflow_samps + fe->num_overflow_samps,
409 *inout_spch, offset *
sizeof(**inout_spch));
410 fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
411 assert(outidx < frame_count);
412 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
416 *inout_spch += offset;
417 *inout_nsamps -= offset;
418 fe->num_overflow_samps -= fe->frame_shift;
421 fe_read_frame(fe, *inout_spch, fe->frame_size);
422 assert(outidx < frame_count);
423 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
427 *inout_spch += fe->frame_size;
428 *inout_nsamps -= fe->frame_size;
432 for (i = 1; i < frame_count; ++i) {
433 assert(*inout_nsamps >= (
size_t)fe->frame_shift);
435 fe_shift_frame(fe, *inout_spch, fe->frame_shift);
436 assert(outidx < frame_count);
437 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
441 *inout_spch += fe->frame_shift;
442 *inout_nsamps -= fe->frame_shift;
444 if (fe->num_overflow_samps > 0)
445 fe->num_overflow_samps -= fe->frame_shift;
449 if (fe->num_overflow_samps <= 0) {
451 n_overflow = *inout_nsamps;
452 if (n_overflow > fe->frame_shift)
453 n_overflow = fe->frame_shift;
454 fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
456 if (fe->num_overflow_samps > *inout_spch - orig_spch)
457 fe->num_overflow_samps = *inout_spch - orig_spch;
458 fe->num_overflow_samps += n_overflow;
459 if (fe->num_overflow_samps > 0) {
460 memcpy(fe->overflow_samps,
461 *inout_spch - (fe->frame_size - fe->frame_shift),
462 fe->num_overflow_samps *
sizeof(**inout_spch));
464 *inout_spch += n_overflow;
465 *inout_nsamps -= n_overflow;
471 memmove(fe->overflow_samps,
472 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
473 fe->num_overflow_samps *
sizeof(*fe->overflow_samps));
475 n_overflow = *inout_spch - orig_spch + *inout_nsamps;
476 if (n_overflow > fe->frame_size - fe->num_overflow_samps)
477 n_overflow = fe->frame_size - fe->num_overflow_samps;
478 memcpy(fe->overflow_samps + fe->num_overflow_samps,
479 orig_spch, n_overflow *
sizeof(*orig_spch));
480 fe->num_overflow_samps += n_overflow;
482 if (n_overflow > *inout_spch - orig_spch) {
483 n_overflow -= (*inout_spch - orig_spch);
484 *inout_spch += n_overflow;
485 *inout_nsamps -= n_overflow;
490 *inout_nframes = outidx;
495 fe_process_utt(
fe_t * fe, int16
const * spch,
size_t nsamps,
496 mfcc_t *** cep_block, int32 * nframes)
502 fe_process_frames(fe, NULL, &nsamps, NULL, nframes);
505 cep = (mfcc_t **)
ckd_calloc_2d(*nframes, fe->feature_dimension,
sizeof(**cep));
507 cep = (mfcc_t **)
ckd_calloc_2d(1, fe->feature_dimension,
sizeof(**cep));
509 rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes);
517 fe_end_utt(
fe_t * fe, mfcc_t * cepvector, int32 * nframes)
520 if (fe->num_overflow_samps > 0) {
521 fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
522 *nframes = fe_write_frame(fe, cepvector);
529 fe->num_overflow_samps = 0;
547 if (--fe->refcount > 0)
552 if (fe->mel_fb->mel_cosine)
553 fe_free_2d((
void *) fe->mel_fb->mel_cosine);
579 fe_mfcc_to_float(
fe_t * fe,
580 mfcc_t ** input, float32 ** output, int32 nframes)
585 if ((
void *) input == (
void *) output)
586 return nframes * fe->feature_dimension;
588 for (i = 0; i < nframes * fe->feature_dimension; ++i)
589 output[0][i] = MFCC2FLOAT(input[0][i]);
598 fe_float_to_mfcc(
fe_t * fe,
599 float32 ** input, mfcc_t ** output, int32 nframes)
604 if ((
void *) input == (
void *) output)
605 return nframes * fe->feature_dimension;
607 for (i = 0; i < nframes * fe->feature_dimension; ++i)
608 output[0][i] = FLOAT2MFCC(input[0][i]);
614 fe_logspec_to_mfcc(
fe_t * fe,
const mfcc_t * fr_spec, mfcc_t * fr_cep)
617 fe_spec2cep(fe, fr_spec, fr_cep);
622 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
623 for (i = 0; i < fe->mel_fb->num_filters; ++i)
624 powspec[i] = (powspec_t) fr_spec[i];
625 fe_spec2cep(fe, powspec, fr_cep);
632 fe_logspec_dct2(
fe_t * fe,
const mfcc_t * fr_spec, mfcc_t * fr_cep)
635 fe_dct2(fe, fr_spec, fr_cep, 0);
640 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
641 for (i = 0; i < fe->mel_fb->num_filters; ++i)
642 powspec[i] = (powspec_t) fr_spec[i];
643 fe_dct2(fe, powspec, fr_cep, 0);
650 fe_mfcc_dct3(
fe_t * fe,
const mfcc_t * fr_cep, mfcc_t * fr_spec)
653 fe_dct3(fe, fr_cep, fr_spec);
658 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
659 fe_dct3(fe, fr_cep, powspec);
660 for (i = 0; i < fe->mel_fb->num_filters; ++i)
661 fr_spec[i] = (mfcc_t) powspec[i];