• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/feat/agc.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * agc.c -- Various forms of automatic gain control (AGC)
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.5  2005/06/21  19:25:41  arthchan2003
00050  * 1, Fixed doxygen documentation. 2, Added $ keyword.
00051  * 
00052  * Revision 1.3  2005/03/30 01:22:46  archan
00053  * Fixed mistakes in last updates. Add
00054  *
00055  * 
00056  * 04-Nov-95    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00057  *              Created.
00058  */
00059 
00060 #include <string.h>
00061 #ifdef HAVE_CONFIG_H
00062 #include <config.h>
00063 #endif
00064 
00065 #include "agc.h"
00066 #include "err.h"
00067 #include "ckd_alloc.h"
00068 
00069 /* NOTE!  These must match the enum in agc.h */
00070 const char *agc_type_str[] = {
00071     "none",
00072     "max",
00073     "emax",
00074     "noise"
00075 };
00076 static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
00077 
00078 agc_type_t
00079 agc_type_from_str(const char *str)
00080 {
00081     int i;
00082 
00083     for (i = 0; i < n_agc_type_str; ++i) {
00084         if (0 == strcmp(str, agc_type_str[i]))
00085             return (agc_type_t)i;
00086     }
00087     E_FATAL("Unknown AGC type '%s'\n", str);
00088     return AGC_NONE;
00089 }
00090 
00091 agc_t *agc_init(void)
00092 {
00093     agc_t *agc;
00094     agc = ckd_calloc(1, sizeof(*agc));
00095     agc->noise_thresh = FLOAT2MFCC(2.0);
00096     
00097     return agc;
00098 }
00099 
00100 void agc_free(agc_t *agc)
00101 {
00102     ckd_free(agc);
00103 }
00104 
00108 void
00109 agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
00110 {
00111     int32 i;
00112 
00113     if (n_frame <= 0)
00114         return;
00115     agc->obs_max = mfc[0][0];
00116     for (i = 1; i < n_frame; i++) {
00117         if (mfc[i][0] > agc->obs_max) {
00118             agc->obs_max = mfc[i][0];
00119             agc->obs_frame = 1;
00120         }
00121     }
00122 
00123     E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
00124     for (i = 0; i < n_frame; i++)
00125         mfc[i][0] -= agc->obs_max;
00126 }
00127 
00128 void
00129 agc_emax_set(agc_t *agc, float32 m)
00130 {
00131     agc->max = FLOAT2MFCC(m);
00132     E_INFO("AGCEMax: max= %.2f\n", m);
00133 }
00134 
00135 float32
00136 agc_emax_get(agc_t *agc)
00137 {
00138     return MFCC2FLOAT(agc->max);
00139 }
00140 
00141 void
00142 agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
00143 {
00144     int i;
00145 
00146     if (n_frame <= 0)
00147         return;
00148     for (i = 1; i < n_frame; ++i) {
00149         if (mfc[i][0] > agc->obs_max) {
00150             agc->obs_max = mfc[i][0];
00151             agc->obs_frame = 1;
00152         }
00153         mfc[i][0] -= agc->max;
00154     }
00155 }
00156 
00157 /* Update estimated max for next utterance */
00158 void
00159 agc_emax_update(agc_t *agc)
00160 {
00161     if (agc->obs_frame) {            /* Update only if some data observed */
00162         agc->obs_max_sum += agc->obs_max;
00163         agc->obs_utt++;
00164 
00165         /* Re-estimate max over past history; decay the history */
00166         agc->max = agc->obs_max_sum / agc->obs_utt;
00167         if (agc->obs_utt == 8) {
00168             agc->obs_max_sum /= 2;
00169             agc->obs_utt = 4;
00170         }
00171     }
00172     E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
00173 
00174     /* Reset the accumulators for the next utterance. */
00175     agc->obs_frame = 0;
00176     agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
00177 }
00178 
00179 void
00180 agc_noise(agc_t *agc,
00181           mfcc_t **cep,
00182           int32 nfr)
00183 {
00184     mfcc_t min_energy; /* Minimum log-energy */
00185     mfcc_t noise_level;        /* Average noise_level */
00186     int32 i;           /* frame index */
00187     int32 noise_frames;        /* Number of noise frames */
00188 
00189     /* Determine minimum log-energy in utterance */
00190     min_energy = cep[0][0];
00191     for (i = 0; i < nfr; ++i) {
00192         if (cep[i][0] < min_energy)
00193             min_energy = cep[i][0];
00194     }
00195 
00196     /* Average all frames between min_energy and min_energy + agc->noise_thresh */
00197     noise_frames = 0;
00198     noise_level = 0;
00199     min_energy += agc->noise_thresh;
00200     for (i = 0; i < nfr; ++i) {
00201         if (cep[i][0] < min_energy) {
00202             noise_level += cep[i][0];
00203             noise_frames++;
00204         }
00205     }
00206     noise_level /= noise_frames;
00207 
00208     E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
00209 
00210     /* Subtract noise_level from all log_energy values */
00211     for (i = 0; i < nfr; ++i)
00212         cep[i][0] -= noise_level;
00213 }
00214 
00215 void
00216 agc_set_threshold(agc_t *agc, float32 threshold)
00217 {
00218     agc->noise_thresh = FLOAT2MFCC(threshold);
00219 }
00220 
00221 float32
00222 agc_get_threshold(agc_t *agc)
00223 {
00224     return FLOAT2MFCC(agc->noise_thresh);
00225 }

Generated on Tue Aug 17 2010 for SphinxBase by  doxygen 1.7.1