• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/pocketsphinx.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* System headers. */
00039 #include <stdio.h>
00040 #include <assert.h>
00041 
00042 /* SphinxBase headers. */
00043 #include <err.h>
00044 #include <strfuncs.h>
00045 #include <filename.h>
00046 #include <pio.h>
00047 
00048 /* Local headers. */
00049 #include "cmdln_macro.h"
00050 #include "pocketsphinx_internal.h"
00051 #include "ps_lattice_internal.h"
00052 #include "phone_loop_search.h"
00053 #include "fsg_search_internal.h"
00054 #include "ngram_search.h"
00055 #include "ngram_search_fwdtree.h"
00056 #include "ngram_search_fwdflat.h"
00057 
00058 static const arg_t ps_args_def[] = {
00059     POCKETSPHINX_OPTIONS,
00060     CMDLN_EMPTY_OPTION
00061 };
00062 
00063 /* I'm not sure what the portable way to do this is. */
00064 static int
00065 file_exists(const char *path)
00066 {
00067     FILE *tmp;
00068 
00069     tmp = fopen(path, "rb");
00070     if (tmp) fclose(tmp);
00071     return (tmp != NULL);
00072 }
00073 
00074 static int
00075 hmmdir_exists(const char *path)
00076 {
00077     FILE *tmp;
00078     char *mdef = string_join(path, "/mdef", NULL);
00079 
00080     tmp = fopen(mdef, "rb");
00081     if (tmp) fclose(tmp);
00082     ckd_free(mdef);
00083     return (tmp != NULL);
00084 }
00085 
00086 static void
00087 ps_add_file(ps_decoder_t *ps, const char *arg,
00088             const char *hmmdir, const char *file)
00089 {
00090     char *tmp = string_join(hmmdir, "/", file, NULL);
00091 
00092     if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
00093         cmd_ln_set_str_r(ps->config, arg, tmp);
00094     ckd_free(tmp);
00095 }
00096 
00097 static void
00098 ps_init_defaults(ps_decoder_t *ps)
00099 {
00100     char const *hmmdir, *lmfile, *dictfile;
00101 
00102     /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
00103 #ifdef __ADSPBLACKFIN__
00104     E_INFO("Will not use mmap() on uClinux/Blackfin.");
00105     cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
00106 #endif
00107 
00108 #ifdef MODELDIR
00109     /* Set default acoustic and language models. */
00110     hmmdir = cmd_ln_str_r(ps->config, "-hmm");
00111     lmfile = cmd_ln_str_r(ps->config, "-lm");
00112     dictfile = cmd_ln_str_r(ps->config, "-dict");
00113     if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) {
00114         hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k";
00115         cmd_ln_set_str_r(ps->config, "-hmm", hmmdir);
00116     }
00117     if (lmfile == NULL && !cmd_ln_str_r(ps->config, "-fsg")
00118         && !cmd_ln_str_r(ps->config, "-jsgf")
00119         && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) {
00120         lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP";
00121         cmd_ln_set_str_r(ps->config, "-lm", lmfile);
00122     }
00123     if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) {
00124         dictfile = MODELDIR "/lm/en_US/cmu07a.dic";
00125         cmd_ln_set_str_r(ps->config, "-dict", dictfile);
00126     }
00127 
00128     /* Expand acoustic and language model filenames relative to installation path. */
00129     if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
00130         char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
00131         cmd_ln_set_str_r(ps->config, "-hmm", tmphmm);
00132         ckd_free(tmphmm);
00133     }
00134     if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
00135         char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
00136         cmd_ln_set_str_r(ps->config, "-lm", tmplm);
00137         ckd_free(tmplm);
00138     }
00139     if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
00140         char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
00141         cmd_ln_set_str_r(ps->config, "-dict", tmpdict);
00142         ckd_free(tmpdict);
00143     }
00144 #endif
00145 
00146     /* Get acoustic model filenames and add them to the command-line */
00147     if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
00148         ps_add_file(ps, "-mdef", hmmdir, "mdef");
00149         ps_add_file(ps, "-mean", hmmdir, "means");
00150         ps_add_file(ps, "-var", hmmdir, "variances");
00151         ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
00152         ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
00153         ps_add_file(ps, "-sendump", hmmdir, "sendump");
00154         ps_add_file(ps, "-fdict", hmmdir, "noisedict");
00155         ps_add_file(ps, "-lda", hmmdir, "feature_transform");
00156         ps_add_file(ps, "-featparams", hmmdir, "feat.params");
00157         ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
00158     }
00159 }
00160 
00161 static void
00162 ps_free_searches(ps_decoder_t *ps)
00163 {
00164     gnode_t *gn;
00165 
00166     if (ps->searches == NULL)
00167         return;
00168 
00169     for (gn = ps->searches; gn; gn = gnode_next(gn))
00170         ps_search_free(gnode_ptr(gn));
00171     glist_free(ps->searches);
00172     ps->searches = NULL;
00173     ps->search = NULL;
00174 }
00175 
00176 static ps_search_t *
00177 ps_find_search(ps_decoder_t *ps, char const *name)
00178 {
00179     gnode_t *gn;
00180 
00181     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00182         if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
00183             return (ps_search_t *)gnode_ptr(gn);
00184     }
00185     return NULL;
00186 }
00187 
00188 int
00189 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
00190 {
00191     char const *lmfile, *lmctl = NULL;
00192 
00193     if (config && config != ps->config) {
00194         cmd_ln_free_r(ps->config);
00195         ps->config = config;
00196     }
00197 #ifndef _WIN32_WCE
00198     /* Set up logging. */
00199     if (cmd_ln_str_r(ps->config, "-logfn"))
00200         err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
00201 #endif
00202     err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
00203     ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
00204     ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
00205 
00206     /* Fill in some default arguments. */
00207     ps_init_defaults(ps);
00208 
00209     /* Free old searches (do this before other reinit) */
00210     ps_free_searches(ps);
00211 
00212     /* Free old acmod. */
00213     acmod_free(ps->acmod);
00214     ps->acmod = NULL;
00215 
00216     /* Free old dictionary (must be done after the two things above) */
00217     dict_free(ps->dict);
00218     ps->dict = NULL;
00219 
00220 
00221     /* Logmath computation (used in acmod and search) */
00222     if (ps->lmath == NULL
00223         || (logmath_get_base(ps->lmath) != 
00224             (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
00225         if (ps->lmath)
00226             logmath_free(ps->lmath);
00227         ps->lmath = logmath_init
00228             ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
00229              cmd_ln_boolean_r(ps->config, "-bestpath"));
00230     }
00231 
00232     /* Acoustic model (this is basically everything that
00233      * uttproc.c, senscr.c, and others used to do) */
00234     if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
00235         return -1;
00236     /* Make the acmod's feature buffer growable if we are doing two-pass search. */
00237     if (cmd_ln_boolean_r(ps->config, "-fwdflat")
00238         && cmd_ln_boolean_r(ps->config, "-fwdtree"))
00239         acmod_set_grow(ps->acmod, TRUE);
00240 
00241     if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
00242         /* Initialize an auxiliary phone loop search, which will run in
00243          * "parallel" with FSG or N-Gram search. */
00244         if ((ps->phone_loop = phone_loop_search_init(ps->config,
00245                                                      ps->acmod, ps->dict)) == NULL)
00246             return -1;
00247         ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
00248     }
00249 
00250     /* Dictionary and triphone mappings (depends on acmod). */
00251     /* FIXME: pass config, change arguments, implement LTS, etc. */
00252     if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
00253         return -1;
00254 
00255     /* Determine whether we are starting out in FSG or N-Gram search mode. */
00256     if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
00257         ps_search_t *fsgs;
00258 
00259         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00260             return -1;
00261         if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00262             return -1;
00263         fsgs->pls = ps->phone_loop;
00264         ps->searches = glist_add_ptr(ps->searches, fsgs);
00265         ps->search = fsgs;
00266     }
00267     else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
00268              || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
00269         ps_search_t *ngs;
00270 
00271         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00272             return -1;
00273         if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
00274             return -1;
00275         ngs->pls = ps->phone_loop;
00276         ps->searches = glist_add_ptr(ps->searches, ngs);
00277         ps->search = ngs;
00278     }
00279     /* Otherwise, we will initialize the search whenever the user
00280      * decides to load an FSG or a language model. */
00281     else {
00282         if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
00283             return -1;
00284     }
00285 
00286     /* Initialize performance timer. */
00287     ps->perf.name = "decode";
00288     ptmr_init(&ps->perf);
00289 
00290     return 0;
00291 }
00292 
00293 ps_decoder_t *
00294 ps_init(cmd_ln_t *config)
00295 {
00296     ps_decoder_t *ps;
00297 
00298     ps = ckd_calloc(1, sizeof(*ps));
00299     ps->refcount = 1;
00300     if (ps_reinit(ps, config) < 0) {
00301         ps_free(ps);
00302         return NULL;
00303     }
00304     return ps;
00305 }
00306 
00307 arg_t const *
00308 ps_args(void)
00309 {
00310     return ps_args_def;
00311 }
00312 
00313 ps_decoder_t *
00314 ps_retain(ps_decoder_t *ps)
00315 {
00316     ++ps->refcount;
00317     return ps;
00318 }
00319 
00320 int
00321 ps_free(ps_decoder_t *ps)
00322 {
00323     gnode_t *gn;
00324 
00325     if (ps == NULL)
00326         return 0;
00327     if (--ps->refcount > 0)
00328         return ps->refcount;
00329     for (gn = ps->searches; gn; gn = gnode_next(gn))
00330         ps_search_free(gnode_ptr(gn));
00331     glist_free(ps->searches);
00332     dict_free(ps->dict);
00333     dict2pid_free(ps->d2p);
00334     acmod_free(ps->acmod);
00335     logmath_free(ps->lmath);
00336     cmd_ln_free_r(ps->config);
00337     ckd_free(ps->uttid);
00338     ckd_free(ps);
00339     return 0;
00340 }
00341 
00342 char const *
00343 ps_get_uttid(ps_decoder_t *ps)
00344 {
00345     return ps->uttid;
00346 }
00347 
00348 cmd_ln_t *
00349 ps_get_config(ps_decoder_t *ps)
00350 {
00351     return ps->config;
00352 }
00353 
00354 logmath_t *
00355 ps_get_logmath(ps_decoder_t *ps)
00356 {
00357     return ps->lmath;
00358 }
00359 
00360 fe_t *
00361 ps_get_fe(ps_decoder_t *ps)
00362 {
00363     return ps->acmod->fe;
00364 }
00365 
00366 feat_t *
00367 ps_get_feat(ps_decoder_t *ps)
00368 {
00369     return ps->acmod->fcb;
00370 }
00371 
00372 ps_mllr_t *
00373 ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
00374 {
00375     return acmod_update_mllr(ps->acmod, mllr);
00376 }
00377 
00378 ngram_model_t *
00379 ps_get_lmset(ps_decoder_t *ps)
00380 {
00381     if (ps->search == NULL
00382         || 0 != strcmp(ps_search_name(ps->search), "ngram"))
00383         return NULL;
00384     return ((ngram_search_t *)ps->search)->lmset;
00385 }
00386 
00387 ngram_model_t *
00388 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
00389 {
00390     ngram_search_t *ngs;
00391     ps_search_t *search;
00392 
00393     /* Look for N-Gram search. */
00394     search = ps_find_search(ps, "ngram");
00395     if (search == NULL) {
00396         /* Initialize N-Gram search. */
00397         search = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p);
00398         if (search == NULL)
00399             return NULL;
00400         search->pls = ps->phone_loop;
00401         ps->searches = glist_add_ptr(ps->searches, search);
00402         ngs = (ngram_search_t *)search;
00403     }
00404     else {
00405         ngs = (ngram_search_t *)search;
00406         /* Free any previous lmset if this is a new one. */
00407         if (ngs->lmset != NULL && ngs->lmset != lmset)
00408             ngram_model_free(ngs->lmset);
00409         ngs->lmset = lmset;
00410         /* Tell N-Gram search to update its view of the world. */
00411         if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
00412             return NULL;
00413     }
00414     ps->search = search;
00415     return ngs->lmset;
00416 }
00417 
00418 fsg_set_t *
00419 ps_get_fsgset(ps_decoder_t *ps)
00420 {
00421     if (ps->search == NULL
00422         || 0 != strcmp(ps_search_name(ps->search), "fsg"))
00423         return NULL;
00424     return (fsg_set_t *)ps->search;
00425 }
00426 
00427 fsg_set_t *
00428 ps_update_fsgset(ps_decoder_t *ps)
00429 {
00430     ps_search_t *search;
00431 
00432     /* Look for FSG search. */
00433     search = ps_find_search(ps, "fsg");
00434     if (search == NULL) {
00435         /* Initialize FSG search. */
00436         search = fsg_search_init(ps->config,
00437                                  ps->acmod, ps->dict, ps->d2p);
00438         search->pls = ps->phone_loop;
00439         ps->searches = glist_add_ptr(ps->searches, search);
00440     }
00441     else {
00442         /* Tell FSG search to update its view of the world. */
00443         if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
00444             return NULL;
00445     }
00446     ps->search = search;
00447     return (fsg_set_t *)search;
00448 }
00449 
00450 int
00451 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
00452              char const *fdictfile, char const *format)
00453 {
00454     cmd_ln_t *newconfig;
00455     dict2pid_t *d2p;
00456     dict_t *dict;
00457     gnode_t *gn;
00458     int rv;
00459 
00460     /* Create a new scratch config to load this dict (so existing one
00461      * won't be affected if it fails) */
00462     newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
00463     cmd_ln_set_boolean_r(newconfig, "-dictcase",
00464                          cmd_ln_boolean_r(ps->config, "-dictcase"));
00465     cmd_ln_set_str_r(newconfig, "-dict", dictfile);
00466     if (fdictfile)
00467         cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
00468     else
00469         cmd_ln_set_str_r(newconfig, "-fdict",
00470                          cmd_ln_str_r(ps->config, "-fdict"));
00471 
00472     /* Try to load it. */
00473     if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
00474         cmd_ln_free_r(newconfig);
00475         return -1;
00476     }
00477 
00478     /* Reinit the dict2pid. */
00479     if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
00480         cmd_ln_free_r(newconfig);
00481         return -1;
00482     }
00483 
00484     /* Success!  Update the existing config to reflect new dicts and
00485      * drop everything into place. */
00486     cmd_ln_free_r(newconfig);
00487     cmd_ln_set_str_r(ps->config, "-dict", dictfile);
00488     if (fdictfile)
00489         cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
00490     dict_free(ps->dict);
00491     ps->dict = dict;
00492     dict2pid_free(ps->d2p);
00493     ps->d2p = d2p;
00494 
00495     /* And tell all searches to reconfigure themselves. */
00496     for (gn = ps->searches; gn; gn = gnode_next(gn)) {
00497         ps_search_t *search = gnode_ptr(gn);
00498         if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
00499             return rv;
00500     }
00501 
00502     return 0;
00503 }
00504 
00505 int
00506 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
00507              char const *format)
00508 {
00509     return dict_write(ps->dict, dictfile, format);
00510 }
00511 
00512 int
00513 ps_add_word(ps_decoder_t *ps,
00514             char const *word,
00515             char const *phones,
00516             int update)
00517 {
00518     int32 wid, lmwid;
00519     ngram_model_t *lmset;
00520     s3cipid_t *pron;
00521     char **phonestr, *tmp;
00522     int np, i, rv;
00523 
00524     /* Parse phones into an array of phone IDs. */
00525     tmp = ckd_salloc(phones);
00526     np = str2words(tmp, NULL, 0);
00527     phonestr = ckd_calloc(np, sizeof(*phonestr));
00528     str2words(tmp, phonestr, np);
00529     pron = ckd_calloc(np, sizeof(*pron));
00530     for (i = 0; i < np; ++i) {
00531         pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
00532         if (pron[i] == -1) {
00533             E_ERROR("Unknown phone %s in phone string %s\n",
00534                     phonestr[i], tmp);
00535             ckd_free(phonestr);
00536             ckd_free(tmp);
00537             ckd_free(pron);
00538             return -1;
00539         }
00540     }
00541     /* No longer needed. */
00542     ckd_free(phonestr);
00543     ckd_free(tmp);
00544 
00545     /* Add it to the dictionary. */
00546     if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
00547         ckd_free(pron);
00548         return -1;
00549     }
00550     /* No longer needed. */
00551     ckd_free(pron);
00552 
00553     /* Now we also have to add it to dict2pid. */
00554     dict2pid_add_word(ps->d2p, wid);
00555 
00556     if ((lmset = ps_get_lmset(ps)) != NULL) {
00557         /* Add it to the LM set (meaning, the current LM).  In a perfect
00558          * world, this would result in the same WID, but because of the
00559          * weird way that word IDs are handled, it doesn't. */
00560         if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
00561             == NGRAM_INVALID_WID)
00562             return -1;
00563     }
00564  
00565     /* Rebuild the widmap and search tree if requested. */
00566     if (update) {
00567         if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
00568             return rv;
00569     }
00570     return wid;
00571 }
00572 
00573 int
00574 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
00575               char const *uttid, long maxsamps)
00576 {
00577     long total, pos;
00578 
00579     ps_start_utt(ps, uttid);
00580     /* If this file is seekable or maxsamps is specified, then decode
00581      * the whole thing at once. */
00582     if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
00583         int16 *data;
00584 
00585         if (maxsamps == -1) {
00586             long endpos;
00587             fseek(rawfh, 0, SEEK_END);
00588             endpos = ftell(rawfh);
00589             fseek(rawfh, pos, SEEK_SET);
00590             maxsamps = endpos - pos;
00591         }
00592         data = ckd_calloc(maxsamps, sizeof(*data));
00593         total = fread(data, sizeof(*data), maxsamps, rawfh);
00594         ps_process_raw(ps, data, total, FALSE, TRUE);
00595         ckd_free(data);
00596     }
00597     else {
00598         /* Otherwise decode it in a stream. */
00599         total = 0;
00600         while (!feof(rawfh)) {
00601             int16 data[256];
00602             size_t nread;
00603 
00604             nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
00605             ps_process_raw(ps, data, nread, FALSE, FALSE);
00606             total += nread;
00607         }
00608     }
00609     ps_end_utt(ps);
00610     return total;
00611 }
00612 
00613 int
00614 ps_start_utt(ps_decoder_t *ps, char const *uttid)
00615 {
00616     FILE *mfcfh = NULL;
00617     FILE *rawfh = NULL;
00618     int rv;
00619 
00620     if (ps->search == NULL) {
00621         E_ERROR("No search module is selected, did you forget to "
00622                 "specify a language model or grammar?\n");
00623         return -1;
00624     }
00625 
00626     ptmr_reset(&ps->perf);
00627     ptmr_start(&ps->perf);
00628 
00629     if (uttid) {
00630         ckd_free(ps->uttid);
00631         ps->uttid = ckd_salloc(uttid);
00632     }
00633     else {
00634         char nuttid[16];
00635         ckd_free(ps->uttid);
00636         sprintf(nuttid, "%09u", ps->uttno);
00637         ps->uttid = ckd_salloc(nuttid);
00638         ++ps->uttno;
00639     }
00640     /* Remove any residual word lattice and hypothesis. */
00641     ps_lattice_free(ps->search->dag);
00642     ps->search->dag = NULL;
00643     ps->search->last_link = NULL;
00644     ps->search->post = 0;
00645     ckd_free(ps->search->hyp_str);
00646     ps->search->hyp_str = NULL;
00647 
00648     if ((rv = acmod_start_utt(ps->acmod)) < 0)
00649         return rv;
00650 
00651     /* Start logging features and audio if requested. */
00652     if (ps->mfclogdir) {
00653         char *logfn = string_join(ps->mfclogdir, "/",
00654                                   ps->uttid, ".mfc", NULL);
00655         E_INFO("Writing MFCC log file: %s\n", logfn);
00656         if ((mfcfh = fopen(logfn, "wb")) == NULL) {
00657             E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
00658             ckd_free(logfn);
00659             return -1;
00660         }
00661         ckd_free(logfn);
00662         acmod_set_mfcfh(ps->acmod, mfcfh);
00663     }
00664     if (ps->rawlogdir) {
00665         char *logfn = string_join(ps->rawlogdir, "/",
00666                                   ps->uttid, ".raw", NULL);
00667         E_INFO("Writing raw audio log file: %s\n", logfn);
00668         if ((rawfh = fopen(logfn, "wb")) == NULL) {
00669             E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
00670             ckd_free(logfn);
00671             return -1;
00672         }
00673         ckd_free(logfn);
00674         acmod_set_rawfh(ps->acmod, rawfh);
00675     }
00676 
00677     /* Start auxiliary phone loop search. */
00678     if (ps->phone_loop)
00679         ps_search_start(ps->phone_loop);
00680 
00681     return ps_search_start(ps->search);
00682 }
00683 
00684 static int
00685 ps_search_forward(ps_decoder_t *ps)
00686 {
00687     int nfr;
00688 
00689     nfr = 0;
00690     while (ps->acmod->n_feat_frame > 0) {
00691         int k;
00692         if (ps->phone_loop)
00693             if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
00694                 return k;
00695         if (ps->acmod->output_frame >= ps->pl_window)
00696             if ((k = ps_search_step(ps->search,
00697                                     ps->acmod->output_frame - ps->pl_window)) < 0)
00698                 return k;
00699         acmod_advance(ps->acmod);
00700         ++ps->n_frame;
00701         ++nfr;
00702     }
00703     return nfr;
00704 }
00705 
00706 int
00707 ps_process_raw(ps_decoder_t *ps,
00708                int16 const *data,
00709                size_t n_samples,
00710                int no_search,
00711                int full_utt)
00712 {
00713     int n_searchfr = 0;
00714 
00715     if (no_search)
00716         acmod_set_grow(ps->acmod, TRUE);
00717 
00718     while (n_samples) {
00719         int nfr;
00720 
00721         /* Process some data into features. */
00722         if ((nfr = acmod_process_raw(ps->acmod, &data,
00723                                      &n_samples, full_utt)) < 0)
00724             return nfr;
00725 
00726         /* Score and search as much data as possible */
00727         if (no_search)
00728             continue;
00729         if ((nfr = ps_search_forward(ps)) < 0)
00730             return nfr;
00731         n_searchfr += nfr;
00732     }
00733 
00734     return n_searchfr;
00735 }
00736 
00737 int
00738 ps_process_cep(ps_decoder_t *ps,
00739                mfcc_t **data,
00740                int32 n_frames,
00741                int no_search,
00742                int full_utt)
00743 {
00744     int n_searchfr = 0;
00745 
00746     if (no_search)
00747         acmod_set_grow(ps->acmod, TRUE);
00748 
00749     while (n_frames) {
00750         int nfr;
00751 
00752         /* Process some data into features. */
00753         if ((nfr = acmod_process_cep(ps->acmod, &data,
00754                                      &n_frames, full_utt)) < 0)
00755             return nfr;
00756 
00757         /* Score and search as much data as possible */
00758         if (no_search)
00759             continue;
00760         if ((nfr = ps_search_forward(ps)) < 0)
00761             return nfr;
00762         n_searchfr += nfr;
00763     }
00764 
00765     return n_searchfr;
00766 }
00767 
00768 int
00769 ps_end_utt(ps_decoder_t *ps)
00770 {
00771     int rv, i;
00772 
00773     acmod_end_utt(ps->acmod);
00774 
00775     /* Search any remaining frames. */
00776     if ((rv = ps_search_forward(ps)) < 0) {
00777         ptmr_stop(&ps->perf);
00778         return rv;
00779     }
00780     /* Finish phone loop search. */
00781     if (ps->phone_loop) {
00782         if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
00783             ptmr_stop(&ps->perf);
00784             return rv;
00785         }
00786     }
00787     /* Search any frames remaining in the lookahead window. */
00788     for (i = ps->acmod->output_frame - ps->pl_window;
00789          i < ps->acmod->output_frame; ++i)
00790         ps_search_step(ps->search, i);
00791     /* Finish main search. */
00792     if ((rv = ps_search_finish(ps->search)) < 0) {
00793         ptmr_stop(&ps->perf);
00794         return rv;
00795     }
00796     ptmr_stop(&ps->perf);
00797 
00798     /* Log a backtrace if requested. */
00799     if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
00800         char const *uttid, *hyp;
00801         ps_seg_t *seg;
00802         int32 score;
00803 
00804         hyp = ps_get_hyp(ps, &score, &uttid);
00805         E_INFO("%s: %s (%d)\n", uttid, hyp, score);
00806         E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
00807                     "word", "start", "end", "pprob", "ascr", "lscr", "lback");
00808         for (seg = ps_seg_iter(ps, &score); seg;
00809              seg = ps_seg_next(seg)) {
00810             char const *word;
00811             int sf, ef;
00812             int32 post, lscr, ascr, lback;
00813 
00814             word = ps_seg_word(seg);
00815             ps_seg_frames(seg, &sf, &ef);
00816             post = ps_seg_prob(seg, &ascr, &lscr, &lback);
00817             E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
00818                         word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
00819         }
00820     }
00821     return rv;
00822 }
00823 
00824 char const *
00825 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
00826 {
00827     char const *hyp;
00828 
00829     ptmr_start(&ps->perf);
00830     hyp = ps_search_hyp(ps->search, out_best_score);
00831     if (out_uttid)
00832         *out_uttid = ps->uttid;
00833     ptmr_stop(&ps->perf);
00834     return hyp;
00835 }
00836 
00837 int32
00838 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
00839 {
00840     int32 prob;
00841 
00842     ptmr_start(&ps->perf);
00843     prob = ps_search_prob(ps->search);
00844     if (out_uttid)
00845         *out_uttid = ps->uttid;
00846     ptmr_stop(&ps->perf);
00847     return prob;
00848 }
00849 
00850 ps_seg_t *
00851 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
00852 {
00853     ps_seg_t *itor;
00854 
00855     ptmr_start(&ps->perf);
00856     itor = ps_search_seg_iter(ps->search, out_best_score);
00857     ptmr_stop(&ps->perf);
00858     return itor;
00859 }
00860 
00861 ps_seg_t *
00862 ps_seg_next(ps_seg_t *seg)
00863 {
00864     return ps_search_seg_next(seg);
00865 }
00866 
00867 char const *
00868 ps_seg_word(ps_seg_t *seg)
00869 {
00870     return seg->word;
00871 }
00872 
00873 void
00874 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
00875 {
00876     if (out_sf) *out_sf = seg->sf;
00877     if (out_ef) *out_ef = seg->ef;
00878 }
00879 
00880 int32
00881 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
00882 {
00883     if (out_ascr) *out_ascr = seg->ascr;
00884     if (out_lscr) *out_lscr = seg->lscr;
00885     if (out_lback) *out_lback = seg->lback;
00886     return seg->prob;
00887 }
00888 
00889 void
00890 ps_seg_free(ps_seg_t *seg)
00891 {
00892     ps_search_seg_free(seg);
00893 }
00894 
00895 ps_lattice_t *
00896 ps_get_lattice(ps_decoder_t *ps)
00897 {
00898     return ps_search_lattice(ps->search);
00899 }
00900 
00901 ps_nbest_t *
00902 ps_nbest(ps_decoder_t *ps, int sf, int ef,
00903          char const *ctx1, char const *ctx2)
00904 {
00905     ps_lattice_t *dag;
00906     ngram_model_t *lmset;
00907     ps_astar_t *nbest;
00908     float32 lwf;
00909     int32 w1, w2;
00910 
00911     if (ps->search == NULL)
00912         return NULL;
00913     if ((dag = ps_get_lattice(ps)) == NULL)
00914         return NULL;
00915 
00916     /* FIXME: This is all quite specific to N-Gram search.  Either we
00917      * should make N-best a method for each search module or it needs
00918      * to be abstracted to work for N-Gram and FSG. */
00919     if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
00920         lmset = NULL;
00921         lwf = 1.0f;
00922     }
00923     else {
00924         lmset = ((ngram_search_t *)ps->search)->lmset;
00925         lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
00926     }
00927 
00928     w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
00929     w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
00930     nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
00931 
00932     return (ps_nbest_t *)nbest;
00933 }
00934 
00935 void
00936 ps_nbest_free(ps_nbest_t *nbest)
00937 {
00938     ps_astar_finish(nbest);
00939 }
00940 
00941 ps_nbest_t *
00942 ps_nbest_next(ps_nbest_t *nbest)
00943 {
00944     ps_latpath_t *next;
00945 
00946     next = ps_astar_next(nbest);
00947     if (next == NULL) {
00948         ps_nbest_free(nbest);
00949         return NULL;
00950     }
00951     return nbest;
00952 }
00953 
00954 char const *
00955 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
00956 {
00957     if (nbest->top == NULL)
00958         return NULL;
00959     if (out_score) *out_score = nbest->top->score;
00960     return ps_astar_hyp(nbest, nbest->top);
00961 }
00962 
00963 ps_seg_t *
00964 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
00965 {
00966     if (nbest->top == NULL)
00967         return NULL;
00968     if (out_score) *out_score = nbest->top->score;
00969     return ps_astar_seg_iter(nbest, nbest->top, 1.0);
00970 }
00971 
00972 int
00973 ps_get_n_frames(ps_decoder_t *ps)
00974 {
00975     return ps->acmod->output_frame + 1;
00976 }
00977 
00978 void
00979 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
00980                 double *out_ncpu, double *out_nwall)
00981 {
00982     int32 frate;
00983 
00984     frate = cmd_ln_int32_r(ps->config, "-frate");
00985     *out_nspeech = (double)ps->acmod->output_frame / frate;
00986     *out_ncpu = ps->perf.t_cpu;
00987     *out_nwall = ps->perf.t_elapsed;
00988 }
00989 
00990 void
00991 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
00992                 double *out_ncpu, double *out_nwall)
00993 {
00994     int32 frate;
00995 
00996     frate = cmd_ln_int32_r(ps->config, "-frate");
00997     *out_nspeech = (double)ps->n_frame / frate;
00998     *out_ncpu = ps->perf.t_tot_cpu;
00999     *out_nwall = ps->perf.t_tot_elapsed;
01000 }
01001 
01002 void
01003 ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
01004                cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
01005                dict2pid_t *d2p)
01006 {
01007     search->vt = vt;
01008     search->config = config;
01009     search->acmod = acmod;
01010     if (d2p)
01011         search->d2p = dict2pid_retain(d2p);
01012     else
01013         search->d2p = NULL;
01014     if (dict) {
01015         search->dict = dict_retain(dict);
01016         search->start_wid = dict_startwid(dict);
01017         search->finish_wid = dict_finishwid(dict);
01018         search->silence_wid = dict_silwid(dict);
01019         search->n_words = dict_size(dict);
01020     }
01021     else {
01022         search->dict = NULL;
01023         search->start_wid = search->finish_wid = search->silence_wid = -1;
01024         search->n_words = 0;
01025     }
01026 }
01027 
01028 void
01029 ps_search_base_reinit(ps_search_t *search, dict_t *dict,
01030                       dict2pid_t *d2p)
01031 {
01032     dict_free(search->dict);
01033     dict2pid_free(search->d2p);
01034     /* FIXME: _retain() should just return NULL if passed NULL. */
01035     if (dict) {
01036         search->dict = dict_retain(dict);
01037         search->start_wid = dict_startwid(dict);
01038         search->finish_wid = dict_finishwid(dict);
01039         search->silence_wid = dict_silwid(dict);
01040         search->n_words = dict_size(dict);
01041     }
01042     else {
01043         search->dict = NULL;
01044         search->start_wid = search->finish_wid = search->silence_wid = -1;
01045         search->n_words = 0;
01046     }
01047     if (d2p)
01048         search->d2p = dict2pid_retain(d2p);
01049     else
01050         search->d2p = NULL;
01051 }
01052 
01053 
01054 void
01055 ps_search_deinit(ps_search_t *search)
01056 {
01057     /* FIXME: We will have refcounting on acmod, config, etc, at which
01058      * point we will free them here too. */
01059     dict_free(search->dict);
01060     dict2pid_free(search->d2p);
01061     ckd_free(search->hyp_str);
01062     ps_lattice_free(search->dag);
01063 }

Generated on Tue Aug 17 2010 for PocketSphinx by  doxygen 1.7.1