00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042
00043 #include <cmd_ln.h>
00044 #include <feat.h>
00045 #include <fe.h>
00046
00048 #define POCKETSPHINX_OPTIONS \
00049 waveform_to_cepstral_command_line_macro(), \
00050 cepstral_to_feature_command_line_macro(), \
00051 POCKETSPHINX_ACMOD_OPTIONS, \
00052 POCKETSPHINX_BEAM_OPTIONS, \
00053 POCKETSPHINX_SEARCH_OPTIONS, \
00054 POCKETSPHINX_DICT_OPTIONS, \
00055 POCKETSPHINX_NGRAM_OPTIONS, \
00056 POCKETSPHINX_FSG_OPTIONS, \
00057 POCKETSPHINX_DEBUG_OPTIONS
00058
00060 #define POCKETSPHINX_DEBUG_OPTIONS \
00061 { "-logfn", \
00062 ARG_STRING, \
00063 NULL, \
00064 "File to write log messages in" \
00065 }, \
00066 { "-debug", \
00067 ARG_INT32, \
00068 NULL, \
00069 "Verbosity level for debugging messages" \
00070 }, \
00071 { "-mfclogdir", \
00072 ARG_STRING, \
00073 NULL, \
00074 "Directory to log feature files to" \
00075 }, \
00076 { "-rawlogdir", \
00077 ARG_STRING, \
00078 NULL, \
00079 "Directory to log raw audio files to" }
00080
00082 #define POCKETSPHINX_BEAM_OPTIONS \
00083 { "-beam", \
00084 ARG_FLOAT64, \
00085 "1e-48", \
00086 "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00087 { "-wbeam", \
00088 ARG_FLOAT64, \
00089 "7e-29", \
00090 "Beam width applied to word exits" }, \
00091 { "-pbeam", \
00092 ARG_FLOAT64, \
00093 "1e-48", \
00094 "Beam width applied to phone transitions" }, \
00095 { "-lpbeam", \
00096 ARG_FLOAT64, \
00097 "1e-40", \
00098 "Beam width applied to last phone in words" }, \
00099 { "-lponlybeam", \
00100 ARG_FLOAT64, \
00101 "7e-29", \
00102 "Beam width applied to last phone in single-phone words" }, \
00103 { "-fwdflatbeam", \
00104 ARG_FLOAT64, \
00105 "1e-64", \
00106 "Beam width applied to every frame in second-pass flat search" }, \
00107 { "-fwdflatwbeam", \
00108 ARG_FLOAT64, \
00109 "7e-29", \
00110 "Beam width applied to word exits in second-pass flat search" }, \
00111 { "-pl_window", \
00112 ARG_INT32, \
00113 "0", \
00114 "Phoneme lookahead window size, in frames" }, \
00115 { "-pl_beam", \
00116 ARG_FLOAT64, \
00117 "1e-10", \
00118 "Beam width applied to phone loop search for lookahead" }, \
00119 { "-pl_pbeam", \
00120 ARG_FLOAT64, \
00121 "1e-5", \
00122 "Beam width applied to phone loop transitions for lookahead" }
00123
00125 #define POCKETSPHINX_SEARCH_OPTIONS \
00126 { "-compallsen", \
00127 ARG_BOOLEAN, \
00128 "no", \
00129 "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00130 { "-fwdtree", \
00131 ARG_BOOLEAN, \
00132 "yes", \
00133 "Run forward lexicon-tree search (1st pass)" }, \
00134 { "-fwdflat", \
00135 ARG_BOOLEAN, \
00136 "yes", \
00137 "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
00138 { "-bestpath", \
00139 ARG_BOOLEAN, \
00140 "yes", \
00141 "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
00142 { "-backtrace", \
00143 ARG_BOOLEAN, \
00144 "no", \
00145 "Print results and backtraces to log file." }, \
00146 { "-latsize", \
00147 ARG_INT32, \
00148 "5000", \
00149 "Initial backpointer table size" }, \
00150 { "-maxwpf", \
00151 ARG_INT32, \
00152 "-1", \
00153 "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
00154 { "-maxhmmpf", \
00155 ARG_INT32, \
00156 "-1", \
00157 "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
00158 { "-fwdflatefwid", \
00159 ARG_INT32, \
00160 "4", \
00161 "Minimum number of end frames for a word to be searched in fwdflat search" }, \
00162 { "-fwdflatsfwin", \
00163 ARG_INT32, \
00164 "25", \
00165 "Window of frames in lattice to search for successor words in fwdflat search " }
00166
00168 #define POCKETSPHINX_FSG_OPTIONS \
00169 { "-fsg", \
00170 ARG_STRING, \
00171 NULL, \
00172 "Sphinx format finite state grammar file"}, \
00173 { "-jsgf", \
00174 ARG_STRING, \
00175 NULL, \
00176 "JSGF grammar file" }, \
00177 { "-toprule", \
00178 ARG_STRING, \
00179 NULL, \
00180 "Start rule for JSGF (first public rule is default)" }, \
00181 { "-fsgusealtpron", \
00182 ARG_BOOLEAN, \
00183 "yes", \
00184 "Add alternate pronunciations to FSG"}, \
00185 { "-fsgusefiller", \
00186 ARG_BOOLEAN, \
00187 "yes", \
00188 "Insert filler words at each state."}
00189
00191 #define POCKETSPHINX_NGRAM_OPTIONS \
00192 { "-lm", \
00193 ARG_STRING, \
00194 NULL, \
00195 "Word trigram language model input file" }, \
00196 { "-lmctl", \
00197 ARG_STRING, \
00198 NULL, \
00199 "Specify a set of language model\n"}, \
00200 { "-lmname", \
00201 ARG_STRING, \
00202 "default", \
00203 "Which language model in -lmctl to use by default"}, \
00204 { "-lw", \
00205 ARG_FLOAT32, \
00206 "6.5", \
00207 "Language model probability weight" }, \
00208 { "-fwdflatlw", \
00209 ARG_FLOAT32, \
00210 "8.5", \
00211 "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
00212 { "-bestpathlw", \
00213 ARG_FLOAT32, \
00214 "9.5", \
00215 "Language model probability weight for bestpath search" }, \
00216 { "-ascale", \
00217 ARG_FLOAT32, \
00218 "20.0", \
00219 "Inverse of acoustic model scale for confidence score calculation" }, \
00220 { "-wip", \
00221 ARG_FLOAT32, \
00222 "0.65", \
00223 "Word insertion penalty" }, \
00224 { "-nwpen", \
00225 ARG_FLOAT32, \
00226 "1.0", \
00227 "New word transition penalty" }, \
00228 { "-pip", \
00229 ARG_FLOAT32, \
00230 "1.0", \
00231 "Phone insertion penalty" }, \
00232 { "-uw", \
00233 ARG_FLOAT32, \
00234 "1.0", \
00235 "Unigram weight" }, \
00236 { "-silprob", \
00237 ARG_FLOAT32, \
00238 "0.005", \
00239 "Silence word transition probability" }, \
00240 { "-fillprob", \
00241 ARG_FLOAT32, \
00242 "1e-8", \
00243 "Filler word transition probability" }, \
00244 { "-bghist", \
00245 ARG_BOOLEAN, \
00246 "no", \
00247 "Bigram-mode: If TRUE only one BP entry/frame; else one per LM state" }, \
00248 { "-lextreedump", \
00249 ARG_INT32, \
00250 "0", \
00251 "Whether to dump the lextree structure to stderr (for debugging), 1 for Ravi's format, 2 for Dot format, Larger than 2 will be treated as Ravi's format" }
00252
00254 #define POCKETSPHINX_DICT_OPTIONS \
00255 { "-dict", \
00256 REQARG_STRING, \
00257 NULL, \
00258 "Main pronunciation dictionary (lexicon) input file" }, \
00259 { "-fdict", \
00260 ARG_STRING, \
00261 NULL, \
00262 "Noise word pronunciation dictionary input file" }, \
00263 { "-dictcase", \
00264 ARG_BOOLEAN, \
00265 "no", \
00266 "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" }, \
00267 { "-maxnewoov", \
00268 ARG_INT32, \
00269 "20", \
00270 "Maximum new OOVs that can be added at run time" }, \
00271 { "-usewdphones", \
00272 ARG_BOOLEAN, \
00273 "no", \
00274 "Use within-word phones only" }
00275
00277 #define POCKETSPHINX_ACMOD_OPTIONS \
00278 { "-hmm", \
00279 ARG_STRING, \
00280 NULL, \
00281 "Directory containing acoustic model files."}, \
00282 { "-featparams", \
00283 ARG_STRING, \
00284 NULL, \
00285 "File containing feature extraction parameters."}, \
00286 { "-mdef", \
00287 ARG_STRING, \
00288 NULL, \
00289 "Model definition input file" }, \
00290 { "-senmgau", \
00291 ARG_STRING, \
00292 NULL, \
00293 "Senone to codebook mapping input file (usually not needed)" }, \
00294 { "-tmat", \
00295 ARG_STRING, \
00296 NULL, \
00297 "HMM state transition matrix input file" }, \
00298 { "-tmatfloor", \
00299 ARG_FLOAT32, \
00300 "0.0001", \
00301 "HMM state transition probability floor (applied to -tmat file)" }, \
00302 { "-mean", \
00303 ARG_STRING, \
00304 NULL, \
00305 "Mixture gaussian means input file" }, \
00306 { "-var", \
00307 ARG_STRING, \
00308 NULL, \
00309 "Mixture gaussian variances input file" }, \
00310 { "-varfloor", \
00311 ARG_FLOAT32, \
00312 "0.0001", \
00313 "Mixture gaussian variance floor (applied to data from -var file)" }, \
00314 { "-mixw", \
00315 ARG_STRING, \
00316 NULL, \
00317 "Senone mixture weights input file (uncompressed)" }, \
00318 { "-mixwfloor", \
00319 ARG_FLOAT32, \
00320 "0.0000001", \
00321 "Senone mixture weights floor (applied to data from -mixw file)" }, \
00322 { "-sendump", \
00323 ARG_STRING, \
00324 NULL, \
00325 "Senone dump (compressed mixture weights) input file" }, \
00326 { "-mllr", \
00327 ARG_STRING, \
00328 NULL, \
00329 "MLLR transformation to apply to means and variances" }, \
00330 { "-mmap", \
00331 ARG_BOOLEAN, \
00332 "yes", \
00333 "Use memory-mapped I/O (if possible) for model files" }, \
00334 { "-ds", \
00335 ARG_INT32, \
00336 "1", \
00337 "Frame GMM computation downsampling ratio" }, \
00338 { "-topn", \
00339 ARG_INT32, \
00340 "4", \
00341 "Maximum number of top Gaussians to use in scoring." }, \
00342 { "-topn_beam", \
00343 ARG_STRING, \
00344 "0", \
00345 "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\
00346 { "-kdtree", \
00347 ARG_STRING, \
00348 NULL, \
00349 "kd-Tree file for Gaussian selection" }, \
00350 { "-kdmaxdepth", \
00351 ARG_INT32, \
00352 "0", \
00353 "Maximum depth of kd-Trees to use" }, \
00354 { "-kdmaxbbi", \
00355 ARG_INT32, \
00356 "-1", \
00357 "Maximum number of Gaussians per leaf node in kd-Trees" }, \
00358 { "-logbase", \
00359 ARG_FLOAT32, \
00360 "1.0001", \
00361 "Base in which all log-likelihoods calculated" }
00362
00363 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00364
00365 #endif