00001 /* ==================================================================== 00002 * Copyright (c) 1998-2000 Carnegie Mellon University. All rights 00003 * reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in 00014 * the documentation and/or other materials provided with the 00015 * distribution. 00016 * 00017 * This work was supported in part by funding from the Defense Advanced 00018 * Research Projects Agency and the National Science Foundation of the 00019 * United States of America, and the CMU Sphinx Speech Consortium. 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00022 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00023 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00024 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00025 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00027 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00028 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00029 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00030 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00031 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 * 00033 * ==================================================================== 00034 * 00035 */ 00036 /********************************************************************* 00037 * 00038 * File: cmd_ln_defn.h 00039 * 00040 * Description: 00041 * Command line argument definition 00042 * 00043 * Author: 00044 * 00045 *********************************************************************/ 00046 00047 #ifndef CMD_LN_DEFN_H 00048 #define CMD_LN_DEFN_H 00049 00050 #include "cmd_ln.h" 00051 #include "fe.h" 00052 #include "wave2feat.h" 00053 00054 const char helpstr[] = 00055 "Description: \n\ 00056 Extract acoustic features form from audio file.\n\ 00057 \n\ 00058 The main parameters that affect the final output, with typical values, are:\n\ 00059 \n\ 00060 samprate, typically 8000, 11025, or 16000\n\ 00061 lowerf, 130, 200, 130, for the respective sampling rates above\n\ 00062 upperf, 3700, 5200, 6800, for the respective sampling rates above\n\ 00063 nfilt, 31, 37, 40, for the respective sampling rates above\n\ 00064 nfft, 256 or 512\n\ 00065 format, raw or nist or mswav\n\ 00066 \""; 00067 00068 const char examplestr[] = 00069 "Example: \n\ 00070 This example creates a cepstral file named \"output.mfc\" from an input audio file named \"input.raw\", which is a raw audio file (no header information), which was originally sampled at 16kHz. \n\ 00071 \n\ 00072 sphinx_fe -i input.raw \n\ 00073 -o output.mfc \n\ 00074 -raw yes \n\ 00075 -input_endian little \n\ 00076 -samprate 16000 \n\ 00077 -lowerf 130 \n\ 00078 -upperf 6800 \n\ 00079 -nfilt 40 \n\ 00080 -nfft 512"; 00081 00082 static arg_t defn[] = { 00083 { "-help", 00084 ARG_BOOLEAN, 00085 "no", 00086 "Shows the usage of the tool"}, 00087 00088 { "-example", 00089 ARG_BOOLEAN, 00090 "no", 00091 "Shows example of how to use the tool"}, 00092 00093 waveform_to_cepstral_command_line_macro(), 00094 00095 { "-argfile", 00096 ARG_STRING, 00097 NULL, 00098 "Argument file (e.g. feat.params from an acoustic model) to read parameters from. This will override anything set in other command line arguments." }, 00099 00100 { "-i", 00101 ARG_STRING, 00102 NULL, 00103 "Single audio input file" }, 00104 00105 { "-o", 00106 ARG_STRING, 00107 NULL, 00108 "Single cepstral output file" }, 00109 00110 { "-c", 00111 ARG_STRING, 00112 NULL, 00113 "Control file for batch processing" }, 00114 00115 { "-nskip", 00116 ARG_STRING, 00117 NULL, 00118 "If a control file was specified, the number of utterances to skip at the head of the file" }, 00119 00120 { "-runlen", 00121 ARG_STRING, 00122 NULL, 00123 "If a control file was specified, the number of utterances to process (see -nskip too)" }, 00124 00125 { "-di", 00126 ARG_STRING, 00127 NULL, 00128 "Input directory, input file names are relative to this, if defined" }, 00129 00130 { "-ei", 00131 ARG_STRING, 00132 NULL, 00133 "Input extension to be applied to all input files" }, 00134 00135 { "-do", 00136 ARG_STRING, 00137 NULL, 00138 "Output directory, output files are relative to this" }, 00139 00140 { "-eo", 00141 ARG_STRING, 00142 NULL, 00143 "Output extension to be applied to all output files" }, 00144 00145 { "-nist", 00146 ARG_BOOLEAN, 00147 "no", 00148 "Defines input format as NIST sphere" }, 00149 00150 { "-raw", 00151 ARG_BOOLEAN, 00152 "no", 00153 "Defines input format as raw binary data" }, 00154 00155 { "-mswav", 00156 ARG_BOOLEAN, 00157 "no", 00158 "Defines input format as Microsoft Wav (RIFF)" }, 00159 00160 { "-nchans", 00161 ARG_INT32, 00162 "1", 00163 "Number of channels of data (interlaced samples assumed)" }, 00164 00165 { "-whichchan", 00166 ARG_INT32, 00167 "1", 00168 "Channel to process" }, 00169 00170 { "-feat", 00171 ARG_STRING, 00172 "sphinx", 00173 "SPHINX format - big endian" }, 00174 00175 { "-mach_endian", 00176 ARG_STRING, 00177 #ifdef WORDS_BIGENDIAN 00178 "big", 00179 #else 00180 "little", 00181 #endif 00182 "Endianness of machine, big or little" }, 00183 00184 { "-blocksize", 00185 ARG_INT32, 00186 "200000", 00187 "Block size, used to limit the number of samples used at a time when reading very large audio files" }, 00188 00189 { NULL, 0, NULL, NULL } 00190 }; 00191 00192 00193 #define CMD_LN_DEFN_H 00194 00195 #endif /* CMD_LN_DEFN_H */ 00196 00197 /* 00198 * Log record. Maintained by RCS. 00199 * 00200 * $Log: cmd_ln_defn.h,v $ 00201 * Revision 1.7 2006/02/25 00:53:48 egouvea 00202 * Added the flag "-seed". If dither is being used and the seed is less 00203 * than zero, the random number generator is initialized with time(). If 00204 * it is at least zero, it's initialized with the provided seed. This way 00205 * we have the benefit of having dither, and the benefit of being 00206 * repeatable. 00207 * 00208 * This is consistent with what sphinx3 does. Well, almost. The random 00209 * number generator is still what the compiler provides. 00210 * 00211 * Also, moved fe_init_params to fe_interface.c, so one can initialize a 00212 * variable of type param_t with meaningful values. 00213 * 00214 * Revision 1.6 2006/02/17 00:31:34 egouvea 00215 * Removed switch -melwarp. Changed the default for window length to 00216 * 0.025625 from 0.256 (so that a window at 16kHz sampling rate has 00217 * exactly 410 samples). Cleaned up include's. Replaced some E_FATAL() 00218 * with E_WARN() and return. 00219 * 00220 * Revision 1.5 2006/02/16 00:18:26 egouvea 00221 * Implemented flexible warping function. The user can specify at run 00222 * time which of several shapes they want to use. Currently implemented 00223 * are an affine function (y = ax + b), an inverse linear (y = a/x) and a 00224 * piecewise linear (y = ax, up to a frequency F, and then it "breaks" so 00225 * Nyquist frequency matches in both scales. 00226 * 00227 * Added two switches, -warp_type and -warp_params. The first specifies 00228 * the type, which valid values: 00229 * 00230 * -inverse or inverse_linear 00231 * -linear or affine 00232 * -piecewise or piecewise_linear 00233 * 00234 * The inverse_linear is the same as implemented by EHT. The -mel_warp 00235 * switch was kept for compatibility (maybe remove it in the 00236 * future?). The code is compatible with EHT's changes: cepstra created 00237 * from code after his changes should be the same as now. Scripts that 00238 * worked with his changes should work now without changes. Tested a few 00239 * cases, same results. 00240 * 00241 * Revision 1.4 2006/02/14 20:56:54 eht 00242 * Implement an argument -melwarp that changes the standard mel-scale 00243 * equation from: 00244 * M(f) = 2595 * log10( 1 + f/700 ) 00245 * to: 00246 * M(f,w) = 2595 * log10( 1 + f/(700*w)) 00247 * 00248 * So, 1.0 means no warp, w > 1.0 means linear compression w < 1.0 means 00249 * linear expansion. 00250 * 00251 * Implement argument -nskip and -runlen arguments so that a subset of the 00252 * utterances in the control file can be executed. Allows a simple 00253 * distribution of wave2feat processing over N processors. 00254 * 00255 * Revision 1.3 2005/05/19 21:21:55 egouvea 00256 * Bug #1176394: example bug 00257 * 00258 * Revision 1.2 2004/11/23 04:14:06 egouvea 00259 * Fixed bug in cmd_ln.c in which a wrong boolean argument led into an 00260 * infinite loop, and fixed the help and example strings, getting rid of 00261 * spaces, so that the appearance is better. 00262 * 00263 * Revision 1.1 2004/09/09 17:59:30 egouvea 00264 * Adding missing files to wave2feat 00265 * 00266 * 00267 * 00268 */