00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <stdio.h>
00039 #include <string.h>
00040
00041 #include <cmd_ln.h>
00042 #include <yin.h>
00043 #include <ckd_alloc.h>
00044 #include <byteorder.h>
00045 #include <strfuncs.h>
00046 #include <err.h>
00047 #include <pio.h>
00048
00049 #ifndef WORDS_BIGENDIAN
00050 #define WORDS_BIGENDIAN 0
00051 #endif
00052
00053 static arg_t defn[] = {
00054 { "-i",
00055 ARG_STRING,
00056 NULL,
00057 "Single audio input file" },
00058
00059 { "-o",
00060 ARG_STRING,
00061 NULL,
00062 "Single text output file (standard output will be used if not given)" },
00063
00064 { "-c",
00065 ARG_STRING,
00066 NULL,
00067 "Control file for batch processing" },
00068
00069 { "-nskip",
00070 ARG_INT32,
00071 "0",
00072 "If a control file was specified, the number of utterances to skip at the head of the file" },
00073
00074 { "-runlen",
00075 ARG_INT32,
00076 "-1",
00077 "If a control file was specified, the number of utterances to process (see -nskip too)" },
00078
00079 { "-di",
00080 ARG_STRING,
00081 NULL,
00082 "Input directory, input file names are relative to this, if defined" },
00083
00084 { "-ei",
00085 ARG_STRING,
00086 NULL,
00087 "Input extension to be applied to all input files" },
00088
00089 { "-do",
00090 ARG_STRING,
00091 NULL,
00092 "Output directory, output files are relative to this" },
00093
00094 { "-eo",
00095 ARG_STRING,
00096 NULL,
00097 "Output extension to be applied to all output files" },
00098
00099 { "-nist",
00100 ARG_BOOLEAN,
00101 "no",
00102 "Defines input format as NIST sphere" },
00103
00104 { "-raw",
00105 ARG_BOOLEAN,
00106 "no",
00107 "Defines input format as raw binary data" },
00108
00109 { "-mswav",
00110 ARG_BOOLEAN,
00111 "no",
00112 "Defines input format as Microsoft Wav (RIFF)" },
00113
00114 { "-samprate",
00115 ARG_INT32,
00116 "0",
00117 "Sampling rate of audio data (will be determined automatically if 0)" },
00118
00119 { "-input_endian",
00120 ARG_STRING,
00121 NULL,
00122 "Endianness of audio data (will be determined automatically if not given)" },
00123
00124 { "-fshift",
00125 ARG_FLOAT32,
00126 "0.01",
00127 "Frame shift: number of seconds between each analysis frame." },
00128
00129 { "-flen",
00130 ARG_FLOAT32,
00131 "0.025",
00132 "Number of seconds in each analysis frame (needs to be greater than twice the longest period you wish to detect - to detect down to 80Hz you need a frame length of 2.0/80 = 0.025)." },
00133
00134 { "-smooth_window",
00135 ARG_INT32,
00136 "2",
00137 "Number of frames on either side of the current frame to use for smoothing." },
00138
00139 { "-voice_thresh",
00140 ARG_FLOAT32,
00141 "0.1",
00142 "Threshold of normalized difference under which to search for the fundamental period." },
00143
00144 { "-search_range",
00145 ARG_FLOAT32,
00146 "0.2",
00147 "Fraction of the best local estimate to use as a search range for smoothing." },
00148
00149 { NULL, 0, NULL, NULL }
00150 };
00151
00152 static int extract_pitch(const char *in, const char *out);
00153 static int run_control_file(const char *ctl);
00154
00155 int
00156 main(int argc, char *argv[])
00157 {
00158 cmd_ln_parse(defn, argc, argv, TRUE);
00159
00160
00161 if (cmd_ln_str("-c")) {
00162 if (run_control_file(cmd_ln_str("-c")) < 0)
00163 return 1;
00164 }
00165 else {
00166 if (extract_pitch(cmd_ln_str("-i"), cmd_ln_str("-o")) < 0)
00167 return 1;
00168 }
00169
00170 cmd_ln_free();
00171 return 0;
00172 }
00173
00174 static int
00175 guess_file_type(char const *file, FILE *infh)
00176 {
00177 char header[4];
00178
00179 fseek(infh, 0, SEEK_SET);
00180 if (fread(header, 1, 4, infh) != 4) {
00181 E_ERROR_SYSTEM("Failed to read 4 byte header");
00182 return -1;
00183 }
00184 if (0 == memcmp(header, "RIFF", 4)) {
00185 E_INFO("%s appears to be a WAV file\n", file);
00186 cmd_ln_set_boolean("-mswav", TRUE);
00187 cmd_ln_set_boolean("-nist", FALSE);
00188 cmd_ln_set_boolean("-raw", FALSE);
00189 }
00190 else if (0 == memcmp(header, "NIST", 4)) {
00191 E_INFO("%s appears to be a NIST SPHERE file\n", file);
00192 cmd_ln_set_boolean("-mswav", FALSE);
00193 cmd_ln_set_boolean("-nist", TRUE);
00194 cmd_ln_set_boolean("-raw", FALSE);
00195 }
00196 else {
00197 E_INFO("%s appears to be raw data\n", file);
00198 cmd_ln_set_boolean("-mswav", FALSE);
00199 cmd_ln_set_boolean("-nist", FALSE);
00200 cmd_ln_set_boolean("-raw", TRUE);
00201 }
00202 fseek(infh, 0, SEEK_SET);
00203 return 0;
00204 }
00205
00206 #define TRY_FREAD(ptr, size, nmemb, stream) \
00207 if (fread(ptr, size, nmemb, stream) != (nmemb)) { \
00208 E_ERROR_SYSTEM("Failed to read %d bytes", size * nmemb); \
00209 goto error_out; \
00210 }
00211
00212 static int
00213 read_riff_header(FILE *infh)
00214 {
00215 char id[4];
00216 int32 intval, header_len;
00217 int16 shortval;
00218
00219
00220 cmd_ln_set_str("-input_endian", "little");
00221
00222
00223 TRY_FREAD(id, 1, 4, infh);
00224
00225 TRY_FREAD(&intval, 4, 1, infh);
00226
00227 TRY_FREAD(id, 1, 4, infh);
00228 if (0 != memcmp(id, "WAVE", 4)) {
00229 E_ERROR("This is not a WAVE file\n");
00230 goto error_out;
00231 }
00232
00233 TRY_FREAD(id, 1, 4, infh);
00234 if (0 != memcmp(id, "fmt ", 4)) {
00235 E_ERROR("Format chunk missing\n");
00236 goto error_out;
00237 }
00238
00239 TRY_FREAD(&intval, 4, 1, infh);
00240 if (WORDS_BIGENDIAN) SWAP_INT32(&intval);
00241 header_len = intval;
00242
00243
00244 TRY_FREAD(&shortval, 2, 1, infh);
00245 if (WORDS_BIGENDIAN) SWAP_INT16(&shortval);
00246 if (shortval != 1) {
00247 E_ERROR("WAVE file is not in PCM format\n");
00248 goto error_out;
00249 }
00250
00251
00252 TRY_FREAD(&shortval, 2, 1, infh);
00253 if (WORDS_BIGENDIAN) SWAP_INT16(&shortval);
00254 if (shortval != 1) {
00255 E_ERROR("WAVE file is not single channel\n");
00256 goto error_out;
00257 }
00258
00259
00260 TRY_FREAD(&intval, 4, 1, infh);
00261 if (WORDS_BIGENDIAN) SWAP_INT32(&intval);
00262 if (cmd_ln_int32("-samprate") == 0)
00263 cmd_ln_set_int32("-samprate", intval);
00264 else if (cmd_ln_int32("-samprate") != intval) {
00265 E_WARN("WAVE file sampling rate %d != -samprate %d\n",
00266 intval, cmd_ln_int32("-samprate"));
00267 }
00268
00269
00270 TRY_FREAD(&intval, 4, 1, infh);
00271
00272
00273 TRY_FREAD(&shortval, 2, 1, infh);
00274
00275
00276 TRY_FREAD(&shortval, 2, 1, infh);
00277 if (WORDS_BIGENDIAN) SWAP_INT16(&shortval);
00278 if (shortval != 16) {
00279 E_ERROR("WAVE file is not 16-bit\n");
00280 goto error_out;
00281 }
00282
00283
00284 if (header_len > 16)
00285 fseek(infh, header_len - 16, SEEK_CUR);
00286
00287
00288 while (1) {
00289 TRY_FREAD(id, 1, 4, infh);
00290 if (0 == memcmp(id, "data", 4)) {
00291
00292 TRY_FREAD(&intval, 4, 1, infh);
00293 break;
00294 }
00295 else {
00296
00297
00298 TRY_FREAD(&intval, 4, 1, infh);
00299 if (WORDS_BIGENDIAN) SWAP_INT32(&intval);
00300 fseek(infh, intval, SEEK_CUR);
00301 }
00302 }
00303
00304
00305 return 0;
00306 error_out:
00307 return -1;
00308 }
00309
00310 static int
00311 read_nist_header(FILE *infh)
00312 {
00313 char hdr[1024];
00314 char *line, *c;
00315
00316 TRY_FREAD(hdr, 1, 1024, infh);
00317 hdr[1023] = '\0';
00318
00319
00320
00321 if ((line = strstr(hdr, "sample_rate")) == NULL) {
00322 E_ERROR("No sampling rate in NIST header!\n");
00323 goto error_out;
00324 }
00325 c = strchr(line, '\n');
00326 if (c) *c = '\0';
00327 c = strrchr(line, ' ');
00328 if (c == NULL) {
00329 E_ERROR("Could not find sampling rate!\n");
00330 goto error_out;
00331 }
00332 ++c;
00333 if (cmd_ln_int32("-samprate") == 0)
00334 cmd_ln_set_int32("-samprate", atoi(c));
00335 else if (cmd_ln_int32("-samprate") != atoi(c)) {
00336 E_WARN("NIST file sampling rate %d != -samprate %d\n",
00337 atoi(c), cmd_ln_int32("-samprate"));
00338 }
00339
00340 if (line + strlen(line) < hdr + 1023)
00341 line[strlen(line)] = ' ';
00342 if ((line = strstr(hdr, "sample_byte_format")) == NULL) {
00343 E_ERROR("No sample byte format in NIST header!\n");
00344 goto error_out;
00345 }
00346 c = strchr(line, '\n');
00347 if (c) *c = '\0';
00348 c = strrchr(line, ' ');
00349 if (c == NULL) {
00350 E_ERROR("Could not find sample byte order!\n");
00351 goto error_out;
00352 }
00353 ++c;
00354 if (0 == memcmp(c, "01", 2)) {
00355 cmd_ln_set_str("-input_endian", "little");
00356 }
00357 else if (0 == memcmp(c, "10", 2)) {
00358 cmd_ln_set_str("-input_endian", "big");
00359 }
00360 else {
00361 E_ERROR("Unknown byte order %s\n", c);
00362 goto error_out;
00363 }
00364
00365
00366 return 0;
00367 error_out:
00368 return -1;
00369 }
00370
00371 static int
00372 extract_pitch(const char *in, const char *out)
00373 {
00374 FILE *infh = NULL, *outfh = NULL;
00375 size_t flen, fshift, nsamps;
00376 int16 *buf = NULL;
00377 yin_t *yin = NULL;
00378 uint16 period, bestdiff;
00379 int32 sps;
00380
00381 if (out) {
00382 if ((outfh = fopen(out, "w")) == NULL) {
00383 E_ERROR_SYSTEM("Failed to open %s for writing", outfh);
00384 goto error_out;
00385 }
00386 }
00387 else {
00388 outfh = stdout;
00389 }
00390 if ((infh = fopen(in, "rb")) == NULL) {
00391 E_ERROR_SYSTEM("Failed to open %s for reading", infh);
00392 goto error_out;
00393 }
00394
00395
00396
00397 if (!(cmd_ln_boolean("-raw")
00398 || cmd_ln_boolean("-mswav")
00399 || cmd_ln_boolean("-nist"))) {
00400 if (guess_file_type(in, infh) < 0)
00401 goto error_out;
00402 }
00403
00404
00405
00406 if (cmd_ln_boolean("-mswav")) {
00407 if (read_riff_header(infh) < 0)
00408 goto error_out;
00409 }
00410 else if (cmd_ln_boolean("-nist")) {
00411 if (read_nist_header(infh) < 0)
00412 goto error_out;
00413 }
00414 else if (cmd_ln_boolean("-raw")) {
00415
00416 if (cmd_ln_str("-input_endian") == NULL) {
00417 if (WORDS_BIGENDIAN)
00418 cmd_ln_set_str("-input_endian", "big");
00419 else
00420 cmd_ln_set_str("-input_endian", "little");
00421 }
00422 if (cmd_ln_int32("-samprate") == 0)
00423 cmd_ln_set_int32("-samprate", 16000);
00424 }
00425
00426
00427 sps = cmd_ln_int32("-samprate");
00428 flen = (size_t)(0.5 + sps * cmd_ln_float32("-flen"));
00429 fshift = (size_t)(0.5 + sps * cmd_ln_float32("-fshift"));
00430 yin = yin_init(flen, cmd_ln_float32("-voice_thresh"),
00431 cmd_ln_float32("-search_range"),
00432 cmd_ln_int32("-smooth_window"));
00433 if (yin == NULL) {
00434 E_ERROR("Failed to initialize YIN\n");
00435 goto error_out;
00436 }
00437 buf = ckd_calloc(flen, sizeof(*buf));
00438
00439 fread(buf, sizeof(*buf), flen, infh);
00440 yin_start(yin);
00441 nsamps = 0;
00442 while (!feof(infh)) {
00443
00444 yin_write(yin, buf);
00445 if (yin_read(yin, &period, &bestdiff)) {
00446 fprintf(outfh, "%.3f %.2f %.2f\n",
00447
00448 (double)nsamps/sps,
00449
00450 bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768,
00451
00452 period == 0 ? sps : (double)sps / period);
00453 nsamps += fshift;
00454 }
00455
00456 memmove(buf, buf + fshift, (flen - fshift) * sizeof(*buf));
00457 fread(buf + flen - fshift, sizeof(*buf), fshift, infh);
00458 }
00459 yin_end(yin);
00460
00461 while (yin_read(yin, &period, &bestdiff)) {
00462 fprintf(outfh, "%.3f %.2f %.2f\n",
00463
00464 (double)nsamps/sps,
00465
00466 bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768,
00467
00468 period == 0 ? sps : (double)sps / period);
00469 }
00470
00471 if (yin)
00472 yin_free(yin);
00473 ckd_free(buf);
00474 fclose(infh);
00475 if (outfh != stdout)
00476 fclose(outfh);
00477 return 0;
00478
00479 error_out:
00480 yin_free(yin);
00481 ckd_free(buf);
00482 if (infh) fclose(infh);
00483 if (outfh && outfh != stdout) fclose(outfh);
00484 return -1;
00485 }
00486
00487 static int
00488 run_control_file(const char *ctl)
00489 {
00490 FILE *ctlfh;
00491 char *line;
00492 char *di, *dout, *ei, *eio;
00493 size_t len;
00494 int rv, guess_type, guess_sps, guess_endian;
00495 int32 skip, runlen;
00496
00497 skip = cmd_ln_int32("-nskip");
00498 runlen = cmd_ln_int32("-runlen");
00499
00500
00501 guess_type = !(cmd_ln_boolean("-raw")
00502 || cmd_ln_boolean("-mswav")
00503 || cmd_ln_boolean("-nist"));
00504
00505 guess_sps = (cmd_ln_int32("-samprate") == 0);
00506
00507 guess_endian = (cmd_ln_str("-input_endian") == NULL);
00508
00509 if ((ctlfh = fopen(ctl, "r")) == NULL) {
00510 E_ERROR_SYSTEM("Failed to open control file %s", ctl);
00511 return -1;
00512 }
00513 if (cmd_ln_str("-di"))
00514 di = string_join(cmd_ln_str("-di"), "/", NULL);
00515 else
00516 di = ckd_salloc("");
00517 if (cmd_ln_str("-do"))
00518 dout = string_join(cmd_ln_str("-do"), "/", NULL);
00519 else
00520 dout = ckd_salloc("");
00521 if (cmd_ln_str("-ei"))
00522 ei = string_join(".", cmd_ln_str("-ei"), NULL);
00523 else
00524 ei = ckd_salloc("");
00525 if (cmd_ln_str("-eo"))
00526 eio = string_join(".", cmd_ln_str("-eo"), NULL);
00527 else
00528 eio = ckd_salloc("");
00529 rv = 0;
00530 while ((line = fread_line(ctlfh, &len)) != NULL) {
00531 char *infile, *outfile;
00532
00533 if (skip-- > 0) {
00534 ckd_free(line);
00535 continue;
00536 }
00537 if (runlen == 0) {
00538 ckd_free(line);
00539 break;
00540 }
00541 --runlen;
00542
00543 if (line[len-1] == '\n')
00544 line[len-1] = '\0';
00545
00546 infile = string_join(di, line, ei, NULL);
00547 outfile = string_join(dout, line, eio, NULL);
00548
00549
00550 if (guess_type) {
00551 cmd_ln_set_boolean("-nist", FALSE);
00552 cmd_ln_set_boolean("-mswav", FALSE);
00553 cmd_ln_set_boolean("-raw", FALSE);
00554 }
00555 if (guess_sps)
00556 cmd_ln_set_int32("-samprate", 0);
00557 if (guess_endian)
00558 cmd_ln_set_str("-input_endian", NULL);
00559
00560 rv = extract_pitch(infile, outfile);
00561
00562 ckd_free(infile);
00563 ckd_free(outfile);
00564 ckd_free(line);
00565
00566 if (rv != 0)
00567 break;
00568 }
00569 ckd_free(di);
00570 ckd_free(dout);
00571 ckd_free(ei);
00572 ckd_free(eio);
00573 fclose(ctlfh);
00574 return rv;
00575 }