00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <string.h>
00042 #include <assert.h>
00043 #include <limits.h>
00044 #include <math.h>
00045 #if defined(__ADSPBLACKFIN__)
00046 #elif !defined(_WIN32_WCE)
00047 #include <sys/types.h>
00048 #endif
00049
00050 #ifndef M_PI
00051 #define M_PI 3.14159265358979323846
00052 #endif
00053
00054
00055 #include <sphinx_config.h>
00056 #include <cmd_ln.h>
00057 #include <fixpoint.h>
00058 #include <ckd_alloc.h>
00059 #include <bio.h>
00060 #include <err.h>
00061 #include <prim_type.h>
00062
00063
00064 #include "s2_semi_mgau.h"
00065 #include "kdtree.h"
00066 #include "posixwin32.h"
00067
00068 #define MGAU_MIXW_VERSION "1.0"
00069 #define MGAU_PARAM_VERSION "1.0"
00070 #define NONE -1
00071 #define WORST_DIST (int32)(0x80000000)
00072
00073 struct vqFeature_s {
00074 int32 score;
00075 int32 codeword;
00076 };
00077
00079 #ifdef FIXED_POINT
00080 #define GMMSUB(a,b) \
00081 (((a)-(b) > a) ? (INT_MIN) : ((a)-(b)))
00082
00083 #define GMMADD(a,b) \
00084 (((a)+(b) < a) ? (INT_MAX) : ((a)+(b)))
00085 #else
00086 #define GMMSUB(a,b) ((a)-(b))
00087 #define GMMADD(a,b) ((a)+(b))
00088 #endif
00089
00090 #ifndef MIN
00091 #define MIN(a,b) ((a) < (b) ? (a) : (b))
00092 #endif
00093
00094
00095 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
00096 #define LOGMATH_INLINE inline
00097 #elif defined(__GNUC__)
00098 #define LOGMATH_INLINE static inline
00099 #elif defined(_MSC_VER)
00100 #define LOGMATH_INLINE __inline
00101 #else
00102 #define LOGMATH_INLINE static
00103 #endif
00104
00105
00106
00107
00108 #define MAX_NEG_MIXW 159
00109 #define MAX_NEG_ASCR 96
00128 LOGMATH_INLINE int
00129 fast_logmath_add(logmath_t *lmath, int mlx, int mly)
00130 {
00131 logadd_t *t = LOGMATH_TABLE(lmath);
00132 int d, r;
00133
00134
00135 if (mlx > mly) {
00136 d = (mlx - mly);
00137 r = mly;
00138 }
00139 else {
00140 d = (mly - mlx);
00141 r = mlx;
00142 }
00143
00144 return r - (((uint8 *)t->table)[d]);
00145 }
00146
00147
00148
00149
00150
00151 static int32 get_scores4_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00152 int32 *senone_active, int32 n_senone_active,
00153 int32 *out_bestidx);
00154 static int32 get_scores2_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00155 int32 *senone_active, int32 n_senone_active,
00156 int32 *out_bestidx);
00157 static int32 get_scores1_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00158 int32 *senone_active, int32 n_senone_active,
00159 int32 *out_bestidx);
00160 static int32 get_scores_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00161 int32 *senone_active, int32 n_senone_active,
00162 int32 *out_bestidx);
00163 static int32 get_scores4_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00164 int32 *out_bestidx);
00165 static int32 get_scores2_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00166 int32 *out_bestidx);
00167 static int32 get_scores1_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00168 int32 *out_bestidx);
00169 static int32 get_scores_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00170 int32 *out_bestidx);
00171
00172 static void
00173 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00174 {
00175 int32 i, ceplen;
00176 vqFeature_t *topn;
00177
00178 topn = s->f[feat];
00179 ceplen = s->veclen[feat];
00180
00181 for (i = 0; i < s->topn; i++) {
00182 mean_t *mean, diff, sqdiff, compl;
00183 vqFeature_t vtmp;
00184 var_t *var, d;
00185 mfcc_t *obs;
00186 int32 cw, j;
00187
00188 cw = topn[i].codeword;
00189 mean = s->means[feat] + cw * ceplen;
00190 var = s->vars[feat] + cw * ceplen;
00191 d = s->dets[feat][cw];
00192 obs = z;
00193 for (j = 0; j < ceplen; j++) {
00194 diff = *obs++ - *mean++;
00195
00196 sqdiff = MFCCMUL(diff, diff);
00197 compl = MFCCMUL(sqdiff, *var);
00198 d = GMMSUB(d, compl);
00199 ++var;
00200 }
00201 topn[i].score = (int32)d;
00202 if (i == 0)
00203 continue;
00204 vtmp = topn[i];
00205 for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
00206 topn[j + 1] = topn[j];
00207 }
00208 topn[j + 1] = vtmp;
00209 }
00210 }
00211
00212 static void
00213 eval_cb_kdtree(s2_semi_mgau_t *s, int32 feat, mfcc_t *z,
00214 kd_tree_node_t *node, uint32 maxbbi)
00215 {
00216 vqFeature_t *worst, *best, *topn;
00217 int32 i, ceplen;
00218
00219 best = topn = s->f[feat];
00220 worst = topn + (s->topn - 1);
00221 ceplen = s->veclen[feat];
00222
00223 for (i = 0; i < maxbbi; ++i) {
00224 mean_t *mean, diff, sqdiff, compl;
00225 var_t *var, d;
00226 mfcc_t *obs;
00227 vqFeature_t *cur;
00228 int32 cw, j, k;
00229
00230 cw = node->bbi[i];
00231 mean = s->means[feat] + cw * ceplen;
00232 var = s->vars[feat] + cw * ceplen;
00233 d = s->dets[feat][cw];
00234 obs = z;
00235 for (j = 0; (j < ceplen) && (d >= worst->score); j++) {
00236 diff = *obs++ - *mean++;
00237
00238 sqdiff = MFCCMUL(diff, diff);
00239 compl = MFCCMUL(sqdiff, *var);
00240 d = GMMSUB(d, compl);
00241 ++var;
00242 }
00243 if (j < ceplen)
00244 continue;
00245 if ((int32)d < worst->score)
00246 continue;
00247 for (k = 0; k < s->topn; k++) {
00248
00249 if (topn[k].codeword == cw)
00250 break;
00251 }
00252 if (k < s->topn)
00253 continue;
00254
00255 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00256 memcpy(cur + 1, cur, sizeof(vqFeature_t));
00257 ++cur;
00258 cur->codeword = cw;
00259 cur->score = (int32)d;
00260 }
00261 }
00262
00263 static void
00264 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
00265 {
00266 vqFeature_t *worst, *best, *topn;
00267 mean_t *mean;
00268 var_t *var, *det, *detP, *detE;
00269 int32 i, ceplen;
00270
00271 best = topn = s->f[feat];
00272 worst = topn + (s->topn - 1);
00273 mean = s->means[feat];
00274 var = s->vars[feat];
00275 det = s->dets[feat];
00276 detE = det + s->n_density;
00277 ceplen = s->veclen[feat];
00278
00279 for (detP = det; detP < detE; ++detP) {
00280 mean_t diff, sqdiff, compl;
00281 var_t d;
00282 mfcc_t *obs;
00283 vqFeature_t *cur;
00284 int32 cw, j;
00285
00286 d = *detP;
00287 obs = z;
00288 cw = detP - det;
00289 for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
00290 diff = *obs++ - *mean++;
00291
00292 sqdiff = MFCCMUL(diff, diff);
00293 compl = MFCCMUL(sqdiff, *var);
00294 d = GMMSUB(d, compl);
00295 ++var;
00296 }
00297 if (j < ceplen) {
00298
00299 mean += (ceplen - j);
00300 var += (ceplen - j);
00301 continue;
00302 }
00303 if ((int32)d < worst->score)
00304 continue;
00305 for (i = 0; i < s->topn; i++) {
00306
00307 if (topn[i].codeword == cw)
00308 break;
00309 }
00310 if (i < s->topn)
00311 continue;
00312
00313 for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
00314 memcpy(cur + 1, cur, sizeof(vqFeature_t));
00315 ++cur;
00316 cur->codeword = cw;
00317 cur->score = (int32)d;
00318 }
00319 }
00320
00321 static void
00322 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
00323 {
00324
00325
00326 memcpy(s->f[feat], s->lastf[feat], sizeof(vqFeature_t) * s->topn);
00327 eval_topn(s, feat, z);
00328
00329
00330 if (frame % s->ds_ratio)
00331 return;
00332
00333
00334 if (s->kdtrees) {
00335 kd_tree_node_t *node;
00336 uint32 maxbbi;
00337
00338 node =
00339 eval_kd_tree(s->kdtrees[feat], z, s->kd_maxdepth);
00340 maxbbi = s->kd_maxbbi == -1 ? node->n_bbi : MIN(node->n_bbi,
00341 s->
00342 kd_maxbbi);
00343 eval_cb_kdtree(s, feat, z, node, maxbbi);
00344 }
00345 else {
00346 eval_cb(s, feat, z);
00347 }
00348
00349
00350 memcpy(s->lastf[feat], s->f[feat], sizeof(vqFeature_t) * s->topn);
00351 }
00352
00353 static void
00354 mgau_norm(s2_semi_mgau_t *s, int feat)
00355 {
00356 int32 norm;
00357 int j;
00358
00359
00360 norm = s->f[feat][0].score >> SENSCR_SHIFT;
00361 for (j = 1; j < s->topn; ++j) {
00362 norm = logmath_add(s->lmath_8b, norm,
00363 s->f[feat][j].score >> SENSCR_SHIFT);
00364 }
00365
00366
00367 for (j = 0; j < s->topn; ++j) {
00368 s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
00369 if (s->f[feat][j].score < 0 || s->f[feat][j].score > MAX_NEG_ASCR)
00370 s->f[feat][j].score = MAX_NEG_ASCR;
00371 }
00372 }
00373
00374
00375
00376
00377 int32
00378 s2_semi_mgau_frame_eval(s2_semi_mgau_t * s,
00379 int16 *senone_scores,
00380 int32 *senone_active,
00381 int32 n_senone_active,
00382 mfcc_t ** featbuf, int32 frame,
00383 int32 compallsen,
00384 int32 *out_bestidx)
00385 {
00386 int i;
00387
00388 for (i = 0; i < s->n_feat; ++i) {
00389 mgau_dist(s, frame, i, featbuf[i]);
00390 mgau_norm(s, i);
00391 }
00392
00393 if (compallsen) {
00394 switch (s->topn) {
00395 case 4:
00396 return get_scores4_8b_all(s, senone_scores, out_bestidx);
00397 case 2:
00398 return get_scores2_8b_all(s, senone_scores, out_bestidx);
00399 case 1:
00400 return get_scores1_8b_all(s, senone_scores, out_bestidx);
00401 default:
00402 return get_scores_8b_all(s, senone_scores, out_bestidx);
00403 }
00404 }
00405 else {
00406 switch (s->topn) {
00407 case 4:
00408 return get_scores4_8b(s, senone_scores,
00409 senone_active, n_senone_active,
00410 out_bestidx);
00411 case 2:
00412 return get_scores2_8b(s, senone_scores,
00413 senone_active, n_senone_active,
00414 out_bestidx);
00415 case 1:
00416 return get_scores1_8b(s, senone_scores,
00417 senone_active, n_senone_active,
00418 out_bestidx);
00419 default:
00420 return get_scores_8b(s, senone_scores,
00421 senone_active, n_senone_active,
00422 out_bestidx);
00423 }
00424 }
00425 }
00426
00427 static int32
00428 get_scores_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00429 int32 *senone_active, int32 n_senone_active,
00430 int32 *out_bestidx)
00431 {
00432 int32 i, j, k;
00433 int32 best = (int32)0x7fffffff;
00434
00435 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00436 for (i = 0; i < s->n_feat; ++i) {
00437 for (j = 0; j < n_senone_active; j++) {
00438 int sen = senone_active[j];
00439 uint8 *pid_cw;
00440 int32 tmp;
00441 pid_cw = s->mixw[i][s->f[i][0].codeword];
00442 tmp = pid_cw[sen] + s->f[i][0].score;
00443 for (k = 1; k < s->topn; ++k) {
00444 pid_cw = s->mixw[i][s->f[i][k].codeword];
00445 tmp = fast_logmath_add(s->lmath_8b, tmp,
00446 pid_cw[sen] + s->f[i][k].score);
00447 }
00448 senone_scores[sen] += tmp;
00449 if (i == s->n_feat - 1 && senone_scores[sen] < best) {
00450 best = senone_scores[sen];
00451 *out_bestidx = sen;
00452 }
00453 }
00454 }
00455 return best;
00456 }
00457
00458 static int32
00459 get_scores_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00460 int32 *out_bestidx)
00461 {
00462 int32 i, j, k;
00463 int32 best = (int32)0x7fffffff;
00464
00465 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00466 for (i = 0; i < s->n_feat; ++i) {
00467 for (j = 0; j < s->n_sen; j++) {
00468 uint8 *pid_cw;
00469 int32 tmp;
00470 pid_cw = s->mixw[i][s->f[i][0].codeword];
00471 tmp = pid_cw[j] + s->f[i][0].score;
00472 for (k = 1; k < s->topn; ++k) {
00473 pid_cw = s->mixw[i][s->f[i][k].codeword];
00474 tmp = fast_logmath_add(s->lmath_8b, tmp,
00475 pid_cw[j] + s->f[i][k].score);
00476 }
00477 senone_scores[j] += tmp;
00478 if (i == s->n_feat - 1 && senone_scores[j] < best) {
00479 best = senone_scores[j];
00480 *out_bestidx = j;
00481 }
00482 }
00483 }
00484 return best;
00485 }
00486
00487 static int32
00488 get_scores4_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00489 int32 *senone_active, int32 n_senone_active,
00490 int32 *out_bestidx)
00491 {
00492 int32 j;
00493 int32 best = (int32)0x7fffffff;
00494
00495 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00496 for (j = 0; j < s->n_feat; j++) {
00497 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00498 int32 k;
00499
00500
00501 pid_cw0 = s->mixw[j][s->f[j][0].codeword];
00502 pid_cw1 = s->mixw[j][s->f[j][1].codeword];
00503 pid_cw2 = s->mixw[j][s->f[j][2].codeword];
00504 pid_cw3 = s->mixw[j][s->f[j][3].codeword];
00505
00506 for (k = 0; k < n_senone_active; k++) {
00507 int32 tmp1, tmp2;
00508 int32 n = senone_active[k];
00509
00510 tmp1 = pid_cw0[n] + s->f[j][0].score;
00511 tmp2 = pid_cw1[n] + s->f[j][1].score;
00512 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00513 tmp2 = pid_cw2[n] + s->f[j][2].score;
00514 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00515 tmp2 = pid_cw3[n] + s->f[j][3].score;
00516 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00517
00518 senone_scores[n] += tmp1;
00519 if (j == s->n_feat - 1 && senone_scores[n] < best) {
00520 best = senone_scores[n];
00521 *out_bestidx = n;
00522 }
00523 }
00524 }
00525 return best;
00526 }
00527
00528 static int32
00529 get_scores4_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00530 int32 *out_bestidx)
00531 {
00532 int32 j;
00533 int32 best = (int32)0x7fffffff;
00534
00535 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00536 for (j = 0; j < s->n_feat; j++) {
00537 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00538 int32 n;
00539
00540
00541 pid_cw0 = s->mixw[j][s->f[j][0].codeword];
00542 pid_cw1 = s->mixw[j][s->f[j][1].codeword];
00543 pid_cw2 = s->mixw[j][s->f[j][2].codeword];
00544 pid_cw3 = s->mixw[j][s->f[j][3].codeword];
00545
00546 for (n = 0; n < s->n_sen; n++) {
00547 int32 tmp1, tmp2;
00548 tmp1 = pid_cw0[n] + s->f[j][0].score;
00549 tmp2 = pid_cw1[n] + s->f[j][1].score;
00550 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00551 tmp2 = pid_cw2[n] + s->f[j][2].score;
00552 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00553 tmp2 = pid_cw3[n] + s->f[j][3].score;
00554 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00555
00556 senone_scores[n] += tmp1;
00557 if (j == s->n_feat - 1 && senone_scores[n] < best) {
00558 best = senone_scores[n];
00559 *out_bestidx = n;
00560 }
00561 }
00562 }
00563 return best;
00564 }
00565
00566 static int32
00567 get_scores2_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00568 int32 *senone_active, int32 n_senone_active,
00569 int32 *out_bestidx)
00570 {
00571 int32 k;
00572 int32 best = (int32)0x7fffffff;
00573 uint8 *pid_cw00, *pid_cw10, *pid_cw01, *pid_cw11,
00574 *pid_cw02, *pid_cw12, *pid_cw03, *pid_cw13;
00575
00576 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00577
00578 pid_cw00 = s->mixw[0][s->f[0][0].codeword];
00579 pid_cw10 = s->mixw[0][s->f[0][1].codeword];
00580 pid_cw01 = s->mixw[1][s->f[1][0].codeword];
00581 pid_cw11 = s->mixw[1][s->f[1][1].codeword];
00582 pid_cw02 = s->mixw[2][s->f[2][0].codeword];
00583 pid_cw12 = s->mixw[2][s->f[2][1].codeword];
00584 pid_cw03 = s->mixw[3][s->f[3][0].codeword];
00585 pid_cw13 = s->mixw[3][s->f[3][1].codeword];
00586
00587 for (k = 0; k < n_senone_active; k++) {
00588 int32 tmp1, tmp2, n;
00589 n = senone_active[k];
00590
00591 tmp1 = pid_cw00[n] + s->f[0][0].score;
00592 tmp2 = pid_cw10[n] + s->f[0][1].score;
00593 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00594 senone_scores[n] += tmp1;
00595 tmp1 = pid_cw01[n] + s->f[1][0].score;
00596 tmp2 = pid_cw11[n] + s->f[1][1].score;
00597 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00598 senone_scores[n] += tmp1;
00599 tmp1 = pid_cw02[n] + s->f[2][0].score;
00600 tmp2 = pid_cw12[n] + s->f[2][1].score;
00601 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00602 senone_scores[n] += tmp1;
00603 tmp1 = pid_cw03[n] + s->f[3][0].score;
00604 tmp2 = pid_cw13[n] + s->f[3][1].score;
00605 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00606 senone_scores[n] += tmp1;
00607 if (senone_scores[n] < best) {
00608 best = senone_scores[n];
00609 *out_bestidx = n;
00610 }
00611 }
00612 return best;
00613 }
00614
00615 static int32
00616 get_scores2_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00617 int32 *out_bestidx)
00618 {
00619 uint8 *pid_cw00, *pid_cw10, *pid_cw01, *pid_cw11,
00620 *pid_cw02, *pid_cw12, *pid_cw03, *pid_cw13;
00621 int32 best = (int32)0x7fffffff;
00622 int32 n;
00623
00624 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
00625
00626 pid_cw00 = s->mixw[0][s->f[0][0].codeword];
00627 pid_cw10 = s->mixw[0][s->f[0][1].codeword];
00628 pid_cw01 = s->mixw[1][s->f[1][0].codeword];
00629 pid_cw11 = s->mixw[1][s->f[1][1].codeword];
00630 pid_cw02 = s->mixw[2][s->f[2][0].codeword];
00631 pid_cw12 = s->mixw[2][s->f[2][1].codeword];
00632 pid_cw03 = s->mixw[3][s->f[3][0].codeword];
00633 pid_cw13 = s->mixw[3][s->f[3][1].codeword];
00634
00635 for (n = 0; n < s->n_sen; n++) {
00636 int32 tmp1, tmp2;
00637
00638 tmp1 = pid_cw00[n] + s->f[0][0].score;
00639 tmp2 = pid_cw10[n] + s->f[0][1].score;
00640 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00641 senone_scores[n] += tmp1;
00642 tmp1 = pid_cw01[n] + s->f[1][0].score;
00643 tmp2 = pid_cw11[n] + s->f[1][1].score;
00644 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00645 senone_scores[n] += tmp1;
00646 tmp1 = pid_cw02[n] + s->f[2][0].score;
00647 tmp2 = pid_cw12[n] + s->f[2][1].score;
00648 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00649 senone_scores[n] += tmp1;
00650 tmp1 = pid_cw03[n] + s->f[3][0].score;
00651 tmp2 = pid_cw13[n] + s->f[3][1].score;
00652 tmp1 = fast_logmath_add(s->lmath_8b, tmp1, tmp2);
00653 senone_scores[n] += tmp1;
00654 if (senone_scores[n] < best) {
00655 best = senone_scores[n];
00656 *out_bestidx = n;
00657 }
00658 }
00659 return best;
00660 }
00661
00662 static int32
00663 get_scores1_8b(s2_semi_mgau_t * s, int16 *senone_scores,
00664 int32 *senone_active, int32 n_senone_active,
00665 int32 *out_bestidx)
00666 {
00667 int32 j, k;
00668 int32 best = (int32)0x7fffffff;
00669 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00670
00671
00672 pid_cw0 = s->mixw[0][s->f[0][0].codeword];
00673 pid_cw1 = s->mixw[1][s->f[1][0].codeword];
00674 pid_cw2 = s->mixw[2][s->f[2][0].codeword];
00675 pid_cw3 = s->mixw[3][s->f[3][0].codeword];
00676
00677 for (k = 0; k < n_senone_active; k++) {
00678 j = senone_active[k];
00679 senone_scores[j] =
00680 (pid_cw0[j] + pid_cw1[j] + pid_cw2[j] + pid_cw3[j]);
00681 if (senone_scores[j] < best) {
00682 best = senone_scores[j];
00683 *out_bestidx = j;
00684 }
00685 }
00686 return best;
00687 }
00688
00689 static int32
00690 get_scores1_8b_all(s2_semi_mgau_t * s, int16 *senone_scores,
00691 int32 *out_bestidx)
00692 {
00693 int32 j;
00694 int32 best = (int32)0x7fffffff;
00695 uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
00696
00697
00698 pid_cw0 = s->mixw[0][s->f[0][0].codeword];
00699 pid_cw1 = s->mixw[1][s->f[1][0].codeword];
00700 pid_cw2 = s->mixw[2][s->f[2][0].codeword];
00701 pid_cw3 = s->mixw[3][s->f[3][0].codeword];
00702
00703 for (j = 0; j < s->n_sen; j++) {
00704 senone_scores[j] =
00705 (pid_cw0[j] + pid_cw1[j] + pid_cw2[j] + pid_cw3[j]);
00706 if (senone_scores[j] < best) {
00707 best = senone_scores[j];
00708 *out_bestidx = j;
00709 }
00710 }
00711 return best;
00712 }
00713
00714 int32
00715 s2_semi_mgau_load_kdtree(s2_semi_mgau_t * s, const char *kdtree_path,
00716 uint32 maxdepth, int32 maxbbi)
00717 {
00718 if (read_kd_trees(kdtree_path, &s->kdtrees, &s->n_kdtrees,
00719 maxdepth, maxbbi) == -1)
00720 E_FATAL("Failed to read kd-trees from %s\n", kdtree_path);
00721 if (s->n_kdtrees != s->n_feat)
00722 E_FATAL("Number of kd-trees != %d\n", s->n_feat);
00723
00724 s->kd_maxdepth = maxdepth;
00725 s->kd_maxbbi = maxbbi;
00726 return 0;
00727 }
00728
00729 static int32
00730 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
00731 {
00732 FILE *fp;
00733 char line[1000];
00734 int32 i, n;
00735 int32 do_swap, do_mmap;
00736 size_t filesize, offset;
00737 int n_clust = 256;
00738
00739 int r = s->n_density;
00740 int c = bin_mdef_n_sen(mdef);
00741
00742 s->n_sen = c;
00743 do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
00744
00745 if ((fp = fopen(file, "rb")) == NULL)
00746 return -1;
00747
00748 E_INFO("Loading senones from dump file %s\n", file);
00749
00750 fread(&n, sizeof(int32), 1, fp);
00751
00752 do_swap = 0;
00753 if (n < 1 || n > 999) {
00754 SWAP_INT32(&n);
00755 if (n < 1 || n > 999) {
00756 E_FATAL("Title length %x in dump file %s out of range\n", n, file);
00757 }
00758 do_swap = 1;
00759 }
00760 if (fread(line, sizeof(char), n, fp) != n)
00761 E_FATAL("Cannot read title\n");
00762 if (line[n - 1] != '\0')
00763 E_FATAL("Bad title in dump file\n");
00764 E_INFO("%s\n", line);
00765
00766
00767 fread(&n, 1, sizeof(n), fp);
00768 if (do_swap) SWAP_INT32(&n);
00769 if (fread(line, sizeof(char), n, fp) != n)
00770 E_FATAL("Cannot read header\n");
00771 if (line[n - 1] != '\0')
00772 E_FATAL("Bad header in dump file\n");
00773
00774
00775 for (;;) {
00776 fread(&n, 1, sizeof(n), fp);
00777 if (do_swap) SWAP_INT32(&n);
00778 if (n == 0)
00779 break;
00780 if (fread(line, sizeof(char), n, fp) != n)
00781 E_FATAL("Cannot read header\n");
00782
00783 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
00784 n_clust = atoi(line + strlen("cluster_count "));
00785 }
00786 }
00787
00788
00789 fread(&r, 1, sizeof(r), fp);
00790 if (do_swap) SWAP_INT32(&r);
00791 fread(&c, 1, sizeof(c), fp);
00792 if (do_swap) SWAP_INT32(&c);
00793 E_INFO("Rows: %d, Columns: %d\n", r, c);
00794
00795 if (n_clust) {
00796 E_ERROR ("Dump file is incompatible with PocketSphinx\n");
00797 fclose(fp);
00798 return -1;
00799 }
00800 if (do_mmap) {
00801 E_INFO("Using memory-mapped I/O for senones\n");
00802 }
00803
00804 if ((c & 3) != 0) {
00805
00806 E_ERROR
00807 ("Number of PDFs (%d) not padded to multiple of 4, will not use mmap()\n",
00808 c);
00809 do_mmap = 0;
00810 }
00811 offset = ftell(fp);
00812 fseek(fp, 0, SEEK_END);
00813 filesize = ftell(fp);
00814 fseek(fp, offset, SEEK_SET);
00815 if ((offset & 3) != 0) {
00816 E_ERROR
00817 ("PDFs are not aligned to 4-byte boundary in file, will not use mmap()\n");
00818 do_mmap = 0;
00819 }
00820
00821
00822 if (do_mmap)
00823 s->sendump_mmap = mmio_file_read(file);
00824 if (s->sendump_mmap) {
00825 s->mixw = ckd_calloc(s->n_feat, sizeof(*s->mixw));
00826 for (i = 0; i < s->n_feat; i++) {
00827
00828 s->mixw[i] = ckd_calloc(r, sizeof(**s->mixw));
00829 }
00830
00831 for (n = 0; n < s->n_feat; n++) {
00832 for (i = 0; i < r; i++) {
00833 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
00834 offset += c;
00835 }
00836 }
00837 }
00838 else {
00839 s->mixw = ckd_calloc_3d(s->n_feat, r, c, sizeof(***s->mixw));
00840
00841 for (n = 0; n < s->n_feat; n++) {
00842 for (i = 0; i < r; i++) {
00843 if (fread(s->mixw[n][i], sizeof(***s->mixw), c, fp) != (size_t) c)
00844 E_FATAL("fread failed\n");
00845 }
00846 }
00847 }
00848
00849 fclose(fp);
00850 return 0;
00851 }
00852
00853 static int32
00854 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
00855 {
00856 char **argname, **argval;
00857 char eofchk;
00858 FILE *fp;
00859 int32 byteswap, chksum_present;
00860 uint32 chksum;
00861 float32 *pdf;
00862 int32 i, f, c, n;
00863 int32 n_sen;
00864 int32 n_feat;
00865 int32 n_comp;
00866 int32 n_err;
00867
00868 E_INFO("Reading mixture weights file '%s'\n", file_name);
00869
00870 if ((fp = fopen(file_name, "rb")) == NULL)
00871 E_FATAL("fopen(%s,rb) failed\n", file_name);
00872
00873
00874 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00875 E_FATAL("bio_readhdr(%s) failed\n", file_name);
00876
00877
00878 chksum_present = 0;
00879 for (i = 0; argname[i]; i++) {
00880 if (strcmp(argname[i], "version") == 0) {
00881 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
00882 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00883 file_name, argval[i], MGAU_MIXW_VERSION);
00884 }
00885 else if (strcmp(argname[i], "chksum0") == 0) {
00886 chksum_present = 1;
00887 }
00888 }
00889 bio_hdrarg_free(argname, argval);
00890 argname = argval = NULL;
00891
00892 chksum = 0;
00893
00894
00895 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
00896 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
00897 1)
00898 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
00899 1)
00900 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
00901 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
00902 }
00903 if (n_feat != s->n_feat)
00904 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
00905 if (n != n_sen * n_feat * n_comp) {
00906 E_FATAL
00907 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
00908 file_name, i, n_sen, n_feat, n_comp);
00909 }
00910
00911
00912
00913
00914 s->n_sen = n_sen;
00915
00916
00917 s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw));
00918
00919
00920 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
00921
00922
00923 n_err = 0;
00924 for (i = 0; i < n_sen; i++) {
00925 for (f = 0; f < n_feat; f++) {
00926 if (bio_fread((void *) pdf, sizeof(float32),
00927 n_comp, fp, byteswap, &chksum) != n_comp) {
00928 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
00929 }
00930
00931
00932 if (vector_sum_norm(pdf, n_comp) <= 0.0)
00933 n_err++;
00934 vector_floor(pdf, n_comp, SmoothMin);
00935 vector_sum_norm(pdf, n_comp);
00936
00937
00938 for (c = 0; c < n_comp; c++) {
00939 int32 qscr;
00940
00941 qscr = -logmath_log(s->lmath_8b, pdf[c]);
00942 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
00943 qscr = MAX_NEG_MIXW;
00944 s->mixw[f][c][i] = qscr;
00945 }
00946 }
00947 }
00948 if (n_err > 0)
00949 E_ERROR("Weight normalization failed for %d senones\n", n_err);
00950
00951 ckd_free(pdf);
00952
00953 if (chksum_present)
00954 bio_verify_chksum(fp, byteswap, chksum);
00955
00956 if (fread(&eofchk, 1, 1, fp) == 1)
00957 E_FATAL("More data than expected in %s\n", file_name);
00958
00959 fclose(fp);
00960
00961 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
00962 return n_sen;
00963 }
00964
00965
00966
00967 static int32
00968 s3_read_mgau(s2_semi_mgau_t *s, const char *file_name, float32 ***out_cb)
00969 {
00970 char tmp;
00971 FILE *fp;
00972 int32 i, blk, n;
00973 int32 n_mgau;
00974 int32 n_feat;
00975 int32 n_density;
00976 int32 *veclen;
00977 int32 byteswap, chksum_present;
00978 char **argname, **argval;
00979 uint32 chksum;
00980
00981 E_INFO("Reading S3 mixture gaussian file '%s'\n", file_name);
00982
00983 if ((fp = fopen(file_name, "rb")) == NULL)
00984 E_FATAL("fopen(%s,rb) failed\n", file_name);
00985
00986
00987 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
00988 E_FATAL("bio_readhdr(%s) failed\n", file_name);
00989
00990
00991 chksum_present = 0;
00992 for (i = 0; argname[i]; i++) {
00993 if (strcmp(argname[i], "version") == 0) {
00994 if (strcmp(argval[i], MGAU_PARAM_VERSION) != 0)
00995 E_WARN("Version mismatch(%s): %s, expecting %s\n",
00996 file_name, argval[i], MGAU_PARAM_VERSION);
00997 }
00998 else if (strcmp(argname[i], "chksum0") == 0) {
00999 chksum_present = 1;
01000 }
01001 }
01002 bio_hdrarg_free(argname, argval);
01003 argname = argval = NULL;
01004
01005 chksum = 0;
01006
01007
01008 if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01009 E_FATAL("fread(%s) (#codebooks) failed\n", file_name);
01010 if (n_mgau != 1) {
01011 E_ERROR("%s: #codebooks (%d) != 1\n", file_name, n_mgau);
01012 fclose(fp);
01013 return -1;
01014 }
01015
01016
01017 if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01018 E_FATAL("fread(%s) (#features) failed\n", file_name);
01019 if (s->n_feat == 0)
01020 s->n_feat = n_feat;
01021 else if (n_feat != s->n_feat)
01022 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
01023
01024
01025 if (bio_fread(&n_density, sizeof(int32), 1, fp,
01026 byteswap, &chksum) != 1)
01027 E_FATAL("fread(%s) (#density/codebook) failed\n", file_name);
01028 if (s->n_density == 0)
01029 s->n_density = n_density;
01030 else if (n_density != s->n_density)
01031 E_FATAL("%s: Number of densities per feature(%d) != %d\n",
01032 file_name, n_mgau, s->n_density);
01033
01034
01035 if (s->veclen == NULL)
01036 s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
01037 veclen = ckd_calloc(s->n_feat, sizeof(int32));
01038 if (bio_fread(veclen, sizeof(int32), s->n_feat,
01039 fp, byteswap, &chksum) != s->n_feat)
01040 E_FATAL("fread(%s) (feature vector-length) failed\n", file_name);
01041 for (i = 0, blk = 0; i < s->n_feat; ++i) {
01042 if (s->veclen[i] == 0)
01043 s->veclen[i] = veclen[i];
01044 else if (veclen[i] != s->veclen[i])
01045 E_FATAL("feature stream length %d is inconsistent (%d != %d)\n",
01046 i, veclen[i], s->veclen[i]);
01047 blk += veclen[i];
01048 }
01049
01050
01051 if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
01052 E_FATAL("fread(%s) (total #floats) failed\n", file_name);
01053 if (n != n_mgau * n_density * blk)
01054 E_FATAL
01055 ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
01056 file_name, n, n_mgau, n_density, blk);
01057
01058 *out_cb = ckd_calloc(s->n_feat, sizeof(float32 *));
01059 for (i = 0; i < s->n_feat; ++i) {
01060 (*out_cb)[i] =
01061 (float32 *) ckd_calloc(n_density * veclen[i],
01062 sizeof(float32));
01063 if (bio_fread
01064 ((*out_cb)[i], sizeof(float32),
01065 n_density * veclen[i], fp,
01066 byteswap, &chksum) != n_density * veclen[i])
01067 E_FATAL("fread(%s, %d) of feat %d failed\n", file_name,
01068 n_density * veclen[i], i);
01069 }
01070 ckd_free(veclen);
01071
01072 if (chksum_present)
01073 bio_verify_chksum(fp, byteswap, chksum);
01074
01075 if (fread(&tmp, 1, 1, fp) == 1)
01076 E_FATAL("%s: More data than expected\n", file_name);
01077
01078 fclose(fp);
01079
01080 E_INFO("%d mixture Gaussians, %d components, %d feature streams, veclen %d\n", n_mgau,
01081 n_density, n_feat, blk);
01082
01083 return n;
01084 }
01085
01086 static int32
01087 s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor)
01088 {
01089 int feat;
01090
01091 for (feat = 0; feat < s->n_feat; ++feat) {
01092 float32 *fmp;
01093 mean_t *mp;
01094 var_t *vp, *dp;
01095 int32 vecLen, i;
01096
01097 vecLen = s->veclen[feat];
01098 fmp = (float32 *) s->means[feat];
01099 mp = s->means[feat];
01100 vp = s->vars[feat];
01101 dp = s->dets[feat];
01102
01103 for (i = 0; i < s->n_density; ++i) {
01104 var_t d;
01105 int32 j;
01106
01107 d = 0;
01108 for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) {
01109 float64 fvar;
01110
01111 #ifdef FIXED_POINT
01112 *mp = FLOAT2FIX(*fmp);
01113 #endif
01114
01115 fvar = *(float32 *) vp;
01116 if (fvar < vFloor)
01117 fvar = vFloor;
01118 d += (var_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI));
01119 *vp = (var_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar));
01120 }
01121 *dp++ = d;
01122 }
01123 }
01124 return 0;
01125 }
01126
01127 s2_semi_mgau_t *
01128 s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, bin_mdef_t *mdef)
01129 {
01130 s2_semi_mgau_t *s;
01131 char const *sendump_path;
01132 float32 **fgau;
01133 int i;
01134
01135 s = ckd_calloc(1, sizeof(*s));
01136 s->config = config;
01137
01138
01139 s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
01140 if (s->lmath_8b == NULL) {
01141 s2_semi_mgau_free(s);
01142 return NULL;
01143 }
01144
01145 if (logmath_get_width(s->lmath_8b) != 1) {
01146 E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
01147 logmath_get_base(s->lmath_8b));
01148 s2_semi_mgau_free(s);
01149 return NULL;
01150 }
01151
01152
01153 if (s3_read_mgau(s, cmd_ln_str_r(config, "-mean"), &fgau) < 0) {
01154 s2_semi_mgau_free(s);
01155 return NULL;
01156 }
01157 s->means = (mean_t **)fgau;
01158 if (s3_read_mgau(s, cmd_ln_str_r(config, "-var"), &fgau) < 0) {
01159 s2_semi_mgau_free(s);
01160 return NULL;
01161 }
01162 s->vars = (var_t **)fgau;
01163
01164
01165 s->dets = (var_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets));
01166 s3_precomp(s, lmath, cmd_ln_float32_r(config, "-varfloor"));
01167
01168
01169 if ((sendump_path = cmd_ln_str_r(config, "-sendump")))
01170 read_sendump(s, mdef, sendump_path);
01171 else
01172 read_mixw(s, cmd_ln_str_r(config, "-mixw"),
01173 cmd_ln_float32_r(config, "-mixwfloor"));
01174 s->topn = cmd_ln_int32_r(config, "-topn");
01175 s->ds_ratio = cmd_ln_int32_r(config, "-ds");
01176
01177
01178 s->f = (vqFeature_t **) ckd_calloc_2d(s->n_feat, s->topn,
01179 sizeof(vqFeature_t));
01180 s->lastf = (vqFeature_t **) ckd_calloc_2d(s->n_feat, s->topn,
01181 sizeof(vqFeature_t));
01182 for (i = 0; i < s->n_feat; ++i) {
01183 int32 j;
01184 for (j = 0; j < s->topn; ++j) {
01185 s->lastf[i][j].score = WORST_DIST;
01186 s->lastf[i][j].codeword = j;
01187 }
01188 }
01189
01190 return s;
01191 }
01192
01193 void
01194 s2_semi_mgau_free(s2_semi_mgau_t * s)
01195 {
01196 uint32 i;
01197
01198 logmath_free(s->lmath_8b);
01199 if (s->sendump_mmap) {
01200 for (i = 0; i < s->n_feat; ++i) {
01201 ckd_free(s->mixw[i]);
01202 }
01203 ckd_free(s->mixw);
01204 mmio_file_unmap(s->sendump_mmap);
01205 }
01206 else {
01207 ckd_free_3d(s->mixw);
01208 }
01209 for (i = 0; i < s->n_feat; ++i) {
01210 ckd_free(s->means[i]);
01211 ckd_free(s->vars[i]);
01212 }
01213 for (i = 0; i < s->n_kdtrees; ++i)
01214 free_kd_tree(s->kdtrees[i]);
01215 ckd_free(s->kdtrees);
01216 ckd_free(s->veclen);
01217 ckd_free(s->means);
01218 ckd_free(s->vars);
01219 ckd_free_2d((void **)s->f);
01220 ckd_free_2d((void **)s->lastf);
01221 ckd_free_2d((void **)s->dets);
01222 ckd_free(s);
01223 }