00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 #include <stdio.h>
00081 #include <string.h>
00082 #include <assert.h>
00083
00084
00085 #include <ckd_alloc.h>
00086 #include <err.h>
00087
00088
00089 #include "fsg_lextree.h"
00090
00091 #define __FSG_DBG__ 0
00092
00099 static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
00100 fsg_model_t *fsg,
00101 int32 from_state,
00102 fsg_pnode_t **alloc_head);
00103
00108 static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
00109
00114 static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *alloc_head, FILE *fp);
00115
00119 static void
00120 fsg_lextree_lc_rc(fsg_lextree_t *lextree)
00121 {
00122 int32 s, d, i, j;
00123 int32 n_ci;
00124 gnode_t *gn;
00125 fsg_model_t *fsg;
00126 fsg_link_t *l;
00127 int32 silcipid;
00128 int32 len;
00129
00130 silcipid = bin_mdef_ciphone_id(lextree->mdef, "SIL");
00131 assert(silcipid >= 0);
00132 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00133
00134 fsg = lextree->fsg;
00135
00136
00137
00138
00139 lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
00140 lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
00141
00142 for (s = 0; s < fsg->n_state; s++) {
00143 for (d = 0; d < fsg->n_state; d++) {
00144 for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) {
00145 int32 dictwid;
00147 l = (fsg_link_t *) gnode_ptr(gn);
00148 assert(l->wid >= 0);
00149 dictwid = dict_to_id(lextree->dict,
00150 fsg_model_word_str(lextree->fsg, l->wid));
00151
00152
00153
00154
00155
00156
00157
00158
00159 if (fsg_model_is_filler(fsg, l->wid)) {
00160
00161 lextree->rc[s][silcipid] = 1;
00162 lextree->lc[d][silcipid] = 1;
00163 }
00164 else {
00165 len = dict_pronlen(lextree->dict, dictwid);
00166 lextree->rc[s][dict_ciphone(lextree->dict, dictwid, 0)] = 1;
00167 lextree->lc[d][dict_ciphone(lextree->dict, dictwid, len - 1)] = 1;
00168 }
00169 }
00170 }
00171
00172
00173
00174
00175
00176
00177
00178
00179 lextree->lc[s][silcipid] = 1;
00180 lextree->rc[s][silcipid] = 1;
00181 }
00182
00183
00184
00185
00186
00187
00188 for (s = 0; s < fsg->n_state; s++) {
00189 for (d = 0; d < fsg->n_state; d++) {
00190 l = fsg->null_trans[s][d];
00191 if (l) {
00192
00193
00194
00195
00196 for (i = 0; i < n_ci; i++)
00197 lextree->lc[d][i] |= lextree->lc[s][i];
00198
00199
00200
00201
00202
00203 for (i = 0; i < n_ci; i++)
00204 lextree->rc[s][i] |= lextree->rc[d][i];
00205 }
00206 }
00207 }
00208
00209
00210 for (s = 0; s < fsg->n_state; s++) {
00211 j = 0;
00212 for (i = 0; i < n_ci; i++) {
00213 if (lextree->lc[s][i]) {
00214 lextree->lc[s][j] = i;
00215 j++;
00216 }
00217 }
00218 lextree->lc[s][j] = -1;
00219
00220 j = 0;
00221 for (i = 0; i < n_ci; i++) {
00222 if (lextree->rc[s][i]) {
00223 lextree->rc[s][j] = i;
00224 j++;
00225 }
00226 }
00227 lextree->rc[s][j] = -1;
00228 }
00229 }
00230
00231
00232
00233
00234 fsg_lextree_t *
00235 fsg_lextree_init(fsg_model_t * fsg, dict_t *dict,
00236 bin_mdef_t *mdef, hmm_context_t *ctx,
00237 int32 wip, int32 pip)
00238 {
00239 int32 s;
00240 fsg_lextree_t *lextree;
00241 fsg_pnode_t *pn;
00242
00243 lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
00244 lextree->fsg = fsg;
00245 lextree->root = ckd_calloc(fsg_model_n_state(fsg),
00246 sizeof(fsg_pnode_t *));
00247 lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
00248 sizeof(fsg_pnode_t *));
00249 lextree->ctx = ctx;
00250 lextree->dict = dict;
00251 lextree->mdef = mdef;
00252 lextree->wip = wip;
00253 lextree->pip = pip;
00254
00255
00256 fsg_lextree_lc_rc(lextree);
00257
00258
00259 lextree->n_pnode = 0;
00260 for (s = 0; s < fsg_model_n_state(fsg); s++) {
00261 lextree->root[s] =
00262 fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
00263
00264 for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next)
00265 lextree->n_pnode++;
00266 }
00267 E_INFO("%d HMM nodes in lextree\n", lextree->n_pnode);
00268
00269 #if __FSG_DBG__
00270 fsg_lextree_dump(lextree, stdout);
00271 #endif
00272
00273 return lextree;
00274 }
00275
00276
00277 void
00278 fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
00279 {
00280 int32 s;
00281
00282 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
00283 fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
00284 fsg_psubtree_dump(lextree, lextree->alloc_head[s], fp);
00285 }
00286 fflush(fp);
00287 }
00288
00289
00290 void
00291 fsg_lextree_free(fsg_lextree_t * lextree)
00292 {
00293 int32 s;
00294
00295 if (lextree == NULL)
00296 return;
00297
00298 if (lextree->fsg)
00299 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
00300 fsg_psubtree_free(lextree->alloc_head[s]);
00301
00302 ckd_free_2d(lextree->lc);
00303 ckd_free_2d(lextree->rc);
00304 ckd_free(lextree->root);
00305 ckd_free(lextree->alloc_head);
00306 ckd_free(lextree);
00307 }
00308
00309 void
00310 fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t * ctxt)
00311 {
00312 int32 i;
00313
00314 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00315 ctxt->bv[i] = 0xffffffff;
00316 }
00317
00318
00319 uint32
00320 fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
00321 {
00322 int32 i;
00323 uint32 non_zero;
00324
00325 non_zero = 0;
00326
00327 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) {
00328 src->bv[i] = ~(sub->bv[i]) & src->bv[i];
00329 non_zero |= src->bv[i];
00330 }
00331
00332 return non_zero;
00333 }
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347 static fsg_pnode_t *
00348 psubtree_add_trans(fsg_lextree_t *lextree,
00349 fsg_pnode_t * root,
00350 fsg_link_t * fsglink,
00351 int16 *lclist, int16 *rclist,
00352 fsg_pnode_t ** alloc_head)
00353 {
00354 uint16 **lcfwd;
00355
00356 uint16 **lcbwd;
00357
00358
00359 uint16 **lcbwdperm;
00360
00361 uint16 **rcbwd;
00362
00363 uint16 **rcfwd;
00364
00365 uint16 **rcfwdperm;
00366
00367 int32 silcipid;
00368 int32 pronlen;
00369 int32 wid;
00370 int32 dictwid;
00371 int32 did;
00372 int32 ssid;
00373 gnode_t *gn;
00374 fsg_pnode_t *pnode, *pred, *head;
00375 int32 n_ci, p, lc, rc;
00376 glist_t lc_pnodelist;
00377 glist_t rc_pnodelist;
00378 fsg_pnode_t **ssid_pnode_map;
00379 int32 i, j;
00380
00381 silcipid = bin_mdef_ciphone_id(lextree->mdef, "SIL");
00382 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00383 lcfwd = lextree->dict->lcFwdTable;
00384 lcbwd = lextree->dict->lcBwdTable;
00385 lcbwdperm = lextree->dict->lcBwdPermTable;
00386 rcbwd = lextree->dict->rcBwdTable;
00387 rcfwd = lextree->dict->rcFwdTable;
00388 rcfwdperm = lextree->dict->rcFwdPermTable;
00389
00390 wid = fsg_link_wid(fsglink);
00391 assert(wid >= 0);
00392 dictwid = dict_to_id(lextree->dict,
00393 fsg_model_word_str(lextree->fsg, wid));
00394 pronlen = dict_pronlen(lextree->dict, dictwid);
00395 assert(pronlen >= 1);
00396
00397 assert(lclist[0] >= 0);
00398 assert(rclist[0] >= 0);
00399
00400 head = *alloc_head;
00401 pred = NULL;
00402
00403 if (pronlen == 1) {
00404 did = dict_phone(lextree->dict, dictwid, 0);
00405 if (dict_mpx(lextree->dict, dictwid)) {
00406
00407
00408
00409
00410 lc_pnodelist = NULL;
00411
00412 for (i = 0; lclist[i] >= 0; i++) {
00413 lc = lclist[i];
00414 ssid = lcfwd[did][lc];
00415
00416
00417 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00418 pnode = (fsg_pnode_t *) gnode_ptr(gn);
00419
00420 if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
00421
00422 fsg_pnode_add_ctxt(pnode, lc);
00423 break;
00424 }
00425 }
00426
00427 if (!gn) {
00428 pnode =
00429 (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00430 pnode->ctx = lextree->ctx;
00431 pnode->next.fsglink = fsglink;
00432 pnode->logs2prob =
00433 fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00434 pnode->ci_ext = dict_ciphone(lextree->dict, dictwid, 0);
00435 pnode->ppos = 0;
00436 pnode->leaf = TRUE;
00437 pnode->sibling = root;
00438 fsg_pnode_add_ctxt(pnode, lc);
00439 pnode->alloc_next = head;
00440 head = pnode;
00441 root = pnode;
00442
00443 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00444
00445 lc_pnodelist =
00446 glist_add_ptr(lc_pnodelist, (void *) pnode);
00447 }
00448 }
00449
00450 glist_free(lc_pnodelist);
00451 }
00452 else {
00453 ssid = did;
00454
00455 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00456 pnode->ctx = lextree->ctx;
00457 pnode->next.fsglink = fsglink;
00458 pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00459 pnode->ci_ext = silcipid;
00460 pnode->ppos = 0;
00461 pnode->leaf = TRUE;
00462 pnode->sibling = root;
00463 fsg_pnode_add_all_ctxt(&(pnode->ctxt));
00464 pnode->alloc_next = head;
00465 head = pnode;
00466 root = pnode;
00467
00468 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00469 }
00470 }
00471 else {
00472 assert(dict_mpx(lextree->dict, dictwid));
00473
00474 ssid_pnode_map =
00475 (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
00476 lc_pnodelist = NULL;
00477 rc_pnodelist = NULL;
00478
00479 for (p = 0; p < pronlen; p++) {
00480 did = ssid = dict_phone(lextree->dict, dictwid, p);
00481
00482 if (p == 0) {
00483 for (i = 0; lclist[i] >= 0; i++) {
00484 lc = lclist[i];
00485
00486 j = lcbwdperm[did][lc];
00487 ssid = lcbwd[did][j];
00488 pnode = ssid_pnode_map[j];
00489
00490 if (!pnode) {
00491 pnode =
00492 (fsg_pnode_t *) ckd_calloc(1,
00493 sizeof
00494 (fsg_pnode_t));
00495 pnode->ctx = lextree->ctx;
00496 pnode->logs2prob =
00497 fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
00498 pnode->ci_ext = dict_ciphone(lextree->dict, dictwid, 0);
00499 pnode->ppos = 0;
00500 pnode->leaf = FALSE;
00501 pnode->sibling = root;
00502 pnode->alloc_next = head;
00503 head = pnode;
00504 root = pnode;
00505
00506 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00507
00508 lc_pnodelist =
00509 glist_add_ptr(lc_pnodelist, (void *) pnode);
00510 ssid_pnode_map[j] = pnode;
00511 }
00512 else {
00513 assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
00514 }
00515 fsg_pnode_add_ctxt(pnode, lc);
00516 }
00517 }
00518 else if (p != pronlen - 1) {
00519 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
00520 pnode->ctx = lextree->ctx;
00521 pnode->logs2prob = lextree->pip;
00522 pnode->ci_ext = dict_ciphone(lextree->dict, dictwid, p);
00523 pnode->ppos = p;
00524 pnode->leaf = FALSE;
00525 pnode->sibling = NULL;
00526 if (p == 1) {
00527 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00528 pred = (fsg_pnode_t *) gnode_ptr(gn);
00529 pred->next.succ = pnode;
00530 }
00531 }
00532 else {
00533 pred->next.succ = pnode;
00534 }
00535 pnode->alloc_next = head;
00536 head = pnode;
00537
00538 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00539
00540 pred = pnode;
00541 }
00542 else {
00543 memset((void *) ssid_pnode_map, 0,
00544 n_ci * sizeof(fsg_pnode_t *));
00545
00546 for (i = 0; rclist[i] >= 0; i++) {
00547 rc = rclist[i];
00548
00549 j = rcfwdperm[did][rc];
00550 ssid = rcfwd[did][j];
00551 pnode = ssid_pnode_map[j];
00552
00553 if (!pnode) {
00554 pnode =
00555 (fsg_pnode_t *) ckd_calloc(1,
00556 sizeof
00557 (fsg_pnode_t));
00558 pnode->ctx = lextree->ctx;
00559 pnode->logs2prob = lextree->pip;
00560 pnode->ci_ext = dict_ciphone(lextree->dict, dictwid, p);
00561 pnode->ppos = p;
00562 pnode->leaf = TRUE;
00563 pnode->sibling = rc_pnodelist ?
00564 (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
00565 pnode->next.fsglink = fsglink;
00566 pnode->alloc_next = head;
00567 head = pnode;
00568
00569 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
00570
00571 rc_pnodelist =
00572 glist_add_ptr(rc_pnodelist, (void *) pnode);
00573 ssid_pnode_map[j] = pnode;
00574 }
00575 else {
00576 assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
00577 }
00578 fsg_pnode_add_ctxt(pnode, rc);
00579 }
00580
00581 if (p == 1) {
00582 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
00583 pred = (fsg_pnode_t *) gnode_ptr(gn);
00584 pred->next.succ =
00585 (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00586 }
00587 }
00588 else {
00589 pred->next.succ =
00590 (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
00591 }
00592 }
00593 }
00594
00595 ckd_free((void *) ssid_pnode_map);
00596 glist_free(lc_pnodelist);
00597 glist_free(rc_pnodelist);
00598 }
00599
00600 *alloc_head = head;
00601
00602 return root;
00603 }
00604
00605
00606
00607
00608
00609 static fsg_pnode_t *
00610 fsg_psubtree_init(fsg_lextree_t *lextree,
00611 fsg_model_t * fsg, int32 from_state,
00612 fsg_pnode_t ** alloc_head)
00613 {
00614 int32 dst;
00615 gnode_t *gn;
00616 fsg_link_t *fsglink;
00617 fsg_pnode_t *root;
00618 int32 n_ci;
00619
00620 root = NULL;
00621 assert(*alloc_head == NULL);
00622
00623 n_ci = bin_mdef_n_ciphone(lextree->mdef);
00624 if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
00625 E_FATAL
00626 ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
00627 FSG_PNODE_CTXT_BVSZ * 32);
00628 }
00629 for (dst = 0; dst < fsg_model_n_state(fsg); dst++) {
00630
00631 for (gn = fsg_model_trans(fsg, from_state, dst); gn;
00632 gn = gnode_next(gn)) {
00633
00634 fsglink = (fsg_link_t *) gnode_ptr(gn);
00635
00636 assert(fsg_link_wid(fsglink) >= 0);
00637
00638 root = psubtree_add_trans(lextree, root, fsglink,
00639 lextree->lc[from_state],
00640 lextree->rc[dst],
00641 alloc_head);
00642 }
00643 }
00644
00645 return root;
00646 }
00647
00648
00649 static void
00650 fsg_psubtree_free(fsg_pnode_t * head)
00651 {
00652 fsg_pnode_t *next;
00653
00654 while (head) {
00655 next = head->alloc_next;
00656 hmm_deinit(&head->hmm);
00657 ckd_free(head);
00658 head = next;
00659 }
00660 }
00661
00662 static void
00663 fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t * head, FILE * fp)
00664 {
00665 int32 i;
00666 fsg_link_t *tl;
00667
00668 for (; head; head = head->alloc_next) {
00669
00670 for (i = 0; i <= head->ppos; i++)
00671 fprintf(fp, " ");
00672
00673 fprintf(fp, "%p.@", head);
00674 fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&head->hmm));
00675 fprintf(fp, " %10d.LP", head->logs2prob);
00676 fprintf(fp, " %p.SIB", head->sibling);
00677 fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, head->ci_ext), head->ppos);
00678 if ((head->ppos == 0) || head->leaf) {
00679 fprintf(fp, " [");
00680 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
00681 fprintf(fp, "%08x", head->ctxt.bv[i]);
00682 fprintf(fp, "]");
00683 }
00684 if (head->leaf) {
00685 tl = head->next.fsglink;
00686 fprintf(fp, " {%s[%d->%d](%d)}",
00687 fsg_model_word_str(tree->fsg, tl->wid),
00688 tl->from_state, tl->to_state, tl->logs2prob);
00689 }
00690 else {
00691 fprintf(fp, " %p.NXT", head->next.succ);
00692 }
00693 fprintf(fp, "\n");
00694 }
00695
00696 fflush(fp);
00697 }
00698
00699
00700 void
00701 fsg_psubtree_pnode_deactivate(fsg_pnode_t * pnode)
00702 {
00703 hmm_clear(&pnode->hmm);
00704 }