00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00042 #include "ngram_model_set.h"
00043
00044 #include <err.h>
00045 #include <ckd_alloc.h>
00046 #include <strfuncs.h>
00047 #include <filename.h>
00048
00049 #include <string.h>
00050 #include <stdlib.h>
00051
00052 static ngram_funcs_t ngram_model_set_funcs;
00053
00054 static int
00055 my_compare(const void *a, const void *b)
00056 {
00057
00058 if (strcmp(*(char * const *)a, "<UNK>") == 0)
00059 return -1;
00060 else if (strcmp(*(char * const *)b, "<UNK>") == 0)
00061 return 1;
00062 else
00063 return strcmp(*(char * const *)a, *(char * const *)b);
00064 }
00065
00066 static void
00067 build_widmap(ngram_model_t *base, logmath_t *lmath, int32 n)
00068 {
00069 ngram_model_set_t *set = (ngram_model_set_t *)base;
00070 ngram_model_t **models = set->lms;
00071 hash_table_t *vocab;
00072 glist_t hlist;
00073 gnode_t *gn;
00074 int32 i;
00075
00076
00077 vocab = hash_table_new(models[0]->n_words, FALSE);
00078
00079 for (i = 0; i < set->n_models; ++i) {
00080 int32 j;
00081 for (j = 0; j < models[i]->n_words; ++j) {
00082
00083 (void)hash_table_enter_int32(vocab, models[i]->word_str[j], j);
00084 }
00085 }
00086
00087 if (hash_table_lookup(vocab, "<UNK>", NULL) != 0)
00088 (void)hash_table_enter_int32(vocab, "<UNK>", 0);
00089
00090 ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab));
00091 base->writable = FALSE;
00092 i = 0;
00093 hlist = hash_table_tolist(vocab, NULL);
00094 for (gn = hlist; gn; gn = gnode_next(gn)) {
00095 hash_entry_t *ent = gnode_ptr(gn);
00096 base->word_str[i++] = (char *)ent->key;
00097 }
00098 glist_free(hlist);
00099 qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare);
00100
00101
00102 if (set->widmap)
00103 ckd_free_2d((void **)set->widmap);
00104 set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models,
00105 sizeof(**set->widmap));
00106 for (i = 0; i < base->n_words; ++i) {
00107 int32 j;
00108
00109 (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
00110
00111 for (j = 0; j < set->n_models; ++j) {
00112 set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]);
00113
00114 }
00115
00116 }
00117 hash_table_free(vocab);
00118 }
00119
00120 ngram_model_t *
00121 ngram_model_set_init(cmd_ln_t *config,
00122 ngram_model_t **models,
00123 char **names,
00124 const float32 *weights,
00125 int32 n_models)
00126 {
00127 ngram_model_set_t *model;
00128 ngram_model_t *base;
00129 logmath_t *lmath;
00130 int32 i, n;
00131
00132 if (n_models == 0)
00133 return NULL;
00134
00135
00136
00137 lmath = models[0]->lmath;
00138 for (i = 0; i < n_models; ++i) {
00139 if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath)
00140 || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) {
00141 E_ERROR("Log-math parameters don't match, will not create LM set\n");
00142 return NULL;
00143 }
00144 }
00145
00146
00147 model = ckd_calloc(1, sizeof(*model));
00148 base = &model->base;
00149 model->n_models = n_models;
00150 model->lms = ckd_calloc(n_models, sizeof(*model->lms));
00151 model->names = ckd_calloc(n_models, sizeof(*model->names));
00152
00153 model->lweights = ckd_calloc(n_models, sizeof(*model->lweights));
00154 {
00155 int32 uniform = logmath_log(lmath, 1.0/n_models);
00156 for (i = 0; i < n_models; ++i)
00157 model->lweights[i] = uniform;
00158 }
00159
00160 if (weights)
00161 model->cur = -1;
00162
00163 n = 0;
00164 for (i = 0; i < n_models; ++i) {
00165 model->lms[i] = models[i];
00166 model->names[i] = ckd_salloc(names[i]);
00167 if (weights)
00168 model->lweights[i] = logmath_log(lmath, weights[i]);
00169
00170 if (models[i]->n > n)
00171 n = models[i]->n;
00172 }
00173
00174 model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist));
00175
00176
00177 build_widmap(base, lmath, n);
00178 return base;
00179 }
00180
00181 ngram_model_t *
00182 ngram_model_set_read(cmd_ln_t *config,
00183 const char *lmctlfile,
00184 logmath_t *lmath)
00185 {
00186 FILE *ctlfp;
00187 glist_t lms = NULL;
00188 glist_t lmnames = NULL;
00189 __BIGSTACKVARIABLE__ char str[1024];
00190 ngram_model_t *set = NULL;
00191 hash_table_t *classes;
00192 char *basedir, *c;
00193
00194
00195
00196 classes = hash_table_new(0, FALSE);
00197 if ((ctlfp = fopen(lmctlfile, "r")) == NULL) {
00198 E_ERROR_SYSTEM("Failed to open %s", lmctlfile);
00199 return NULL;
00200 }
00201
00202
00203
00204 if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) {
00205
00206 basedir = ckd_calloc(c - lmctlfile + 2, 1);
00207 memcpy(basedir, lmctlfile, c - lmctlfile + 1);
00208 }
00209 else {
00210 basedir = NULL;
00211 }
00212 E_INFO("Reading LM control file '%s'\n", lmctlfile);
00213 if (basedir)
00214 E_INFO("Will prepend '%s' to unqualified paths\n", basedir);
00215
00216 if (fscanf(ctlfp, "%1023s", str) == 1) {
00217 if (strcmp(str, "{") == 0) {
00218
00219 while ((fscanf(ctlfp, "%1023s", str) == 1)
00220 && (strcmp(str, "}") != 0)) {
00221 char *deffile;
00222 if (basedir && !path_is_absolute(str))
00223 deffile = string_join(basedir, str, NULL);
00224 else
00225 deffile = ckd_salloc(str);
00226 E_INFO("Reading classdef from '%s'\n", deffile);
00227 if (read_classdef_file(classes, deffile) < 0) {
00228 ckd_free(deffile);
00229 goto error_out;
00230 }
00231 ckd_free(deffile);
00232 }
00233
00234 if (strcmp(str, "}") != 0) {
00235 E_ERROR("Unexpected EOF in %s\n", lmctlfile);
00236 goto error_out;
00237 }
00238
00239
00240 if (fscanf(ctlfp, "%1023s", str) != 1)
00241 str[0] = '\0';
00242 }
00243 }
00244 else
00245 str[0] = '\0';
00246
00247
00248 while (str[0] != '\0') {
00249 char *lmfile;
00250 ngram_model_t *lm;
00251
00252 if (basedir && str[0] != '/' && str[0] != '\\')
00253 lmfile = string_join(basedir, str, NULL);
00254 else
00255 lmfile = ckd_salloc(str);
00256 E_INFO("Reading lm from '%s'\n", lmfile);
00257 lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath);
00258 if (lm == NULL) {
00259 ckd_free(lmfile);
00260 goto error_out;
00261 }
00262 if (fscanf(ctlfp, "%1023s", str) != 1) {
00263 E_ERROR("LMname missing after LMFileName '%s'\n", lmfile);
00264 ckd_free(lmfile);
00265 goto error_out;
00266 }
00267 ckd_free(lmfile);
00268 lms = glist_add_ptr(lms, lm);
00269 lmnames = glist_add_ptr(lmnames, ckd_salloc(str));
00270
00271 if (fscanf(ctlfp, "%1023s", str) == 1) {
00272 if (strcmp(str, "{") == 0) {
00273
00274 while ((fscanf(ctlfp, "%1023s", str) == 1) &&
00275 (strcmp(str, "}") != 0)) {
00276 void *val;
00277 classdef_t *classdef;
00278
00279 E_INFO("Adding class '%s'\n", str);
00280 if (hash_table_lookup(classes, str, &val) == -1) {
00281 E_ERROR("Unknown class %s in control file\n", str);
00282 goto error_out;
00283 }
00284 classdef = val;
00285 if (ngram_model_add_class(lm, str, 1.0,
00286 classdef->words, classdef->weights,
00287 classdef->n_words) < 0) {
00288 goto error_out;
00289 }
00290 }
00291 if (strcmp(str, "}") != 0) {
00292 E_ERROR("Unexpected EOF in %s\n", lmctlfile);
00293 goto error_out;
00294 }
00295 if (fscanf(ctlfp, "%1023s", str) != 1)
00296 str[0] = '\0';
00297 }
00298 }
00299 else
00300 str[0] = '\0';
00301 }
00302 fclose(ctlfp);
00303
00304
00305
00306 lms = glist_reverse(lms);
00307 lmnames = glist_reverse(lmnames);
00308 {
00309 int32 n_models;
00310 ngram_model_t **lm_array;
00311 char **name_array;
00312 gnode_t *lm_node, *name_node;
00313 int32 i;
00314
00315 n_models = glist_count(lms);
00316 lm_array = ckd_calloc(n_models, sizeof(*lm_array));
00317 name_array = ckd_calloc(n_models, sizeof(*name_array));
00318 lm_node = lms;
00319 name_node = lmnames;
00320 for (i = 0; i < n_models; ++i) {
00321 lm_array[i] = gnode_ptr(lm_node);
00322 name_array[i] = gnode_ptr(name_node);
00323 lm_node = gnode_next(lm_node);
00324 name_node = gnode_next(name_node);
00325 }
00326 set = ngram_model_set_init(config, lm_array, name_array,
00327 NULL, n_models);
00328 ckd_free(lm_array);
00329 ckd_free(name_array);
00330 }
00331 error_out:
00332 {
00333 gnode_t *gn;
00334 glist_t hlist;
00335
00336 if (set == NULL) {
00337 for (gn = lms; gn; gn = gnode_next(gn)) {
00338 ngram_model_free(gnode_ptr(gn));
00339 }
00340 }
00341 glist_free(lms);
00342 for (gn = lmnames; gn; gn = gnode_next(gn)) {
00343 ckd_free(gnode_ptr(gn));
00344 }
00345 glist_free(lmnames);
00346 hlist = hash_table_tolist(classes, NULL);
00347 for (gn = hlist; gn; gn = gnode_next(gn)) {
00348 hash_entry_t *he = gnode_ptr(gn);
00349 ckd_free((char *)he->key);
00350 classdef_free(he->val);
00351 }
00352 glist_free(hlist);
00353 hash_table_free(classes);
00354 ckd_free(basedir);
00355 }
00356 return set;
00357 }
00358
00359 int32
00360 ngram_model_set_count(ngram_model_t *base)
00361 {
00362 ngram_model_set_t *set = (ngram_model_set_t *)base;
00363 return set->n_models;
00364 }
00365
00366 ngram_model_set_iter_t *
00367 ngram_model_set_iter(ngram_model_t *base)
00368 {
00369 ngram_model_set_t *set = (ngram_model_set_t *)base;
00370 ngram_model_set_iter_t *itor;
00371
00372 if (set == NULL || set->n_models == 0)
00373 return NULL;
00374 itor = ckd_calloc(1, sizeof(*itor));
00375 itor->set = set;
00376 return itor;
00377 }
00378
00379 ngram_model_set_iter_t *
00380 ngram_model_set_iter_next(ngram_model_set_iter_t *itor)
00381 {
00382 if (++itor->cur == itor->set->n_models) {
00383 ngram_model_set_iter_free(itor);
00384 return NULL;
00385 }
00386 return itor;
00387 }
00388
00389 void
00390 ngram_model_set_iter_free(ngram_model_set_iter_t *itor)
00391 {
00392 ckd_free(itor);
00393 }
00394
00395 ngram_model_t *
00396 ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00397 char const **lmname)
00398 {
00399 if (lmname) *lmname = itor->set->names[itor->cur];
00400 return itor->set->lms[itor->cur];
00401 }
00402
00403 ngram_model_t *
00404 ngram_model_set_lookup(ngram_model_t *base,
00405 const char *name)
00406 {
00407 ngram_model_set_t *set = (ngram_model_set_t *)base;
00408 int32 i;
00409
00410 if (name == NULL) {
00411 if (set->cur == -1)
00412 return NULL;
00413 else
00414 return set->lms[set->cur];
00415 }
00416
00417
00418 for (i = 0; i < set->n_models; ++i)
00419 if (0 == strcmp(set->names[i], name))
00420 break;
00421 if (i == set->n_models)
00422 return NULL;
00423 return set->lms[i];
00424 }
00425
00426 ngram_model_t *
00427 ngram_model_set_select(ngram_model_t *base,
00428 const char *name)
00429 {
00430 ngram_model_set_t *set = (ngram_model_set_t *)base;
00431 int32 i;
00432
00433
00434 for (i = 0; i < set->n_models; ++i)
00435 if (0 == strcmp(set->names[i], name))
00436 break;
00437 if (i == set->n_models)
00438 return NULL;
00439 set->cur = i;
00440 return set->lms[set->cur];
00441 }
00442
00443 const char *
00444 ngram_model_set_current(ngram_model_t *base)
00445 {
00446 ngram_model_set_t *set = (ngram_model_set_t *)base;
00447
00448 if (set->cur == -1)
00449 return NULL;
00450 else
00451 return set->names[set->cur];
00452 }
00453
00454 int32
00455 ngram_model_set_current_wid(ngram_model_t *base,
00456 int32 set_wid)
00457 {
00458 ngram_model_set_t *set = (ngram_model_set_t *)base;
00459
00460 if (set->cur == -1 || set_wid >= base->n_words)
00461 return NGRAM_INVALID_WID;
00462 else
00463 return set->widmap[set->cur][set_wid];
00464 }
00465
00466 int32
00467 ngram_model_set_known_wid(ngram_model_t *base,
00468 int32 set_wid)
00469 {
00470 ngram_model_set_t *set = (ngram_model_set_t *)base;
00471
00472 if (set_wid >= base->n_words)
00473 return FALSE;
00474 else if (set->cur == -1) {
00475 int32 i;
00476 for (i = 0; i < set->n_models; ++i) {
00477 if (set->widmap[i][set_wid] != ngram_unknown_wid(set->lms[i]))
00478 return TRUE;
00479 }
00480 return FALSE;
00481 }
00482 else
00483 return (set->widmap[set_wid][set->cur]
00484 != ngram_unknown_wid(set->lms[set->cur]));
00485 }
00486
00487 ngram_model_t *
00488 ngram_model_set_interp(ngram_model_t *base,
00489 const char **names,
00490 const float32 *weights)
00491 {
00492 ngram_model_set_t *set = (ngram_model_set_t *)base;
00493
00494
00495 if (names && weights) {
00496 int32 i, j;
00497
00498
00499 for (i = 0; i < set->n_models; ++i) {
00500 for (j = 0; j < set->n_models; ++j)
00501 if (0 == strcmp(names[i], set->names[j]))
00502 break;
00503 if (j == set->n_models) {
00504 E_ERROR("Unknown LM name %s\n", names[i]);
00505 return NULL;
00506 }
00507 set->lweights[j] = logmath_log(base->lmath, weights[i]);
00508 }
00509 }
00510 else if (weights) {
00511 memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights));
00512 }
00513
00514 set->cur = -1;
00515 return base;
00516 }
00517
00518 ngram_model_t *
00519 ngram_model_set_add(ngram_model_t *base,
00520 ngram_model_t *model,
00521 const char *name,
00522 float32 weight,
00523 int reuse_widmap)
00524
00525 {
00526 ngram_model_set_t *set = (ngram_model_set_t *)base;
00527 float32 fprob;
00528 int32 scale, i;
00529
00530
00531 ++set->n_models;
00532 set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms));
00533 set->lms[set->n_models - 1] = model;
00534 set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names));
00535 set->names[set->n_models - 1] = ckd_salloc(name);
00536
00537 if (model->n > base->n) {
00538 base->n = model->n;
00539 set->maphist = ckd_realloc(set->maphist,
00540 (model->n - 1) * sizeof(*set->maphist));
00541 }
00542
00543
00544 fprob = weight * 1.0f / set->n_models;
00545 set->lweights = ckd_realloc(set->lweights,
00546 set->n_models * sizeof(*set->lweights));
00547 set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob);
00548
00549
00550
00551 scale = logmath_log(base->lmath, 1.0 - fprob);
00552 for (i = 0; i < set->n_models - 1; ++i)
00553 set->lweights[i] += scale;
00554
00555
00556 if (reuse_widmap) {
00557 int32 **new_widmap;
00558
00559
00560 new_widmap = (int32 **)ckd_calloc_2d(base->n_words, set->n_models,
00561 sizeof (**new_widmap));
00562 for (i = 0; i < base->n_words; ++i) {
00563
00564 memcpy(new_widmap[i], set->widmap[i],
00565 (set->n_models - 1) * sizeof(**new_widmap));
00566
00567 new_widmap[i][set->n_models-1] = ngram_wid(model, base->word_str[i]);
00568 }
00569 ckd_free_2d((void **)set->widmap);
00570 set->widmap = new_widmap;
00571 }
00572 else {
00573 build_widmap(base, base->lmath, base->n);
00574 }
00575 return model;
00576 }
00577
00578 ngram_model_t *
00579 ngram_model_set_remove(ngram_model_t *base,
00580 const char *name,
00581 int reuse_widmap)
00582 {
00583 ngram_model_set_t *set = (ngram_model_set_t *)base;
00584 ngram_model_t *submodel;
00585 int32 lmidx, scale, n, i;
00586 float32 fprob;
00587
00588 for (lmidx = 0; lmidx < set->n_models; ++lmidx)
00589 if (0 == strcmp(name, set->names[lmidx]))
00590 break;
00591 if (lmidx == set->n_models)
00592 return NULL;
00593 submodel = set->lms[lmidx];
00594
00595
00596
00597 fprob = (float32)logmath_exp(base->lmath, set->lweights[lmidx]);
00598 scale = logmath_log(base->lmath, 1.0 - fprob);
00599
00600
00601
00602 --set->n_models;
00603 n = 0;
00604 ckd_free(set->names[lmidx]);
00605 set->names[lmidx] = NULL;
00606 for (i = 0; i < set->n_models; ++i) {
00607 if (i >= lmidx) {
00608 set->lms[i] = set->lms[i+1];
00609 set->names[i] = set->names[i+1];
00610 set->lweights[i] = set->lweights[i+1];
00611 }
00612 set->lweights[i] -= scale;
00613 if (set->lms[i]->n > n)
00614 n = set->lms[i]->n;
00615 }
00616
00617 set->lms[set->n_models] = NULL;
00618 set->lweights[set->n_models] = base->log_zero;
00619
00620
00621
00622 if (reuse_widmap) {
00623
00624 for (i = 0; i < base->n_words; ++i) {
00625 memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1,
00626 (set->n_models - lmidx) * sizeof(**set->widmap));
00627 }
00628 }
00629 else {
00630 build_widmap(base, base->lmath, n);
00631 }
00632 return submodel;
00633 }
00634
00635 void
00636 ngram_model_set_map_words(ngram_model_t *base,
00637 const char **words,
00638 int32 n_words)
00639 {
00640 ngram_model_set_t *set = (ngram_model_set_t *)base;
00641 int32 i;
00642
00643
00644 if (base->writable) {
00645 for (i = 0; i < base->n_words; ++i) {
00646 ckd_free(base->word_str[i]);
00647 }
00648 }
00649 ckd_free(base->word_str);
00650 ckd_free_2d((void **)set->widmap);
00651 base->writable = TRUE;
00652 base->n_words = base->n_1g_alloc = n_words;
00653 base->word_str = ckd_calloc(n_words, sizeof(*base->word_str));
00654 set->widmap = (int32 **)ckd_calloc_2d(n_words, set->n_models, sizeof(**set->widmap));
00655 hash_table_empty(base->wid);
00656 for (i = 0; i < n_words; ++i) {
00657 int32 j;
00658 base->word_str[i] = ckd_salloc(words[i]);
00659 (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
00660 for (j = 0; j < set->n_models; ++j) {
00661 set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]);
00662 }
00663 }
00664 }
00665
00666 static int
00667 ngram_model_set_apply_weights(ngram_model_t *base, float32 lw,
00668 float32 wip, float32 uw)
00669 {
00670 ngram_model_set_t *set = (ngram_model_set_t *)base;
00671 int32 i;
00672
00673
00674 for (i = 0; i < set->n_models; ++i)
00675 ngram_model_apply_weights(set->lms[i], lw, wip, uw);
00676 return 0;
00677 }
00678
00679 static int32
00680 ngram_model_set_score(ngram_model_t *base, int32 wid,
00681 int32 *history, int32 n_hist,
00682 int32 *n_used)
00683 {
00684 ngram_model_set_t *set = (ngram_model_set_t *)base;
00685 int32 mapwid;
00686 int32 score;
00687 int32 i;
00688
00689
00690 if (n_hist > base->n - 1)
00691 n_hist = base->n - 1;
00692
00693
00694 if (set->cur == -1) {
00695 score = base->log_zero;
00696 for (i = 0; i < set->n_models; ++i) {
00697 int32 j;
00698
00699 mapwid = set->widmap[wid][i];
00700 for (j = 0; j < n_hist; ++j) {
00701 if (history[j] == NGRAM_INVALID_WID)
00702 set->maphist[j] = NGRAM_INVALID_WID;
00703 else
00704 set->maphist[j] = set->widmap[history[j]][i];
00705 }
00706 score = logmath_add(base->lmath, score,
00707 set->lweights[i] +
00708 ngram_ng_score(set->lms[i],
00709 mapwid, set->maphist, n_hist, n_used));
00710 }
00711 }
00712 else {
00713 int32 j;
00714
00715 mapwid = set->widmap[wid][set->cur];
00716 for (j = 0; j < n_hist; ++j) {
00717 if (history[j] == NGRAM_INVALID_WID)
00718 set->maphist[j] = NGRAM_INVALID_WID;
00719 else
00720 set->maphist[j] = set->widmap[history[j]][set->cur];
00721 }
00722 score = ngram_ng_score(set->lms[set->cur],
00723 mapwid, set->maphist, n_hist, n_used);
00724 }
00725
00726 return score;
00727 }
00728
00729 static int32
00730 ngram_model_set_raw_score(ngram_model_t *base, int32 wid,
00731 int32 *history, int32 n_hist,
00732 int32 *n_used)
00733 {
00734 ngram_model_set_t *set = (ngram_model_set_t *)base;
00735 int32 mapwid;
00736 int32 score;
00737 int32 i;
00738
00739
00740 if (n_hist > base->n - 1)
00741 n_hist = base->n - 1;
00742
00743
00744 if (set->cur == -1) {
00745 score = base->log_zero;
00746 for (i = 0; i < set->n_models; ++i) {
00747 int32 j;
00748
00749 mapwid = set->widmap[wid][i];
00750 for (j = 0; j < n_hist; ++j) {
00751 if (history[j] == NGRAM_INVALID_WID)
00752 set->maphist[j] = NGRAM_INVALID_WID;
00753 else
00754 set->maphist[j] = set->widmap[history[j]][i];
00755 }
00756 score = logmath_add(base->lmath, score,
00757 set->lweights[i] +
00758 ngram_ng_prob(set->lms[i],
00759 mapwid, set->maphist, n_hist, n_used));
00760 }
00761 }
00762 else {
00763 int32 j;
00764
00765 mapwid = set->widmap[wid][set->cur];
00766 for (j = 0; j < n_hist; ++j) {
00767 if (history[j] == NGRAM_INVALID_WID)
00768 set->maphist[j] = NGRAM_INVALID_WID;
00769 else
00770 set->maphist[j] = set->widmap[history[j]][set->cur];
00771 }
00772 score = ngram_ng_prob(set->lms[set->cur],
00773 mapwid, set->maphist, n_hist, n_used);
00774 }
00775
00776 return score;
00777 }
00778
00779 static int32
00780 ngram_model_set_add_ug(ngram_model_t *base,
00781 int32 wid, int32 lweight)
00782 {
00783 ngram_model_set_t *set = (ngram_model_set_t *)base;
00784 int32 *newwid;
00785 int32 i, prob;
00786
00787
00788
00789
00790 newwid = ckd_calloc(set->n_models, sizeof(*newwid));
00791 prob = base->log_zero;
00792 for (i = 0; i < set->n_models; ++i) {
00793 int32 wprob, n_hist;
00794
00795
00796 if (set->cur == -1 || set->cur == i) {
00797
00798 newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]);
00799 if (newwid[i] == NGRAM_INVALID_WID) {
00800
00801 newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid],
00802 (float32)logmath_exp(base->lmath, lweight));
00803 if (newwid[i] == NGRAM_INVALID_WID) {
00804 ckd_free(newwid);
00805 return base->log_zero;
00806 }
00807 }
00808
00809
00810 wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
00811 if (set->cur == i)
00812 prob = wprob;
00813 else if (set->cur == -1)
00814 prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob);
00815 }
00816 else {
00817 newwid[i] = NGRAM_INVALID_WID;
00818 }
00819 }
00820
00821
00822
00823 set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap));
00824 set->widmap[0] = ckd_realloc(set->widmap[0],
00825 base->n_words
00826 * set->n_models
00827 * sizeof(**set->widmap));
00828 for (i = 0; i < base->n_words; ++i)
00829 set->widmap[i] = set->widmap[0] + i * set->n_models;
00830 memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid));
00831 ckd_free(newwid);
00832 return prob;
00833 }
00834
00835 static void
00836 ngram_model_set_free(ngram_model_t *base)
00837 {
00838 ngram_model_set_t *set = (ngram_model_set_t *)base;
00839 int32 i;
00840
00841 for (i = 0; i < set->n_models; ++i)
00842 ngram_model_free(set->lms[i]);
00843 ckd_free(set->lms);
00844 for (i = 0; i < set->n_models; ++i)
00845 ckd_free(set->names[i]);
00846 ckd_free(set->names);
00847 ckd_free(set->lweights);
00848 ckd_free(set->maphist);
00849 ckd_free_2d((void **)set->widmap);
00850 }
00851
00852 static void
00853 ngram_model_set_flush(ngram_model_t *base)
00854 {
00855 ngram_model_set_t *set = (ngram_model_set_t *)base;
00856 int32 i;
00857
00858 for (i = 0; i < set->n_models; ++i)
00859 ngram_model_flush(set->lms[i]);
00860 }
00861
00862 static ngram_funcs_t ngram_model_set_funcs = {
00863 ngram_model_set_free,
00864 ngram_model_set_apply_weights,
00865 ngram_model_set_score,
00866 ngram_model_set_raw_score,
00867 ngram_model_set_add_ug,
00868 ngram_model_set_flush
00869 };