00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00043 #ifndef __NGRAM_MODEL_H__
00044 #define __NGRAM_MODEL_H__
00045
00046
00047 #include <sphinxbase_export.h>
00048 #include <prim_type.h>
00049 #include <cmd_ln.h>
00050 #include <logmath.h>
00051 #include <mmio.h>
00052 #include <stdarg.h>
00053
00054 #ifdef __cplusplus
00055 extern "C" {
00056 #endif
00057 #if 0
00058
00059 }
00060 #endif
00061
00065 typedef struct ngram_model_s ngram_model_t;
00066
00070 typedef struct ngram_class_s ngram_class_t;
00071
00075 typedef enum ngram_file_type_e {
00076 NGRAM_INVALID = -1,
00077 NGRAM_AUTO,
00078 NGRAM_ARPA,
00079 NGRAM_DMP,
00080 NGRAM_DMP32,
00081 } ngram_file_type_t;
00082
00083 #define NGRAM_INVALID_WID -1
00105 SPHINXBASE_EXPORT
00106 ngram_model_t *ngram_model_read(cmd_ln_t *config,
00107 const char *file_name,
00108 ngram_file_type_t file_type,
00109 logmath_t *lmath);
00110
00116 SPHINXBASE_EXPORT
00117 int ngram_model_write(ngram_model_t *model, const char *file_name,
00118 ngram_file_type_t format);
00119
00125 SPHINXBASE_EXPORT
00126 ngram_file_type_t ngram_file_name_to_type(const char *file_name);
00127
00133 SPHINXBASE_EXPORT
00134 ngram_file_type_t ngram_str_to_type(const char *str_name);
00135
00142 SPHINXBASE_EXPORT
00143 char const *ngram_type_to_str(int type);
00144
00150 SPHINXBASE_EXPORT
00151 ngram_model_t *ngram_model_retain(ngram_model_t *model);
00152
00158 SPHINXBASE_EXPORT
00159 int ngram_model_free(ngram_model_t *model);
00160
00177 SPHINXBASE_EXPORT
00178 int ngram_model_recode(ngram_model_t *model, const char *from, const char *to);
00179
00183 typedef enum ngram_case_e {
00184 NGRAM_UPPER,
00185 NGRAM_LOWER
00186 } ngram_case_t;
00187
00194 SPHINXBASE_EXPORT
00195 int ngram_model_casefold(ngram_model_t *model, int kase);
00196
00208 SPHINXBASE_EXPORT
00209 int ngram_model_apply_weights(ngram_model_t *model,
00210 float32 lw, float32 wip, float32 uw);
00211
00220 SPHINXBASE_EXPORT
00221 float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip,
00222 int32 *out_log_uw);
00223
00256 SPHINXBASE_EXPORT
00257 int32 ngram_score(ngram_model_t *model, const char *word, ...);
00258
00262 SPHINXBASE_EXPORT
00263 int32 ngram_tg_score(ngram_model_t *model,
00264 int32 w3, int32 w2, int32 w1,
00265 int32 *n_used);
00266
00270 SPHINXBASE_EXPORT
00271 int32 ngram_bg_score(ngram_model_t *model,
00272 int32 w2, int32 w1,
00273 int32 *n_used);
00274
00278 SPHINXBASE_EXPORT
00279 int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history,
00280 int32 n_hist, int32 *n_used);
00281
00292 SPHINXBASE_EXPORT
00293 int32 ngram_prob(ngram_model_t *model, const char *word, ...);
00294
00301 SPHINXBASE_EXPORT
00302 int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history,
00303 int32 n_hist, int32 *n_used);
00304
00316 SPHINXBASE_EXPORT
00317 int32 ngram_score_to_prob(ngram_model_t *model, int32 score);
00318
00322 SPHINXBASE_EXPORT
00323 int32 ngram_wid(ngram_model_t *model, const char *word);
00324
00328 SPHINXBASE_EXPORT
00329 const char *ngram_word(ngram_model_t *model, int32 wid);
00330
00344 SPHINXBASE_EXPORT
00345 int32 ngram_unknown_wid(ngram_model_t *model);
00346
00350 SPHINXBASE_EXPORT
00351 int32 ngram_zero(ngram_model_t *model);
00352
00356 SPHINXBASE_EXPORT
00357 int32 ngram_model_get_size(ngram_model_t *model);
00358
00362 SPHINXBASE_EXPORT
00363 int32 const *ngram_model_get_counts(ngram_model_t *model);
00364
00368 typedef struct ngram_iter_s ngram_iter_t;
00369
00378 SPHINXBASE_EXPORT
00379 ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m);
00380
00384 SPHINXBASE_EXPORT
00385 ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...);
00386
00390 SPHINXBASE_EXPORT
00391 ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
00392
00401 SPHINXBASE_EXPORT
00402 int32 const *ngram_iter_get(ngram_iter_t *itor,
00403 int32 *out_score,
00404 int32 *out_bowt);
00405
00411 SPHINXBASE_EXPORT
00412 ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor);
00413
00417 SPHINXBASE_EXPORT
00418 ngram_iter_t *ngram_iter_next(ngram_iter_t *itor);
00419
00423 SPHINXBASE_EXPORT
00424 void ngram_iter_free(ngram_iter_t *itor);
00425
00438 SPHINXBASE_EXPORT
00439 int32 ngram_model_add_word(ngram_model_t *model,
00440 const char *word, float32 weight);
00441
00455 SPHINXBASE_EXPORT
00456 int32 ngram_model_read_classdef(ngram_model_t *model,
00457 const char *file_name);
00458
00467 SPHINXBASE_EXPORT
00468 int32 ngram_model_add_class(ngram_model_t *model,
00469 const char *classname,
00470 float32 classweight,
00471 char **words,
00472 const float32 *weights,
00473 int32 n_words);
00474
00484 SPHINXBASE_EXPORT
00485 int32 ngram_model_add_class_word(ngram_model_t *model,
00486 const char *classname,
00487 const char *word,
00488 float32 weight);
00489
00514 SPHINXBASE_EXPORT
00515 ngram_model_t *ngram_model_set_init(cmd_ln_t *config,
00516 ngram_model_t **models,
00517 char **names,
00518 const float32 *weights,
00519 int32 n_models);
00520
00551 SPHINXBASE_EXPORT
00552 ngram_model_t *ngram_model_set_read(cmd_ln_t *config,
00553 const char *lmctlfile,
00554 logmath_t *lmath);
00555
00559 SPHINXBASE_EXPORT
00560 int32 ngram_model_set_count(ngram_model_t *set);
00561
00565 typedef struct ngram_model_set_iter_s ngram_model_set_iter_t;
00566
00572 SPHINXBASE_EXPORT
00573 ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set);
00574
00580 SPHINXBASE_EXPORT
00581 ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor);
00582
00586 SPHINXBASE_EXPORT
00587 void ngram_model_set_iter_free(ngram_model_set_iter_t *itor);
00588
00596 SPHINXBASE_EXPORT
00597 ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00598 char const **lmname);
00599
00606 SPHINXBASE_EXPORT
00607 ngram_model_t *ngram_model_set_select(ngram_model_t *set,
00608 const char *name);
00609
00616 SPHINXBASE_EXPORT
00617 ngram_model_t *ngram_model_set_lookup(ngram_model_t *set,
00618 const char *name);
00619
00623 SPHINXBASE_EXPORT
00624 const char *ngram_model_set_current(ngram_model_t *set);
00625
00633 SPHINXBASE_EXPORT
00634 ngram_model_t *ngram_model_set_interp(ngram_model_t *set,
00635 const char **names,
00636 const float32 *weights);
00637
00650 SPHINXBASE_EXPORT
00651 ngram_model_t *ngram_model_set_add(ngram_model_t *set,
00652 ngram_model_t *model,
00653 const char *name,
00654 float32 weight,
00655 int reuse_widmap);
00656
00665 SPHINXBASE_EXPORT
00666 ngram_model_t *ngram_model_set_remove(ngram_model_t *set,
00667 const char *name,
00668 int reuse_widmap);
00669
00673 SPHINXBASE_EXPORT
00674 void ngram_model_set_map_words(ngram_model_t *set,
00675 const char **words,
00676 int32 n_words);
00677
00685 SPHINXBASE_EXPORT
00686 int32 ngram_model_set_current_wid(ngram_model_t *set,
00687 int32 set_wid);
00688
00698 SPHINXBASE_EXPORT
00699 int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid);
00700
00708 SPHINXBASE_EXPORT
00709 void ngram_model_flush(ngram_model_t *lm);
00710
00711 #ifdef __cplusplus
00712 }
00713 #endif
00714
00715
00716 #endif