Operations on dictionary. More...
Go to the source code of this file.
Classes | |
struct | dictword_t |
a structure for one dictionary word. More... | |
struct | dict_t |
a structure for a dictionary. More... | |
Macros | |
#define | DICT_INC_SZ 4096 |
#define | dict_size(d) ((d)->n_word) |
#define | dict_basewid(d, w) ((d)->word[w].basewid) |
#define | dict_wordstr(d, w) ((d)->word[w].word) |
#define | dict_nextalt(d, w) ((d)->word[w].alt) |
#define | dict_pronlen(d, w) ((d)->word[w].pronlen) |
#define | dict_pron(d, w, p) ((d)->word[w].ciphone[p]) |
#define | dict_filler_start(d) ((d)->filler_start) |
#define | dict_filler_end(d) ((d)->filler_end) |
#define | dict_startwid(d) ((d)->startwid) |
#define | dict_finishwid(d) ((d)->finishwid) |
#define | dict_silwid(d) ((d)->silwid) |
#define | dict_first_phone(d, w) ((d)->word[w].ciphone[0]) |
#define | dict_second_last_phone(d, w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2]) |
#define | dict_last_phone(d, w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1]) |
#define | S3_START_WORD "<s>" |
#define | S3_FINISH_WORD "</s>" |
#define | S3_SILENCE_WORD "<sil>" |
#define | S3_UNKNOWN_WORD "<UNK>" |
Functions | |
S3DECODER_EXPORT dict_t * | dict_init (mdef_t *mdef, const char *dictfile, const char *fillerfile, const char comp_sep, int useLTS, int breport) |
S3DECODER_EXPORT s3wid_t | dict_wordid (dict_t *d, const char *word) |
S3DECODER_EXPORT int32 | dict_filler_word (dict_t *d, s3wid_t w) |
s3wid_t | dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np) |
s3wid_t | dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len) |
const char * | dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos) |
s3wid_t | _dict_basewid (dict_t *d, s3wid_t w) |
char * | _dict_wordstr (dict_t *d, s3wid_t wid) |
s3wid_t | _dict_nextalt (dict_t *d, s3wid_t wid) |
int32 | dict_word2basestr (char *word) |
S3DECODER_EXPORT void | dict_free (dict_t *d) |
void | dict_report (dict_t *d) |
Operations on dictionary.
#define dict_basewid | ( | d, | |
w | |||
) | ((d)->word[w].basewid) |
#define dict_filler_end | ( | d | ) | ((d)->filler_end) |
#define dict_filler_start | ( | d | ) | ((d)->filler_start) |
#define dict_finishwid | ( | d | ) | ((d)->finishwid) |
#define dict_first_phone | ( | d, | |
w | |||
) | ((d)->word[w].ciphone[0]) |
#define DICT_INC_SZ 4096 |
#define dict_last_phone | ( | d, | |
w | |||
) | ((d)->word[w].ciphone[(d)->word[w].pronlen - 1]) |
#define dict_nextalt | ( | d, | |
w | |||
) | ((d)->word[w].alt) |
#define dict_pron | ( | d, | |
w, | |||
p | |||
) | ((d)->word[w].ciphone[p]) |
The CI phones of the word w at position p
#define dict_pronlen | ( | d, | |
w | |||
) | ((d)->word[w].pronlen) |
#define dict_second_last_phone | ( | d, | |
w | |||
) | ((d)->word[w].ciphone[(d)->word[w].pronlen - 2]) |
#define dict_silwid | ( | d | ) | ((d)->silwid) |
#define dict_size | ( | d | ) | ((d)->n_word) |
Packaged macro access to dictionary members
#define dict_startwid | ( | d | ) | ((d)->startwid) |
#define dict_wordstr | ( | d, | |
w | |||
) | ((d)->word[w].word) |
#define S3_FINISH_WORD "</s>" |
#define S3_SILENCE_WORD "<sil>" |
#define S3_START_WORD "<s>" |
#define S3_UNKNOWN_WORD "<UNK>" |
Return base word id for given word id w (which may be itself). w must be valid.
Return the next alternative word id for the given word id, which must be valid. The returned id may be BAD_S3WID if there is none.
Return word string for given word id, which must be valid.
Add a word with the given ciphone pronunciation list to the dictionary. Return value: Result word id if successful, BAD_S3WID otherwise
d | The dictionary structure |
word | The word |
Return value: CI phone string for the given word, phone position.
d | In: Dictionary to look up |
wid | In: Component word being looked up |
pos | In: Pronunciation phone position |
S3DECODER_EXPORT int32 dict_filler_word | ( | dict_t * | d, |
s3wid_t | w | ||
) |
Return 1 if w is a filler word, 0 if not. A filler word is one that was read in from the filler dictionary; however, sentence START and FINISH words are not filler words.
d | The dictionary structure |
w | The The word |
S3DECODER_EXPORT void dict_free | ( | dict_t * | d | ) |
Free memory allocated for the dictionary
S3DECODER_EXPORT dict_t* dict_init | ( | mdef_t * | mdef, |
const char * | dictfile, | ||
const char * | fillerfile, | ||
const char | comp_sep, | ||
int | useLTS, | ||
int | breport | ||
) |
Initialize with given main and filler dictionary files. fillerfile can be NULL (but external modules might impose their own requirements). Return ptr to dict_t if successful, NULL otherwise.
mdef | For looking up CI phone IDs; NULL if none, in which case CI phones kept internally |
dictfile | Main dictionary file |
fillerfile | Filler dictionary file |
comp_sep | Compound word separator character, or 0 if no compound words |
useLTS | Whether to use letter-to-sound rules |
breport | Whether we should report the progress |
void dict_report | ( | dict_t * | d | ) |
Report a diciontary structure
d | A dictionary structure |
Look for a compound word that matches the given word-id sequence. Return value: Base ID of compound word if found, else BAD_S3WID.
d | In: Dictionary to look up |
wid | In: Component words to look for |
len | In: No. of component words |
int32 dict_word2basestr | ( | char * | word | ) |
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation specification), strip that trailing portion from it. Note that the given string is modified. Return value: If string was modified, the character position at which the original string was truncated; otherwise -1.
S3DECODER_EXPORT s3wid_t dict_wordid | ( | dict_t * | d, |
const char * | word | ||
) |
Return word id for given word string if present. Otherwise return BAD_S3WID