dict.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * dict.h -- Pronunciation dictionary structures
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1997 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.1 2006/04/05 20:27:30 dhdfu
50  * A Great Reorganzation of header files and executables
51  *
52  * Revision 1.10 2006/02/22 20:55:06 arthchan2003
53  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH:
54  *
55  * 1, Added Letter-to-sound LTS rule, dict_init will only specify
56  * d->lts_rules to be true if the useLTS is specified. Only if
57  * d->lts_rules is specified, the LTS logic will be used. The code safe
58  * guarded the case when a phone in mdef doesn't appear in LTS, in that
59  * case, the code will force exit.
60  *
61  * 2, The LTS logic is only used as a reserved measure. By default, it
62  * is not turned on. See also the comment in kbcore.c and the default
63  * parameters in revision 1.3 cmdln_macro.h . We added it because we have
64  * this functionality in SphinxTrain.
65  *
66  * Revision 1.9.4.4 2005/10/07 18:58:04 arthchan2003
67  * Added macro for getting second last phone for a word.
68  *
69  * Revision 1.9.4.3 2005/09/25 19:12:09 arthchan2003
70  * Added optional LTS support for the dictionary.
71  *
72  * Revision 1.9.4.2 2005/09/18 01:15:45 arthchan2003
73  * Add one doxy-doc in dict.h
74  *
75  * Revision 1.9.4.1 2005/07/05 06:55:26 arthchan2003
76  * Fixed dox-doc.
77  *
78  * Revision 1.9 2005/06/21 21:04:36 arthchan2003
79  * 1, Introduced a reporting routine. 2, Fixed doyxgen documentation, 3, Added keyword.
80  *
81  * Revision 1.5 2005/06/13 04:02:57 archan
82  * Fixed most doxygen-style documentation under libs3decoder.
83  *
84  * Revision 1.4 2005/04/21 23:50:26 archan
85  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
86  *
87  * Revision 1.3 2005/03/30 01:22:46 archan
88  * Fixed mistakes in last updates. Add
89  *
90  * 19-Apr-01 Ricky Houghton, added code for freeing memory that is allocated internally.
91  *
92  * 23-Apr-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
93  * Made usage of mdef optional. If no mdef is specified while loading
94  * a dictionary, it maintains the needed CI phone information internally.
95  * Added dict_ciphone_str().
96  *
97  * 02-Jul-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
98  * Added startwid, finishwid, silwid to dict_t structure.
99  *
100  * 07-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
101  * Created from previous Sphinx-3 version.
102  */
103 
104 
105 #ifndef _S3_DICT_H_
106 #define _S3_DICT_H_
107 
111 #include <hash_table.h>
112 #include <s3types.h>
113 #include "mdef.h" /* This is still a sore point; dict should be independent of mdef */
114 #include "lts.h"
115 
116 #define DICT_INC_SZ 4096
117 #ifdef __cplusplus
118 extern "C" {
119 #endif
120 #if 0
121 } /* Fool Emacs into not indenting things. */
122 #endif
123 
128 typedef struct {
129  char *word;
131  int32 pronlen;
134  int32 n_comp;
139 } dictword_t;
140 
146 typedef struct {
148  hash_table_t *pht;
149  char **ciphone_str;
150  int32 n_ciphone;
152  hash_table_t *ht;
153  int32 max_words;
154  int32 n_word;
155  int32 filler_start;
156  int32 filler_end;
165 } dict_t;
166 
167 
174 dict_t *dict_init (mdef_t *mdef,
176  const char *dictfile,
177  const char *fillerfile,
178  const char comp_sep,
180  int useLTS,
181  int breport
182  );
183 
186 s3wid_t dict_wordid (dict_t *d, const char *word);
187 
193 int32 dict_filler_word (dict_t *d,
194  s3wid_t w
195  );
196 
202  char *word,
203  s3cipid_t *p,
204  int32 np
205  );
206 
212  s3wid_t *wid,
213  int32 len
214  );
215 
219 const char *dict_ciphone_str (dict_t *d,
220  s3wid_t wid,
221  int32 pos
222  );
223 
225 #define dict_size(d) ((d)->n_word)
226 #define dict_basewid(d,w) ((d)->word[w].basewid)
227 #define dict_wordstr(d,w) ((d)->word[w].word)
228 #define dict_nextalt(d,w) ((d)->word[w].alt)
229 #define dict_pronlen(d,w) ((d)->word[w].pronlen)
230 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p])
231 #define dict_filler_start(d) ((d)->filler_start)
232 #define dict_filler_end(d) ((d)->filler_end)
233 #define dict_startwid(d) ((d)->startwid)
234 #define dict_finishwid(d) ((d)->finishwid)
235 #define dict_silwid(d) ((d)->silwid)
236 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0])
237 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
238 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
239 
240 /* Hard-coded special words */
241 #define S3_START_WORD "<s>"
242 #define S3_FINISH_WORD "</s>"
243 #define S3_SILENCE_WORD "<sil>"
244 #define S3_UNKNOWN_WORD "<UNK>"
245 
246 /* Function versions of some of the above macros; note the leading underscore. */
247 
252 
256 char *_dict_wordstr (dict_t *d, s3wid_t wid);
257 
263 
271 int32 dict_word2basestr (char *word);
272 
273 /* RAH, free memory allocated for the dictionary */
276 void dict_free (dict_t *d);
277 
279 void dict_report(dict_t *d
280  );
281 
282 #if 0
283 { /* Stop indent from complaining */
284 #endif
285 #ifdef __cplusplus
286 }
287 #endif
288 
289 #endif
int32 n_comp
Definition: dict.h:134
strcture for storing the model definition.
Definition: mdef.h:184
s3wid_t _dict_basewid(dict_t *d, s3wid_t w)
s3wid_t * comp_head
Definition: dict.h:157
int32 s3wid_t
Definition: s3types.h:136
char * word
Definition: dict.h:129
hash_table_t * pht
Definition: dict.h:148
lts_t * lts_rules
Definition: dict.h:164
int32 n_word
Definition: dict.h:154
S3DECODER_EXPORT void dict_free(dict_t *d)
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
s3wid_t * comp
Definition: dict.h:138
mdef_t * mdef
Definition: dict.h:147
int32 filler_end
Definition: dict.h:156
s3wid_t startwid
Definition: dict.h:160
s3wid_t silwid
Definition: dict.h:162
s3wid_t alt
Definition: dict.h:132
int16 s3cipid_t
Definition: s3types.h:110
Size definition of semantically units. Common for both s3 and s3.X decoder.
char ** ciphone_str
Definition: dict.h:149
a structure for one dictionary word.
Definition: dict.h:128
Definition: lts.h:96
#define S3DECODER_EXPORT
Definition: sphinx3_export.h:15
s3wid_t basewid
Definition: dict.h:133
s3wid_t finishwid
Definition: dict.h:161
a structure for a dictionary.
Definition: dict.h:146
int32 filler_start
Definition: dict.h:155
S3DECODER_EXPORT dict_t * dict_init(mdef_t *mdef, const char *dictfile, const char *fillerfile, const char comp_sep, int useLTS, int breport)
S3DECODER_EXPORT int32 dict_filler_word(dict_t *d, s3wid_t w)
s3wid_t dict_wids2compwid(dict_t *d, s3wid_t *wid, int32 len)
int32 max_words
Definition: dict.h:153
Model definition.
s3cipid_t * ciphone
Definition: dict.h:130
dictword_t * word
Definition: dict.h:151
S3DECODER_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
int32 n_ciphone
Definition: dict.h:150
s3wid_t _dict_nextalt(dict_t *d, s3wid_t wid)
int32 pronlen
Definition: dict.h:131
s3wid_t dict_add_word(dict_t *d, char *word, s3cipid_t *p, int32 np)
char * _dict_wordstr(dict_t *d, s3wid_t wid)
hash_table_t * ht
Definition: dict.h:152
void dict_report(dict_t *d)
int32 dict_word2basestr(char *word)