classify.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
19  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
22  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * ====================================================================
31  *
32  */
33 /*************************************************
34  * CMU CALO Speech Project
35  *
36  * Copyright (c) 2004 Carnegie Mellon University.
37  * ALL RIGHTS RESERVED.
38  * **********************************************
39  *
40  * 17-Jun-2004 Ziad Al Bawab (ziada@cs.cmu.edu) at Carnegie Mellon University
41  * Created
42  * $Log$
43  * Revision 1.1 2006/04/05 20:27:30 dhdfu
44  * A Great Reorganzation of header files and executables
45  *
46  * Revision 1.10 2006/03/03 20:02:38 arthchan2003
47  * Removed C++ styles comment. This will make options -ansi and -std=c89 happy
48  *
49  * Revision 1.9 2006/02/23 04:05:21 arthchan2003
50  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: fixed dox-doc.
51  *
52  *
53  * Revision 1.6.4.1 2005/07/05 06:46:23 arthchan2003
54  * 1, Merged from HEAD. 2, fixed dox-doc.
55  *
56  * Revision 1.8 2005/07/04 20:57:53 dhdfu
57  * Finally remove the "temporary hack" for the endpointer, and do
58  * everything in logs3 domain. Should make it faster and less likely to
59  * crash on Alphas.
60  *
61  * Actually it kind of duplicates the existing GMM computation functions,
62  * but it is slightly different (see the comment in classify.c). I don't
63  * know the rationale for this.
64  *
65  * Revision 1.7 2005/07/02 04:24:45 egouvea
66  * Changed some hardwired constants to user defined parameters in the end pointer. Tested with make test-ep.
67  *
68  * Revision 1.6 2005/06/21 21:06:47 arthchan2003
69  * 1, Fixed doxygen documentation, 2, Added keyword. 3, Change for mdef_init to use logging.
70  *
71  * Revision 1.3 2005/06/15 06:48:54 archan
72  * Sphinx3 to s3.generic: 1, updated the endptr and classify 's code, 2, also added
73  *
74  */
75 
76 
77 #include "s3types.h"
78 #include "cont_mgau.h"
79 
83 #ifdef __cplusplus
84 extern "C" {
85 #endif
86 #if 0
87 } /* Fool Emacs into not indenting things. */
88 #endif
89 
90 #ifndef __FRAME_CLASSIFIER__
91 #define __FRAME_CLASSIFIER__
92 
93 /******** Set the parameters of the classes *************/
94 
95 #define NUMCLASSES 4 /* Number of classes*/
96 
97 /*
98  #define NUMMIXTURES 32 // Number of gaussian mixtures used in classification
99 */
100 #define DIMENSIONS 13 /* Length of the feature vector*/
101 #define MAXFRAMES 10000
102 
103 /******** Set the names of the classes *************/
104 /* this is how the mdef file arranged the models*/
105 
106 #define CLASS_N 0 /* Noise*/
107 #define CLASS_O 1 /* Owner speech*/
108 #define CLASS_S 2 /* Secondary speech */
109 #define CLASS_SIL 3 /* Silence */
110 
111 /******** Set the priors of the classes *************/
112 
113 #define PRIOR_N 0.1 /* N */
114 #define PRIOR_O 0.4 /* O */
115 #define PRIOR_S 0.1 /* S */
116 #define PRIOR_SIL 0.4 /* SIL */
117 
118 /****************************************************/
119 
120 
121 /******** Set the width of the voting Window *************/
122 
123 #define VOTEWINDOWLEN 5 /* Don't change this number as the code expects 5, or you have to
124  change the code*/
125 
126 #define CLASSLATENCY 2 /* Number of latency frames caused by the post processing (voting window)
127  */
128 
129 #define POSTPROCESS 1 /* Enabling/disabling post-processing
130  */
131 /**************************************************/
132 
138 typedef struct{
139  char *classname[NUMCLASSES];
140  int32 windowlen;
143 
144  int32 priors[NUMCLASSES];
145 
146  int32 window[VOTEWINDOWLEN];
147  int32 postprocess;
149 }class_t;
150 
151 
152 
153 
154 
156 #define SWAP_INT(x) *(x) = ((0x000000ff & (*(x))>>24) | \
157  (0x0000ff00 & (*(x))>>8) | \
158  (0x00ff0000 & (*(x))<<8) | \
159  (0xff000000 & (*(x))<<24))
160 
161 #define SWAP_FLOAT(x) SWAP_INT((int *) x)
162 
163 void majority_class(class_t* CLASSW, int *classcount, int frame_count);
164 
165 class_t * classw_initialize(char *mdeffile,
166  char* meanfile,
167  char *varfile,
168  float64 varfloor,
169  char* mixwfile,
170  float64 mixwfloor,
171  int32 precomp,
174  char *senmgau
175  );
176 
177 void classw_free(class_t *CLASSW);
178 
179 int classify (float *frame,
180  mgau_model_t *g,
181  int32 priors[NUMCLASSES],
182  s3cipid_t *map
183  );
184 
185 int postclassify (int *window, int windowlen, int *wincap, int myclass);
186 
187 int vote (int *window,
188  int windowlen
189  );
190 
191 #endif /*__FRAME_CLASSIFIER__*/
192 
193 #if 0
194 { /* Stop indent from complaining */
195 #endif
196 #ifdef __cplusplus
197 }
198 #endif
199 
class to store the classifier parameters
Definition: classify.h:138
mgau_model_t * g
Definition: classify.h:141
Interface of full GMM computation with integer value of log likelihood.
class_t * classw_initialize(char *mdeffile, char *meanfile, char *varfile, float64 varfloor, char *mixwfile, float64 mixwfloor, int32 precomp, char *senmgau)
int postclassify(int *window, int windowlen, int *wincap, int myclass)
#define VOTEWINDOWLEN
Definition: classify.h:123
int32 classlatency
Definition: classify.h:148
void classw_free(class_t *CLASSW)
int16 s3cipid_t
Definition: s3types.h:110
Size definition of semantically units. Common for both s3 and s3.X decoder.
int classify(float *frame, mgau_model_t *g, int32 priors[NUMCLASSES], s3cipid_t *map)
The set of mixture-Gaussians in an acoustic model used in Sphinx 3.X family of tool.
Definition: cont_mgau.h:207
#define NUMCLASSES
Definition: classify.h:95
int32 postprocess
Definition: classify.h:147
int32 windowlen
Definition: classify.h:140
void majority_class(class_t *CLASSW, int *classcount, int frame_count)
int vote(int *window, int windowlen)