PocketSphinx 5prealpha
ms_mgau.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
39 * senone. It supports multi-stream.
40 *
41 *
42 * **********************************************
43 * CMU ARPA Speech Project
44 *
45 * Copyright (c) 1997 Carnegie Mellon University.
46 * ALL RIGHTS RESERVED.
47 * **********************************************
48 * HISTORY
49 * $Log$
50 * Revision 1.2 2006/02/22 16:56:01 arthchan2003
51 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
52 *
53 * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003
54 * Added a flag to turn on and off precomputation.
55 *
56 * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu
57 * Add memory deallocation functions. Also move all the initialization
58 * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
59 * from decode_anytopo and friends.
60 *
61 * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003
62 * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
63 *
64 * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003
65 * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
66 *
67 *
68 *
69 */
70
71/* Local headers. */
72#include "ms_mgau.h"
73
74static ps_mgaufuncs_t ms_mgau_funcs = {
75 "ms",
76 ms_cont_mgau_frame_eval, /* frame_eval */
77 ms_mgau_mllr_transform, /* transform */
78 ms_mgau_free /* free */
79};
80
82ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef)
83{
84 /* Codebooks */
85 ms_mgau_model_t *msg;
86 ps_mgau_t *mg;
87 gauden_t *g;
88 senone_t *s;
89 cmd_ln_t *config;
90 int i;
91
92 config = acmod->config;
93
94 msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
95 msg->config = config;
96 msg->g = NULL;
97 msg->s = NULL;
98
99 if ((g = msg->g = gauden_init(cmd_ln_str_r(config, "_mean"),
100 cmd_ln_str_r(config, "_var"),
101 cmd_ln_float32_r(config, "-varfloor"),
102 lmath)) == NULL) {
103 E_ERROR("Failed to read means and variances\n");
104 goto error_out;
105 }
106
107 /* Verify n_feat and veclen, against acmod. */
108 if (g->n_feat != feat_dimension1(acmod->fcb)) {
109 E_ERROR("Number of streams does not match: %d != %d\n",
110 g->n_feat, feat_dimension1(acmod->fcb));
111 goto error_out;
112 }
113 for (i = 0; i < g->n_feat; ++i) {
114 if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
115 E_ERROR("Dimension of stream %d does not match: %d != %d\n", i,
116 g->featlen[i], feat_dimension2(acmod->fcb, i));
117 goto error_out;
118 }
119 }
120
121 s = msg->s = senone_init(msg->g,
122 cmd_ln_str_r(config, "_mixw"),
123 cmd_ln_str_r(config, "_senmgau"),
124 cmd_ln_float32_r(config, "-mixwfloor"),
125 lmath, mdef);
126
127 s->aw = cmd_ln_int32_r(config, "-aw");
128
129 /* Verify senone parameters against gauden parameters */
130 if (s->n_feat != g->n_feat)
131 E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
132 s->n_feat);
133 if (s->n_cw != g->n_density)
134 E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
135 g->n_density, s->n_cw);
136 if (s->n_gauden > g->n_mgau)
137 E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
138 s->n_gauden, g->n_mgau);
139 if (s->n_gauden < g->n_mgau)
140 E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
141 s->n_gauden, g->n_mgau);
142
143 msg->topn = cmd_ln_int32_r(config, "-topn");
144 E_INFO("The value of topn: %d\n", msg->topn);
145 if (msg->topn == 0 || msg->topn > msg->g->n_density) {
146 E_WARN
147 ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
148 msg->topn, msg->g->n_density);
149 msg->topn = msg->g->n_density;
150 }
151
152 msg->dist = (gauden_dist_t ***)
153 ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
154 sizeof(gauden_dist_t));
155 msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
156
157 mg = (ps_mgau_t *)msg;
158 mg->vt = &ms_mgau_funcs;
159 return mg;
160error_out:
161 ms_mgau_free(ps_mgau_base(msg));
162 return NULL;
163}
164
165void
166ms_mgau_free(ps_mgau_t * mg)
167{
168 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
169 if (msg == NULL)
170 return;
171
172 if (msg->g)
173 gauden_free(msg->g);
174 if (msg->s)
175 senone_free(msg->s);
176 if (msg->dist)
177 ckd_free_3d((void *) msg->dist);
178 if (msg->mgau_active)
179 ckd_free(msg->mgau_active);
180
181 ckd_free(msg);
182}
183
184int
185ms_mgau_mllr_transform(ps_mgau_t *s,
186 ps_mllr_t *mllr)
187{
189 return gauden_mllr_transform(msg->g, mllr, msg->config);
190}
191
192int32
193ms_cont_mgau_frame_eval(ps_mgau_t * mg,
194 int16 *senscr,
195 uint8 *senone_active,
196 int32 n_senone_active,
197 mfcc_t ** feat,
198 int32 frame,
199 int32 compallsen)
200{
201 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
202 int32 gid;
203 int32 topn;
204 int32 best;
205 gauden_t *g;
206 senone_t *sen;
207
208 topn = ms_mgau_topn(msg);
209 g = ms_mgau_gauden(msg);
210 sen = ms_mgau_senone(msg);
211
212 if (compallsen) {
213 int32 s;
214
215 for (gid = 0; gid < g->n_mgau; gid++)
216 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
217
218 best = (int32) 0x7fffffff;
219 for (s = 0; s < sen->n_sen; s++) {
220 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
221 if (best > senscr[s]) {
222 best = senscr[s];
223 }
224 }
225
226 /* Normalize senone scores */
227 for (s = 0; s < sen->n_sen; s++) {
228 int32 bs = senscr[s] - best;
229 if (bs > 32767)
230 bs = 32767;
231 if (bs < -32768)
232 bs = -32768;
233 senscr[s] = bs;
234 }
235 }
236 else {
237 int32 i, n;
238 /* Flag all active mixture-gaussian codebooks */
239 for (gid = 0; gid < g->n_mgau; gid++)
240 msg->mgau_active[gid] = 0;
241
242 n = 0;
243 for (i = 0; i < n_senone_active; i++) {
244 /* senone_active consists of deltas. */
245 int32 s = senone_active[i] + n;
246 msg->mgau_active[sen->mgau[s]] = 1;
247 n = s;
248 }
249
250 /* Compute topn gaussian density values (for active codebooks) */
251 for (gid = 0; gid < g->n_mgau; gid++) {
252 if (msg->mgau_active[gid])
253 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
254 }
255
256 best = (int32) 0x7fffffff;
257 n = 0;
258 for (i = 0; i < n_senone_active; i++) {
259 int32 s = senone_active[i] + n;
260 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
261 if (best > senscr[s]) {
262 best = senscr[s];
263 }
264 n = s;
265 }
266
267 /* Normalize senone scores */
268 n = 0;
269 for (i = 0; i < n_senone_active; i++) {
270 int32 s = senone_active[i] + n;
271 int32 bs = senscr[s] - best;
272 if (bs > 32767)
273 bs = 32767;
274 if (bs < -32768)
275 bs = -32768;
276 senscr[s] = bs;
277 n = s;
278 }
279 }
280
281 return 0;
282}
(Sphinx 3.0 specific) A module that wraps up the code of gauden and senone because they are closely r...
Acoustic model structure.
Definition: acmod.h:148
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
Structure to store distance (density) values for a given input observation wrt density values in some...
Definition: ms_gauden.h:71
Multivariate gaussian mixture density parameters.
Definition: ms_gauden.h:82
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
int topn
Top-n gaussian will be computed.
Definition: ms_mgau.h:118
senone_t * s
The senone.
Definition: ms_mgau.h:117
gauden_t * g
The codebook.
Definition: ms_mgau.h:116
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
Feature space linear transform structure.
Definition: acmod.h:82
8-bit senone PDF structure.
Definition: ms_senone.h:76
uint32 n_cw
Number codewords per codebook,stream.
Definition: ms_senone.h:87
uint32 * mgau
senone-id -> mgau-id mapping for senones in this set
Definition: ms_senone.h:90
uint32 n_feat
Number feature streams.
Definition: ms_senone.h:86
uint32 n_gauden
Number gaussian density codebooks referred to by senones.
Definition: ms_senone.h:88
uint32 n_sen
Number senones in this set.
Definition: ms_senone.h:85
int32 aw
Inverse acoustic weight.
Definition: ms_senone.h:92