PocketSphinx 5prealpha
ngram_search_fwdtree.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
42/* System headers. */
43#include <string.h>
44#include <assert.h>
45
46/* SphinxBase headers. */
47#include <sphinxbase/ckd_alloc.h>
48#include <sphinxbase/listelem_alloc.h>
49#include <sphinxbase/err.h>
50
51/* Local headers. */
53#include "phone_loop_search.h"
54
55/* Turn this on to dump channels for debugging */
56#define __CHAN_DUMP__ 0
57#if __CHAN_DUMP__
58#define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
59#else
60#define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
61#endif
62
63/*
64 * Allocate that part of the search channel tree structure that is independent of the
65 * LM in use.
66 */
67static void
68init_search_tree(ngram_search_t *ngs)
69{
70 int32 w, ndiph, i, n_words, n_ci;
71 dict_t *dict = ps_search_dict(ngs);
72 bitvec_t *dimap;
73
74 E_INFO("Initializing search tree\n");
75
76 n_words = ps_search_n_words(ngs);
77 ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
78
79 /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
80 ndiph = 0;
81 ngs->n_1ph_words = 0;
82 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
83 /* Allocate a bitvector with flags for each possible diphone. */
84 dimap = bitvec_alloc(n_ci * n_ci);
85 for (w = 0; w < n_words; w++) {
86 if (!dict_real_word(dict, w))
87 continue;
88 if (dict_is_single_phone(dict, w))
89 ++ngs->n_1ph_words;
90 else {
91 int ph0, ph1;
92 ph0 = dict_first_phone(dict, w);
93 ph1 = dict_second_phone(dict, w);
94 /* Increment ndiph the first time we see a diphone. */
95 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
96 bitvec_set(dimap, ph0 * n_ci + ph1);
97 ++ndiph;
98 }
99 }
100 }
101 E_INFO("%d unique initial diphones\n", ndiph);
102 bitvec_free(dimap);
103
104 /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
105 ngs->n_1ph_words += dict_num_fillers(dict) + 2;
106 ngs->n_root_chan_alloc = ndiph + 1;
107 /* Verify that these are all *actually* single-phone words,
108 * otherwise really bad things will happen to us. */
109 for (w = 0; w < n_words; ++w) {
110 if (dict_real_word(dict, w))
111 continue;
112 if (!dict_is_single_phone(dict, w)) {
113 E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
114 w, dict_wordstr(dict, w));
115 --ngs->n_1ph_words;
116 }
117 }
118
119 /* Allocate and initialize root channels */
120 ngs->root_chan =
121 ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
122 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
123 hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
124 ngs->root_chan[i].penult_phn_wid = -1;
125 ngs->root_chan[i].next = NULL;
126 }
127
128 /* Permanently allocate and initialize channels for single-phone
129 * words (1/word). */
130 ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
131 i = 0;
132 for (w = 0; w < n_words; w++) {
133 if (!dict_is_single_phone(dict, w))
134 continue;
135 /* Use SIL as right context for these. */
136 ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
137 ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
138 hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
139 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
140 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone));
141 ngs->rhmm_1ph[i].next = NULL;
142
143 ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
144 i++;
145 }
146
147 ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
148 sizeof(*ngs->single_phone_wid));
149}
150
151/*
152 * One-time initialization of internal channels in HMM tree.
153 */
154static void
155init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci, int32 tmatid)
156{
157 hmm->next = NULL;
158 hmm->alt = NULL;
159 hmm->info.penult_phn_wid = -1;
160 hmm->ciphone = ci;
161 hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, tmatid);
162}
163
164/*
165 * Allocate and initialize search channel-tree structure.
166 * At this point, all the root-channels have been allocated and partly initialized
167 * (as per init_search_tree()), and channels for all the single-phone words have been
168 * allocated and initialized. None of the interior channels of search-trees have
169 * been allocated.
170 * This routine may be called on every utterance, after reinit_search_tree() clears
171 * the search tree created for the previous utterance. Meant for reconfiguring the
172 * search tree to suit the currently active LM.
173 */
174static void
175create_search_channels(ngram_search_t *ngs)
176{
177 chan_t *hmm;
178 root_chan_t *rhmm;
179 int32 w, i, j, p, ph, tmatid;
180 int32 n_words;
181 dict_t *dict = ps_search_dict(ngs);
182 dict2pid_t *d2p = ps_search_dict2pid(ngs);
183
184 n_words = ps_search_n_words(ngs);
185
186 E_INFO("Creating search channels\n");
187
188 for (w = 0; w < n_words; w++)
189 ngs->homophone_set[w] = -1;
190
191 ngs->n_1ph_LMwords = 0;
192 ngs->n_root_chan = 0;
193 ngs->n_nonroot_chan = 0;
194
195 for (w = 0; w < n_words; w++) {
196 int ciphone, ci2phone;
197
198 /* Ignore dictionary words not in LM */
199 if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
200 continue;
201
202 /* Handle single-phone words individually; not in channel tree */
203 if (dict_is_single_phone(dict, w)) {
204 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
205 ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
206 ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
207 continue;
208 }
209
210 /* Find a root channel matching the initial diphone, or
211 * allocate one if not found. */
212 ciphone = dict_first_phone(dict, w);
213 ci2phone = dict_second_phone(dict, w);
214 for (i = 0; i < ngs->n_root_chan; ++i) {
215 if (ngs->root_chan[i].ciphone == ciphone
216 && ngs->root_chan[i].ci2phone == ci2phone)
217 break;
218 }
219 if (i == ngs->n_root_chan) {
220 rhmm = &(ngs->root_chan[ngs->n_root_chan]);
221 rhmm->hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
222 /* Begin with CI phone? Not sure this makes a difference... */
223 hmm_mpx_ssid(&rhmm->hmm, 0) =
224 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
225 rhmm->ciphone = ciphone;
226 rhmm->ci2phone = ci2phone;
227 ngs->n_root_chan++;
228 }
229 else
230 rhmm = &(ngs->root_chan[i]);
231
232 E_DEBUG(3,("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
233 /* Now, rhmm = root channel for w. Go on to remaining phones */
234 if (dict_pronlen(dict, w) == 2) {
235 /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
236 if ((j = rhmm->penult_phn_wid) < 0)
237 rhmm->penult_phn_wid = w;
238 else {
239 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
240 ngs->homophone_set[j] = w;
241 }
242 }
243 else {
244 /* Add remaining phones, except the last, to tree */
245 ph = dict2pid_internal(d2p, w, 1);
246 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, 1));
247 hmm = rhmm->next;
248 if (hmm == NULL) {
249 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
250 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
251 ngs->n_nonroot_chan++;
252 }
253 else {
254 chan_t *prev_hmm = NULL;
255
256 for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
257 prev_hmm = hmm;
258 if (!hmm) { /* thanks, rkm! */
259 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
260 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid);
261 ngs->n_nonroot_chan++;
262 }
263 }
264 E_DEBUG(3,("phone %s = %d\n",
265 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
266 dict_second_phone(dict, w)), ph));
267 for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
268 ph = dict2pid_internal(d2p, w, p);
269 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, p));
270 if (!hmm->next) {
271 hmm->next = listelem_malloc(ngs->chan_alloc);
272 hmm = hmm->next;
273 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
274 ngs->n_nonroot_chan++;
275 }
276 else {
277 chan_t *prev_hmm = NULL;
278
279 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
280 hmm = hmm->alt)
281 prev_hmm = hmm;
282 if (!hmm) { /* thanks, rkm! */
283 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
284 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid);
285 ngs->n_nonroot_chan++;
286 }
287 }
288 E_DEBUG(3,("phone %s = %d\n",
289 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
290 dict_pron(dict, w, p)), ph));
291 }
292
293 /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
294 if ((j = hmm->info.penult_phn_wid) < 0)
295 hmm->info.penult_phn_wid = w;
296 else {
297 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
298 ngs->homophone_set[j] = w;
299 }
300 }
301 }
302
303 ngs->n_1ph_words = ngs->n_1ph_LMwords;
304
305 /* Add filler words to the array of 1ph words. */
306 for (w = 0; w < n_words; ++w) {
307 /* Skip anything that doesn't actually have a single phone. */
308 if (!dict_is_single_phone(dict, w))
309 continue;
310 /* Also skip "real words" and things that are in the LM. */
311 if (dict_real_word(dict, w))
312 continue;
313 if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
314 continue;
315 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
316 ngs->n_1ph_words, dict_wordstr(dict, w)));
317 ngs->single_phone_wid[ngs->n_1ph_words++] = w;
318 }
319
320 if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
321 /* Give some room for channels for new words added dynamically at run time */
322 ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
323 E_INFO("Max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
324
325 /* Free old active channel list array if any and allocate new one */
326 if (ngs->active_chan_list)
327 ckd_free_2d(ngs->active_chan_list);
328 ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
329 sizeof(**ngs->active_chan_list));
330 }
331
332 E_INFO("Created %d root, %d non-root channels, %d single-phone words\n",
333 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
334
335 if (ngs->n_root_chan + ngs->n_1ph_words == 0)
336 E_ERROR("No word from the language model has pronunciation in the dictionary\n");
337}
338
339static void
340reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
341{
342 chan_t *child, *sibling;
343
344 /* First free all children under hmm */
345 for (child = hmm->next; child; child = sibling) {
346 sibling = child->alt;
347 reinit_search_subtree(ngs, child);
348 }
349
350 /* Now free hmm */
351 hmm_deinit(&hmm->hmm);
352 listelem_free(ngs->chan_alloc, hmm);
353}
354
355/*
356 * Delete search tree by freeing all interior channels within search tree and
357 * restoring root channel state to the init state (i.e., just after init_search_tree()).
358 */
359static void
360reinit_search_tree(ngram_search_t *ngs)
361{
362 int32 i;
363 chan_t *hmm, *sibling;
364
365 for (i = 0; i < ngs->n_root_chan; i++) {
366 hmm = ngs->root_chan[i].next;
367
368 while (hmm) {
369 sibling = hmm->alt;
370 reinit_search_subtree(ngs, hmm);
371 hmm = sibling;
372 }
373
374 ngs->root_chan[i].penult_phn_wid = -1;
375 ngs->root_chan[i].next = NULL;
376 }
377 ngs->n_nonroot_chan = 0;
378}
379
380void
382{
383 /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
384 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
385 sizeof(*ngs->bestbp_rc));
386 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
387 sizeof(*ngs->lastphn_cand));
388 init_search_tree(ngs);
389 create_search_channels(ngs);
390}
391
392static void
393deinit_search_tree(ngram_search_t *ngs)
394{
395 int i, w, n_words;
396
397 n_words = ps_search_n_words(ngs);
398 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
399 hmm_deinit(&ngs->root_chan[i].hmm);
400 }
401 if (ngs->rhmm_1ph) {
402 for (i = w = 0; w < n_words; ++w) {
403 if (!dict_is_single_phone(ps_search_dict(ngs), w))
404 continue;
405 hmm_deinit(&ngs->rhmm_1ph[i].hmm);
406 ++i;
407 }
408 ckd_free(ngs->rhmm_1ph);
409 ngs->rhmm_1ph = NULL;
410 }
411 ngs->n_root_chan = 0;
412 ngs->n_root_chan_alloc = 0;
413 ckd_free(ngs->root_chan);
414 ngs->root_chan = NULL;
415 ckd_free(ngs->single_phone_wid);
416 ngs->single_phone_wid = NULL;
417 ckd_free(ngs->homophone_set);
418 ngs->homophone_set = NULL;
419}
420
421void
423{
424 double n_speech = (double)ngs->n_tot_frame
425 / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
426
427 E_INFO("TOTAL fwdtree %.2f CPU %.3f xRT\n",
428 ngs->fwdtree_perf.t_tot_cpu,
429 ngs->fwdtree_perf.t_tot_cpu / n_speech);
430 E_INFO("TOTAL fwdtree %.2f wall %.3f xRT\n",
431 ngs->fwdtree_perf.t_tot_elapsed,
432 ngs->fwdtree_perf.t_tot_elapsed / n_speech);
433
434 /* Reset non-root channels. */
435 reinit_search_tree(ngs);
436 /* Free the search tree. */
437 deinit_search_tree(ngs);
438 /* Free other stuff. */
439 ngs->max_nonroot_chan = 0;
440 ckd_free_2d(ngs->active_chan_list);
441 ngs->active_chan_list = NULL;
442 ckd_free(ngs->cand_sf);
443 ngs->cand_sf = NULL;
444 ckd_free(ngs->bestbp_rc);
445 ngs->bestbp_rc = NULL;
446 ckd_free(ngs->lastphn_cand);
447 ngs->lastphn_cand = NULL;
448}
449
450int
452{
453 /* Reset non-root channels. */
454 reinit_search_tree(ngs);
455 /* Free the search tree. */
456 deinit_search_tree(ngs);
457 /* Reallocate things that depend on the number of words. */
458 ckd_free(ngs->lastphn_cand);
459 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
460 sizeof(*ngs->lastphn_cand));
461 ckd_free(ngs->word_chan);
462 ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
463 sizeof(*ngs->word_chan));
464 /* Rebuild the search tree. */
465 init_search_tree(ngs);
466 create_search_channels(ngs);
467 return 0;
468}
469
470void
472{
473 ps_search_t *base = (ps_search_t *)ngs;
474 int32 i, w, n_words;
475 root_chan_t *rhmm;
476
477 n_words = ps_search_n_words(ngs);
478
479 /* Reset utterance statistics. */
480 memset(&ngs->st, 0, sizeof(ngs->st));
481 ptmr_reset(&ngs->fwdtree_perf);
482 ptmr_start(&ngs->fwdtree_perf);
483
484 /* Reset backpointer table. */
485 ngs->bpidx = 0;
486 ngs->bss_head = 0;
487
488 /* Reset word lattice. */
489 for (i = 0; i < n_words; ++i)
490 ngs->word_lat_idx[i] = NO_BP;
491
492 /* Reset active HMM and word lists. */
493 ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
494 ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
495
496 /* Reset scores. */
497 ngs->best_score = 0;
498 ngs->renormalized = 0;
499
500 /* Reset other stuff. */
501 for (i = 0; i < n_words; i++)
502 ngs->last_ltrans[i].sf = -1;
503 ngs->n_frame = 0;
504
505 /* Clear the hypothesis string. */
506 ckd_free(base->hyp_str);
507 base->hyp_str = NULL;
508
509 /* Reset the permanently allocated single-phone words, since they
510 * may have junk left over in them from FWDFLAT. */
511 for (i = 0; i < ngs->n_1ph_words; i++) {
512 w = ngs->single_phone_wid[i];
513 rhmm = (root_chan_t *) ngs->word_chan[w];
514 hmm_clear(&rhmm->hmm);
515 }
516
517 /* Start search with <s>; word_chan[<s>] is permanently allocated */
518 rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
519 hmm_clear(&rhmm->hmm);
520 hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
521}
522
523/*
524 * Mark the active senones for all senones belonging to channels that are active in the
525 * current frame.
526 */
527static void
528compute_sen_active(ngram_search_t *ngs, int frame_idx)
529{
530 root_chan_t *rhmm;
531 chan_t *hmm, **acl;
532 int32 i, w, *awl;
533
534 acmod_clear_active(ps_search_acmod(ngs));
535
536 /* Flag active senones for root channels */
537 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
538 if (hmm_frame(&rhmm->hmm) == frame_idx)
539 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
540 }
541
542 /* Flag active senones for nonroot channels in HMM tree */
543 i = ngs->n_active_chan[frame_idx & 0x1];
544 acl = ngs->active_chan_list[frame_idx & 0x1];
545 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
546 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
547 }
548
549 /* Flag active senones for individual word channels */
550 i = ngs->n_active_word[frame_idx & 0x1];
551 awl = ngs->active_word_list[frame_idx & 0x1];
552 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
553 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
554 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
555 }
556 }
557 for (i = 0; i < ngs->n_1ph_words; i++) {
558 w = ngs->single_phone_wid[i];
559 rhmm = (root_chan_t *) ngs->word_chan[w];
560
561 if (hmm_frame(&rhmm->hmm) == frame_idx)
562 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
563 }
564}
565
566static void
567renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
568{
569 root_chan_t *rhmm;
570 chan_t *hmm, **acl;
571 int32 i, w, *awl;
572
573 /* Renormalize root channels */
574 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
575 if (hmm_frame(&rhmm->hmm) == frame_idx) {
576 hmm_normalize(&rhmm->hmm, norm);
577 }
578 }
579
580 /* Renormalize nonroot channels in HMM tree */
581 i = ngs->n_active_chan[frame_idx & 0x1];
582 acl = ngs->active_chan_list[frame_idx & 0x1];
583 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
584 hmm_normalize(&hmm->hmm, norm);
585 }
586
587 /* Renormalize individual word channels */
588 i = ngs->n_active_word[frame_idx & 0x1];
589 awl = ngs->active_word_list[frame_idx & 0x1];
590 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
591 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
592 hmm_normalize(&hmm->hmm, norm);
593 }
594 }
595 for (i = 0; i < ngs->n_1ph_words; i++) {
596 w = ngs->single_phone_wid[i];
597 rhmm = (root_chan_t *) ngs->word_chan[w];
598 if (hmm_frame(&rhmm->hmm) == frame_idx) {
599 hmm_normalize(&rhmm->hmm, norm);
600 }
601 }
602
603 ngs->renormalized = TRUE;
604}
605
606static int32
607eval_root_chan(ngram_search_t *ngs, int frame_idx)
608{
609 root_chan_t *rhmm;
610 int32 i, bestscore;
611
612 bestscore = WORST_SCORE;
613 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
614 if (hmm_frame(&rhmm->hmm) == frame_idx) {
615 int32 score = chan_v_eval(rhmm);
616 if (score BETTER_THAN bestscore)
617 bestscore = score;
618 ++ngs->st.n_root_chan_eval;
619 }
620 }
621 return (bestscore);
622}
623
624static int32
625eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
626{
627 chan_t *hmm, **acl;
628 int32 i, bestscore;
629
630 i = ngs->n_active_chan[frame_idx & 0x1];
631 acl = ngs->active_chan_list[frame_idx & 0x1];
632 bestscore = WORST_SCORE;
633 ngs->st.n_nonroot_chan_eval += i;
634
635 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
636 int32 score = chan_v_eval(hmm);
637 assert(hmm_frame(&hmm->hmm) == frame_idx);
638 if (score BETTER_THAN bestscore)
639 bestscore = score;
640 }
641
642 return bestscore;
643}
644
645static int32
646eval_word_chan(ngram_search_t *ngs, int frame_idx)
647{
648 root_chan_t *rhmm;
649 chan_t *hmm;
650 int32 i, w, bestscore, *awl, j, k;
651
652 k = 0;
653 bestscore = WORST_SCORE;
654 awl = ngs->active_word_list[frame_idx & 0x1];
655
656 i = ngs->n_active_word[frame_idx & 0x1];
657 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
658 assert(bitvec_is_set(ngs->word_active, w));
659 bitvec_clear(ngs->word_active, w);
660 assert(ngs->word_chan[w] != NULL);
661
662 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
663 int32 score;
664
665 assert(hmm_frame(&hmm->hmm) == frame_idx);
666 score = chan_v_eval(hmm);
667 /*printf("eval word chan %d score %d\n", w, score); */
668
669 if (score BETTER_THAN bestscore)
670 bestscore = score;
671
672 k++;
673 }
674 }
675
676 /* Similarly for statically allocated single-phone words */
677 j = 0;
678 for (i = 0; i < ngs->n_1ph_words; i++) {
679 int32 score;
680
681 w = ngs->single_phone_wid[i];
682 rhmm = (root_chan_t *) ngs->word_chan[w];
683 if (hmm_frame(&rhmm->hmm) < frame_idx)
684 continue;
685
686 score = chan_v_eval(rhmm);
687 /* printf("eval 1ph word chan %d score %d\n", w, score); */
688 if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
689 bestscore = score;
690
691 j++;
692 }
693
694 ngs->st.n_last_chan_eval += k + j;
695 ngs->st.n_nonroot_chan_eval += k + j;
696 ngs->st.n_word_lastchan_eval +=
697 ngs->n_active_word[frame_idx & 0x1] + j;
698
699 return bestscore;
700}
701
702static int32
703evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
704{
705 int32 bs;
706
707 hmm_context_set_senscore(ngs->hmmctx, senone_scores);
708 ngs->best_score = eval_root_chan(ngs, frame_idx);
709 if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
710 ngs->best_score = bs;
711 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
712 ngs->best_score = bs;
713 ngs->last_phone_best_score = bs;
714
715 return ngs->best_score;
716}
717
718/*
719 * Prune currently active root channels for next frame. Also, perform exit
720 * transitions out of them and activate successors.
721 * score[] of pruned root chan set to WORST_SCORE elsewhere.
722 */
723static void
724prune_root_chan(ngram_search_t *ngs, int frame_idx)
725{
726 root_chan_t *rhmm;
727 chan_t *hmm;
728 int32 i, nf, w;
729 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
730 chan_t **nacl; /* next active list */
731 lastphn_cand_t *candp;
733
734 nf = frame_idx + 1;
735 thresh = ngs->best_score + ngs->dynamic_beam;
736 newphone_thresh = ngs->best_score + ngs->pbeam;
737 lastphn_thresh = ngs->best_score + ngs->lpbeam;
738 nacl = ngs->active_chan_list[nf & 0x1];
739 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
740
741 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
742 E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
743 i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
744 /* First check if this channel was active in current frame */
745 if (hmm_frame(&rhmm->hmm) < frame_idx)
746 continue;
747
748 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
749 hmm_frame(&rhmm->hmm) = nf; /* rhmm will be active in next frame */
750 E_DEBUG(3,("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
751 /* transitions out of this root channel */
752 /* transition to all next-level channels in the HMM tree */
753 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
754 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
755 for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
756 int32 pl_newphone_score = newphone_score
757 + phone_loop_search_score(pls, hmm->ciphone);
758 if (pl_newphone_score BETTER_THAN newphone_thresh) {
759 if ((hmm_frame(&hmm->hmm) < frame_idx)
760 || (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
761 hmm_enter(&hmm->hmm, newphone_score,
762 hmm_out_history(&rhmm->hmm), nf);
763 *(nacl++) = hmm;
764 }
765 }
766 }
767 }
768
769 /*
770 * Transition to last phone of all words for which this is the
771 * penultimate phone (the last phones may need multiple right contexts).
772 * Remember to remove the temporary newword_penalty.
773 */
774 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
775 for (w = rhmm->penult_phn_wid; w >= 0;
776 w = ngs->homophone_set[w]) {
777 int32 pl_newphone_score = newphone_score
779 (pls, dict_last_phone(ps_search_dict(ngs),w));
780 E_DEBUG(3,("word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
781 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
782 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
783 ngs->n_lastphn_cand++;
784 candp->wid = w;
785 candp->score =
786 newphone_score - ngs->nwpen;
787 candp->bp = hmm_out_history(&rhmm->hmm);
788 }
789 }
790 }
791 }
792 }
793 ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
794}
795
796/*
797 * Prune currently active nonroot channels in HMM tree for next frame. Also, perform
798 * exit transitions out of such channels and activate successors.
799 */
800static void
801prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
802{
803 chan_t *hmm, *nexthmm;
804 int32 nf, w, i;
805 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
806 chan_t **acl, **nacl; /* active list, next active list */
807 lastphn_cand_t *candp;
809
810 nf = frame_idx + 1;
811
812 thresh = ngs->best_score + ngs->dynamic_beam;
813 newphone_thresh = ngs->best_score + ngs->pbeam;
814 lastphn_thresh = ngs->best_score + ngs->lpbeam;
815 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
816
817 acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
818 nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
819
820 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
821 --i, hmm = *(acl++)) {
822 assert(hmm_frame(&hmm->hmm) >= frame_idx);
823
824 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
825 /* retain this channel in next frame */
826 if (hmm_frame(&hmm->hmm) != nf) {
827 hmm_frame(&hmm->hmm) = nf;
828 *(nacl++) = hmm;
829 }
830
831 /* transition to all next-level channel in the HMM tree */
832 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
833 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
834 for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
835 int32 pl_newphone_score = newphone_score
836 + phone_loop_search_score(pls, nexthmm->ciphone);
837 if ((pl_newphone_score BETTER_THAN newphone_thresh)
838 && ((hmm_frame(&nexthmm->hmm) < frame_idx)
839 || (newphone_score
840 BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
841 if (hmm_frame(&nexthmm->hmm) != nf) {
842 /* Keep this HMM on the active list */
843 *(nacl++) = nexthmm;
844 }
845 hmm_enter(&nexthmm->hmm, newphone_score,
846 hmm_out_history(&hmm->hmm), nf);
847 }
848 }
849 }
850
851 /*
852 * Transition to last phone of all words for which this is the
853 * penultimate phone (the last phones may need multiple right contexts).
854 * Remember to remove the temporary newword_penalty.
855 */
856 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
857 for (w = hmm->info.penult_phn_wid; w >= 0;
858 w = ngs->homophone_set[w]) {
859 int32 pl_newphone_score = newphone_score
861 (pls, dict_last_phone(ps_search_dict(ngs),w));
862 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
863 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
864 ngs->n_lastphn_cand++;
865 candp->wid = w;
866 candp->score =
867 newphone_score - ngs->nwpen;
868 candp->bp = hmm_out_history(&hmm->hmm);
869 }
870 }
871 }
872 }
873 else if (hmm_frame(&hmm->hmm) != nf) {
874 hmm_clear(&hmm->hmm);
875 }
876 }
877 ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]);
878}
879
880/*
881 * Execute the transition into the last phone for all candidates words emerging from
882 * the HMM tree. Attach LM scores to such transitions.
883 * (Executed after pruning root and non-root, but before pruning word-chan.)
884 */
885static void
886last_phone_transition(ngram_search_t *ngs, int frame_idx)
887{
888 int32 i, j, k, nf, bp, bpend, w;
889 lastphn_cand_t *candp;
890 int32 *nawl;
891 int32 thresh;
892 int32 bestscore, dscr;
893 chan_t *hmm;
894 bptbl_t *bpe;
895 int32 n_cand_sf = 0;
896
897 nf = frame_idx + 1;
898 nawl = ngs->active_word_list[nf & 0x1];
899 ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
900
901 /* For each candidate word (entering its last phone) */
902 /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
903 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
904 int32 start_score;
905
906 /* This can happen if recognition fails. */
907 if (candp->bp == -1)
908 continue;
909 /* Backpointer entry for it. */
910 bpe = &(ngs->bp_table[candp->bp]);
911
912 /* Subtract starting score for candidate, leave it with only word score */
913 start_score = ngram_search_exit_score
914 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
915 assert(start_score BETTER_THAN WORST_SCORE);
916 candp->score -= start_score;
917
918 /*
919 * If this candidate not occurred in an earlier frame, prepare for finding
920 * best transition score into last phone; sort by start frame.
921 */
922 /* i.e. if we don't have an entry in last_ltrans for this
923 * <word,sf>, then create one */
924 if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
925 /* Look for an entry in cand_sf matching the backpointer
926 * for this candidate. */
927 for (j = 0; j < n_cand_sf; j++) {
928 if (ngs->cand_sf[j].bp_ef == bpe->frame)
929 break;
930 }
931 /* Oh, we found one, so chain onto it. */
932 if (j < n_cand_sf)
933 candp->next = ngs->cand_sf[j].cand;
934 else {
935 /* Nope, let's make a new one, allocating cand_sf if necessary. */
936 if (n_cand_sf >= ngs->cand_sf_alloc) {
937 if (ngs->cand_sf_alloc == 0) {
938 ngs->cand_sf =
939 ckd_calloc(CAND_SF_ALLOCSIZE,
940 sizeof(*ngs->cand_sf));
941 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
942 }
943 else {
944 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
945 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
946 ngs->cand_sf_alloc
947 * sizeof(*ngs->cand_sf));
948 E_INFO("cand_sf[] increased to %d entries\n",
949 ngs->cand_sf_alloc);
950 }
951 }
952
953 /* Use the newly created cand_sf. */
954 j = n_cand_sf++;
955 candp->next = -1; /* End of the chain. */
956 ngs->cand_sf[j].bp_ef = bpe->frame;
957 }
958 /* Update it to point to this candidate. */
959 ngs->cand_sf[j].cand = i;
960
961 ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
962 ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
963 }
964 }
965
966 /* Compute best LM score and bp for new cands entered in the sorted lists above */
967 for (i = 0; i < n_cand_sf; i++) {
968 /* For the i-th unique end frame... */
969 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
970 bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
971 for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
972 if (!bpe->valid)
973 continue;
974 /* For each candidate at the start frame find bp->cand transition-score */
975 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
976 int32 n_used;
977 candp = &(ngs->lastphn_cand[j]);
978 dscr =
980 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
981 if (dscr BETTER_THAN WORST_SCORE) {
982 assert(!dict_filler_word(ps_search_dict(ngs), candp->wid));
983 dscr += ngram_tg_score(ngs->lmset,
984 dict_basewid(ps_search_dict(ngs), candp->wid),
985 bpe->real_wid,
986 bpe->prev_real_wid,
987 &n_used)>>SENSCR_SHIFT;
988 }
989
990 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
991 ngs->last_ltrans[candp->wid].dscr = dscr;
992 ngs->last_ltrans[candp->wid].bp = bp;
993 }
994 }
995 }
996 }
997
998 /* Update best transitions for all candidates; also update best lastphone score */
999 bestscore = ngs->last_phone_best_score;
1000 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1001 candp->score += ngs->last_ltrans[candp->wid].dscr;
1002 candp->bp = ngs->last_ltrans[candp->wid].bp;
1003
1004 if (candp->score BETTER_THAN bestscore)
1005 bestscore = candp->score;
1006 }
1007 ngs->last_phone_best_score = bestscore;
1008
1009 /* At this pt, we know the best entry score (with LM component) for all candidates */
1010 thresh = bestscore + ngs->lponlybeam;
1011 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1012 if (candp->score BETTER_THAN thresh) {
1013 w = candp->wid;
1014
1016
1017 k = 0;
1018 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
1019 if ((hmm_frame(&hmm->hmm) < frame_idx)
1020 || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
1021 assert(hmm_frame(&hmm->hmm) != nf);
1022 hmm_enter(&hmm->hmm,
1023 candp->score, candp->bp, nf);
1024 k++;
1025 }
1026 }
1027 if (k > 0) {
1028 assert(bitvec_is_clear(ngs->word_active, w));
1029 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1030 *(nawl++) = w;
1031 bitvec_set(ngs->word_active, w);
1032 }
1033 }
1034 }
1035 ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
1036}
1037
1038/*
1039 * Prune currently active word channels for next frame. Also, perform exit
1040 * transitions out of such channels and active successors.
1041 */
1042static void
1043prune_word_chan(ngram_search_t *ngs, int frame_idx)
1044{
1045 root_chan_t *rhmm;
1046 chan_t *hmm, *thmm;
1047 chan_t **phmmp; /* previous HMM-pointer */
1048 int32 nf, w, i, k;
1049 int32 newword_thresh, lastphn_thresh;
1050 int32 *awl, *nawl;
1051
1052 nf = frame_idx + 1;
1053 newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
1054 lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
1055
1056 awl = ngs->active_word_list[frame_idx & 0x1];
1057 nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
1058
1059 /* Dynamically allocated last channels of multi-phone words */
1060 for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1061 --i, w = *(awl++)) {
1062 k = 0;
1063 phmmp = &(ngs->word_chan[w]);
1064 for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
1065 assert(hmm_frame(&hmm->hmm) >= frame_idx);
1066
1067 thmm = hmm->next;
1068 if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
1069 /* retain this channel in next frame */
1070 hmm_frame(&hmm->hmm) = nf;
1071 k++;
1072 phmmp = &(hmm->next);
1073
1074 /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1075 if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
1076 /* can exit channel and recognize word */
1077 ngram_search_save_bp(ngs, frame_idx, w,
1078 hmm_out_score(&hmm->hmm),
1079 hmm_out_history(&hmm->hmm),
1080 hmm->info.rc_id);
1081 }
1082 }
1083 else if (hmm_frame(&hmm->hmm) == nf) {
1084 phmmp = &(hmm->next);
1085 }
1086 else {
1087 hmm_deinit(&hmm->hmm);
1088 listelem_free(ngs->chan_alloc, hmm);
1089 *phmmp = thmm;
1090 }
1091 }
1092 if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
1093 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1094 *(nawl++) = w;
1095 bitvec_set(ngs->word_active, w);
1096 }
1097 }
1098 ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]);
1099
1100 /*
1101 * Prune permanently allocated single-phone channels.
1102 * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
1103 */
1104 for (i = 0; i < ngs->n_1ph_words; i++) {
1105 w = ngs->single_phone_wid[i];
1106 rhmm = (root_chan_t *) ngs->word_chan[w];
1107 E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1108 dict_wordstr(ps_search_dict(ngs),w),
1109 hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
1110 lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
1111 if (hmm_frame(&rhmm->hmm) < frame_idx)
1112 continue;
1113 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
1114 hmm_frame(&rhmm->hmm) = nf;
1115
1116 /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
1117 if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
1118 E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
1119 dict_wordstr(ps_search_dict(ngs),w),
1120 hmm_out_score(&rhmm->hmm),
1121 lastphn_thresh, newword_thresh));
1122 ngram_search_save_bp(ngs, frame_idx, w,
1123 hmm_out_score(&rhmm->hmm),
1124 hmm_out_history(&rhmm->hmm), 0);
1125 }
1126 }
1127 }
1128}
1129
1130static void
1131prune_channels(ngram_search_t *ngs, int frame_idx)
1132{
1133 /* Clear last phone candidate list. */
1134 ngs->n_lastphn_cand = 0;
1135 /* Set the dynamic beam based on maxhmmpf here. */
1136 ngs->dynamic_beam = ngs->beam;
1137 if (ngs->maxhmmpf != -1
1138 && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1139 /* Build a histogram to approximately prune them. */
1140 int32 bins[256], bw, nhmms, i;
1141 root_chan_t *rhmm;
1142 chan_t **acl, *hmm;
1143
1144 /* Bins go from zero (best score) to edge of beam. */
1145 bw = -ngs->beam / 256;
1146 memset(bins, 0, sizeof(bins));
1147 /* For each active root channel. */
1148 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
1149 int32 b;
1150
1151 /* Put it in a bin according to its bestscore. */
1152 b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
1153 if (b >= 256)
1154 b = 255;
1155 ++bins[b];
1156 }
1157 /* For each active non-root channel. */
1158 acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */
1159 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1160 i > 0; --i, hmm = *(acl++)) {
1161 int32 b;
1162
1163 /* Put it in a bin according to its bestscore. */
1164 b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
1165 if (b >= 256)
1166 b = 255;
1167 ++bins[b];
1168 }
1169 /* Walk down the bins to find the new beam. */
1170 for (i = nhmms = 0; i < 256; ++i) {
1171 nhmms += bins[i];
1172 if (nhmms > ngs->maxhmmpf)
1173 break;
1174 }
1175 ngs->dynamic_beam = -(i * bw);
1176 }
1177
1178 prune_root_chan(ngs, frame_idx);
1179 prune_nonroot_chan(ngs, frame_idx);
1180 last_phone_transition(ngs, frame_idx);
1181 prune_word_chan(ngs, frame_idx);
1182}
1183
1184/*
1185 * Limit the number of word exits in each frame to maxwpf. And also limit the number of filler
1186 * words to 1.
1187 */
1188static void
1189bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
1190{
1191 int32 bp, n;
1192 int32 bestscr, worstscr;
1193 bptbl_t *bpe, *bestbpe, *worstbpe;
1194
1195 /* Don't prune if no pruing. */
1196 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1197 return;
1198
1199 /* Allow only one filler word exit (the best) per frame */
1200 bestscr = (int32) 0x80000000;
1201 bestbpe = NULL;
1202 n = 0;
1203 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1204 bpe = &(ngs->bp_table[bp]);
1205 if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
1206 if (bpe->score BETTER_THAN bestscr) {
1207 bestscr = bpe->score;
1208 bestbpe = bpe;
1209 }
1210 bpe->valid = FALSE;
1211 n++; /* No. of filler words */
1212 }
1213 }
1214 /* Restore bestbpe to valid state */
1215 if (bestbpe != NULL) {
1216 bestbpe->valid = TRUE;
1217 --n;
1218 }
1219
1220 /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
1221 n = (ngs->bpidx
1222 - ngs->bp_table_idx[frame_idx]) - n; /* No. of entries after limiting fillers */
1223 for (; n > ngs->maxwpf; --n) {
1224 /* Find worst BPTable entry */
1225 worstscr = (int32) 0x7fffffff;
1226 worstbpe = NULL;
1227 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1228 bpe = &(ngs->bp_table[bp]);
1229 if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
1230 worstscr = bpe->score;
1231 worstbpe = bpe;
1232 }
1233 }
1234 /* FIXME: Don't panic! */
1235 if (worstbpe == NULL)
1236 E_FATAL("PANIC: No worst BPtable entry remaining\n");
1237 worstbpe->valid = FALSE;
1238 }
1239}
1240
1241static void
1242word_transition(ngram_search_t *ngs, int frame_idx)
1243{
1244 int32 i, k, bp, w, nf;
1245 int32 rc;
1246 int32 thresh, newscore, pl_newscore;
1247 bptbl_t *bpe;
1248 root_chan_t *rhmm;
1249 struct bestbp_rc_s *bestbp_rc_ptr;
1251 dict_t *dict = ps_search_dict(ngs);
1252 dict2pid_t *d2p = ps_search_dict2pid(ngs);
1253
1254 /*
1255 * Transition to start of new word instances (HMM tree roots); but only if words
1256 * other than </s> finished here.
1257 * But, first, find the best starting score for each possible right context phone.
1258 */
1259 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1260 ngs->bestbp_rc[i].score = WORST_SCORE;
1261 k = 0;
1262 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
1263 /* Ugh, this is complicated. Scan all word exits for this frame
1264 * (they have already been created by prune_word_chan()). */
1265 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1266 bpe = &(ngs->bp_table[bp]);
1267 ngs->word_lat_idx[bpe->wid] = NO_BP;
1268
1269 if (bpe->wid == ps_search_finish_wid(ngs))
1270 continue;
1271 k++;
1272
1273 /* DICT2PID */
1274 /* Array of HMM scores corresponding to all the possible right
1275 * context expansions of the final phone. It's likely that a
1276 * lot of these are going to be missing, actually. */
1277 if (bpe->last2_phone == -1) { /* implies s_idx == -1 */
1278 /* No right context expansion. */
1279 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1280 if (bpe->score BETTER_THAN ngs->bestbp_rc[rc].score) {
1281 E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n",
1282 bpe->score, bpe->last_phone));
1283 ngs->bestbp_rc[rc].score = bpe->score;
1284 ngs->bestbp_rc[rc].path = bp;
1285 ngs->bestbp_rc[rc].lc = bpe->last_phone;
1286 }
1287 }
1288 }
1289 else {
1290 xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
1291 int32 *rcss = &(ngs->bscore_stack[bpe->s_idx]);
1292 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1293 if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
1294 E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
1295 rc, rcss[rssid->cimap[rc]], bpe->last_phone));
1296 ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
1297 ngs->bestbp_rc[rc].path = bp;
1298 ngs->bestbp_rc[rc].lc = bpe->last_phone;
1299 }
1300 }
1301 }
1302 }
1303 if (k == 0)
1304 return;
1305
1306 nf = frame_idx + 1;
1307 thresh = ngs->best_score + ngs->dynamic_beam;
1308 /*
1309 * Hypothesize successors to words finished in this frame.
1310 * Main dictionary, multi-phone words transition to HMM-trees roots.
1311 */
1312 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1313 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
1314
1315 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
1316 pl_newscore = newscore
1317 + phone_loop_search_score(pls, rhmm->ciphone);
1318 if (pl_newscore BETTER_THAN thresh) {
1319 if ((hmm_frame(&rhmm->hmm) < frame_idx)
1320 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1321 hmm_enter(&rhmm->hmm, newscore,
1322 bestbp_rc_ptr->path, nf);
1323 /* DICT2PID: Another place where mpx ssids are entered. */
1324 /* Look up the ssid to use when entering this mpx triphone. */
1325 hmm_mpx_ssid(&rhmm->hmm, 0) =
1326 dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
1327 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1328 }
1329 }
1330 }
1331
1332 /*
1333 * Single phone words; no right context for these. Cannot use bestbp_rc as
1334 * LM scores have to be included. First find best transition to these words.
1335 */
1336 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1337 w = ngs->single_phone_wid[i];
1338 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1339 }
1340 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1341 bpe = &(ngs->bp_table[bp]);
1342 if (!bpe->valid)
1343 continue;
1344
1345 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1346 int32 n_used;
1347 w = ngs->single_phone_wid[i];
1348 newscore = ngram_search_exit_score
1349 (ngs, bpe, dict_first_phone(dict, w));
1350 E_DEBUG(4, ("initial newscore for %s: %d\n",
1351 dict_wordstr(dict, w), newscore));
1352 if (newscore != WORST_SCORE)
1353 newscore += ngram_tg_score(ngs->lmset,
1354 dict_basewid(dict, w),
1355 bpe->real_wid,
1356 bpe->prev_real_wid,
1357 &n_used)>>SENSCR_SHIFT;
1358
1359 /* FIXME: Not sure how WORST_SCORE could be better, but it
1360 * apparently happens. */
1361 if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
1362 ngs->last_ltrans[w].dscr = newscore;
1363 ngs->last_ltrans[w].bp = bp;
1364 }
1365 }
1366 }
1367
1368 /* Now transition to in-LM single phone words */
1369 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
1370 w = ngs->single_phone_wid[i];
1371 /* Never transition into the start word (for one thing, it is
1372 a non-event in the language model.) */
1373 if (w == dict_startwid(ps_search_dict(ngs)))
1374 continue;
1375 rhmm = (root_chan_t *) ngs->word_chan[w];
1376 newscore = ngs->last_ltrans[w].dscr + ngs->pip;
1377 pl_newscore = newscore + phone_loop_search_score(pls, rhmm->ciphone);
1378 if (pl_newscore BETTER_THAN thresh) {
1379 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1380 if ((hmm_frame(&rhmm->hmm) < frame_idx)
1381 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1382 hmm_enter(&rhmm->hmm,
1383 newscore, ngs->last_ltrans[w].bp, nf);
1384 /* DICT2PID: another place where mpx ssids are entered. */
1385 /* Look up the ssid to use when entering this mpx triphone. */
1386 hmm_mpx_ssid(&rhmm->hmm, 0) =
1387 dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
1388 dict_last_phone(dict, bpe->wid));
1389 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
1390 }
1391 }
1392 }
1393
1394 /* Remaining words: <sil>, noise words. No mpx for these! */
1395 w = ps_search_silence_wid(ngs);
1396 rhmm = (root_chan_t *) ngs->word_chan[w];
1397 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1398 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
1399 pl_newscore = newscore
1400 + phone_loop_search_score(pls, rhmm->ciphone);
1401 if (pl_newscore BETTER_THAN thresh) {
1402 if ((hmm_frame(&rhmm->hmm) < frame_idx)
1403 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1404 hmm_enter(&rhmm->hmm,
1405 newscore, bestbp_rc_ptr->path, nf);
1406 }
1407 }
1408 for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1409 if (w == ps_search_silence_wid(ngs))
1410 continue;
1411 /* Never transition into the start word (for one thing, it is
1412 a non-event in the language model.) */
1413 if (w == dict_startwid(ps_search_dict(ngs)))
1414 continue;
1415 rhmm = (root_chan_t *) ngs->word_chan[w];
1416 /* If this was not actually a single-phone word, rhmm will be NULL. */
1417 if (rhmm == NULL)
1418 continue;
1419 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
1420 pl_newscore = newscore
1421 + phone_loop_search_score(pls, rhmm->ciphone);
1422 if (pl_newscore BETTER_THAN thresh) {
1423 if ((hmm_frame(&rhmm->hmm) < frame_idx)
1424 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
1425 hmm_enter(&rhmm->hmm,
1426 newscore, bestbp_rc_ptr->path, nf);
1427 }
1428 }
1429 }
1430}
1431
1432static void
1433deactivate_channels(ngram_search_t *ngs, int frame_idx)
1434{
1435 root_chan_t *rhmm;
1436 int i;
1437
1438 /* Clear score[] of pruned root channels */
1439 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1440 if (hmm_frame(&rhmm->hmm) == frame_idx) {
1441 hmm_clear(&rhmm->hmm);
1442 }
1443 }
1444 /* Clear score[] of pruned single-phone channels */
1445 for (i = 0; i < ngs->n_1ph_words; i++) {
1446 int32 w = ngs->single_phone_wid[i];
1447 rhmm = (root_chan_t *) ngs->word_chan[w];
1448 if (hmm_frame(&rhmm->hmm) == frame_idx) {
1449 hmm_clear(&rhmm->hmm);
1450 }
1451 }
1452}
1453
1454int
1456{
1457 int16 const *senscr;
1458
1459 /* Activate our HMMs for the current frame if need be. */
1460 if (!ps_search_acmod(ngs)->compallsen)
1461 compute_sen_active(ngs, frame_idx);
1462
1463 /* Compute GMM scores for the current frame. */
1464 if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1465 return 0;
1466 ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1467
1468 /* Mark backpointer table for current frame. */
1469 ngram_search_mark_bptable(ngs, frame_idx);
1470
1471 /* If the best score is equal to or worse than WORST_SCORE,
1472 * recognition has failed, don't bother to keep trying. */
1474 return 0;
1475 /* Renormalize if necessary */
1476 if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
1477 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
1478 frame_idx, ngs->best_score);
1479 renormalize_scores(ngs, frame_idx, ngs->best_score);
1480 }
1481
1482 /* Evaluate HMMs */
1483 evaluate_channels(ngs, senscr, frame_idx);
1484 /* Prune HMMs and do phone transitions. */
1485 prune_channels(ngs, frame_idx);
1486 /* Do absolute pruning on word exits. */
1487 bptable_maxwpf(ngs, frame_idx);
1488 /* Do word transitions. */
1489 word_transition(ngs, frame_idx);
1490 /* Deactivate pruned HMMs. */
1491 deactivate_channels(ngs, frame_idx);
1492
1493 ++ngs->n_frame;
1494 /* Return the number of frames processed. */
1495 return 1;
1496}
1497
1498void
1500{
1501 int32 i, w, cf, *awl;
1502 root_chan_t *rhmm;
1503 chan_t *hmm, **acl;
1504
1505 /* This is the number of frames processed. */
1506 cf = ps_search_acmod(ngs)->output_frame;
1507 /* Add a mark in the backpointer table for one past the final frame. */
1509
1510 /* Deactivate channels lined up for the next frame */
1511 /* First, root channels of HMM tree */
1512 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
1513 hmm_clear(&rhmm->hmm);
1514 }
1515
1516 /* nonroot channels of HMM tree */
1517 i = ngs->n_active_chan[cf & 0x1];
1518 acl = ngs->active_chan_list[cf & 0x1];
1519 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1520 hmm_clear(&hmm->hmm);
1521 }
1522
1523 /* word channels */
1524 i = ngs->n_active_word[cf & 0x1];
1525 awl = ngs->active_word_list[cf & 0x1];
1526 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1527 /* Don't accidentally free single-phone words! */
1528 if (dict_is_single_phone(ps_search_dict(ngs), w))
1529 continue;
1530 bitvec_clear(ngs->word_active, w);
1531 if (ngs->word_chan[w] == NULL)
1532 continue;
1534 }
1535
1536 /*
1537 * The previous search code did a postprocessing of the
1538 * backpointer table here, but we will postpone this until it is
1539 * absolutely necessary, i.e. when generating a word graph.
1540 * Likewise we don't actually have to decide what the exit word is
1541 * until somebody requests a backtrace.
1542 */
1543
1544 ptmr_stop(&ngs->fwdtree_perf);
1545 /* Print out some statistics. */
1546 if (cf > 0) {
1547 double n_speech = (double)(cf + 1)
1548 / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1549 E_INFO("%8d words recognized (%d/fr)\n",
1550 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1551 E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
1552 (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1553 E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
1554 ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
1555 (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
1556 ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
1557 E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
1558 ngs->st.n_word_lastchan_eval,
1559 ngs->st.n_word_lastchan_eval / (cf + 1));
1560 E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
1561 ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
1562 E_INFO("fwdtree %.2f CPU %.3f xRT\n",
1563 ngs->fwdtree_perf.t_cpu,
1564 ngs->fwdtree_perf.t_cpu / n_speech);
1565 E_INFO("fwdtree %.2f wall %.3f xRT\n",
1566 ngs->fwdtree_perf.t_elapsed,
1567 ngs->fwdtree_perf.t_elapsed / n_speech);
1568 }
1569 /* dump_bptable(ngs); */
1570}
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
Definition: bin_mdef.h:94
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition: dict2pid.c:367
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition: dict2pid.h:115
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition: dict.h:165
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:647
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
Definition: ngram_search.c:598
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
Definition: ngram_search.c:660
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
Definition: ngram_search.c:329
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Definition: ngram_search.c:383
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
Lexicon tree based Viterbi search.
Fast and rough context-independent phoneme loop search.
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Back pointer table (forward pass lattice; actually a tree)
Definition: ngram_search.h:109
int32 wid
Word index.
Definition: ngram_search.h:113
int16 last2_phone
next-to-last phone of this word
Definition: ngram_search.h:120
uint8 valid
For absolute pruning.
Definition: ngram_search.h:111
int32 prev_real_wid
wid of second-last real word
Definition: ngram_search.h:118
int32 real_wid
wid of this or latest predecessor real word
Definition: ngram_search.h:117
int32 score
Score (best among all right contexts)
Definition: ngram_search.h:115
int16 last_phone
last phone of this word
Definition: ngram_search.h:119
int32 s_idx
Start of BScoreStack for various right contexts.
Definition: ngram_search.h:116
frame_idx_t frame
start or end frame
Definition: ngram_search.h:110
Lexical tree node data type.
Definition: ngram_search.h:64
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
Definition: ngram_search.h:75
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
Definition: ngram_search.h:68
int32 ciphone
ciphone for this node
Definition: ngram_search.h:73
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
Definition: ngram_search.h:71
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:65
int32 rc_id
right-context id for last phone of words
Definition: ngram_search.h:79
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
N-Gram search module structure.
Definition: ngram_search.h:197
int32 n_nonroot_chan
Number of valid non-root channels.
Definition: ngram_search.h:234
int32 * single_phone_wid
list of single-phone word ids
Definition: ngram_search.h:264
int32 best_score
Best Viterbi path score.
Definition: ngram_search.h:325
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
Definition: ngram_search.h:236
int32 n_root_chan
Number of valid root_chan.
Definition: ngram_search.h:233
int32 max_nonroot_chan
Maximum possible number of non-root channels.
Definition: ngram_search.h:235
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
Definition: ngram_search.h:287
int32 n_frame
Number of frames actually present.
Definition: ngram_search.h:308
ngram_search_stats_t st
Various statistics for profiling.
Definition: ngram_search.h:335
int32 n_active_word[2]
Number entries in active_word_list.
Definition: ngram_search.h:288
ngram_model_t * lmset
Set of language models.
Definition: ngram_search.h:199
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
Definition: ngram_search.h:246
int32 last_phone_best_score
Best Viterbi path score for last phone.
Definition: ngram_search.h:326
chan_t *** active_chan_list
Array of active channels for current and next frame.
Definition: ngram_search.h:275
int32 n_1ph_words
Number single phone words in dict (total)
Definition: ngram_search.h:265
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
Definition: ngram_search.h:266
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
Definition: ngram_search.h:263
int32 n_root_chan_alloc
Number of root_chan allocated.
Definition: ngram_search.h:232
listelem_alloc_t * chan_alloc
For chan_t.
Definition: ngram_search.h:211
int32 n_active_chan[2]
Number entries in active_chan_list.
Definition: ngram_search.h:276
hmm_context_t * hmmctx
HMM context.
Definition: ngram_search.h:200
root_chan_t * root_chan
Search structure of HMM instances.
Definition: ngram_search.h:231
bitvec_t * word_active
array of active flags for all words.
Definition: ngram_search.h:247
Phone loop search structure.
Base structure for search module.
char * hyp_str
Current hypothesis string.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
Definition: ngram_search.h:90
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
Definition: ngram_search.h:102
hmm_t hmm
Basic HMM structure.
Definition: ngram_search.h:91
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone
Definition: ngram_search.h:100
chan_t * next
first descendant of this channel
Definition: ngram_search.h:94
cross word triphone model structure
Definition: dict2pid.h:73
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition: dict2pid.h:75