47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
59 static int ngram_search_step(
ps_search_t *search,
int frame_idx);
60 static int ngram_search_finish(
ps_search_t *search);
62 static char const *ngram_search_hyp(
ps_search_t *search, int32 *out_score);
63 static int32 ngram_search_prob(
ps_search_t *search);
75 ngram_search_seg_iter,
78 static ngram_model_t *default_lm;
87 n_words = ps_search_n_words(ngs);
88 words = (
char const**)ckd_calloc(n_words,
sizeof(*words));
90 for (i = 0; i < n_words; ++i)
91 words[i] = dict_wordstr(ps_search_dict(ngs), i);
92 ngram_model_set_map_words(ngs->
lmset, words, n_words);
102 config = ps_search_config(ngs);
103 acmod = ps_search_acmod(ngs);
106 ngs->beam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))>>
SENSCR_SHIFT;
107 ngs->wbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))>>
SENSCR_SHIFT;
108 ngs->pbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))>>
SENSCR_SHIFT;
109 ngs->lpbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lpbeam"))>>
SENSCR_SHIFT;
110 ngs->lponlybeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lponlybeam"))>>
SENSCR_SHIFT;
111 ngs->fwdflatbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatbeam"))>>
SENSCR_SHIFT;
112 ngs->fwdflatwbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatwbeam"))>>
SENSCR_SHIFT;
115 ngs->maxwpf = cmd_ln_int32_r(config,
"-maxwpf");
116 ngs->maxhmmpf = cmd_ln_int32_r(config,
"-maxhmmpf");
119 ngs->wip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip")) >>
SENSCR_SHIFT;
120 ngs->nwpen = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-nwpen")) >>
SENSCR_SHIFT;
121 ngs->pip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip")) >>
SENSCR_SHIFT;
122 ngs->silpen = ngs->pip
123 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-silprob"))>>
SENSCR_SHIFT);
124 ngs->fillpen = ngs->pip
125 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-fillprob"))>>
SENSCR_SHIFT);
128 ngs->fwdflat_fwdtree_lw_ratio =
129 cmd_ln_float32_r(config,
"-fwdflatlw")
130 / cmd_ln_float32_r(config,
"-lw");
131 ngs->bestpath_fwdtree_lw_ratio =
132 cmd_ln_float32_r(config,
"-bestpathlw")
133 / cmd_ln_float32_r(config,
"-lw");
136 ngs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
148 static char *lmname =
"default";
153 cmd_ln_boolean_r(config,
"-fwdtree"));
155 ngs = ckd_calloc(1,
sizeof(*ngs));
156 ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
160 if (ngs->
hmmctx == NULL) {
161 ps_search_free(ps_search_base(ngs));
169 ngram_search_calc_beams(ngs);
174 ngs->word_lat_idx = ckd_calloc(
dict_size(dict),
175 sizeof(*ngs->word_lat_idx));
177 ngs->last_ltrans = ckd_calloc(
dict_size(dict),
178 sizeof(*ngs->last_ltrans));
182 ngs->bp_table_size = cmd_ln_int32_r(config,
"-latsize");
183 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184 sizeof(*ngs->bp_table));
186 ngs->bscore_stack_size = ngs->bp_table_size * 20;
187 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188 sizeof(*ngs->bscore_stack));
191 sizeof(*ngs->bp_table_idx));
198 ngs->
lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
202 if (ngram_wid(ngs->
lmset, S3_FINISH_WORD) ==
203 ngram_unknown_wid(ngs->
lmset))
205 E_ERROR(
"Language model/set does not contain </s>, "
206 "recognition will fail\n");
211 ngram_search_update_widmap(ngs);
214 if (cmd_ln_boolean_r(config,
"-fwdtree")) {
217 ngs->fwdtree_perf.name =
"fwdtree";
218 ptmr_init(&ngs->fwdtree_perf);
220 if (cmd_ln_boolean_r(config,
"-fwdflat")) {
223 ngs->fwdflat_perf.name =
"fwdflat";
224 ptmr_init(&ngs->fwdflat_perf);
226 if (cmd_ln_boolean_r(config,
"-bestpath")) {
227 ngs->bestpath = TRUE;
228 ngs->bestpath_perf.name =
"bestpath";
229 ptmr_init(&ngs->bestpath_perf);
251 ckd_free(ngs->word_lat_idx);
253 ckd_free(ngs->last_ltrans);
255 ngs->word_lat_idx = ckd_calloc(search->
n_words,
sizeof(*ngs->word_lat_idx));
257 ngs->last_ltrans = ckd_calloc(search->
n_words,
sizeof(*ngs->last_ltrans));
259 = ckd_calloc_2d(2, search->
n_words,
266 if (ngs->
lmset == NULL)
270 ngram_search_calc_beams(ngs);
273 ngram_search_update_widmap(ngs);
298 double n_speech = (double)ngs->n_tot_frame
299 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
301 E_INFO(
"TOTAL bestpath %.2f CPU %.3f xRT\n",
302 ngs->bestpath_perf.t_tot_cpu,
303 ngs->bestpath_perf.t_tot_cpu / n_speech);
304 E_INFO(
"TOTAL bestpath %.2f wall %.3f xRT\n",
305 ngs->bestpath_perf.t_tot_elapsed,
306 ngs->bestpath_perf.t_tot_elapsed / n_speech);
314 ngram_model_free(ngs->
lmset);
317 ckd_free(ngs->word_lat_idx);
319 ckd_free(ngs->bp_table);
320 ckd_free(ngs->bscore_stack);
321 if (ngs->bp_table_idx != NULL)
322 ckd_free(ngs->bp_table_idx - 1);
324 ckd_free(ngs->last_ltrans);
333 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
335 *
sizeof(*ngs->bp_table_idx));
343 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
353 ent = ngs->bp_table + bp;
354 if (ent->
bp == NO_BP)
357 prev = ngs->bp_table + ent->
bp;
366 ent->
real_wid = dict_basewid(ps_search_dict(ngs),
372 ent->
real_wid = dict_basewid(ps_search_dict(ngs), ent->
wid);
380 #define NGRAM_HISTORY_LONG_WORD 2000
384 int32 w, int32 score, int32 path, int32 rc)
391 bp = ngs->word_lat_idx[w];
394 if (frame_idx - ngs->bp_table[path].
frame > NGRAM_HISTORY_LONG_WORD) {
395 E_WARN(
"Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396 frame_idx - ngs->bp_table[path].
frame);
404 if (ngs->bp_table[bp].
bp != path) {
405 int32 bplh[2], newlh[2];
409 E_DEBUG(2,(
"Updating path history %d => %d frame %d\n",
410 ngs->bp_table[bp].
bp, path, frame_idx));
411 bplh[0] = ngs->bp_table[bp].
bp == -1
413 bplh[1] = ngs->bp_table[bp].
bp == -1
414 ? -1 : ngs->bp_table[ngs->bp_table[bp].
bp].
real_wid;
415 newlh[0] = path == -1
417 newlh[1] = path == -1
418 ? -1 : ngs->bp_table[path].
real_wid;
421 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
425 E_DEBUG(1, (
"Updating language model state %s,%s => %s,%s frame %d\n",
426 dict_wordstr(ps_search_dict(ngs), bplh[0]),
427 dict_wordstr(ps_search_dict(ngs), bplh[1]),
428 dict_wordstr(ps_search_dict(ngs), newlh[0]),
429 dict_wordstr(ps_search_dict(ngs), newlh[1]),
431 set_real_wid(ngs, bp);
433 ngs->bp_table[bp].
bp = path;
435 ngs->bp_table[bp].
score = score;
440 if (ngs->bp_table[bp].
s_idx != -1)
441 ngs->bscore_stack[ngs->bp_table[bp].
s_idx + rc] = score;
448 if (ngs->bpidx == NO_BP) {
449 E_ERROR(
"No entries in backpointer table!");
454 if (ngs->bpidx >= ngs->bp_table_size) {
455 ngs->bp_table_size *= 2;
456 ngs->bp_table = ckd_realloc(ngs->bp_table,
458 *
sizeof(*ngs->bp_table));
459 E_INFO(
"Resized backpointer table to %d entries\n", ngs->bp_table_size);
461 if (ngs->bss_head >= ngs->bscore_stack_size
462 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463 ngs->bscore_stack_size *= 2;
464 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465 ngs->bscore_stack_size
466 *
sizeof(*ngs->bscore_stack));
467 E_INFO(
"Resized score stack to %d entries\n", ngs->bscore_stack_size);
470 ngs->word_lat_idx[w] = ngs->bpidx;
471 be = &(ngs->bp_table[ngs->bpidx]);
473 be->
frame = frame_idx;
476 be->
s_idx = ngs->bss_head;
478 assert(path != ngs->bpidx);
482 be->
last_phone = dict_last_phone(ps_search_dict(ngs),w);
483 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
489 be->
last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
494 for (i = 0; i < rcsize; ++i)
495 ngs->bscore_stack[ngs->bss_head + i] =
WORST_SCORE;
497 ngs->bscore_stack[ngs->bss_head + rc] = score;
498 set_real_wid(ngs, ngs->bpidx);
501 ngs->bss_head += rcsize;
517 if (frame_idx == -1 || frame_idx >= ngs->
n_frame)
519 end_bpidx = ngs->bp_table_idx[frame_idx];
525 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
532 assert(end_bpidx < ngs->bp_table_size);
533 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs)
536 best_score = ngs->bp_table[bp].
score;
539 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs))
543 if (out_best_score) {
544 *out_best_score = best_score;
562 while (bp != NO_BP) {
563 bptbl_t *be = &ngs->bp_table[bp];
566 len += strlen(dict_basestr(ps_search_dict(ngs), be->
wid)) + 1;
574 base->
hyp_str = ckd_calloc(1, len);
578 while (bp != NO_BP) {
579 bptbl_t *be = &ngs->bp_table[bp];
584 len = strlen(dict_basestr(ps_search_dict(ngs), be->
wid));
586 memcpy(c, dict_basestr(ps_search_dict(ngs), be->
wid), len);
602 int32 i, tmatid, ciphone;
606 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
607 ciphone = dict_last_phone(ps_search_dict(ngs),w);
610 dict_second_last_phone(ps_search_dict(ngs),w));
611 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
613 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->
hmm) != rssid->
ssid[0])) {
621 E_DEBUG(3,(
"allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
623 dict_second_last_phone(ps_search_dict(ngs),w),
624 dict_wordstr(ps_search_dict(ngs),w)));
626 for (i = 1; i < rssid->
n_ssid; ++i) {
627 if ((hmm->
next == NULL) || (hmm_nonmpx_ssid(&hmm->
next->
hmm) != rssid->
ssid[i])) {
636 E_DEBUG(3,(
"allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
638 dict_second_last_phone(ps_search_dict(ngs),w),
639 dict_wordstr(ps_search_dict(ngs),w)));
651 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
677 return ngs->bscore_stack[pbe->
s_idx + rssid->
cimap[rcphone]];
686 int32 *out_ascr, int32 *out_lscr)
692 if (be->
bp == NO_BP) {
693 *out_ascr = be->
score;
699 pbe = ngs->bp_table + be->
bp;
701 dict_first_phone(ps_search_dict(ngs),be->
wid));
708 if (be->
wid == ps_search_silence_wid(ngs)) {
709 *out_lscr = ngs->silpen;
712 *out_lscr = ngs->fillpen;
716 *out_lscr = ngram_tg_score(ngs->
lmset,
721 *out_lscr = *out_lscr * lwf;
723 *out_ascr = be->
score - start_score - *out_lscr;
732 ngram_model_flush(ngs->
lmset);
735 else if (ngs->fwdflat)
743 ngram_search_step(
ps_search_t *search,
int frame_idx)
749 else if (ngs->fwdflat)
759 E_INFO(
"Backpointer table (%d entries):\n", ngs->bpidx);
760 for (i = 0; i < ngs->bpidx; ++i) {
761 bptbl_t *bpe = ngs->bp_table + i;
764 E_INFO_NOFN(
"%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
765 i, dict_wordstr(ps_search_dict(ngs), bpe->
wid),
767 ? 0 : ngs->bp_table[bpe->
bp].
frame + 1),
778 for (j = 0; j < rcsize; ++j)
780 E_INFOCONT(
" %d", bpe->
score - ngs->bscore_stack[bpe->
s_idx + j]);
791 ngs->n_tot_frame += ngs->
n_frame;
805 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
817 else if (ngs->fwdflat) {
827 ngram_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
833 ngs->bestpath_fwdtree_lw_ratio,
839 if (search->
post == 0)
849 ngram_search_hyp(
ps_search_t *search, int32 *out_score)
854 if (ngs->bestpath && ngs->done) {
860 ptmr_reset(&ngs->bestpath_perf);
861 ptmr_start(&ngs->bestpath_perf);
864 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
867 ptmr_stop(&ngs->bestpath_perf);
869 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
870 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
871 ngs->bestpath_perf.t_cpu,
872 ngs->bestpath_perf.t_cpu / n_speech);
873 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
874 ngs->bestpath_perf.t_elapsed,
875 ngs->bestpath_perf.t_elapsed / n_speech);
891 ngram_search_bp2itor(
ps_seg_t *seg,
int bp)
896 be = &ngs->bp_table[bp];
897 pbe = be->
bp == -1 ? NULL : &ngs->bp_table[be->
bp];
898 seg->
word = dict_wordstr(ps_search_dict(ngs), be->
wid);
900 seg->
sf = pbe ? pbe->
frame + 1 : 0;
913 dict_first_phone(ps_search_dict(ngs), be->
wid));
915 if (be->
wid == ps_search_silence_wid(ngs)) {
916 seg->
lscr = ngs->silpen;
919 seg->
lscr = ngs->fillpen;
938 ckd_free(itor->
bpidx);
948 ngram_bp_seg_free(seg);
952 ngram_search_bp2itor(seg, itor->
bpidx[itor->
cur]);
971 itor = ckd_calloc(1,
sizeof(*itor));
972 itor->
base.
vt = &ngram_bp_segfuncs;
977 while (bp != NO_BP) {
978 bptbl_t *be = &ngs->bp_table[bp];
989 while (bp != NO_BP) {
990 bptbl_t *be = &ngs->bp_table[bp];
991 itor->
bpidx[cur] = bp;
997 ngram_search_bp2itor((
ps_seg_t *)itor, itor->bpidx[0]);
1008 if (ngs->bestpath && ngs->done) {
1014 ptmr_reset(&ngs->bestpath_perf);
1015 ptmr_start(&ngs->bestpath_perf);
1018 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1021 ngs->bestpath_fwdtree_lw_ratio);
1022 ptmr_stop(&ngs->bestpath_perf);
1024 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1025 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
1026 ngs->bestpath_perf.t_cpu,
1027 ngs->bestpath_perf.t_cpu / n_speech);
1028 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
1029 ngs->bestpath_perf.t_elapsed,
1030 ngs->bestpath_perf.t_elapsed / n_speech);
1038 return ngram_search_bp_iter(ngs, bpidx,
1040 (ngs->done && ngs->fwdflat)
1041 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1053 if (ngs->bestpath && ngs->done) {
1059 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1061 return search->
post;
1075 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1083 sf = (bp_ptr->
bp < 0) ? 0 : ngs->bp_table[bp_ptr->
bp].
frame + 1;
1087 assert(ef < dag->n_frames);
1089 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->
n_frames - 1))
1094 && (!ngram_model_set_known_wid(ngs->
lmset,
1095 dict_basewid(ps_search_dict(ngs), wid))))
1099 for (node = dag->
nodes; node; node = node->
next) {
1100 if ((node->
wid == wid) && (node->
sf == sf))
1112 node->
fef = node->
lef = i;
1133 for (node = dag->
nodes; node; node = node->
next) {
1134 if ((node->
wid == ps_search_start_wid(ngs)) && (node->
sf == 0))
1139 E_ERROR(
"Couldn't find <s> in first frame\n");
1149 int32 ef, bestbp, bp, bestscore;
1152 for (node = dag->
nodes; node; node = node->
next) {
1153 int32 lef = ngs->bp_table[node->
lef].
frame;
1154 if ((node->
wid == ps_search_finish_wid(ngs))
1165 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1168 E_ERROR(
"Empty backpointer table: can not build DAG.\n");
1175 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1176 int32 n_used, l_scr, wid, prev_wid;
1180 if (wid == ps_search_finish_wid(ngs)) {
1184 l_scr = ngram_tg_score(ngs->
lmset, ps_search_finish_wid(ngs),
1186 l_scr = l_scr * lwf;
1188 bestscore = ngs->bp_table[bp].
score + l_scr;
1192 if (bestbp == NO_BP) {
1193 E_ERROR(
"No word exits found in last frame (%d), assuming no recognition\n", ef);
1196 E_INFO(
"</s> not found in last frame, using %s.%d instead\n",
1197 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid), ef);
1200 for (node = dag->
nodes; node; node = node->
next) {
1201 if (node->
lef == bestbp)
1206 E_ERROR(
"Failed to find DAG node corresponding to %s\n",
1207 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid));
1217 int32 i, score, ascr, lscr;
1221 int min_endfr, nlink;
1225 min_endfr = cmd_ln_int32_r(ps_search_config(search),
"-min_endfr");
1242 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1243 create_dag_nodes(ngs, dag);
1244 if ((dag->
start = find_start_node(ngs, dag)) == NULL)
1246 if ((dag->
end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1248 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1252 ngram_compute_seg_score(ngs, ngs->bp_table + dag->
end->
lef, lwf,
1276 E_INFO(
"Eliminated %d nodes before end node\n", i);
1279 for (to = dag->
end; to; to = to->
next) {
1288 fef = ngs->bp_table[to->
fef].
frame;
1289 lef = ngs->bp_table[to->
lef].
frame;
1290 if (to != dag->
end && lef - fef < min_endfr) {
1296 for (from = to->
next; from; from = from->
next) {
1299 fef = ngs->bp_table[from->
fef].
frame;
1300 lef = ngs->bp_table[from->
lef].
frame;
1302 if ((to->
sf <= fef) || (to->
sf > lef + 1))
1304 if (lef - fef < min_endfr) {
1311 from_bpe = ngs->bp_table + i;
1312 for (; i <= from->
lef; i++, from_bpe++) {
1313 if (from_bpe->
wid != from->
wid)
1315 if (from_bpe->
frame >= to->
sf - 1)
1319 if ((i > from->
lef) || (from_bpe->
frame != to->
sf - 1))
1324 ngram_compute_seg_score(ngs, from_bpe, lwf,
1330 dict_first_phone(ps_search_dict(ngs), to->
wid));
1336 score = ascr + (score - from_bpe->
score);
1357 E_ERROR(
"End node of lattice isolated; unreachable\n");
1361 for (node = dag->
nodes; node; node = node->
next) {
1370 for (node = dag->
nodes; node; node = node->
next) {
1373 for (alt = node->
next; alt && alt->
sf == node->
sf; alt = alt->
next) {
1381 E_INFO(
"Lattice has %d nodes, %d links\n", dag->
n_nodes, nlink);
1387 dag->
end->
basewid = ps_search_finish_wid(ngs);
1405 default_lm = ngram_model_retain(lm);