| 1 |
# !!!!!! notes: need a 2 gram model in addition to the 3 gram model to ge this to work!!! |
|---|
| 2 |
# and both models need to be trained on the same data! |
|---|
| 3 |
# |
|---|
| 4 |
# Copyright (c) 1991-2006 Kawahara Lab., Kyoto University |
|---|
| 5 |
# Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology |
|---|
| 6 |
# Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology |
|---|
| 7 |
# All rights reserved |
|---|
| 8 |
# |
|---|
| 9 |
# Example of Runtime Configuration File for Julius rev.3.5 |
|---|
| 10 |
# |
|---|
| 11 |
# 1) NOTICE: relative paths must be relative to THIS FILE. |
|---|
| 12 |
# Current working directory does not affect. |
|---|
| 13 |
# 2) Blank lines and comments after '#' are ignored. |
|---|
| 14 |
# Use `\#' to specify `#' in an argument, |
|---|
| 15 |
# 3) Each line should be shorter than 512 bytes. |
|---|
| 16 |
# 4) Syntax of each arguments are same as corresponding command options. |
|---|
| 17 |
# 5) The commented-out values in this file are default values in Julius. |
|---|
| 18 |
# |
|---|
| 19 |
|
|---|
| 20 |
###################################################################### |
|---|
| 21 |
#### Files |
|---|
| 22 |
###################################################################### |
|---|
| 23 |
## |
|---|
| 24 |
## Language model file(s) |
|---|
| 25 |
## |
|---|
| 26 |
# in ARPA standard format |
|---|
| 27 |
#-nlr /cdrom/lang_m/20k/MNP.111.20k.1.arpa.gz # LR 2-gram |
|---|
| 28 |
#-nrl /cdrom/lang_m/20k/MNP.111.20k.rev.1-1.10p.arpa.gz # RL 3-gram |
|---|
| 29 |
# or in binary format (generated by "mkbingram"), |
|---|
| 30 |
#-d /cdrom/lang_m/20k/bingram_for_julius/MNP.111.20k.1-1.10p.bingram.gz |
|---|
| 31 |
-nlr ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/bn.bigram.arpa |
|---|
| 32 |
-nrl ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/bn99_64000_lm.arpa |
|---|
| 33 |
|
|---|
| 34 |
## |
|---|
| 35 |
## Word dictionary file |
|---|
| 36 |
## |
|---|
| 37 |
#-v /cdrom/lang_m/20k/MNP.111.20k.htkdic |
|---|
| 38 |
-v ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/LM.vocabulary_julius |
|---|
| 39 |
|
|---|
| 40 |
|
|---|
| 41 |
## |
|---|
| 42 |
## Acoustic HMM file |
|---|
| 43 |
## |
|---|
| 44 |
# support ascii hmmdefs or binary format (converted by "mkbinhmm") |
|---|
| 45 |
# format (ascii/binary) will be automatically detected |
|---|
| 46 |
#-h /cdrom/phone_m/jnas/ptm-3000x64/gid/hmmdefs,tmix.gz # HTK format |
|---|
| 47 |
-h ../../../AcousticModels/HTK/16kHz:16-bit/MFCC_O_D/hmmdefs |
|---|
| 48 |
|
|---|
| 49 |
## triphone model needs HMMList that maps logical triphone to physical ones. |
|---|
| 50 |
#-hlist /cdrom/phone_m/jnas/logicalTri |
|---|
| 51 |
-hlist ../../../AcousticModels/HTK/16kHz:16-bit/MFCC_O_D/tiedlist |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
###################################################################### |
|---|
| 55 |
#### Language Model |
|---|
| 56 |
###################################################################### |
|---|
| 57 |
## |
|---|
| 58 |
## Language score weights and insertion penalty |
|---|
| 59 |
## |
|---|
| 60 |
## for example, "-lmp 8.0 7.0" indicates that the language log probability |
|---|
| 61 |
## 'log p(w)' should be translated as '(log p(w)) * 8.0 + 7.0' in search. |
|---|
| 62 |
## |
|---|
| 63 |
##default values |
|---|
| 64 |
##for monophone |
|---|
| 65 |
#-lmp 5.0 -1.0 # 1st pass(2-gram) |
|---|
| 66 |
#-lmp2 6.0 0.0 # 2nd pass(3-gram) |
|---|
| 67 |
##for triphone |
|---|
| 68 |
#-lmp 8.0 -2.0 |
|---|
| 69 |
#-lmp2 8.0 -2.0 |
|---|
| 70 |
##for triphone with setup='v2.1', when IWCD not handled on 1st pass |
|---|
| 71 |
#-lmp 9.0 8.0 |
|---|
| 72 |
#-lmp2 11.0 -2.0 |
|---|
| 73 |
|
|---|
| 74 |
## |
|---|
| 75 |
## additional penalty for transparent words |
|---|
| 76 |
## |
|---|
| 77 |
#-transp 0.0 |
|---|
| 78 |
|
|---|
| 79 |
###################################################################### |
|---|
| 80 |
#### Dictionary |
|---|
| 81 |
###################################################################### |
|---|
| 82 |
## |
|---|
| 83 |
## name of beginning-of-sentence word in dictionary |
|---|
| 84 |
## |
|---|
| 85 |
#-silhead '<s>' |
|---|
| 86 |
|
|---|
| 87 |
## |
|---|
| 88 |
## name of end-of-sentence word in dictionary |
|---|
| 89 |
## |
|---|
| 90 |
#-siltail '</s>' |
|---|
| 91 |
|
|---|
| 92 |
## |
|---|
| 93 |
## do not giveup startup on error words |
|---|
| 94 |
## |
|---|
| 95 |
#-forcedict |
|---|
| 96 |
|
|---|
| 97 |
###################################################################### |
|---|
| 98 |
#### Acoustic Model |
|---|
| 99 |
###################################################################### |
|---|
| 100 |
## |
|---|
| 101 |
## Context-dependency handling will be enabled according to the model type. |
|---|
| 102 |
## Try below if julius wrongly detect the type of hmmdefs |
|---|
| 103 |
## |
|---|
| 104 |
#-no_ccd # disable context-dependency handling |
|---|
| 105 |
#-force_ccd # enable context-dependency handling |
|---|
| 106 |
|
|---|
| 107 |
## |
|---|
| 108 |
## If julius go wrong with checking parameter type, try below. |
|---|
| 109 |
## |
|---|
| 110 |
#-notypecheck |
|---|
| 111 |
# |
|---|
| 112 |
|
|---|
| 113 |
## |
|---|
| 114 |
## (PTM/triphone) switch computation method of IWCD on 1st pass |
|---|
| 115 |
## |
|---|
| 116 |
#-iwcd1 best 3 # assign average of N-best likelihood of the same context (default, N=3) |
|---|
| 117 |
#-iwcd1 max # assign maximum likelihood of the same context |
|---|
| 118 |
#-iwcd1 avg # assign average likelihood of the same context |
|---|
| 119 |
|
|---|
| 120 |
###################################################################### |
|---|
| 121 |
#### Gaussian Pruning |
|---|
| 122 |
###################################################################### |
|---|
| 123 |
## Number of mixtures to select in a mixture pdf. |
|---|
| 124 |
## This default value is optimized for IPA99's PTM, |
|---|
| 125 |
## with 64 Gaussians per codebook |
|---|
| 126 |
#-tmix 2 |
|---|
| 127 |
|
|---|
| 128 |
## Select Gaussian pruning algorithm |
|---|
| 129 |
## defulat: beam (standard setting), safe (others) |
|---|
| 130 |
#-gprune safe # safe pruning, accurate but slow |
|---|
| 131 |
#-gprune heuristic # heuristic pruning |
|---|
| 132 |
#-gprune beam # beam pruning, fast but sensitive |
|---|
| 133 |
#-gprune none # no pruning |
|---|
| 134 |
|
|---|
| 135 |
###################################################################### |
|---|
| 136 |
#### Gaussian Mixture Selection |
|---|
| 137 |
###################################################################### |
|---|
| 138 |
#-gshmm hmmdefs # monophone HMM for GMS |
|---|
| 139 |
# (OFF when not specified) |
|---|
| 140 |
#-gsnum 24 # number of states to be selected on GMS |
|---|
| 141 |
|
|---|
| 142 |
###################################################################### |
|---|
| 143 |
#### Search Parameters |
|---|
| 144 |
###################################################################### |
|---|
| 145 |
#-b 400 # beam width on 1st pass (#nodes) for monophone |
|---|
| 146 |
#-b 800 # beam width on 1st pass (#nodes) for triphone,PTM |
|---|
| 147 |
#-b 1000 # beam width on 1st pass (#nodes) for triphone,PTM,engine=v2.1 |
|---|
| 148 |
#-b2 30 # beam width on 2nd pass (#words) |
|---|
| 149 |
#-sb 80.0 # score beam envelope threshold |
|---|
| 150 |
#-s 500 # hypotheses stack size on 2nd pass (#hypo) |
|---|
| 151 |
#-m 2000 # hypotheses overflow threshold (#hypo) |
|---|
| 152 |
#-lookuprange 5 # lookup range for word expansion (#frame) |
|---|
| 153 |
#-n 1 # num of sentences to find (#sentence) |
|---|
| 154 |
#-n 10 # (default for 'standard' configuration) |
|---|
| 155 |
#-output 1 # num of found sentences to output (#sentence) |
|---|
| 156 |
|
|---|
| 157 |
### when configured with "./configure --enable-lowmem" |
|---|
| 158 |
#-iwcache 10 # % of inter-word LM cache size |
|---|
| 159 |
# 100 is equivalent with no "--enable-lowmem" |
|---|
| 160 |
|
|---|
| 161 |
### when configured with "--enable-lowmem2" |
|---|
| 162 |
#-sepnum 150 # num of high freq words to be separated from tree |
|---|
| 163 |
|
|---|
| 164 |
###################################################################### |
|---|
| 165 |
#### Graph Output (--enable-graphout) |
|---|
| 166 |
###################################################################### |
|---|
| 167 |
## |
|---|
| 168 |
## Merge same words in graph. |
|---|
| 169 |
## -1: not merge, leave same words on the same location with diff. score |
|---|
| 170 |
## 0: merge same words at same location |
|---|
| 171 |
## >0: merge same words around the margin |
|---|
| 172 |
## |
|---|
| 173 |
#-graphrange 0 |
|---|
| 174 |
|
|---|
| 175 |
## |
|---|
| 176 |
## Graph depth cutting threshold (num of words per reference word). |
|---|
| 177 |
## (-1: disable) |
|---|
| 178 |
## |
|---|
| 179 |
#-graphcut 80 |
|---|
| 180 |
|
|---|
| 181 |
## |
|---|
| 182 |
## Maximum number of boundary adjustment loop. |
|---|
| 183 |
## |
|---|
| 184 |
#-graphboundloop 20 |
|---|
| 185 |
|
|---|
| 186 |
## |
|---|
| 187 |
## When "-graphsearchdelay" option is set, Julius modifies its alogrithm of |
|---|
| 188 |
## graph generation on the 2nd pass not to apply search termination by graph |
|---|
| 189 |
## merging until the first sentence candidate is found. |
|---|
| 190 |
## |
|---|
| 191 |
## This option may result in slight improvement of graph accuracy only |
|---|
| 192 |
## when you are going to generate a huge word graph by setting broad search. |
|---|
| 193 |
## Namely, it may result in better graph accuracy when you set wide beams on |
|---|
| 194 |
## both 1st pass "-b" and 2nd pass "-b2", and large number for "-n". |
|---|
| 195 |
## |
|---|
| 196 |
#-graphsearchdelay |
|---|
| 197 |
#-nographsearchdelay # this is default (off) |
|---|
| 198 |
|
|---|
| 199 |
###################################################################### |
|---|
| 200 |
#### Inter-word Short Pause Handling |
|---|
| 201 |
###################################################################### |
|---|
| 202 |
## |
|---|
| 203 |
## Specify short pause model name to be treated as special |
|---|
| 204 |
## |
|---|
| 205 |
#-spmodel "sp" # HMM model name |
|---|
| 206 |
|
|---|
| 207 |
## |
|---|
| 208 |
## Add a pause word entry to the dictionary |
|---|
| 209 |
## |
|---|
| 210 |
#-iwspword |
|---|
| 211 |
#-iwspentry "<UNK> [sp] sp sp" # default word entry to be added |
|---|
| 212 |
|
|---|
| 213 |
## |
|---|
| 214 |
## For insertion of context-free short-term inter-word pauses between words |
|---|
| 215 |
## (multi-path version only) |
|---|
| 216 |
## |
|---|
| 217 |
#-iwsp # append a skippable sp model at all word ends |
|---|
| 218 |
#-iwsppenalty 0.0 # transition penalty for the appenede sp models |
|---|
| 219 |
|
|---|
| 220 |
###################################################################### |
|---|
| 221 |
#### Short-pause Segmentation (--enable-sp-segment) |
|---|
| 222 |
###################################################################### |
|---|
| 223 |
#-spdur 10 # sp duration frame on 1st pass |
|---|
| 224 |
# Input will be segmented if sp word becomes top |
|---|
| 225 |
# during this period |
|---|
| 226 |
|
|---|
| 227 |
###################################################################### |
|---|
| 228 |
#### Speech Input Source |
|---|
| 229 |
###################################################################### |
|---|
| 230 |
## select one (default: mfcfile) |
|---|
| 231 |
#-input mfcfile # MFCC file in HTK parameter file format |
|---|
| 232 |
#-input rawfile # raw wavefile (auto-detect format) |
|---|
| 233 |
# WAV(16bit) or |
|---|
| 234 |
# RAW(16bit(signed short),mono,big-endian) |
|---|
| 235 |
# AIFF,AU (with libsndfile extension) |
|---|
| 236 |
# other than 16kHz, sampling rate should be specified |
|---|
| 237 |
# by "-smpFreq" option |
|---|
| 238 |
#-input mic # direct microphone input |
|---|
| 239 |
# device name can be specified via env. val. "AUDIODEV" |
|---|
| 240 |
#-input netaudio -NA host:0 # direct input from DatLink(NetAudio) host |
|---|
| 241 |
#-input adinnet -adport portnum # via adinnet network client |
|---|
| 242 |
#-input stdin # from standard tty input (pipe) |
|---|
| 243 |
|
|---|
| 244 |
#-filelist filename # specify file list to be recognized in batch mode |
|---|
| 245 |
|
|---|
| 246 |
#-nostrip # switch OFF dropping of invalid input segment. |
|---|
| 247 |
# (default: strip off invalid segment (0 sequence etc.) |
|---|
| 248 |
#-zmean # enable DC offset removal (invalid for mfcfile input) |
|---|
| 249 |
#-zmeanframe # frame-wise DC offset removal (same as HTK) |
|---|
| 250 |
|
|---|
| 251 |
###################################################################### |
|---|
| 252 |
#### Recording |
|---|
| 253 |
###################################################################### |
|---|
| 254 |
#-record directory # auto-save recognized speech data into the dir |
|---|
| 255 |
|
|---|
| 256 |
###################################################################### |
|---|
| 257 |
#### GMM-based Input Verification and Rejection |
|---|
| 258 |
###################################################################### |
|---|
| 259 |
#-gmm gmmdefs # specify GMM definition file in HTK format |
|---|
| 260 |
#-gmmnum 10 # num of Gaussians to be computed per mixture |
|---|
| 261 |
#-gmmreject "noise,laugh,cough" # list of GMM names to be rejected |
|---|
| 262 |
|
|---|
| 263 |
###################################################################### |
|---|
| 264 |
#### Too Short Input Rejection |
|---|
| 265 |
###################################################################### |
|---|
| 266 |
#-rejectshort 800 # reject input shorter than specified millisecond |
|---|
| 267 |
|
|---|
| 268 |
###################################################################### |
|---|
| 269 |
#### Speech Detection |
|---|
| 270 |
###################################################################### |
|---|
| 271 |
#-pausesegment # turn on speech detection by level and zero-cross |
|---|
| 272 |
#-nopausesegment # turn off speech detection by level and zero-cross |
|---|
| 273 |
# (default: on for mic or adinnet, off for file) |
|---|
| 274 |
#-lv 2000 # threshold of input level (0-32767) |
|---|
| 275 |
#-headmargin 300 # head margin of input segment (msec) |
|---|
| 276 |
#-tailmargin 400 # tail margin of input segment (msec) |
|---|
| 277 |
#-zc 60 # threshold of number of zero-cross in a second |
|---|
| 278 |
|
|---|
| 279 |
###################################################################### |
|---|
| 280 |
#### Acoustic Analysis |
|---|
| 281 |
###################################################################### |
|---|
| 282 |
#-smpFreq 16000 # sampling rate (Hz) |
|---|
| 283 |
#-smpPeriod 625 # sampling period (ns) (= 10000000 / smpFreq) |
|---|
| 284 |
#-fsize 400 # window size (samples) |
|---|
| 285 |
#-fshift 160 # frame shift (samples) |
|---|
| 286 |
#-preemph 0.97 # pre-emphasis coef. |
|---|
| 287 |
#-fbank 24 # number of filterbank channels |
|---|
| 288 |
#-ceplif 22 # cepstral liftering coef. |
|---|
| 289 |
#-rawe # use raw energy |
|---|
| 290 |
#-norawe # not use raw energy (this is default) |
|---|
| 291 |
#-enormal # normalize log energy |
|---|
| 292 |
#-noenormal # not normalize log energy (this is default) |
|---|
| 293 |
#-escale 1.0 # scaling log energy for enormal |
|---|
| 294 |
#-silfloor 50.0 # energy silence floor in dB for enormal |
|---|
| 295 |
#-delwin 2 # delta window (frames) |
|---|
| 296 |
#-accwin 2 # acceleration window (frames) |
|---|
| 297 |
#-hifreq -1 # cut-off hi frequency (Hz) (-1: disable) |
|---|
| 298 |
#-lofreq -1 # cut-off low frequency (Hz) (-1: disable) |
|---|
| 299 |
|
|---|
| 300 |
###################################################################### |
|---|
| 301 |
#### Cepstral Mean Normalization (CMN) |
|---|
| 302 |
###################################################################### |
|---|
| 303 |
#-cmnsave filename # save CMN param to file (update per input) |
|---|
| 304 |
#-cmnload filename # load initial CMN param from file on startup |
|---|
| 305 |
#-cmnmapweight 100.0 # weight for MAP-CMN |
|---|
| 306 |
#-cmnnoupdate # keep initial CMN, not updating from input |
|---|
| 307 |
# (use with -cmnload) |
|---|
| 308 |
|
|---|
| 309 |
###################################################################### |
|---|
| 310 |
#### Spectral Subtraction (SS) |
|---|
| 311 |
###################################################################### |
|---|
| 312 |
#-sscalc # do SS using head silence (file input only) |
|---|
| 313 |
#-sscalclen 300 # length of head silence for SS (msec) |
|---|
| 314 |
#-ssload filename # load constant noise spectrum from file for SS |
|---|
| 315 |
#-ssalpha 2.0 # alpha coef. for SS |
|---|
| 316 |
#-ssfloor 0.5 # spectral floor for SS |
|---|
| 317 |
|
|---|
| 318 |
###################################################################### |
|---|
| 319 |
#### Forced alignment |
|---|
| 320 |
###################################################################### |
|---|
| 321 |
#-walign # do forced alignment with result per word |
|---|
| 322 |
#-palign # do forced alignment with result per phoneme |
|---|
| 323 |
#-salign # do forced alignment with result per HMM state |
|---|
| 324 |
|
|---|
| 325 |
###################################################################### |
|---|
| 326 |
#### Word Confidence Scoring |
|---|
| 327 |
###################################################################### |
|---|
| 328 |
#-cmalpha 0.05 # smoothing coef. alpha |
|---|
| 329 |
|
|---|
| 330 |
###################################################################### |
|---|
| 331 |
#### Output |
|---|
| 332 |
###################################################################### |
|---|
| 333 |
#-separatescore # output language and acoustic score separately |
|---|
| 334 |
#-progout # output partial result per a time interval |
|---|
| 335 |
#-proginterval 300 # time interval for "-progout" (msec) |
|---|
| 336 |
#-quiet # output minimal result |
|---|
| 337 |
#-demo # = "-progout -quiet", suitable for dictation demo |
|---|
| 338 |
#-debug # output full message for debug |
|---|
| 339 |
#-charconv from to # output character set conversion (see manual for |
|---|
| 340 |
# available code set name) |
|---|
| 341 |
|
|---|
| 342 |
###################################################################### |
|---|
| 343 |
#### Server module mode |
|---|
| 344 |
###################################################################### |
|---|
| 345 |
#-module # Run Julius on "Server module mode" |
|---|
| 346 |
#-module 5530 # (when using another port number for connection) |
|---|
| 347 |
#-outcode WLPSC # select output message toward module (WLPSCwlps) |
|---|
| 348 |
|
|---|
| 349 |
###################################################################### |
|---|
| 350 |
#### Misc. |
|---|
| 351 |
###################################################################### |
|---|
| 352 |
#-help # output help and exit |
|---|
| 353 |
#-setting # output engine configuration and exit |
|---|
| 354 |
#-C jconffile # expand other jconf file in its place |
|---|
| 355 |
|
|---|
| 356 |
################################################################# end of file |
|---|