voxforge.org
VoxForge Dev

root/Trunk/Scripts/AcousticModel_scripts/HTK/manual_testing/julius.jconf

Revision 886, 14.0 kB (checked in by kmaclean, 2 years ago)

16kHz:16bit Acoustic Model build

Line 
1 # !!!!!! notes: need a 2 gram model in addition to the 3 gram model to ge this to work!!!
2 # and both models need  to be trained on the same data!
3 #
4 # Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
5 # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
6 # Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
7 # All rights reserved
8 #
9 # Example of Runtime Configuration File for Julius rev.3.5
10 #
11 # 1) NOTICE: relative paths must be relative to THIS FILE.
12 #            Current working directory does not affect.
13 # 2) Blank lines and comments after '#' are ignored.
14 #    Use `\#' to specify `#' in an argument,
15 # 3) Each line should be shorter than 512 bytes.
16 # 4) Syntax of each arguments are same as corresponding command options.
17 # 5) The commented-out values in this file are default values in Julius.
18 #
19
20 ######################################################################
21 #### Files
22 ######################################################################
23 ##
24 ## Language model file(s)
25 ##
26 # in ARPA standard format
27 #-nlr /cdrom/lang_m/20k/MNP.111.20k.1.arpa.gz           # LR 2-gram
28 #-nrl /cdrom/lang_m/20k/MNP.111.20k.rev.1-1.10p.arpa.gz # RL 3-gram
29 # or in binary format (generated by "mkbingram"),
30 #-d /cdrom/lang_m/20k/bingram_for_julius/MNP.111.20k.1-1.10p.bingram.gz
31 -nlr ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/bn.bigram.arpa
32 -nrl  ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/bn99_64000_lm.arpa
33
34 ##
35 ## Word dictionary file
36 ##
37 #-v /cdrom/lang_m/20k/MNP.111.20k.htkdic
38 -v ../../../LanguageModels/sphinx_models_hub4opensrc_jan2002/LM.vocabulary_julius
39
40
41 ##
42 ## Acoustic HMM file
43 ##
44 # support ascii hmmdefs or binary format (converted by "mkbinhmm")
45 # format (ascii/binary) will be automatically detected
46 #-h /cdrom/phone_m/jnas/ptm-3000x64/gid/hmmdefs,tmix.gz # HTK format
47 -h ../../../AcousticModels/HTK/16kHz:16-bit/MFCC_O_D/hmmdefs
48
49 ## triphone model needs HMMList that maps logical triphone to physical ones.
50 #-hlist /cdrom/phone_m/jnas/logicalTri
51 -hlist ../../../AcousticModels/HTK/16kHz:16-bit/MFCC_O_D/tiedlist
52
53
54 ######################################################################
55 #### Language Model
56 ######################################################################
57 ##
58 ## Language score weights and insertion penalty
59 ##
60 ## for example, "-lmp 8.0 7.0" indicates that the language log probability
61 ## 'log p(w)' should be translated as '(log p(w)) * 8.0 + 7.0' in search.
62 ##
63 ##default values
64 ##for monophone
65 #-lmp 5.0 -1.0          # 1st pass(2-gram)
66 #-lmp2 6.0 0.0          # 2nd pass(3-gram)
67 ##for triphone
68 #-lmp 8.0 -2.0
69 #-lmp2 8.0 -2.0
70 ##for triphone with setup='v2.1', when IWCD not handled on 1st pass
71 #-lmp 9.0 8.0
72 #-lmp2 11.0 -2.0
73
74 ##
75 ## additional penalty for transparent words
76 ##
77 #-transp 0.0
78
79 ######################################################################
80 #### Dictionary
81 ######################################################################
82 ##
83 ## name of beginning-of-sentence word in dictionary
84 ##
85 #-silhead '<s>'
86
87 ##
88 ## name of end-of-sentence word in dictionary
89 ##
90 #-siltail '</s>'
91
92 ##
93 ## do not giveup startup on error words
94 ##
95 #-forcedict
96
97 ######################################################################
98 #### Acoustic Model
99 ######################################################################
100 ##
101 ## Context-dependency handling will be enabled according to the model type.
102 ## Try below if julius wrongly detect the type of hmmdefs
103 ##
104 #-no_ccd                # disable context-dependency handling
105 #-force_ccd             # enable context-dependency handling
106
107 ##
108 ## If julius go wrong with checking parameter type, try below.
109 ##
110 #-notypecheck
111 #
112
113 ##
114 ## (PTM/triphone) switch computation method of IWCD on 1st pass
115 ##
116 #-iwcd1 best 3  # assign average of N-best likelihood of the same context (default, N=3)
117 #-iwcd1 max     # assign maximum likelihood of the same context
118 #-iwcd1 avg     # assign average likelihood of the same context
119
120 ######################################################################
121 #### Gaussian Pruning
122 ######################################################################
123 ## Number of mixtures to select in a mixture pdf.
124 ## This default value is optimized for IPA99's PTM,
125 ## with 64 Gaussians per codebook
126 #-tmix 2
127
128 ## Select Gaussian pruning algorithm
129 ## defulat: beam (standard setting), safe (others)
130 #-gprune safe           # safe pruning, accurate but slow
131 #-gprune heuristic      # heuristic pruning
132 #-gprune beam           # beam pruning, fast but sensitive
133 #-gprune none           # no pruning
134
135 ######################################################################
136 #### Gaussian Mixture Selection
137 ######################################################################
138 #-gshmm hmmdefs         # monophone HMM for GMS
139                         # (OFF when not specified)
140 #-gsnum 24              # number of states to be selected on GMS
141
142 ######################################################################
143 #### Search Parameters
144 ######################################################################
145 #-b 400                 # beam width on 1st pass (#nodes) for monophone
146 #-b 800                 # beam width on 1st pass (#nodes) for triphone,PTM
147 #-b 1000                # beam width on 1st pass (#nodes) for triphone,PTM,engine=v2.1
148 #-b2 30                 # beam width on 2nd pass (#words)
149 #-sb 80.0               # score beam envelope threshold
150 #-s 500                 # hypotheses stack size on 2nd pass (#hypo)
151 #-m 2000                # hypotheses overflow threshold (#hypo)
152 #-lookuprange 5         # lookup range for word expansion (#frame)
153 #-n 1                   # num of sentences to find (#sentence)
154 #-n 10                  #   (default for 'standard' configuration)
155 #-output 1              # num of found sentences to output (#sentence)
156
157 ### when configured with "./configure --enable-lowmem"
158 #-iwcache 10            # % of inter-word LM cache size
159                         # 100 is equivalent with no "--enable-lowmem"
160
161 ### when configured with "--enable-lowmem2"
162 #-sepnum 150            # num of high freq words to be separated from tree
163
164 ######################################################################
165 #### Graph Output (--enable-graphout)
166 ######################################################################
167 ##
168 ## Merge same words in graph.
169 ##  -1: not merge, leave same words on the same location with diff. score
170 ##   0: merge same words at same location
171 ##  >0: merge same words around the margin
172 ##
173 #-graphrange 0
174
175 ##
176 ## Graph depth cutting threshold (num of words per reference word).
177 ## (-1: disable)
178 ##
179 #-graphcut 80
180
181 ##
182 ## Maximum number of boundary adjustment loop.
183 ##
184 #-graphboundloop 20
185
186 ##
187 ## When "-graphsearchdelay" option is set, Julius modifies its alogrithm of
188 ## graph generation on the 2nd pass not to apply search termination by graph
189 ## merging until the first sentence candidate is found.
190 ##
191 ## This option may result in slight improvement of graph accuracy only
192 ## when you are going to generate a huge word graph by setting broad search.
193 ## Namely, it may result in better graph accuracy when you set wide beams on
194 ## both 1st pass "-b" and 2nd pass "-b2", and large number for "-n".
195 ##
196 #-graphsearchdelay
197 #-nographsearchdelay  # this is default (off)
198
199 ######################################################################
200 #### Inter-word Short Pause Handling
201 ######################################################################
202 ##
203 ## Specify short pause model name to be treated as special
204 ##
205 #-spmodel "sp"          # HMM model name
206
207 ##
208 ## Add a pause word entry to the dictionary
209 ##
210 #-iwspword
211 #-iwspentry "<UNK> [sp] sp sp"  # default word entry to be added
212
213 ##
214 ## For insertion of context-free short-term inter-word pauses between words
215 ##  (multi-path version only)
216 ##
217 #-iwsp                  # append a skippable sp model at all word ends
218 #-iwsppenalty 0.0       # transition penalty for the appenede sp models
219
220 ######################################################################
221 #### Short-pause Segmentation  (--enable-sp-segment)
222 ######################################################################
223 #-spdur 10              # sp duration frame on 1st pass
224                         # Input will be segmented if sp word becomes top
225                         # during this period
226
227 ######################################################################
228 #### Speech Input Source
229 ######################################################################
230 ## select one (default: mfcfile)
231 #-input mfcfile         # MFCC file in HTK parameter file format
232 #-input rawfile         # raw wavefile (auto-detect format)
233                         # WAV(16bit) or
234                         # RAW(16bit(signed short),mono,big-endian)
235                         # AIFF,AU (with libsndfile extension)
236                         # other than 16kHz, sampling rate should be specified
237                         # by "-smpFreq" option
238 #-input mic             # direct microphone input
239                         # device name can be specified via env. val. "AUDIODEV"
240 #-input netaudio -NA host:0    # direct input from DatLink(NetAudio) host
241 #-input adinnet -adport portnum # via adinnet network client
242 #-input stdin           # from standard tty input (pipe)
243
244 #-filelist filename     # specify file list to be recognized in batch mode
245
246 #-nostrip               # switch OFF dropping of invalid input segment.
247                         # (default: strip off invalid segment (0 sequence etc.)
248 #-zmean                 # enable DC offset removal (invalid for mfcfile input)
249 #-zmeanframe            # frame-wise DC offset removal (same as HTK)
250
251 ######################################################################
252 #### Recording
253 ######################################################################
254 #-record directory      # auto-save recognized speech data into the dir
255
256 ######################################################################
257 #### GMM-based Input Verification and Rejection
258 ######################################################################
259 #-gmm gmmdefs           # specify GMM definition file in HTK format
260 #-gmmnum 10             # num of Gaussians to be computed per mixture
261 #-gmmreject "noise,laugh,cough" # list of GMM names to be rejected
262
263 ######################################################################
264 #### Too Short Input Rejection
265 ######################################################################
266 #-rejectshort 800       # reject input shorter than specified millisecond
267
268 ######################################################################
269 #### Speech Detection
270 ######################################################################
271 #-pausesegment          # turn on speech detection by level and zero-cross
272 #-nopausesegment        # turn off speech detection by level and zero-cross
273                         # (default: on for mic or adinnet, off for file)
274 #-lv 2000               # threshold of input level (0-32767)
275 #-headmargin 300        # head margin of input segment (msec)
276 #-tailmargin 400        # tail margin of input segment (msec)
277 #-zc 60                 # threshold of number of zero-cross in a second
278
279 ######################################################################
280 #### Acoustic Analysis
281 ######################################################################
282 #-smpFreq 16000         # sampling rate (Hz)
283 #-smpPeriod 625         # sampling period (ns) (= 10000000 / smpFreq)
284 #-fsize 400             # window size (samples)
285 #-fshift 160            # frame shift (samples)
286 #-preemph 0.97          # pre-emphasis coef.
287 #-fbank 24              # number of filterbank channels
288 #-ceplif 22             # cepstral liftering coef.
289 #-rawe                  # use raw energy
290 #-norawe                # not use raw energy (this is default)
291 #-enormal               # normalize log energy
292 #-noenormal             # not normalize log energy (this is default)
293 #-escale 1.0            # scaling log energy for enormal
294 #-silfloor 50.0         # energy silence floor in dB for enormal
295 #-delwin 2              # delta window (frames)
296 #-accwin 2              # acceleration window (frames)
297 #-hifreq -1             # cut-off hi frequency (Hz) (-1: disable)
298 #-lofreq -1             # cut-off low frequency (Hz) (-1: disable)
299
300 ######################################################################
301 #### Cepstral Mean Normalization (CMN)
302 ######################################################################
303 #-cmnsave filename      # save CMN param to file (update per input)
304 #-cmnload filename      # load initial CMN param from file on startup
305 #-cmnmapweight 100.0    # weight for MAP-CMN
306 #-cmnnoupdate           # keep initial CMN, not updating from input
307                         # (use with -cmnload)
308
309 ######################################################################
310 #### Spectral Subtraction (SS)
311 ######################################################################
312 #-sscalc                # do SS using head silence (file input only)
313 #-sscalclen 300         # length of head silence for SS (msec)
314 #-ssload filename       # load constant noise spectrum from file for SS
315 #-ssalpha 2.0           # alpha coef. for SS
316 #-ssfloor 0.5           # spectral floor for SS
317
318 ######################################################################
319 #### Forced alignment
320 ######################################################################
321 #-walign                # do forced alignment with result per word
322 #-palign                # do forced alignment with result per phoneme
323 #-salign                # do forced alignment with result per HMM state
324
325 ######################################################################
326 #### Word Confidence Scoring
327 ######################################################################
328 #-cmalpha 0.05          # smoothing coef. alpha
329
330 ######################################################################
331 #### Output
332 ######################################################################
333 #-separatescore         # output language and acoustic score separately
334 #-progout               # output partial result per a time interval
335 #-proginterval 300      # time interval for "-progout" (msec)
336 #-quiet                 # output minimal result
337 #-demo                  # = "-progout -quiet", suitable for dictation demo
338 #-debug                 # output full message for debug
339 #-charconv from to      # output character set conversion (see manual for
340                         # available code set name)
341
342 ######################################################################
343 #### Server module mode
344 ######################################################################
345 #-module                # Run Julius on "Server module mode"
346 #-module 5530           # (when using another port number for connection)
347 #-outcode WLPSC         # select output message toward module (WLPSCwlps)
348
349 ######################################################################
350 #### Misc.
351 ######################################################################
352 #-help                  # output help and exit
353 #-setting               # output engine configuration and exit
354 #-C jconffile           # expand other jconf file in its place
355
356 ################################################################# end of file
Note: See TracBrowser for help on using the browser.