- Timestamp:
- 06/17/08 15:25:27 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm
r2609 r2613 22 22 use AudioBook::Segments; 23 23 use AudioBook::MissingWords; 24 use AudioBook::MissingWords::CommandLine; 24 25 use AudioBook::Chapter; 25 26 … … 36 37 It is executable from the command line and uses the following configuration options to help in segmenting speech: 37 38 38 -a * audio file name (WAV format only) 39 -b notify if beam width for Forced Alignment exceeds a certain level (default = 250) 40 (does not set HVite's beam width parameter) 41 -d pronunciation dictionary (default = AudioBook/input_files/VoxforgeDict) 42 -h show help 43 -l LICENSE file (default = AudioBook/input_files/LICENCE) 44 -m Target maximum sentence length (default = 20 words) 45 -p Minimum pause for sentence break (default = 2000000 in units of 100ns) 46 -q log words with single quotes (default = yes) 47 -r README file (default = AudioBook/input_files/README) 48 -s Average sentence length (default = 15 words) 49 -t * text file name (containing transcriptions of speech in audio file) 50 -u username or name you want file stats collected by on VoxForge Metrics 51 page: (http://www.voxforge.org/home/downloads/metrics) 52 -v validate segment audio files to prompt text using forced Aligment 53 -w validate missing word pronunciations to audio recordings 54 -x unique tar file suffix (max 3 characters - remainder is truncated) 55 -S run sanity test 56 -T create gzipped/tar file 57 58 * required for script to run 39 VoxForge Audio Segmentation Script Parameters 40 ============================================= 41 -a * audio file name (WAV format only) 42 -b notify if beam width for Forced Alignment exceeds a certain level (default = 250) 43 (does not set HVite's beam width parameter) 44 -d pronunciation dictionary (default = AudioBook/input_files/VoxforgeDict) 45 -h show help 46 -i interactive validation of missing word pronunciations 47 -l LICENSE file (default = AudioBook/input_files/LICENCE) 48 -m Target maximum sentence length (default = 20 words) 49 -p Minimum pause for sentence break (default = 2000000 in units of 100ns) 50 -q log words with single quotes (default = yes) 51 -r README file (default = AudioBook/input_files/README) 52 -s Average sentence length (default = 15 words) 53 -t * text file name (containing transcriptions of speech in audio file) 54 -u username or name you want file stats collected by on VoxForge Metrics 55 page: (http://www.voxforge.org/home/downloads/metrics) 56 -v validate segment audio files to prompt text using forced Aligment 57 -w validate missing word pronunciations to audio recordings 58 -x unique tar file suffix (max 3 characters - remainder is truncated) 59 -S run sanity test 60 -T create gzipped/tar file 61 62 * minimum required for script to run 63 59 64 60 65 =head1 Suggested Segmentation Approach: … … 198 203 ### Class Variables 199 204 #################################################################### 200 our($opt_a,$opt_b,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v,$opt_w); # need to define these because using strict. 201 my %self; 202 $self{'debug'} = 0; 203 $self{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 204 $self{'htk_files'} = "AudioBook/input_files/htk"; 205 $self{'log'} = "AudioBook/output_files/AudioBook_Log"; 206 my $self=\%self; 205 our($opt_a,$opt_b,$opt_d,$opt_h,$opt_i,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v,$opt_w); # need to define these because using strict. 206 my $self = {}; 207 $self->{'debug'} = 0; 208 $self->{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 209 $self->{'htk_files'} = "AudioBook/input_files/htk"; 210 $self->{'log'} = "AudioBook/output_files/AudioBook_Log"; 207 211 bless($self,"AudioBook"); 208 212 … … 215 219 ### Main 216 220 #################################################################### 217 $self->cleanupFiles();218 221 $self->getOptions(); 219 $self->process(); 222 if ($self->getInteractive) { 223 my $xmlfile = 'AudioBook/interim_files/MissingWords.xml'; 224 my $missingWords = AudioBook::MissingWords::CommandLine->new($xmlfile); 225 $missingWords->interactive(); 226 } else { 227 $self->cleanupFiles(); 228 if ($self->getTesting) { 229 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n" if $self->{'debug'} ; system($command); 230 } 231 $self->process(); 232 } 220 233 print "completed!\n"; 221 234 … … 235 248 my ($self)= @_; 236 249 my $tarSuffix = $self->{"tarSuffix"}; 237 238 250 my $chapter = AudioBook::Chapter->new($self); 239 251 # need draft missing word pronunciations before audio can be processed 240 252 my $missingWords = $chapter->processText(); 241 253 $chapter->processAudio(); 242 254 243 255 my $segments = AudioBook::Segments->new($self,$chapter); 244 256 $segments->processAudio(); 245 257 246 258 if ($chapter->getMissingWordFound()) { 247 259 if ($self->getVerify_out_of_vocabulary_pronunciations()) { … … 249 261 } 250 262 } 251 263 252 264 if (defined($tarSuffix)){ 253 265 _createTarFile($self); … … 338 350 =head2 getOptions 339 351 340 Get the user submitted options ('a:b:d:h l:m:p:r:s:t:u:x:q:vwST')352 Get the user submitted options ('a:b:d:hil:m:p:r:s:t:u:x:q:vwST') 341 353 342 354 =cut … … 345 357 my ($self)= @_; 346 358 my $debug = $self->{'debug'}; 347 getopts('a:b:d:h l:m:p:r:s:t:u:x:q:vwST'); # sets $opt_* as a side effect.359 getopts('a:b:d:hil:m:p:r:s:t:u:x:q:vwST'); # sets $opt_* as a side effect. 348 360 if ($opt_h) { 349 361 print "\nVoxForge Audio Segmentation Script Parameters\n"; … … 353 365 print "\t(does not set HVite's beam width parameter)\n"; 354 366 print "-d\tpronunciation dictionary (default = AudioBook/input_files/VoxforgeDict)\n"; 355 print "-h\tshow help\n"; 367 print "-h\tshow help\n"; 368 print "-i\tinteractive validation of missing word pronunciations\n"; 356 369 print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; 357 370 print "-m\tTarget maximum sentence length (default = $default_max_sentence_length words)\n"; … … 377 390 exit; 378 391 } elsif ($opt_S) { # Sanity test switch 392 $self->{'testing'} = 1; 379 393 $self->{"audiofile"}="AudioBook/test/audio.wav"; 380 394 #$self->{"textFile"}="AudioBook/test/text-simple.txt"; 381 395 $self->{"textFile"}="AudioBook/test/text-original.txt"; 382 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n" if $debug; system($command);383 396 $self->{"pronDict"}="AudioBook/interim_files/VoxForgeDict"; 384 397 $self->{"tarSuffix"}=_random_characters(3); … … 484 497 } 485 498 } 499 } elsif ($opt_i) { 500 if ($opt_i) { 501 $self->{"interactive"}=1; 502 } 503 if (defined($opt_d)) { 504 if (-r $opt_d) { 505 $self->{"pronDict"}=$opt_d; 506 } else { 507 die "can't open -d" . $self->{"pronDict"} . "\n"; 508 } 509 } else { 510 $self->{"pronDict"}="AudioBook/input_files/VoxForgeDict"; 511 } 486 512 } else { 487 513 print "\nVoxForge Audio Segmentation Script\n"; … … 507 533 my $self = shift; 508 534 return $self->{"average_sentence_length"}; 535 } 536 537 =item * getInteractive() 538 539 =cut 540 541 sub getInteractive { 542 my $self = shift; 543 return $self->{"interactive"}; 544 } 545 546 =item * getTesting() 547 548 =cut 549 550 sub getTesting { 551 my $self = shift; 552 return $self->{'testing'}; 509 553 } 510 554