- Timestamp:
- 05/29/08 23:01:55 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm
r2595 r2597 26 26 my $command; 27 27 28 =head1 METHODS (not user accessible)28 =head1 CLASS METHODS 29 29 30 30 =head2 new … … 50 50 } 51 51 52 =head2 forceAlign 53 54 Force Aligns an individual prompt line with its corresponding speech audio file. 55 56 =cut 57 58 sub forceAlign { # public 59 my ($class,$super,$fileID, $promptLine, $dict)= @_; 60 my $wavfilename = $fileID . "\.wav"; 61 my $htk_files = $super->{'htk_files'}; 62 63 my $segmentMlf = "AudioBook/interim_files/$fileID.mlf"; 64 createMLF($class,$fileID, $promptLine,$segmentMlf); 65 66 my $aligned_out = "AudioBook/interim_files/$fileID.aligned.out"; 67 my $log = "AudioBook/interim_files/$fileID.forceAlign.log"; 68 69 $command = ("sox AudioBook/interim_files/wav/$wavfilename -c 1 -r 16000 -w AudioBook/interim_files/downsampled.wav 2>&1 > AudioBook/interim_files/logs/Segment_Soxlog"); system($command) == 0 or confess "$command failed: $?"; 70 $command = ("HVite -A -D -T 1 -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I $segmentMlf 71 -i $aligned_out $dict $htk_files/models/tiedlist $wavfilename > $log"); system($command) == 0 or confess "error: $command failed: $?"; 72 73 return ($aligned_out, $log); 74 } 75 76 =head2 _createMLFFile 77 78 Internal creates an MLF (HTK required "Multi-Label File") file for a prompt. 79 80 =cut 81 82 sub createMLF { # public 83 my ($class,$fileID, $promptLine,$segmentMlf)= @_; 84 open(MLF, ">$segmentMlf") or confess ("cannot open $segmentMlf for output"); 85 print MLF "#!MLF!#\n"; # 86 print MLF "\"AudioBook/interim_files/$fileID.lab\"\n"; 87 foreach my $word (@$promptLine) { 88 print MLF "$word\n"; 89 } 90 print MLF "\.\n"; 91 } 92 93 =head1 INSTANCE METHODS 94 52 95 =head2 segment 53 96 … … 81 124 82 125 _forceAlign($self); 83 my $aligned_words= _processHViteOutput($self);126 my $aligned_words= $self->_processHViteOutput(); 84 127 $self->{"aligned_words"} = $aligned_words; 85 128 86 print "### segment::$filename ###########################\n"; 129 print "\nSegmenting:$filename (each dot represents a newly created segmented audio file)\n"; 130 print "-------------------------------------------------------------------------------\n"; 87 131 # copy to "interim_files"" directory for processing; also converts to 16 bits per sample (-w=16-bits) so it can be processed by HVite 88 132 $command = ("sox $filename -w AudioBook/interim_files/$filename_nopath"); print "$command\n"; system($command) == 0 or confess "fullrun $command failed: $?"; … … 130 174 } 131 175 } 132 176 print "\n"; 133 177 $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 134 178 … … 136 180 print LOG "\nAudio Segmenting summary:\n"; 137 181 print LOG "-------------------------\n"; 138 print LOG "Settings:average _sentence_length: $average_sentence_length\n";139 print LOG " max_sentence_length: $max_sentence_length\n";182 print LOG "Settings:average sentence length: $average_sentence_length\n"; 183 print LOG " target max sentence length: $max_sentence_length\n"; 140 184 print LOG " pause length: $min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n\n"; 141 185 print LOG "Sentence Length: min:$min_sentence_length_linenumber: $min_sentence_length_found\n"; … … 174 218 of the speech audio file and figuring you the time stamps for each of the words. This helps determine where the pauses are. 175 219 220 Sox audio editor is used to downsample the audio to 16kHz-16bit - Hvite only works with 16kHz sampling rate audio 221 176 222 The time stamps are put in the "AudioBooks/interim_files/aligned.out" file 177 223 … … 192 238 # forced alignment - creates aligned.out 193 239 print "\nRunning HVite ...\n"; 194 print " if this seems to take too long, check interim_files/logs/HVite_log for a possible explanation\n";240 print "check interim_files/logs/HVite_log for a possible errors\n"; 195 241 print "(like \"no tokens surviving\"... which means that text does not match audio)\n\n"; 196 242 #################################################################### … … 354 400 max:aud0003: 16 355 401 356 Prompt lines with more than max_sentence_length of 20 words:402 Prompt lines with more than target max_sentence_length of 20 words: 357 403 none 358 404 … … 371 417 print "increment:$$increment\n"; 372 418 print "min_sentence_length_found:$min_sentence_length_found:\n"; 373 print " max_sentence_length:$max_sentence_length:\n";419 print "target max_sentence_length:$max_sentence_length:\n"; 374 420 } 375 421 my $sentence_length = (($$sentence_end + $$increment)-$$sentence_start); … … 451 497 # sox command to create a proper wav file with a RIFF header; 452 498 $command = ("sox -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command); 453 print " AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug;499 print "\."; 454 500 } else { 455 501 print "AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; … … 471 517 my $debug = $self->{"debug"}; 472 518 my $log = $self->{"log"}; 473 474 print "Verify Prompts (each dot represents a processed prompt file):\n"; 519 520 open(LOG,">>$log") or confess ("cannot open $log file"); 521 print LOG "Checking for \"No tokens survived to final node of network at beam\" warnings:\n"; 522 print LOG "----------------------------------------------------------------------------\n"; 523 524 print "\nVerify Prompts (each dot represents a processed prompt file):\n"; 525 print "-------------------------------------------------------------\n"; 475 526 open(PROMPTS, "<AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); 476 527 my @promptScores; … … 482 533 $self->_createMLFFile(\@promptList); 483 534 #print "Force Align:$wavfilename:@promptList\n"; 484 $self->_forceAlignSegment($wavfilename );535 $self->_forceAlignSegment($wavfilename, "AudioBook/interim_files/segment.mlf"); 485 536 my $avgLogLikelihoodPerFrame = $self->_processHviteLog($wavfilename); 486 537 push (@promptScores,[$avgLogLikelihoodPerFrame,$fileID,"@promptList"]); … … 493 544 my @sortedlist = sort { $a->[0] cmp $b->[0] } (@promptScores); # sort of 1st element of @promptScores (which is an array of arrays) 494 545 495 open(LOG,">>$log") or confess ("cannot open $log file"); 546 496 547 print LOG "\nTop 15 prompts with the lowest average log likelihood per frame\n"; 497 548 print LOG "(confirm anything with an avg log likelihood of less than 60):\n"; … … 506 557 } 507 558 508 =head 3_createMLFFile559 =head2 _createMLFFile 509 560 510 561 Internal creates an MLF (HTK required "Multi-Label File") file for an individual prompt file. … … 524 575 } 525 576 526 =head 3_forceAlignSegment527 528 Force Aligns an individual prompt li enwith its corresponding speech audio file.577 =head2 _forceAlignSegment 578 579 Force Aligns an individual prompt line with its corresponding speech audio file. 529 580 530 581 =cut 531 582 532 583 sub _forceAlignSegment { # private 533 my ($self,$wavfilename )= @_;584 my ($self,$wavfilename, $segmentMLF)= @_; 534 585 my $debug = $self->{"debug"}; 535 586 my $htk_files = $self->{'htk_files'}; 536 587 537 588 $command = ("sox AudioBook/interim_files/wav/$wavfilename -c 1 -r 16000 -w AudioBook/interim_files/downsampled.wav 2>&1 > AudioBook/interim_files/logs/Segment_Soxlog"); system($command) == 0 or confess "$command failed: $?"; 538 $command = ("HVite -A -D -T 1 -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/segment.mlf-i AudioBook/interim_files/aligned.out AudioBook/interim_files/dict $htk_files/models/tiedlist AudioBook/interim_files/downsampled.wav > AudioBook/interim_files/logs/Segment_$wavfilename.log"); system($command) == 0 or confess "error: $command failed: $?";589 $command = ("HVite -A -D -T 1 -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I $segmentMLF -i AudioBook/interim_files/aligned.out AudioBook/interim_files/dict $htk_files/models/tiedlist AudioBook/interim_files/downsampled.wav > AudioBook/interim_files/logs/Segment_$wavfilename.log"); system($command) == 0 or confess "error: $command failed: $?"; 539 590 } 540 591 … … 552 603 my $beam_width = $self->{"beam_width"}; 553 604 my $avgLogLikelihoodPerFrame; 605 my $log = $self->{"log"}; 554 606 555 607 open (Segment_Log,"AudioBook/interim_files/logs/Segment_$wavfilename.log") || confess "error: can't open AudioBook/interim_files/logs/Segment_$wavfilename.log: $?"; … … 564 616 my $beam = pop (@line); 565 617 $beam =~ s/ //g; 566 print "**** check that audio corresponds to prompt in $wavfilename *** beam= $beam\n"; 618 print "\n**** check that audio corresponds to prompt in $wavfilename *** beam= $beam\n"; 619 print LOG "check that audio corresponds to prompt in $wavfilename; beam= $beam\n"; 567 620 if ($beam > $beam_width) { 568 621 confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ;