- Timestamp:
- 05/25/08 20:14:38 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm
r2589 r2590 54 54 my ($class,$super) = @_; 55 55 my %self; 56 $command = ("rm -f AudioBook/output_files/wav/*.wav"); print "$command\n"; system($command) == 0 or confess "Audio.pm $command failed: $?";57 56 $self{"process_audio"} = 1; 58 57 $self{'htk_files'} = $super->{'htk_files'}; … … 62 61 $min_sentence_length_found = $super->{"max_sentence_length"}; 63 62 $self{"min_pause_for_sentence_break"} = $super->{"min_pause_for_sentence_break"}; 63 $self{"beam_width"}= $super->{"beam_width"}; 64 64 $self{'log'} = $super->{'log'}; 65 65 bless(\%self,$class); … … 86 86 my $htk_files = $self->{'htk_files'}; 87 87 my $textContents = $self->{"textContents"}; 88 my $beam_width = $self->{"beam_width"}; 88 89 89 90 # Hvite only works with 16kHz sampling rate audio … … 95 96 print "if this seems to take too long, check interim_files/logs/HVite_log for a possible explanation\n"; 96 97 print "(like \"no tokens surviving\"... which means that text does not match audio)\n\n"; 98 #################################################################### 99 # process audio file with HVite without using a script file (i.e. -S $htk_files/train.scp); need to remove -l '*' 100 #HVite -A -D -T 1 -a -b SENT-END -m -C AudioBook/input_files/htk/wav_config 101 # -H AudioBook/input_files/htk/models/macros 102 # -H AudioBook/input_files/htk/models/hmmdefs 103 # -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf 104 # -i AudioBook/interim_files/aligned.out 105 # AudioBook/interim_files/dict AudioBook/input_files/htk/models/tiedlist 106 # AudioBook/interim_files/downsampled.wav 97 107 $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist > AudioBook/interim_files/logs/HVite_log"); system($command) == 0 or confess "error: $command failed: $?"; 98 108 open (HVite_Log,"AudioBook/interim_files/logs/HVite_log") || confess "error: can't open AudioBook/interim_files/HVite_log: $?"; … … 110 120 print "**** check that audio corresponds to prompt in file ***\n"; 111 121 print "******************************************************************\n\n"; 112 if ($beam > 250) {122 if ($beam > $beam_width) { 113 123 confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 114 124 } … … 196 206 my @aligned_line = split(/ /,$$aligned_words[$sentence_end]); 197 207 my ($word,$startTime,$endTime,$pause) = @aligned_line; 198 open(PROMPTS, ">AudioBook/ output_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output");208 open(PROMPTS, ">AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); 199 209 while (!$lastSentence) { 200 210 $loop++; … … 393 403 $command = ("HCopy -C $htk_files/copy_config -s $startTime -e $endTime AudioBook/interim_files/$filename_nopath AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 394 404 # sox command to create a proper wav file with a RIFF header; 395 $command = ("sox -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/ output_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command);396 print "AudioBook/ output_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug;405 $command = ("sox -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command); 406 print "AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug; 397 407 } else { 398 print "AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; 399 } 400 } 408 print "AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; 409 } 410 } 411 412 sub verifySegments { #public 413 my ($self) = @_; 414 my $debug = $self->{"debug"}; 415 open(PROMPTS, "<AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); 416 my @promptScores; 417 foreach my $line (<PROMPTS>) { 418 chomp $line; 419 my @promptList = split(" ", $line); 420 my $wavfilename = shift(@promptList) . "\.wav"; 421 $self->_createMLFFile(\@promptList); 422 #print "Force Align:$wavfilename:@promptList\n"; 423 $self->_forceAlignSegment($wavfilename); 424 my $avgLogLikelihoodPerFrame = $self->_processHviteLog($wavfilename); 425 push (@promptScores,[$avgLogLikelihoodPerFrame,$wavfilename, "@promptList"]); 426 } 427 close (PROMPTS); 428 close (MLF); 429 #my @sortedlist = sort @promptScores; 430 my @sortedlist = sort { $a->[0] cmp $b->[0] } (@promptScores); # reverse sort of 1st element of @promptScores (which is an array of arrays) 431 432 foreach my $line (@sortedlist) { 433 print "Force Align:$$line[0] $$line[1] $$line[2]\n"; 434 } 435 print "completed\n"; 436 } 437 438 sub _createMLFFile { # private 439 my ($self,$promptList)= @_; 440 my $debug = $self->{"debug"}; 441 open(MLF, ">AudioBook/interim_files/segment.mlf") or confess ("cannot open AudioBook/interim_files/segment.mlf for output"); 442 print MLF "#!MLF!#\n"; # 443 print MLF "\"AudioBook/interim_files/downsampled.lab\"\n"; 444 foreach my $word (@$promptList) { 445 print MLF "$word\n"; 446 } 447 print MLF "\.\n"; 448 } 449 450 sub _forceAlignSegment { # private 451 my ($self,$wavfilename)= @_; 452 my $debug = $self->{"debug"}; 453 my $htk_files = $self->{'htk_files'}; 454 455 $command = ("sox AudioBook/interim_files/wav/$wavfilename -c 1 -r 16000 -w AudioBook/interim_files/downsampled.wav 2>&1 > AudioBook/interim_files/logs/Segment_Soxlog"); system($command) == 0 or confess "$command failed: $?"; 456 $command = ("HVite -A -D -T 1 -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/segment.mlf -i AudioBook/interim_files/aligned.out AudioBook/interim_files/dict $htk_files/models/tiedlist AudioBook/interim_files/downsampled.wav > AudioBook/interim_files/logs/Segment_$wavfilename.log"); system($command) == 0 or confess "error: $command failed: $?"; 457 } 458 459 sub _processHviteLog { # private 460 my ($self,$wavfilename)= @_; 461 my $debug = $self->{"debug"}; 462 my $beam_width = $self->{"beam_width"}; 463 my $avgLogLikelihoodPerFrame; 464 465 open (Segment_Log,"AudioBook/interim_files/logs/Segment_$wavfilename.log") || confess "error: can't open AudioBook/interim_files/logs/Segment_$wavfilename.log: $?"; 466 while (my $line = <Segment_Log>) { 467 chomp $line; 468 my $filename; 469 if ($line =~ /Aligning File:/) { 470 my @line=split(/:/, $line); 471 $filename = pop(@line); 472 } elsif ($line =~ /No tokens survived to final node of network at beam/) { 473 my @line=split(/ /, $line); 474 my $beam = pop (@line); 475 $beam =~ s/ //g; 476 print "**** check that audio corresponds to prompt in $wavfilename *** beam= $beam\n"; 477 if ($beam > $beam_width) { 478 confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 479 } 480 } elsif ($line =~ /frames\](.+)\[Ac=/) { 481 $avgLogLikelihoodPerFrame = $1; 482 $avgLogLikelihoodPerFrame =~ s/\s+//g; 483 $avgLogLikelihoodPerFrame =~ s/\-//g; 484 #print "average log likelihood per frame:$avgLogLikelihoodPerFrame\n:"; 485 } 486 487 } 488 close Segment_Log; 489 return $avgLogLikelihoodPerFrame; 490 } 491 401 492 #################################################################### 402 493 ### Gettors - Public