voxforge.org
VoxForge Dev
Show
Ignore:
Timestamp:
05/25/08 20:14:38 (6 months ago)
Author:
kmaclean
Message:

AudioSegmentation scripts -snapshot

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm

    r2589 r2590  
    5454        my ($class,$super) = @_;  
    5555        my %self; 
    56         $command = ("rm -f AudioBook/output_files/wav/*.wav"); print "$command\n"; system($command) == 0 or confess "Audio.pm $command failed: $?"; 
    5756        $self{"process_audio"} = 1; 
    5857        $self{'htk_files'} = $super->{'htk_files'}; 
     
    6261        $min_sentence_length_found = $super->{"max_sentence_length"}; 
    6362        $self{"min_pause_for_sentence_break"} = $super->{"min_pause_for_sentence_break"}; 
     63        $self{"beam_width"}= $super->{"beam_width"}; 
    6464        $self{'log'} = $super->{'log'};  
    6565        bless(\%self,$class); 
     
    8686        my $htk_files = $self->{'htk_files'}; 
    8787        my $textContents = $self->{"textContents"};  
     88        my $beam_width = $self->{"beam_width"}; 
    8889         
    8990        # Hvite only works with 16kHz sampling rate audio 
     
    9596        print "if this seems to take too long, check interim_files/logs/HVite_log for a possible explanation\n";  
    9697        print "(like \"no tokens surviving\"... which means that text does not match audio)\n\n"; 
     98        ####################################################################     
     99        # process audio file with HVite without using a script file (i.e. -S $htk_files/train.scp); need to remove -l '*' 
     100        #HVite -A -D -T 1  -a -b SENT-END -m -C AudioBook/input_files/htk/wav_config  
     101        #   -H AudioBook/input_files/htk/models/macros  
     102        #   -H AudioBook/input_files/htk/models/hmmdefs  
     103        #   -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf  
     104        #   -i AudioBook/interim_files/aligned.out  
     105        #   AudioBook/interim_files/dict AudioBook/input_files/htk/models/tiedlist   
     106        #   AudioBook/interim_files/downsampled.wav 
    97107        $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf  -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist > AudioBook/interim_files/logs/HVite_log"); system($command) == 0 or confess "error: $command failed: $?"; 
    98108        open (HVite_Log,"AudioBook/interim_files/logs/HVite_log") || confess "error: can't open AudioBook/interim_files/HVite_log: $?"; 
     
    110120                        print "**** check that audio corresponds to prompt in file ***\n"; 
    111121                        print "******************************************************************\n\n"; 
    112                         if ($beam > 250) { 
     122                        if ($beam > $beam_width) { 
    113123                                confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 
    114124                        }        
     
    196206        my @aligned_line = split(/ /,$$aligned_words[$sentence_end]); 
    197207        my ($word,$startTime,$endTime,$pause) = @aligned_line;   
    198         open(PROMPTS, ">AudioBook/output_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output");         
     208        open(PROMPTS, ">AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output");        
    199209        while (!$lastSentence) { 
    200210                $loop++; 
     
    393403                $command = ("HCopy -C $htk_files/copy_config  -s $startTime -e $endTime AudioBook/interim_files/$filename_nopath AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command);  
    394404                # sox command to create a proper wav file with a RIFF header;  
    395                 $command = ("sox  -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command);     
    396                 print "AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug; 
     405                $command = ("sox  -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command);    
     406                print "AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug; 
    397407        } else { 
    398                 print "AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; 
    399         } 
    400 
     408                print "AudioBook/interim_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; 
     409        } 
     410
     411 
     412sub verifySegments { #public 
     413        my ($self) = @_;                 
     414        my $debug = $self->{"debug"};            
     415        open(PROMPTS, "<AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); 
     416        my @promptScores;        
     417        foreach my $line (<PROMPTS>) { 
     418                chomp $line; 
     419                my @promptList = split(" ", $line); 
     420                my $wavfilename = shift(@promptList) . "\.wav"; 
     421                $self->_createMLFFile(\@promptList); 
     422                #print "Force Align:$wavfilename:@promptList\n";         
     423                $self->_forceAlignSegment($wavfilename); 
     424                my $avgLogLikelihoodPerFrame = $self->_processHviteLog($wavfilename); 
     425                push (@promptScores,[$avgLogLikelihoodPerFrame,$wavfilename, "@promptList"]); 
     426        } 
     427        close (PROMPTS);                 
     428        close (MLF);             
     429        #my @sortedlist = sort @promptScores; 
     430        my @sortedlist = sort { $a->[0] cmp $b->[0] } (@promptScores); # reverse sort of 1st element of @promptScores (which is an array of arrays) 
     431 
     432        foreach my $line (@sortedlist) { 
     433                print "Force Align:$$line[0] $$line[1] $$line[2]\n"; 
     434        } 
     435        print "completed\n"; 
     436
     437 
     438sub _createMLFFile { # private 
     439        my ($self,$promptList)= @_; 
     440        my $debug = $self->{"debug"};    
     441        open(MLF, ">AudioBook/interim_files/segment.mlf") or confess ("cannot open AudioBook/interim_files/segment.mlf for output");     
     442        print MLF "#!MLF!#\n"; #  
     443        print MLF "\"AudioBook/interim_files/downsampled.lab\"\n"; 
     444        foreach my $word (@$promptList) { 
     445                print MLF "$word\n"; 
     446        } 
     447        print MLF "\.\n"; 
     448
     449 
     450sub _forceAlignSegment {  # private 
     451        my ($self,$wavfilename)= @_; 
     452        my $debug = $self->{"debug"};    
     453        my $htk_files = $self->{'htk_files'}; 
     454 
     455        $command = ("sox AudioBook/interim_files/wav/$wavfilename -c 1 -r 16000 -w AudioBook/interim_files/downsampled.wav 2>&1 > AudioBook/interim_files/logs/Segment_Soxlog"); system($command) == 0 or confess "$command failed: $?"; 
     456        $command = ("HVite -A -D -T 1 -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/segment.mlf  -i AudioBook/interim_files/aligned.out AudioBook/interim_files/dict $htk_files/models/tiedlist AudioBook/interim_files/downsampled.wav > AudioBook/interim_files/logs/Segment_$wavfilename.log"); system($command) == 0 or confess "error: $command failed: $?"; 
     457
     458 
     459sub _processHviteLog {  # private 
     460        my ($self,$wavfilename)= @_; 
     461        my $debug = $self->{"debug"};    
     462        my $beam_width = $self->{"beam_width"}; 
     463        my $avgLogLikelihoodPerFrame; 
     464         
     465        open (Segment_Log,"AudioBook/interim_files/logs/Segment_$wavfilename.log") || confess "error: can't open AudioBook/interim_files/logs/Segment_$wavfilename.log: $?"; 
     466        while (my $line = <Segment_Log>) { 
     467                chomp $line; 
     468                my $filename; 
     469                if ($line =~ /Aligning File:/) {  
     470                        my @line=split(/:/, $line); 
     471                        $filename = pop(@line); 
     472                } elsif ($line =~ /No tokens survived to final node of network at beam/) { 
     473                        my @line=split(/ /, $line); 
     474                        my $beam = pop (@line); 
     475                        $beam =~ s/ //g; 
     476                        print "**** check that audio corresponds to prompt in $wavfilename *** beam= $beam\n"; 
     477                        if ($beam > $beam_width) { 
     478                                confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 
     479                        }        
     480                }  elsif ($line =~ /frames\](.+)\[Ac=/) { 
     481                        $avgLogLikelihoodPerFrame = $1; 
     482                        $avgLogLikelihoodPerFrame =~ s/\s+//g; 
     483                        $avgLogLikelihoodPerFrame =~ s/\-//g;                    
     484                        #print "average log likelihood per frame:$avgLogLikelihoodPerFrame\n:"; 
     485                } 
     486                 
     487        } 
     488        close Segment_Log; 
     489        return $avgLogLikelihoodPerFrame; 
     490
     491 
    401492#################################################################### 
    402493### Gettors - Public