voxforge.org
VoxForge Dev

Changeset 2600

Show
Ignore:
Timestamp:
05/30/08 15:08:45 (6 months ago)
Author:
kmaclean
Message:

AudioSegmentation scripts - snapshot

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm

    r2599 r2600  
    108108### Class Variables 
    109109#################################################################### 
    110 our($opt_a,$opt_b,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v); # need to define these because using strict. 
     110our($opt_a,$opt_b,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v,$opt_w); # need to define these because using strict. 
    111111my %self; 
    112112$self{'debug'} = 0; 
     
    184184                $audio->verifySegments; 
    185185        }        
    186         if ($missingwordfound) {  
    187                 $dictionary->getAlternatePronunciations("AudioBook/interim_files/MissingWords_alt",15); # uses Sequitor g2p to get top N pronunication vairations 
    188                 $dictionary->createAltDict($originalDict,$altDict);     # merge & sort missing_words_alt and originalDict into altDict  
    189                 $dictionary->validateAlternatePronunciations($originalDict,$altDict,$prompts); 
    190                 $dictionary->updatePronDict($pronDict);          
     186        if ($missingwordfound) { 
     187                if ($self->{"verify_out_of_vocabulary_pronunciations"}) {  
     188                        $dictionary->getAlternatePronunciations("AudioBook/interim_files/MissingWords_alt",15); # uses Sequitor g2p to get top N pronunication vairations 
     189                        $dictionary->createAltDict($originalDict,$altDict);     # merge & sort missing_words_alt and originalDict into altDict  
     190                        $dictionary->validateAlternatePronunciations($originalDict,$altDict,$prompts); 
     191                } 
     192                $dictionary->updatePronDict($pronDict);  
    191193        }        
     194         
    192195        if (defined($tarSuffix)){ 
    193196                _createTarFile($self); 
     
    275278=head2 getOptions  
    276279 
    277 Get the user submitted options ('a:b:d:hl:m:p:r:s:t:u:x:q:vST') 
     280Get the user submitted options ('a:b:d:hl:m:p:r:s:t:u:x:q:vwST') 
    278281 
    279282=cut 
     
    282285        my ($self)= @_; 
    283286        my $debug = $self->{'debug'};    
    284         getopts('a:b:d:hl:m:p:r:s:t:u:x:q:vST');    #  sets $opt_* as a side effect. 
     287        getopts('a:b:d:hl:m:p:r:s:t:u:x:q:vwST');    #  sets $opt_* as a side effect. 
    285288        if ($opt_a and $opt_t) {         
    286289                if (-r $opt_a) { 
     
    337340                } else { 
    338341                        $self->{"verify_segments"}=0;    
    339                 }                
     342                }        
     343                if ($opt_w) { 
     344                        $self->{"verify_out_of_vocabulary_pronunciations"}=1; 
     345                } else { 
     346                        $self->{"verify_out_of_vocabulary_pronunciations"}=0;    
     347                }        
    340348                ### Tar file processing 
    341349                if (defined($opt_T)) { 
     
    403411                print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n";     
    404412                 
    405                 print "-v\tverify segments created from first pass Forced Alignment\n"; 
     413                print "-v\tvalidate segment audio files to prompt text using forced Aligment\n"; 
     414                print "-w\tvalidate missing word pronunciations to audio (using created audio segments)\n";              
    406415                print "-x\tunique tar file suffix (max 3 characters - remainder is truncated)\n"; 
    407416                print "-S\trun sanity test\n";           
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm

    r2599 r2600  
    287287                my @line = split(/\t/, $line); 
    288288                my ($word, $seqnum, $prob, $phonemes) = @line; 
    289                 push (@missingWordAlt, "$word [$word] $phonemes\n"); 
     289                push (@missingWordAlt, "$word\t[$word]\t$phonemes\n"); 
    290290        } 
    291291        open(DICT,$originalDict) or confess ("cannot open $originalDict file");