voxforge.org
VoxForge Dev
Show
Ignore:
Timestamp:
06/17/08 15:25:27 (6 months ago)
Author:
kmaclean
Message:

Updated AudioSegmentation script to include class to interactively update missing word phonemes:
MissingWord::CommandLine?.pm script aloows a user to listen to actual audio segment that corresponds to missing word (+- 0.5 seconds before
and after location of word), and select best phoneme set (using output from g2p and HVite results on the actual audio), or modify suggested phoneme sets to better reflect actual pronunciation of a word

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm

    r2609 r2613  
    2222use AudioBook::Segments;  
    2323use AudioBook::MissingWords; 
     24use AudioBook::MissingWords::CommandLine;  
    2425use AudioBook::Chapter; 
    2526 
     
    3637It is executable from the command line and uses the following configuration options to help in segmenting speech: 
    3738 
    38         -a      * audio file name (WAV format only) 
    39         -b      notify if beam width for Forced Alignment exceeds a certain level (default = 250) 
    40                 (does not set HVite's beam width parameter) 
    41         -d      pronunciation dictionary  (default = AudioBook/input_files/VoxforgeDict) 
    42         -h      show help 
    43         -l      LICENSE file (default = AudioBook/input_files/LICENCE) 
    44         -m      Target maximum sentence length (default = 20 words) 
    45         -p      Minimum pause for sentence break (default = 2000000 in units of 100ns) 
    46         -q      log words with single quotes (default = yes) 
    47         -r      README file (default = AudioBook/input_files/README) 
    48         -s      Average sentence length (default = 15 words) 
    49         -t      * text file name (containing transcriptions of speech in audio file) 
    50         -u      username or name you want file stats collected by on VoxForge Metrics  
    51                 page:   (http://www.voxforge.org/home/downloads/metrics) 
    52         -v      validate segment audio files to prompt text using forced Aligment 
    53         -w      validate missing word pronunciations to audio recordings 
    54         -x      unique tar file suffix (max 3 characters - remainder is truncated) 
    55         -S      run sanity test 
    56         -T      create gzipped/tar file 
    57  
    58                  * required for script to run 
     39VoxForge Audio Segmentation Script Parameters 
     40============================================= 
     41-a      * audio file name (WAV format only) 
     42-b      notify if beam width for Forced Alignment exceeds a certain level (default = 250) 
     43        (does not set HVite's beam width parameter) 
     44-d      pronunciation dictionary  (default = AudioBook/input_files/VoxforgeDict) 
     45-h      show help 
     46-i      interactive validation of missing word pronunciations 
     47-l      LICENSE file (default = AudioBook/input_files/LICENCE) 
     48-m      Target maximum sentence length (default = 20 words) 
     49-p      Minimum pause for sentence break (default = 2000000 in units of 100ns) 
     50-q      log words with single quotes (default = yes) 
     51-r      README file (default = AudioBook/input_files/README) 
     52-s      Average sentence length (default = 15 words) 
     53-t      * text file name (containing transcriptions of speech in audio file) 
     54-u      username or name you want file stats collected by on VoxForge Metrics  
     55        page:   (http://www.voxforge.org/home/downloads/metrics) 
     56-v      validate segment audio files to prompt text using forced Aligment 
     57-w      validate missing word pronunciations to audio recordings 
     58-x      unique tar file suffix (max 3 characters - remainder is truncated) 
     59-S      run sanity test 
     60-T      create gzipped/tar file 
     61 
     62        * minimum required for script to run 
     63 
    5964 
    6065=head1 Suggested Segmentation Approach: 
     
    198203### Class Variables 
    199204#################################################################### 
    200 our($opt_a,$opt_b,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v,$opt_w); # need to define these because using strict. 
    201 my %self; 
    202 $self{'debug'} = 0; 
    203 $self{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 
    204 $self{'htk_files'} = "AudioBook/input_files/htk"; 
    205 $self{'log'} = "AudioBook/output_files/AudioBook_Log"; 
    206 my $self=\%self; 
     205our($opt_a,$opt_b,$opt_d,$opt_h,$opt_i,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_q,$opt_S,$opt_T,$opt_u,$opt_v,$opt_w); # need to define these because using strict. 
     206my $self = {}; 
     207$self->{'debug'} = 0; 
     208$self->{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 
     209$self->{'htk_files'} = "AudioBook/input_files/htk"; 
     210$self->{'log'} = "AudioBook/output_files/AudioBook_Log"; 
    207211bless($self,"AudioBook"); 
    208212 
     
    215219### Main 
    216220#################################################################### 
    217 $self->cleanupFiles(); 
    218221$self->getOptions(); 
    219 $self->process(); 
     222if ($self->getInteractive) { 
     223        my $xmlfile = 'AudioBook/interim_files/MissingWords.xml';                        
     224        my $missingWords = AudioBook::MissingWords::CommandLine->new($xmlfile); 
     225        $missingWords->interactive(); 
     226} else { 
     227        $self->cleanupFiles(); 
     228        if ($self->getTesting) { 
     229                $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n" if $self->{'debug'} ; system($command); 
     230        } 
     231        $self->process(); 
     232
    220233print "completed!\n"; 
    221234 
     
    235248        my ($self)= @_; 
    236249        my $tarSuffix = $self->{"tarSuffix"}; 
    237          
    238250        my $chapter = AudioBook::Chapter->new($self); 
    239251        # need draft missing word pronunciations before audio can be processed 
    240252        my $missingWords = $chapter->processText();  
    241253        $chapter->processAudio();                
    242  
     254         
    243255        my $segments = AudioBook::Segments->new($self,$chapter); 
    244256        $segments->processAudio();       
    245                  
     257                        
    246258        if ($chapter->getMissingWordFound()) {   
    247259                if ($self->getVerify_out_of_vocabulary_pronunciations()) {  
     
    249261                } 
    250262        } 
    251          
     263                
    252264        if (defined($tarSuffix)){ 
    253265                _createTarFile($self); 
     
    338350=head2 getOptions  
    339351 
    340 Get the user submitted options ('a:b:d:hl:m:p:r:s:t:u:x:q:vwST') 
     352Get the user submitted options ('a:b:d:hil:m:p:r:s:t:u:x:q:vwST') 
    341353 
    342354=cut 
     
    345357        my ($self)= @_; 
    346358        my $debug = $self->{'debug'};    
    347         getopts('a:b:d:hl:m:p:r:s:t:u:x:q:vwST');    #  sets $opt_* as a side effect. 
     359        getopts('a:b:d:hil:m:p:r:s:t:u:x:q:vwST');    #  sets $opt_* as a side effect. 
    348360        if ($opt_h) { 
    349361                print "\nVoxForge Audio Segmentation Script Parameters\n";       
     
    353365                print "\t(does not set HVite's beam width parameter)\n"; 
    354366                print "-d\tpronunciation dictionary  (default = AudioBook/input_files/VoxforgeDict)\n"; 
    355                 print "-h\tshow help\n";         
     367                print "-h\tshow help\n"; 
     368                print "-i\tinteractive validation of missing word pronunciations\n";     
    356369                print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; 
    357370                print "-m\tTarget maximum sentence length (default = $default_max_sentence_length words)\n"; 
     
    377390                exit; 
    378391        } elsif ($opt_S) { # Sanity test switch 
     392                $self->{'testing'} = 1;  
    379393                $self->{"audiofile"}="AudioBook/test/audio.wav"; 
    380394                #$self->{"textFile"}="AudioBook/test/text-simple.txt"; 
    381395                $self->{"textFile"}="AudioBook/test/text-original.txt"; 
    382                 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n" if $debug; system($command); 
    383396                $self->{"pronDict"}="AudioBook/interim_files/VoxForgeDict"; 
    384397                $self->{"tarSuffix"}=_random_characters(3); 
     
    484497                        } 
    485498                } 
     499        } elsif ($opt_i) { 
     500                if ($opt_i) { 
     501                        $self->{"interactive"}=1; 
     502                } 
     503                if (defined($opt_d)) { 
     504                        if (-r $opt_d) { 
     505                                $self->{"pronDict"}=$opt_d; 
     506                        } else { 
     507                                die "can't open -d" . $self->{"pronDict"} . "\n";        
     508                        } 
     509                } else { 
     510                        $self->{"pronDict"}="AudioBook/input_files/VoxForgeDict";        
     511                } 
    486512        } else { 
    487513                print "\nVoxForge Audio Segmentation Script\n";  
     
    507533        my $self = shift; 
    508534        return $self->{"average_sentence_length"}; 
     535} 
     536 
     537=item * getInteractive() 
     538 
     539=cut 
     540 
     541sub getInteractive { 
     542        my $self = shift; 
     543        return $self->{"interactive"}; 
     544} 
     545 
     546=item * getTesting() 
     547 
     548=cut 
     549 
     550sub getTesting { 
     551        my $self = shift; 
     552        return $self->{'testing'}; 
    509553} 
    510554