voxforge.org
VoxForge Dev
Show
Ignore:
Timestamp:
06/09/08 17:06:50 (6 months ago)
Author:
kmaclean
Message:

refacture to create Chapter, Segments & MissingWords? classes - snapshop

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm

    r2604 r2606  
    11#! /usr/bin/perl 
    2 $VERSION = 0.1
     2$VERSION = 0.1.1
    33 
    44=head1 NAME 
    55 
    6 AudioBook::Dictionary - Pronunciation Dictionary processing   
     6Dictionary - Pronunciation Dictionary processing   
    77 
    88=cut  
    99 
    10 package AudioBook::Dictionary; 
     10package AudioBook::Dictionary; 
    1111use strict; 
    1212use diagnostics; 
     
    3131sub new { 
    3232        my ($class,$super) = @_;  
    33         my %self; 
    34         $self{'htk_files'} = $super->{'htk_files'}
    35         $self{'g2p_model'} = $super->{'g2p_model'}; 
    36         $self{'log'} = $super->{'log'};         
    37         bless(\%self,$class); 
    38         return \%self; 
     33        my $self ={};  
     34        bless($self,$class)
     35         
     36        $self->{'audiobookObject'} = $super->getAudioBookObject(); 
     37 
     38        return $self; 
    3939}     
    4040 
     
    4848 
    4949sub findOutOfVocabularyWords { # public  
    50         my ($self,$pronDict, $missing_words)= @_;        
    51         my $debug = $self->{"debug"};    
    52         my $htk_files = $self->{'htk_files'}; 
    53          
    54         $self->{"pronDict"} =  $pronDict; 
    55         $self->{"missing_words"} = $missing_words; 
    56           
    57         $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
     50        my ($self,$pronDict,$wlist,$missing_words)= @_;  
     51        my $audioBook = $self->{'audiobookObject'}; 
     52        my $debug = $audioBook->getDebug();      
     53        my $htk_files = $audioBook->getHtk_files(); 
     54        if ($debug) { 
     55                $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w $wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
     56        } else {         
     57                $command = ("HDMan -T 1 -g $htk_files/global.ded -m -w $wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
     58        }        
    5859        my $dlog1 = "AudioBook/interim_files/logs/dlog1"; 
    5960        $command = ("mv AudioBook/interim_files/dlog $dlog1"); print "cmd:$command\n" if $debug; system($command); 
     
    8283        close DLOG; 
    8384        close MISSINGWORDS; 
     85 
     86        $self->{"pronDict"} =  $pronDict;        
     87        $self->{"missing_words"} = $missing_words;       
    8488        return $missingwordsfound; 
    8589} 
    8690 
    87 =head2 getRecommendedPronunciations  
     91=head2 getG2PPronunciations  
    8892 
    8993Use the Sequitor g2p script (Python) to generate pronunications for out-of-vocabulary words 
     
    9195=cut 
    9296 
    93 sub getRecommendedPronunciations { # public  
     97sub getG2PPronunciations { # public  
    9498        my ($self,$missing_words_out)= @_; 
    95         my $debug = $self->{'debug'}; 
    96         my $model = $self->{'g2p_model'}; 
     99        my $audioBook = $self->{'audiobookObject'}; 
     100        my $debug = $audioBook->getDebug; 
     101        my $model = $audioBook->getG2p_model(); 
    97102        my $missing_words = $self->{'missing_words'}; 
    98         my $log = $self->{'log'}; 
    99  
    100         $self->{"missing_words_out"} = $missing_words_out; 
    101          
     103        my $log = $audioBook->getLog(); 
     104 
    102105        my @missingwords = `export PYTHONPATH=/usr/local/lib64/python2.4/site-packages && 
    103106                            g2p.py --model $model --apply $missing_words`; 
     
    135138        close MISSINGWORDSOUT; 
    136139        close LOG; 
     140 
     141        $self->{"missing_words_out"} = $missing_words_out; 
    137142        return 1; 
    138143} 
     
    147152=cut 
    148153 
    149 sub getAlternatePronunciations { # public  
    150         my ($self,$missing_words_alt, $numberOfPronunciations)= @_; 
    151         my $debug = $self->{'debug'}; 
    152         my $model = $self->{'g2p_model'}; 
     154sub getNBestPronunciations { # public  
     155        my ($self,$missing_words_alt,$numberOfPronunciations)= @_; 
     156        my $audioBook = $self->{'audiobookObject'}; 
     157        my $debug = $audioBook->getDebug; 
     158        my $model = $audioBook->getG2p_model(); 
    153159        my $missing_words = $self->{'missing_words'}; 
    154         my $log = $self->{'log'}
     160        my $log = $self->getLog()
    155161         
    156162        $self->{"missing_words_alt"} = $missing_words_alt; 
     
    188194sub validateAlternatePronunciations { # public  
    189195        my ($self,$originalDict,$altDict,$prompts)= @_; 
    190         my $debug = $self->{'debug'}; 
     196        my $audioBook = $self->{'audiobookObject'}; 
     197        my $debug = $audioBook->getDebug; 
    191198        my $missing_words = $self->{'missing_words'};    
    192199         
     
    299306sub _forceAlignPromptLine { # private  
    300307        my ($self,$altDict,$word,$promptID,$promptLine)= @_; 
    301          
     308        my $audioBook = $self->{'audiobookObject'}; 
     309        my $debug = $audioBook->getDebug; 
     310                 
    302311        my ($aligned_out, $log) = AudioBook::Audio->forceAlign($self, $promptID, $promptLine, $altDict); 
    303312        open(ALIGNED_OUT,"$aligned_out") or confess ("cannot open $aligned_out file"); 
    304313        my (@phoneList,$gatherPhones); 
    305         #print "_forceAlignPromptLine:word=$word\n"; 
    306314        while (my $line = <ALIGNED_OUT>) { 
    307315                my @line = split(/\s/, $line); 
     
    331339sub _getMissingWordList { # private  
    332340        my ($self,$missing_words)= @_; 
     341        my $audioBook = $self->{'audiobookObject'}; 
     342        my $debug = $audioBook->getDebug;        
     343         
    333344        my %missingWordList; 
    334345        open(MISSINGWORDS,"$missing_words") or confess ("cannot open $missing_words file");      
     
    349360=cut 
    350361 
    351 sub createAltDict { 
     362sub createAltDict { 
    352363        my ($self,$originalDict,$altDict)= @_; 
     364        my $audioBook = $self->{'audiobookObject'}; 
     365        my $debug = $audioBook->getDebug; 
    353366        my $missing_words_alt = $self->{"missing_words_alt"};    
    354          
     367 
    355368        open(MISSINGWORDSALT,"$missing_words_alt") or confess ("cannot open $missing_words_alt file"); 
    356369        my @missingWordAlt; 
     
    382395=head3 Note 
    383396 
    384 The pronunciations generated by the Sequitor G2P scripts need to be manually reviewed before any new pronunciations  
     397The pronunciations generated by the Sequitor G2P scripts should be manually reviewed before any new pronunciations  
    385398are added to the pronunciation dictionary.   
    386399 
     
    389402sub updatePronDict { # public  
    390403        my ($self, $pronDictionary)= @_; 
    391         my $debug = $self->{"debug"};            
     404        my $audioBook = $self->{'audiobookObject'}; 
     405        my $debug = $audioBook->getDebug;        
    392406        my $missing_words_out = $self->{"missing_words_out"}; 
    393407                 
     
    396410        open(DICTIONARY,">>$pronDictionary") or confess ("cannot open $pronDictionary file"); 
    397411        open(MISSINGWORDSOUT,"<$missing_words_out") or confess ("cannot open $missing_words_out file"); 
    398         print "adding the following words to VoxForge Pronunciation Dictionary\n"; 
     412        #print "adding the following words to VoxForge Pronunciation Dictionary\n"; 
    399413        while (my $line = <MISSINGWORDSOUT>) { 
    400414                next if ($line =~ /^(\s)*$/);  # skip blank lines 
    401415                if ($line =~ /\w/) { 
    402                         print $line; 
     416                        #print $line; 
    403417                        print DICTIONARY $line; 
    404418                } 
     
    424438=head1 Change Log     
    425439 
     4402008/06/09 - 0.1.1 - refacture to create Chapter, Segments & MissingWords classes 
    4264412008.05.05 - 0.1 - created 
    427442