voxforge.org
VoxForge Dev
Show
Ignore:
Timestamp:
06/02/08 13:51:48 (6 months ago)
Author:
kmaclean
Message:

AudioSegmentation scripts - snapshot

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm

    r2601 r2603  
    1313use Carp; 
    1414use File::Copy; 
     15use FileHandle; # for cleaner FORMAT names 
    1516use Lingua::EN::Numbers qw(num2en num2en_ordinal); 
    1617use Lingua::EN::Numbers::Years; 
     
    176177=head2 validateAlternatePronunciations  
    177178 
    178 Add alternate pronunications generated by Sequitor G2P to original dict file, then perform forced alingment to see which 
     179Add alternate pronunications generated by Sequitor G2P to copy of original dict file, then perform forced alingment to see which 
    179180pronunciation that HVite picks, based on the phonemes it recognizes in the audio. 
    180181 
     
    198199        open(PROMPTS,"$prompts") or confess ("cannot open $prompts file"); 
    199200        while (my $line = <PROMPTS>) {  
    200                 chomp $line;    
     201                chomp $line; 
    201202                my @line = split(/\s/, $line); 
    202203                my $promptID = shift @line; 
     
    214215        close PROMPTS; 
    215216         
    216         open(MISSINGWORDPROMPTS,">AudioBook/interim_files/missingWords_prompts") or confess ("cannot open AudioBook/interim_files/missingWords_prompts file");           
     217        open(MISSINGWORDPROMPTS,">AudioBook/interim_files/MissingWords_prompts") or confess ("cannot open AudioBook/interim_files/missingWords_prompts file");           
     218        my %missingWordsPrompts; 
    217219        foreach my $line (sort(@missingWordsPrompts)) { 
    218220                print MISSINGWORDPROMPTS $line; 
     221                chomp $line; 
     222                my @temp = split (/:/,$line); 
     223                my ($word) = shift(@temp);  
     224                print "word:$word\n"; 
     225                if (defined($missingWordsPrompts{$word})) { 
     226                        my $temp = "$missingWordsPrompts{$word}\n"; 
     227                        $missingWordsPrompts{$word} = $temp . "$line";                   
     228                } else { 
     229                        $missingWordsPrompts{$word} = $line; 
     230                } 
     231                 
    219232        } 
    220233        close MISSINGWORDPROMPTS;        
    221                  
    222         open(MISSINGWORDSVAL,">AudioBook/interim_files/missingWords_validated") or confess ("cannot open AudioBook/interim_files/missingWords_validated file");          
     234        # !!!!!!                 
     235        open(MISSINGWORDSVAL,">AudioBook/interim_files/MissingWords_validated") or confess ("cannot open AudioBook/interim_files/missingWords_validated file");          
     236        my %missingWordsValidated; 
    223237        foreach my $line (sort(@missingWordsValidated)) { 
    224                 print MISSINGWORDSVAL $line; 
    225         } 
    226         close MISSINGWORDSVAL;   
    227                  
     238                print MISSINGWORDSVAL $line;             
     239                chomp $line; 
     240                my @temp = split (/\s/,$line); 
     241                my ($word,$returnword, @phones) = @temp;  
     242                if (defined($missingWordsValidated{$word})) { 
     243                        my $temp = "$missingWordsValidated{$word}\n"; 
     244                        $missingWordsValidated{$word} = $temp . "$word\t@phones";                        
     245                } else { 
     246                        $missingWordsValidated{$word} = "$word\t@phones"; 
     247                } 
     248        } 
     249        close MISSINGWORDSVAL; 
     250         
     251        open(MISSINGWORDSOUT,"AudioBook/interim_files/MissingWords_out") or confess ("cannot open AudioBook/interim_files/MissingWords_out file");               
     252        open(MISSINGWORDSCOMB,">AudioBook/interim_files/MissingWords_combined") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 
     253        my ($word,$returnword, $phones, @phones);        
     254 
     255        while (my $line = <MISSINGWORDSOUT>) { 
     256                chomp $line; 
     257                #print MISSINGWORDSCOMB "g2p:$word\t\t@phones\n"; 
     258                my @temp = split (/\s+/,$line); 
     259                ($word,$returnword, @phones) = @temp;  
     260                print MISSINGWORDSCOMB "$missingWordsPrompts{$word}\n"; 
     261                $phones = join(" ",@phones); 
     262        format_name MISSINGWORDSCOMB "G2P"; 
     263                write MISSINGWORDSCOMB; 
     264                #print MISSINGWORDSCOMB "$missingWordsValidated{$word}\n"; 
     265                @temp = split (/\s+/,$missingWordsValidated{$word}); 
     266                ($word,@phones) = @temp;  
     267                $phones = join(" ",@phones); 
     268        format_name MISSINGWORDSCOMB "HVITE"; 
     269                write MISSINGWORDSCOMB; 
     270        } 
     271        format PROMPTS = 
     272@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
     273"g2p",$word, $phones 
     274
     275        format G2P = 
     276@<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
     277"g2p",$word, $phones 
     278
     279        format HVITE = 
     280@<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
     281"  hvite:",$word, $phones 
     282
     283        close MISSINGWORDSOUT; 
     284        close MISSINGWORDSCOMB; 
     285        # !!!!!!         
    228286        print   "----------------------------------\n";  
    229287}