- Timestamp:
- 06/02/08 13:51:48 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm
r2601 r2603 13 13 use Carp; 14 14 use File::Copy; 15 use FileHandle; # for cleaner FORMAT names 15 16 use Lingua::EN::Numbers qw(num2en num2en_ordinal); 16 17 use Lingua::EN::Numbers::Years; … … 176 177 =head2 validateAlternatePronunciations 177 178 178 Add alternate pronunications generated by Sequitor G2P to original dict file, then perform forced alingment to see which179 Add alternate pronunications generated by Sequitor G2P to copy of original dict file, then perform forced alingment to see which 179 180 pronunciation that HVite picks, based on the phonemes it recognizes in the audio. 180 181 … … 198 199 open(PROMPTS,"$prompts") or confess ("cannot open $prompts file"); 199 200 while (my $line = <PROMPTS>) { 200 chomp $line; 201 chomp $line; 201 202 my @line = split(/\s/, $line); 202 203 my $promptID = shift @line; … … 214 215 close PROMPTS; 215 216 216 open(MISSINGWORDPROMPTS,">AudioBook/interim_files/missingWords_prompts") or confess ("cannot open AudioBook/interim_files/missingWords_prompts file"); 217 open(MISSINGWORDPROMPTS,">AudioBook/interim_files/MissingWords_prompts") or confess ("cannot open AudioBook/interim_files/missingWords_prompts file"); 218 my %missingWordsPrompts; 217 219 foreach my $line (sort(@missingWordsPrompts)) { 218 220 print MISSINGWORDPROMPTS $line; 221 chomp $line; 222 my @temp = split (/:/,$line); 223 my ($word) = shift(@temp); 224 print "word:$word\n"; 225 if (defined($missingWordsPrompts{$word})) { 226 my $temp = "$missingWordsPrompts{$word}\n"; 227 $missingWordsPrompts{$word} = $temp . "$line"; 228 } else { 229 $missingWordsPrompts{$word} = $line; 230 } 231 219 232 } 220 233 close MISSINGWORDPROMPTS; 221 222 open(MISSINGWORDSVAL,">AudioBook/interim_files/missingWords_validated") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 234 # !!!!!! 235 open(MISSINGWORDSVAL,">AudioBook/interim_files/MissingWords_validated") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 236 my %missingWordsValidated; 223 237 foreach my $line (sort(@missingWordsValidated)) { 224 print MISSINGWORDSVAL $line; 225 } 226 close MISSINGWORDSVAL; 227 238 print MISSINGWORDSVAL $line; 239 chomp $line; 240 my @temp = split (/\s/,$line); 241 my ($word,$returnword, @phones) = @temp; 242 if (defined($missingWordsValidated{$word})) { 243 my $temp = "$missingWordsValidated{$word}\n"; 244 $missingWordsValidated{$word} = $temp . "$word\t@phones"; 245 } else { 246 $missingWordsValidated{$word} = "$word\t@phones"; 247 } 248 } 249 close MISSINGWORDSVAL; 250 251 open(MISSINGWORDSOUT,"AudioBook/interim_files/MissingWords_out") or confess ("cannot open AudioBook/interim_files/MissingWords_out file"); 252 open(MISSINGWORDSCOMB,">AudioBook/interim_files/MissingWords_combined") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 253 my ($word,$returnword, $phones, @phones); 254 255 while (my $line = <MISSINGWORDSOUT>) { 256 chomp $line; 257 #print MISSINGWORDSCOMB "g2p:$word\t\t@phones\n"; 258 my @temp = split (/\s+/,$line); 259 ($word,$returnword, @phones) = @temp; 260 print MISSINGWORDSCOMB "$missingWordsPrompts{$word}\n"; 261 $phones = join(" ",@phones); 262 format_name MISSINGWORDSCOMB "G2P"; 263 write MISSINGWORDSCOMB; 264 #print MISSINGWORDSCOMB "$missingWordsValidated{$word}\n"; 265 @temp = split (/\s+/,$missingWordsValidated{$word}); 266 ($word,@phones) = @temp; 267 $phones = join(" ",@phones); 268 format_name MISSINGWORDSCOMB "HVITE"; 269 write MISSINGWORDSCOMB; 270 } 271 format PROMPTS = 272 @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 273 "g2p",$word, $phones 274 . 275 format G2P = 276 @<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 277 "g2p",$word, $phones 278 . 279 format HVITE = 280 @<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 281 " hvite:",$word, $phones 282 . 283 close MISSINGWORDSOUT; 284 close MISSINGWORDSCOMB; 285 # !!!!!! 228 286 print "----------------------------------\n"; 229 287 }