- Timestamp:
- 06/17/08 15:25:27 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/MissingWords.pm
r2609 r2613 13 13 use Carp; 14 14 use File::Copy; 15 use XML::LibXML; 15 16 16 17 #use AudioBook::Dictionary; … … 114 115 =head2 getAudio 115 116 116 get audio object.117 Verify Missing Word Pronunciations. 117 118 118 119 =cut … … 173 174 my @missingWordsPrompts; 174 175 my @missingWordsValidated; 175 print "\nValidate Alternate Pronunciations :\n";176 print "---------------------------------- \n";176 print "\nValidate Alternate Pronunciations for Missing Words:\n"; 177 print "----------------------------------------------------\n"; 177 178 open(PROMPTS,"$prompts") or confess ("cannot open $prompts file"); 178 179 while (my $line = <PROMPTS>) { … … 184 185 my $wavfilename = $promptID . "\.wav"; 185 186 copy("AudioBook/interim_files/wav/$wavfilename","AudioBook/interim_files/missingWordsFolder/$wavfilename"); 186 push (@missingWordsPrompts,"$word :$promptID @line\n");187 push (@missingWordsPrompts,"$word $promptID @line\n"); 187 188 print "\."; 189 my ($phoneList,$startTime,$stopTime) = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line 188 190 # !!!!!! 189 my ($phoneList,$startTime,$stopTime) = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line190 #my @phoneList = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line191 push (@missingWordsValidated,"$word [$word] @$phoneList\n");191 #push (@missingWordsValidated,"$word [$word] @$phoneList\n"); 192 push (@missingWordsValidated,"$word $promptID @$phoneList\n"); 193 # !!!!!! 192 194 $self->_processMissingWordAudio($promptID,$word,$startTime,$stopTime); 193 # !!!!!!194 195 } 195 196 } … … 202 203 print MISSINGWORDPROMPTS $line; 203 204 chomp $line; 204 my @temp = split (/:/,$line); 205 my ($word) = shift(@temp); 206 #print "word:$word\n"; 205 my @temp = split (/ /,$line); 206 my ($word,$promptID,@phones) = @temp; 207 207 if (defined($missingWordsPrompts{$word})) { 208 my $ temp = "$missingWordsPrompts{$word}\n";209 $missingWordsPrompts{$word} = $temp . "$line";208 my $array = $missingWordsPrompts{$word}; 209 push @$array,[$promptID,\@phones]; # see perlref 210 210 } else { 211 $missingWordsPrompts{$word} = $line; 212 } 213 211 $missingWordsPrompts{$word} = []; 212 my $array = $missingWordsPrompts{$word}; 213 push @$array,[$promptID,\@phones]; # see perlref 214 } 214 215 } 215 216 close MISSINGWORDPROMPTS; 216 # !!!!!!217 217 218 open(MISSINGWORDSVAL,">AudioBook/interim_files/MissingWords_validated") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 218 219 my %missingWordsValidated; 220 219 221 foreach my $line (sort(@missingWordsValidated)) { 220 print MISSINGWORDSVAL $line;221 chomp $line;222 222 my @temp = split (/\s/,$line); 223 my ($word,$returnword, @phones) = @temp; 223 my ($word,$promptID, @phones) = @temp; 224 print MISSINGWORDSVAL "$word [$word] @phones\n"; 224 225 if (defined($missingWordsValidated{$word})) { 225 226 my $array = $missingWordsValidated{$word}; 226 push @$array, \@phones; # see perlref227 push @$array,[$promptID,\@phones]; # see perlref 227 228 } else { 228 229 $missingWordsValidated{$word} = []; 229 230 my $array = $missingWordsValidated{$word}; 230 push @$array, \@phones; # see perlref231 push @$array,[$promptID,\@phones]; # see perlref 231 232 } 232 233 } … … 234 235 235 236 open(MISSINGWORDSOUT,"AudioBook/interim_files/MissingWords_out") or confess ("cannot open AudioBook/interim_files/MissingWords_out file"); 236 open(MISSINGWORDSCOMB,">AudioBook/interim_files/MissingWords_combined") or confess ("cannot open AudioBook/interim_files/missingWords_validated file"); 237 my ($word,$returnword, $phones, @phones); 238 237 open(MISSINGWORDSCOMB,">AudioBook/interim_files/MissingWords_combined") or confess ("cannot open AudioBook/interim_files/MissingWords_combined file"); 238 # !!!!!! 239 my $doc = XML::LibXML::Document->new(); 240 my $root = $doc->createElement('missingwords'); 241 $doc->setDocumentElement($root); 242 # !!!!!! 243 my ($word,$returnword, $promptID, @phones); 244 my ($g2pPhones,$promptWords,$hvitePhones); 239 245 while (my $line = <MISSINGWORDSOUT>) { 240 246 chomp $line; 241 247 my @temp = split (/\s+/,$line); 242 248 ($word,$returnword, @phones) = @temp; 243 print MISSINGWORDSCOMB "$missingWordsPrompts{$word}\n"; 244 $phones = join(" ",@phones); 245 format_name MISSINGWORDSCOMB "G2P"; 249 #word 250 print MISSINGWORDSCOMB "word:$word\n"; 251 # !!!!!! 252 #print MISSINGWORDSXML "<word=$word>\n"; 253 #print MISSINGWORDSXML "<promptList>\n"; 254 my $wordElement= $doc->createElement('word'); 255 $wordElement->setAttribute('name', $word); 256 $root->appendChild($wordElement); 257 # !!!!!! 258 # prompt ID and prompt line 259 my $MWParray = $missingWordsPrompts{$word}; 260 foreach my $lines (@$MWParray) { 261 $promptID = $$lines[0]; 262 #$promptWords = join(" ",$$lines[1]); 263 $promptWords = $$lines[1]; 264 print MISSINGWORDSCOMB "prompt:$promptID @$promptWords\n"; 265 # !!!!!! 266 #print MISSINGWORDSXML "<prompt promptID=$promptID>@$promptWords</prompt>\n"; 267 my $element= $doc->createElement('prompt'); 268 $element->setAttribute('promptID', $promptID); 269 my $text = XML::LibXML::Text->new("@$promptWords"); 270 $element->appendChild($text); 271 $wordElement->appendChild($element); 272 # !!!!!! 273 } 274 #print MISSINGWORDSXML "</promptList>\n"; 275 276 #print MISSINGWORDSXML "<phones>\n"; 277 # G2P recommended pronunciations - from MissingWords_out 278 $g2pPhones = join(" ",@phones); 279 format_name MISSINGWORDSCOMB "G2P"; 246 280 write MISSINGWORDSCOMB; 247 my $array = $missingWordsValidated{$word}; 248 foreach my $lines (@$array) { 249 $phones = join(" ",@$lines); 281 # !!!!!! 282 #print MISSINGWORDSXML "<g2pPhones>$g2pPhones</g2pPhones>\n"; 283 { # isolate variables 284 my $element= $doc->createElement('g2pPhones'); 285 my $text = XML::LibXML::Text->new($g2pPhones); 286 $element->appendChild($text); 287 $wordElement->appendChild($element); 288 } 289 # !!!!!! 290 # HVite pronunciations 291 my $MWVarray = $missingWordsValidated{$word}; 292 foreach my $lines (@$MWVarray) { 293 $promptID = $$lines[0]; 294 my $arrayref = $$lines[1]; 295 $hvitePhones = join(" ",@$arrayref); # format_name requires a scalar 250 296 format_name MISSINGWORDSCOMB "HVITE"; 251 297 write MISSINGWORDSCOMB; 252 } 253 } 254 format PROMPTS = 255 @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 256 "g2p",$word, $phones 257 . 298 # !!!!!! 299 #print MISSINGWORDSXML "<hvitePhones promptID=$promptID>$hvitePhones</hvitePhones>\n"; 300 my $element= $doc->createElement('hvitePhones'); 301 $element->setAttribute('promptID', $promptID); 302 my $text = XML::LibXML::Text->new($hvitePhones); 303 $element->appendChild($text); 304 $wordElement->appendChild($element); 305 # !!!!!!! 306 } 307 $root->appendChild($wordElement); 308 #print MISSINGWORDSXML "</phones>\n"; 309 #print MISSINGWORDSXML "</word>\n"; 310 } 258 311 format G2P = 259 @<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<260 " g2p",$word, $phones312 @<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 313 " g2p:",$word, $g2pPhones 261 314 . 262 315 format HVITE = 263 316 @<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 264 " hvite:",$word, $ phones317 " hvite:",$word, $hvitePhones 265 318 . 266 319 close MISSINGWORDSOUT; 267 close MISSINGWORDSCOMB; 268 # !!!!!! 269 print "----------------------------------\n"; 320 # !!!!!! 321 322 $doc->toFile("AudioBook/interim_files/MissingWords.xml"); 323 #close MISSINGWORDSCOMB; 324 # !!!!!! 325 print "\n----------------------------------------------------\n"; 270 326 } 271 327 272 328 =head2 _processMissingWordAudio 273 329 274 copy 330 This takes the time information from the forced alignment of a segment, and copies the audio segment where the missing word is located 331 to an audio file in the "AudioBook/interim_files/missingWordsFolder" folder, and adds a "-mw" suffix. 275 332 276 333 =cut