- Timestamp:
- 06/10/08 13:28:34 (7 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/MissingWords.pm
r2608 r2609 112 112 } 113 113 114 =head2 getAudio 115 116 get audio object. 117 118 =cut 119 120 sub getAudio { 121 my ($self)= @_; 122 my $audioBook = $self->{'audioBookObject'}; 123 my $debug = $audioBook->getDebug; 124 125 $self->verifyMissingWordPronunciations(); 126 127 } 114 128 115 129 =head2 verifyMissingWordPronunciations … … 133 147 $dictionary->getNBestPronunciations("AudioBook/interim_files/MissingWords_alt",15); # uses Sequitor g2p to get top N pronunication vairations 134 148 $dictionary->createAltDict($originalDict,$altDict); # merge & sort missing_words_alt and originalDict into altDict 135 # !!!!!!136 #$dictionary->validateAlternatePronunciations($originalDict,$altDict,$prompts);137 149 $self->validateAlternatePronunciations($originalDict,$altDict,$prompts); 138 # !!!!!!139 150 } 140 151 … … 154 165 my $audioBook = $self->{'audioBookObject'}; 155 166 my $debug = $audioBook->getDebug; 156 # !!!!!! 167 157 168 my $dictionary = $self->{'dictionaryObject'}; 158 169 my $missing_words = $dictionary->getMissing_words(); 159 # !!!!!!! 160 170 161 171 my $missingWordList = $self->_getMissingWordList($missing_words); 162 172 my %prompts; … … 173 183 if ($missingWordList->{$word}) { # there is a missing word in this prompt line 174 184 my $wavfilename = $promptID . "\.wav"; 175 copy("AudioBook/interim_files/wav/$wavfilename","AudioBook/interim_files/missingWordsFolder/$wavfilename"); 185 copy("AudioBook/interim_files/wav/$wavfilename","AudioBook/interim_files/missingWordsFolder/$wavfilename"); 176 186 push (@missingWordsPrompts,"$word:$promptID @line\n"); 177 187 print "\."; 178 my @phoneList = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line 179 push (@missingWordsValidated,"$word [$word] @phoneList\n"); 188 # !!!!!! 189 my ($phoneList,$startTime,$stopTime) = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line 190 #my @phoneList = $self->_forceAlignPromptLine($altDict, $word,$promptID,\@line); # force align entire prompt line 191 push (@missingWordsValidated,"$word [$word] @$phoneList\n"); 192 $self->_processMissingWordAudio($promptID,$word,$startTime,$stopTime); 193 # !!!!!! 180 194 } 181 195 } … … 190 204 my @temp = split (/:/,$line); 191 205 my ($word) = shift(@temp); 192 print "word:$word\n";206 #print "word:$word\n"; 193 207 if (defined($missingWordsPrompts{$word})) { 194 208 my $temp = "$missingWordsPrompts{$word}\n"; … … 256 270 } 257 271 272 =head2 _processMissingWordAudio 273 274 copy 275 276 =cut 277 278 sub _processMissingWordAudio { # private 279 my ($self,$promptID,$word,$startTime,$stopTime)= @_; 280 my $audioBook = $self->{'audioBookObject'}; 281 my $debug = $audioBook->getDebug; 282 my $chapter = $self->{'chapterObject'}; 283 my $audio = $chapter->getchapterAudioObject(); 284 285 #copy("AudioBook/interim_files/wav/$wavfilename","AudioBook/interim_files/missingWordsFolder/$wavfilename"); 286 my $fromFileName = "AudioBook/interim_files/wav/$promptID\.wav"; 287 my $toFileName = "AudioBook/interim_files/missingWordsFolder/$promptID" . "-mw\.wav"; 288 my $buffer = 2500000; 289 if ($startTime - $buffer > 0) { 290 $startTime = $startTime - $buffer; 291 } else { 292 $startTime = 0; 293 } 294 $stopTime = $stopTime + $buffer; # htk will just padd the end of the file with silence if longer than full duration of file 295 296 $audio->processMissingWordAudio($fromFileName, $startTime,$stopTime,$toFileName); 297 298 return 1; 299 } 300 258 301 =head2 _forceAlignPromptLine 259 302 … … 261 304 Sequitor G2P), in doing so, it picks the most likely pronunciation, thereby validating a Sequitor G2P with real audio. 262 305 263 Assumes only one missing word per prompt line 306 Assumes only one missing word per prompt line (picks up only the first one...) 264 307 265 308 =cut … … 273 316 open(ALIGNED_OUT,"$aligned_out") or confess ("cannot open $aligned_out file"); 274 317 my (@phoneList,$gatherPhones); 318 my ($word_startTime,$word_stopTime); 275 319 while (my $line = <ALIGNED_OUT>) { 276 320 my @line = split(/\s/, $line); … … 279 323 if ($recword eq $word) { 280 324 $gatherPhones=1; 325 $word_startTime = $startTime; 281 326 } elsif ($gatherPhones) { 327 $word_stopTime = $startTime; # can't search on "sp" because some words have not ending "sp" phone 282 328 last; 283 329 } … … 286 332 if ($phone ne "sp"){ 287 333 push (@phoneList,$phone); 288 } 334 } 289 335 } 290 336 } 291 return @phoneList;337 return (\@phoneList,$word_startTime,$word_stopTime); 292 338 } 293 339 … … 313 359 } 314 360 315 =head2 getAudio316 317 get audio object.318 319 =cut320 321 sub getAudio {322 my ($self,$segments)= @_;323 my $audioBook = $self->{'audioBookObject'};324 my $debug = $audioBook->getDebug;325 # structure $fileID,"$current_word $current_startTime $current_endTime $pause"326 my $promptsWithTimes = $segments->getPromptsWithTimes();327 open(MISSINGWORDS, "<AudioBook/interim_files/MissingWords") or confess ("cannot open AudioBook/output_files/prompts for output");328 my %missingwords;329 foreach my $word (<MISSINGWORDS>) {330 $missingwords{$word}=1;331 }332 foreach my $line (@$promptsWithTimes) {333 my $fileID = $$line[0];334 my $promptWordsWithTime = $$line[1];335 foreach my $line (@$promptWordsWithTime) {336 337 }338 }339 340 $self->{'segmentsObject'} = $segments;341 }342 361 343 362 =head1 Change Log