- Timestamp:
- 06/09/08 17:06:50 (6 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm
r2604 r2606 1 1 #! /usr/bin/perl 2 $VERSION = 0.1 ;2 $VERSION = 0.1.1; 3 3 4 4 =head1 NAME 5 5 6 AudioBook::Dictionary - Pronunciation Dictionary processing6 Dictionary - Pronunciation Dictionary processing 7 7 8 8 =cut 9 9 10 package AudioBook::Dictionary;10 package AudioBook::Dictionary; 11 11 use strict; 12 12 use diagnostics; … … 31 31 sub new { 32 32 my ($class,$super) = @_; 33 my %self;34 $self{'htk_files'} = $super->{'htk_files'};35 $self{'g2p_model'} = $super->{'g2p_model'};36 $self {'log'} = $super->{'log'};37 bless(\%self,$class); 38 return \%self;33 my $self ={}; 34 bless($self,$class); 35 36 $self->{'audiobookObject'} = $super->getAudioBookObject(); 37 38 return $self; 39 39 } 40 40 … … 48 48 49 49 sub findOutOfVocabularyWords { # public 50 my ($self,$pronDict, $missing_words)= @_; 51 my $debug = $self->{"debug"}; 52 my $htk_files = $self->{'htk_files'}; 53 54 $self->{"pronDict"} = $pronDict; 55 $self->{"missing_words"} = $missing_words; 56 57 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 50 my ($self,$pronDict,$wlist,$missing_words)= @_; 51 my $audioBook = $self->{'audiobookObject'}; 52 my $debug = $audioBook->getDebug(); 53 my $htk_files = $audioBook->getHtk_files(); 54 if ($debug) { 55 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w $wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 56 } else { 57 $command = ("HDMan -T 1 -g $htk_files/global.ded -m -w $wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $pronDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 58 } 58 59 my $dlog1 = "AudioBook/interim_files/logs/dlog1"; 59 60 $command = ("mv AudioBook/interim_files/dlog $dlog1"); print "cmd:$command\n" if $debug; system($command); … … 82 83 close DLOG; 83 84 close MISSINGWORDS; 85 86 $self->{"pronDict"} = $pronDict; 87 $self->{"missing_words"} = $missing_words; 84 88 return $missingwordsfound; 85 89 } 86 90 87 =head2 get RecommendedPronunciations91 =head2 getG2PPronunciations 88 92 89 93 Use the Sequitor g2p script (Python) to generate pronunications for out-of-vocabulary words … … 91 95 =cut 92 96 93 sub get RecommendedPronunciations { # public97 sub getG2PPronunciations { # public 94 98 my ($self,$missing_words_out)= @_; 95 my $debug = $self->{'debug'}; 96 my $model = $self->{'g2p_model'}; 99 my $audioBook = $self->{'audiobookObject'}; 100 my $debug = $audioBook->getDebug; 101 my $model = $audioBook->getG2p_model(); 97 102 my $missing_words = $self->{'missing_words'}; 98 my $log = $self->{'log'}; 99 100 $self->{"missing_words_out"} = $missing_words_out; 101 103 my $log = $audioBook->getLog(); 104 102 105 my @missingwords = `export PYTHONPATH=/usr/local/lib64/python2.4/site-packages && 103 106 g2p.py --model $model --apply $missing_words`; … … 135 138 close MISSINGWORDSOUT; 136 139 close LOG; 140 141 $self->{"missing_words_out"} = $missing_words_out; 137 142 return 1; 138 143 } … … 147 152 =cut 148 153 149 sub getAlternatePronunciations { # public 150 my ($self,$missing_words_alt, $numberOfPronunciations)= @_; 151 my $debug = $self->{'debug'}; 152 my $model = $self->{'g2p_model'}; 154 sub getNBestPronunciations { # public 155 my ($self,$missing_words_alt,$numberOfPronunciations)= @_; 156 my $audioBook = $self->{'audiobookObject'}; 157 my $debug = $audioBook->getDebug; 158 my $model = $audioBook->getG2p_model(); 153 159 my $missing_words = $self->{'missing_words'}; 154 my $log = $self-> {'log'};160 my $log = $self->getLog(); 155 161 156 162 $self->{"missing_words_alt"} = $missing_words_alt; … … 188 194 sub validateAlternatePronunciations { # public 189 195 my ($self,$originalDict,$altDict,$prompts)= @_; 190 my $debug = $self->{'debug'}; 196 my $audioBook = $self->{'audiobookObject'}; 197 my $debug = $audioBook->getDebug; 191 198 my $missing_words = $self->{'missing_words'}; 192 199 … … 299 306 sub _forceAlignPromptLine { # private 300 307 my ($self,$altDict,$word,$promptID,$promptLine)= @_; 301 308 my $audioBook = $self->{'audiobookObject'}; 309 my $debug = $audioBook->getDebug; 310 302 311 my ($aligned_out, $log) = AudioBook::Audio->forceAlign($self, $promptID, $promptLine, $altDict); 303 312 open(ALIGNED_OUT,"$aligned_out") or confess ("cannot open $aligned_out file"); 304 313 my (@phoneList,$gatherPhones); 305 #print "_forceAlignPromptLine:word=$word\n";306 314 while (my $line = <ALIGNED_OUT>) { 307 315 my @line = split(/\s/, $line); … … 331 339 sub _getMissingWordList { # private 332 340 my ($self,$missing_words)= @_; 341 my $audioBook = $self->{'audiobookObject'}; 342 my $debug = $audioBook->getDebug; 343 333 344 my %missingWordList; 334 345 open(MISSINGWORDS,"$missing_words") or confess ("cannot open $missing_words file"); … … 349 360 =cut 350 361 351 sub createAltDict { #362 sub createAltDict { 352 363 my ($self,$originalDict,$altDict)= @_; 364 my $audioBook = $self->{'audiobookObject'}; 365 my $debug = $audioBook->getDebug; 353 366 my $missing_words_alt = $self->{"missing_words_alt"}; 354 367 355 368 open(MISSINGWORDSALT,"$missing_words_alt") or confess ("cannot open $missing_words_alt file"); 356 369 my @missingWordAlt; … … 382 395 =head3 Note 383 396 384 The pronunciations generated by the Sequitor G2P scripts need tobe manually reviewed before any new pronunciations397 The pronunciations generated by the Sequitor G2P scripts should be manually reviewed before any new pronunciations 385 398 are added to the pronunciation dictionary. 386 399 … … 389 402 sub updatePronDict { # public 390 403 my ($self, $pronDictionary)= @_; 391 my $debug = $self->{"debug"}; 404 my $audioBook = $self->{'audiobookObject'}; 405 my $debug = $audioBook->getDebug; 392 406 my $missing_words_out = $self->{"missing_words_out"}; 393 407 … … 396 410 open(DICTIONARY,">>$pronDictionary") or confess ("cannot open $pronDictionary file"); 397 411 open(MISSINGWORDSOUT,"<$missing_words_out") or confess ("cannot open $missing_words_out file"); 398 print "adding the following words to VoxForge Pronunciation Dictionary\n";412 #print "adding the following words to VoxForge Pronunciation Dictionary\n"; 399 413 while (my $line = <MISSINGWORDSOUT>) { 400 414 next if ($line =~ /^(\s)*$/); # skip blank lines 401 415 if ($line =~ /\w/) { 402 print $line;416 #print $line; 403 417 print DICTIONARY $line; 404 418 } … … 424 438 =head1 Change Log 425 439 440 2008/06/09 - 0.1.1 - refacture to create Chapter, Segments & MissingWords classes 426 441 2008.05.05 - 0.1 - created 427 442