Changeset 2586
- Timestamp:
- 05/15/08 10:44:44 (5 months ago)
- Files:
-
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm (modified) (9 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm (modified) (8 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm (modified) (5 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm (modified) (14 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog1 (deleted)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog2 (deleted)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/logs (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/logs/HVite_log (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/logs/dlog1 (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/logs/dlog2 (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/AudioBook_Log (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm
r2585 r2586 36 36 ### Class Variables 37 37 #################################################################### 38 our($opt_a,$opt_d,$opt_h,$opt_l,$opt_ r,$opt_t,$opt_x,$opt_K,$opt_T,$opt_u); # need to define these because using strict.38 our($opt_a,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_S,$opt_T,$opt_u); # need to define these because using strict. 39 39 my %self; 40 40 $self{'debug'} = 0; 41 41 $self{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 42 42 $self{'htk_files'} = "AudioBook/input_files/htk"; 43 $self{'log'} = "AudioBook/output_files/AudioBook_Log"; 44 my $default_average_sentence_length = 13; 45 my $default_max_sentence_length = 18; 46 my $default_min_pause_for_sentence_break = 2000000; 43 47 my $command; 44 48 … … 46 50 ### Main 47 51 #################################################################### 52 cleanupFiles(\%self); 48 53 getOptions(\%self); 49 54 process(\%self); … … 62 67 my $voxforgeDict = $self->{"voxforgeDict"}; 63 68 my $htk_files = $self->{'htk_files'}; 64 65 my $textContents = AudioBook::Text->new($textfile); 69 my $log = $self{'log'}; 70 71 my $textContents = AudioBook::Text->new($self,$textfile); 66 72 $textContents->createWLISTFile("AudioBook/interim_files/wlist"); 67 73 … … 73 79 # need to update dict with missing words 74 80 # can't seem to change default HDMan log file with "-l" parameter 75 $command = ("cp AudioBook/interim_files/dlog AudioBook/interim_files/dlog1"); print "cmd:$command\n" if $debug; system($command);76 81 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 77 $command = ("mv AudioBook/interim_files/dlog AudioBook/interim_files/ dlog2"); print "cmd:$command\n" if $debug; system($command);78 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command);82 $command = ("mv AudioBook/interim_files/dlog AudioBook/interim_files/logs/dlog2"); print "cmd:$command\n" if $debug; system($command); 83 # no longer required$command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 79 84 } else { 80 unlink ("AudioBook/interim_files/MissingWords_out"); 81 open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file"); 82 print MISSINGWORDSOUT "no missing words\n"; 83 close MISSINGWORDSOUT 85 open(LOG,">>$log") or confess ("cannot open AudioBook/output_files/MissingWords file"); 86 print LOG "\nMissing Words added to Pronunciation Dictionary:\n"; 87 print LOG "------------------------------------------------\n"; 88 print LOG "no missing words\n"; 89 close LOG 84 90 } 85 91 $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command); … … 88 94 if (defined($tarSuffix)){ 89 95 _createTarFile($self); 96 } 97 } 98 99 sub cleanupFiles { 100 my ($self)= @_; 101 if (defined(<AudioBook/interim_files/*>)) { 102 unlink (<AudioBook/interim_files/*>); 103 } 104 if (defined(<AudioBook/output_files/wav/*>)) { 105 unlink (<AudioBook/output_files/wav/*>); 106 } 107 if (defined(<AudioBook/output_files/logs/*>)) { 108 unlink (<AudioBook/output_files/logs/*>); 109 } 110 if (defined(<AudioBook/output_files/*>)) { 111 unlink (<AudioBook/output_files/*>); 90 112 } 91 113 } … … 122 144 sub getOptions { 123 145 my ($self)= @_; 124 getopts('a:d:hl:r:t:u:x:KT'); # sets $opt_* as a side effect. 146 my $debug = $self->{'debug'}; 147 getopts('a:d:hl:m:p:r:s:t:u:x:ST'); # sets $opt_* as a side effect. 125 148 if ($opt_a and $opt_t) { 126 149 if (-r $opt_a) { … … 143 166 $self->{"voxforgeDict"}="AudioBook/input_files/VoxForgeDict"; 144 167 } 145 ### 168 ### Audio Processing 169 if ($opt_s) { 170 $self->{"average_sentence_length"}=$opt_s; 171 } else { 172 $self->{"average_sentence_length"}= $default_average_sentence_length; 173 } 174 if ($opt_m) { 175 $self->{"max_sentence_length"}=$opt_m; 176 } else { 177 $self->{"max_sentence_length"}= $default_max_sentence_length; 178 } 179 if ($opt_p) { 180 $self->{"min_pause_for_sentence_break"}=$opt_p; 181 } else { 182 $self->{"min_pause_for_sentence_break"}= $default_min_pause_for_sentence_break; 183 } 184 ### Tar file processing 146 185 if (defined($opt_T)) { 147 186 if ($opt_x) { … … 174 213 } 175 214 } 176 } elsif ($opt_ K) {215 } elsif ($opt_S) { # Sanity test switch 177 216 $self->{"audiofile"}="AudioBook/test/audio.wav"; 178 217 #$self->{"textfile"}="AudioBook/test/text-simple.txt"; 179 218 $self->{"textfile"}="AudioBook/test/text-original.txt"; 180 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); system($command);219 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n"; system($command); 181 220 $self->{"voxforgeDict"}="AudioBook/interim_files/VoxForgeDict"; 182 221 $self->{"tarSuffix"}=_random_characters(3); 183 222 $self->{"username"}="test"; 223 $self->{"average_sentence_length"}= $default_average_sentence_length; 224 $self->{"max_sentence_length"}= $default_max_sentence_length; 225 $self->{"min_pause_for_sentence_break"}=$default_min_pause_for_sentence_break; 184 226 } elsif ($opt_h) { 185 227 print "\nVoxForge Audio Segmentation Script Parameters\n"; 186 228 print "=============================================\n"; 187 print "-a\t* audio file name \n";188 print "-d\t pronunciation dictionary (default = AudioBook/input_files/VoxforgeDict)\n";229 print "-a\t* audio file name (WAV format only)\n"; 230 print "-d\tpronunciation dictionary (default = AudioBook/input_files/VoxforgeDict)\n"; 189 231 print "-h\tshow help\n"; 190 print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; 232 print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; 233 print "-m\tMaximum sentence length (default = $default_max_sentence_length words)\n"; 234 print "-p\tMinimum pause for sentence break (default = $default_min_pause_for_sentence_break in units of 100ns)\n"; 191 235 print "-r\tREADME file (default = AudioBook/input_files/README)\n"; 236 print "-s\tAverage sentence length (default = $default_average_sentence_length words)\n"; 192 237 print "-t\t* text file name\n"; 193 238 print "-u\tusername or name you want file stats collected by on VoxForge Metrics \n"; 194 239 print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n"; 195 240 print "-x\tunique tar file suffix (max 3 characters - remainder is truncated)\n"; 196 print "- K\truntest\n";241 print "-S\trun sanity test\n"; 197 242 print "-T\tcreate gzipped/tar file\n"; 198 243 print "\n\t* required for script to run\n"; … … 210 255 } 211 256 257 #################################################################### 258 ### Gettors - Public 259 #################################################################### 260 sub getAverage_sentence_length { 261 my $self = shift; 262 return $self->{"average_sentence_length"}; 263 } 264 265 sub getMax_sentence_length { 266 my $self = shift; 267 return $self->{"max_sentence_length"}; 268 } 269 270 sub getMin_pause_for_sentence_break { 271 my $self = shift; 272 return $self->{"max_sentence_length"}; 273 } 274 212 275 1; Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm
r2585 r2586 38 38 ### Class Variables 39 39 #################################################################### 40 my $average_sentence_length = 13; 41 my $max_sentence_length = 18; 42 #my $min_pause_for_sentence_break = 200000; # HTK time format - 100 millisecond increments 43 my $min_pause_for_sentence_break = 5000000; 40 #my $average_sentence_length = 13; 41 #my $max_sentence_length = 18; 42 #my $min_pause_for_sentence_break = 5000000; # HTK time format - 100 millisecond increments 44 43 my (@max_sentences, $max_sentence_length_found, $max_sentence_length_linenumber, $min_sentence_length_linenumber); 45 my $min_sentence_length_found = $max_sentence_length; 44 #my $min_sentence_length_found = $max_sentence_length; 45 my $min_sentence_length_found; 46 46 my $up_increment = 1; 47 47 my $down_increment = -1; 48 my ($command);48 my $command; 49 49 #################################################################### 50 50 ### Constructor … … 57 57 $self{'htk_files'} = $super->{'htk_files'}; 58 58 $self{'g2p_model'} = $super->{'g2p_model'}; 59 59 $self{"average_sentence_length"} = $super->{"average_sentence_length"}; 60 $self{"max_sentence_length"} = $super->{"max_sentence_length"}; 61 $min_sentence_length_found = $super->{"max_sentence_length"}; 62 $self{"min_pause_for_sentence_break"} = $super->{"min_pause_for_sentence_break"}; 63 $self{'log'} = $super->{'log'}; 60 64 bless(\%self,$class); 61 65 return \%self; … … 87 91 $textContents->createMLFFile("downsampled","AudioBook/interim_files/words.mlf" ); 88 92 # forced alignment - creates aligned.out 89 #!!!!!! $command = ("cd AudioBook/interim_files && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C wav_config -H macros -H hmmdefs -m -t 250.0 150.0 1000.0 -I words.mlf -i aligned.out -S train.scp dict tiedlist"); system($command) == 0 or confess "error: $command failed: $?"; 90 $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist"); system($command) == 0 or confess "error: $command failed: $?"; 91 93 $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist > AudioBook/interim_files/logs/HVite_log"); system($command) == 0 or confess "error: $command failed: $?"; 94 open (HVite_Log,"AudioBook/interim_files/logs/HVite_log") || confess "error: can't open AudioBook/interim_files/HVite_log: $?"; 95 while (my $line = <HVite_Log>) { 96 chomp $line; 97 my $filename; 98 if ($line =~ /Aligning File:/) { 99 my @line=split(/:/, $line); 100 $filename = pop(@line); 101 } elsif ($line =~ /No tokens survived to final node of network at beam/) { 102 my @line=split(/ /, $line); 103 my $beam = pop (@line); 104 $beam =~ s/ //g; 105 print "\n\n******************************************************************\n"; 106 print "**** check that audio corresponds to prompt in file ***\n"; 107 print "******************************************************************\n\n"; 108 if ($beam > 250) { 109 confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 110 } 111 } 112 } 92 113 } 93 114 … … 135 156 my ($self,$filename,$textContents) = @_; 136 157 my $debug = $self->{"debug"}; 137 158 my $log = $self->{"log"}; 159 138 160 my $filename_nopath = basename($filename); 139 161 my $filename_nosuffix = fileparse($filename, "wav"); … … 144 166 $self->{"filename_prefix"} = lc(substr($filename_nopath,0,3)); 145 167 $self->{"textContents"} = $textContents; 168 169 my $average_sentence_length = $self->{"average_sentence_length"}; 170 my $max_sentence_length = $self->{"max_sentence_length"}; 171 my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 146 172 147 173 my $samplerate = _getSampleRate($self); … … 200 226 201 227 $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 202 203 print "### segment summary: #######################################################\n"; 204 print "\nSettings:average_sentence_length->$average_sentence_length;max_sentence_length->$max_sentence_length\n"; 205 print " pause length:$min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n"; 206 print "\nSentence Length: min:$min_sentence_length_linenumber->$min_sentence_length_found; max:$max_sentence_length_linenumber->$max_sentence_length_found\n"; 207 print "\nSentences over max_sentence_length of $max_sentence_length words:\n"; 208 foreach my $line (@max_sentences) { 209 print "\t$line\n"; 210 } 211 print "\n###########################################################################\n"; 228 229 open(LOG,">>$log") or confess ("cannot open $log file"); 230 print LOG "\nAudio Segmenting summary:\n"; 231 print LOG "-------------------------\n"; 232 print LOG "Settings:average_sentence_length: $average_sentence_length\n"; 233 print LOG " max_sentence_length: $max_sentence_length\n"; 234 print LOG " pause length: $min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n\n"; 235 print LOG "Sentence Length: min:$min_sentence_length_linenumber: $min_sentence_length_found\n"; 236 print LOG " max:$max_sentence_length_linenumber: $max_sentence_length_found\n\n"; 237 print LOG "Prompt lines with more than max_sentence_length of $max_sentence_length words:\n"; 238 if (@max_sentences) { 239 foreach my $line (@max_sentences) { 240 print LOG "\t$line\n"; 241 } 242 } else { 243 print LOG "\tnone\n"; 244 } 212 245 } 213 246 … … 216 249 my $debug = $self->{"debug"}; 217 250 my $filename_prefix = $self->{"filename_prefix"}; 251 my $average_sentence_length = $self->{"average_sentence_length"}; 252 #my $max_sentence_length = $self->{"max_sentence_length"}; 253 my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 218 254 219 255 my @aligned_line = split(/\s/,$$aligned_words[$$sentence_end + $increment]); … … 277 313 my ($self,$sentence_start,$sentence_end,$increment,$filename_prefix,$padded_fileid ) = @_; 278 314 my $debug = $self->{"debug"}; 315 #my $average_sentence_length = $self->{"average_sentence_length"}; 316 my $max_sentence_length = $self->{"max_sentence_length"}; 317 #my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 279 318 280 319 if ($debug) { Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm
r2585 r2586 38 38 $self{'htk_files'} = $super->{'htk_files'}; 39 39 $self{'g2p_model'} = $super->{'g2p_model'}; 40 $self{'log'} = $super->{'log'}; 40 41 bless(\%self,$class); 41 42 return \%self; … … 52 53 $self->{"missing_words"} = $missing_words; 53 54 54 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 55 open(DLOG,"AudioBook/interim_files/dlog") or confess ("cannot open AudioBook/interim_files/dlog file"); 55 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 56 my $dlog1 = "AudioBook/interim_files/logs/dlog1"; 57 $command = ("mv AudioBook/interim_files/dlog $dlog1"); print "cmd:$command\n" if $debug; system($command); 58 open(DLOG,$dlog1) or confess ("cannot open $dlog1 file"); 56 59 open(MISSINGWORDS,">$missing_words") or confess ("cannot open $missing_words file"); 57 60 my $missingwordsheader = 0; … … 82 85 sub getPronunciations { # public 83 86 my ($self,$missing_words_out)= @_; 84 my $debug = $self->{ "debug"};87 my $debug = $self->{'debug'}; 85 88 my $model = $self->{'g2p_model'}; 86 my $missing_words = $self->{"missing_words"}; 89 my $missing_words = $self->{'missing_words'}; 90 my $log = $self->{'log'}; 87 91 88 92 $self->{"missing_words_out"} = $missing_words_out; … … 94 98 } 95 99 open(MISSINGWORDSOUT,">$missing_words_out") or confess ("cannot open $missing_words_out file"); 100 open(LOG,">>$log") or confess ("cannot open $log file"); 101 print LOG "\nMissing Words added to Pronunciation Dictionary:\n"; 102 print LOG "------------------------------------------------\n"; 96 103 my ($word, $phonemes); 97 104 format MISSINGWORDSOUT = 105 @<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 106 $word,"[" . $word ."]",$phonemes 107 . 108 format LOG = 98 109 @<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 99 110 $word,"[" . $word ."]",$phonemes … … 104 115 $phonemes = join(" ",@line); 105 116 write MISSINGWORDSOUT; 117 write LOG; 106 118 } 119 close MISSINGWORDSOUT; 120 close LOG; 107 121 return 1; 108 122 } Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm
r2585 r2586 34 34 #################################################################### 35 35 sub new { 36 my ($class,$ textFile) = @_;36 my ($class,$super,$textFile) = @_; 37 37 my %self; 38 38 $self{"inputfilename"} = $textFile; 39 $self{'log'} = $super->{'log'}; 39 40 _clean(\%self); 40 41 bless(\%self,$class); … … 49 50 my $debug = $self->{"debug"}; 50 51 my $inputfilename = $self->{"inputfilename"}; 52 my $log = $self->{'log'}; 51 53 52 54 open(IN, "<$inputfilename") or confess ("error: cannot open input $inputfilename for input"); 55 open(LOG,">>$log") or confess ("cannot open $log file"); 56 print LOG "Changes made to Text file\n"; 57 print LOG "-------------------------\n"; 53 58 my @eText = <IN>; # slurp in entire file into an array 54 59 my @words; … … 91 96 _processEmails($self, \@words, $word); 92 97 #} elsif ($word =~ /(http|https|ftp):\/\/[\w\-.]+(:\d+)?\/[\w.\-~\/%]+/) { # url 93 } elsif ($word =~ /(www\.)?(\w+)\.([A-Za-z]{2,4})/) { # url 98 } elsif ($word =~ /(www\.)?(\w+)\.([A-Za-z]{2,4})/) { # url 94 99 _processUrls($self, \@words, $word); 95 } elsif ($word =~ /\d/) { # word contains numbers 100 } elsif ($word =~ /^\d+$/) { # word contains one or more numbers 101 print LOG "changed digit: $word \n"; 96 102 _processNumbers ($self, \@words, $word); 97 } elsif ($word =~ /\./) {# $word contains a period 103 }elsif ($word =~ /\d/) { # word contains numbers and letters 104 print LOG "changed digit: $word \n"; 105 _processWordsContainingNumbers ($self, \@words,$word); 106 } 107 elsif ($word =~ /\./) {# $word contains a period 108 print LOG "changed word with period in it: $word to"; 98 109 $word =~ s/\.//g; 110 print LOG " $word\n"; 99 111 push (@words, $word); 100 112 } else { … … 114 126 confess "error - fix: $word contains numbers\n" 115 127 } elsif ($word =~ /(\w+)\@(\w+)\.(\w+)/) { # only catches basic email address 116 push (@$wordarray, $1); 117 push (@$wordarray, "at"); 118 push (@$wordarray, $2); 119 push (@$wordarray, "dot"); 120 push (@$wordarray, $3); 128 my $word1 = $1; 129 my $word2 = $2; 130 my $word3 = $3; 131 push (@$wordarray, $word1); 132 push (@$wordarray, "AT"); 133 push (@$wordarray, $word2); 134 push (@$wordarray, "DOT"); 135 push (@$wordarray, $word3); 136 print LOG "changed email address: $word to $word1 at $word2 DOT $word3\n"; 121 137 } else { 122 138 confess "error - fix: $word\n" … … 133 149 my ($self,$wordarray, $word)= @_; 134 150 my $debug = $self->{"debug"}; 135 print "processingUrls: $word\n"; 151 print "processingUrls: $word\n" if $debug; 152 136 153 if ($word =~ /(www)?\.(\w+)\.([A-Za-z]{2,4})/) { # URL: www.abc.com 137 print "processingUrls: splitword:$1;DOT;$2;DOT;$3;\n"; 138 push (@$wordarray, $1); 154 my $word1 = $1; 155 my $word2 = $2; 156 my $word3 = $3; 157 print LOG "changed URL $word to:$word1 DOT $word2 DOT $word3;\n"; 158 push (@$wordarray, $word1); 139 159 push (@$wordarray, "DOT"); 140 if ($ 2 =~ /\d+/) {141 _processWordsContainingNumbers ($self, $wordarray, $2);160 if ($word2 =~ /\d+/) { 161 _processWordsContainingNumbers ($self, $wordarray,$word2); 142 162 } else { 143 push (@$wordarray, $ 2);163 push (@$wordarray, $word2); 144 164 } 145 165 push (@$wordarray, "DOT"); 146 push (@$wordarray, $ 3); # can only be a top level domain name166 push (@$wordarray, $word3); # can only be a top level domain name 147 167 } elsif ($word =~ /(\w+)\.([A-Za-z]{2,4})/) { # URL: abc.com 148 168 my $word1 = $1; 149 169 my $word2 = $2; 150 print "processingUrls: splitword:$word1;DOT;$word2;\n";170 print LOG "changed URL $word to:$word1;DOT;$word2;\n"; 151 171 if ($word1 =~ /\d+/) { 152 print "processingUrls: splitword2:$word1 ;DOT;$word2;\n";172 print "processingUrls: splitword2:$word1 DOT $word2;\n" if $debug; 153 173 _processWordsContainingNumbers ($self, $wordarray,$word1); 154 174 } else { … … 168 188 if ($subword =~ /\d+\D+/) { # assume single, consecutive set of numbers (i.e no split numbers in word) 169 189 my $number = $subword; 170 print "processWordsContainingNumbers:subword:$subword;\n" ;190 print "processWordsContainingNumbers:subword:$subword;\n" if $debug; 171 191 $number =~ s/\D//g; # removes non-digit characters 172 print "processWordsContainingNumbers:number:$number\n" ;192 print "processWordsContainingNumbers:number:$number\n" if $debug; 173 193 my @number = split(//, $number); 174 194 foreach my $digit (@number) { 175 processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array195 _processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 176 196 } 177 197 my $word = $subword; … … 187 207 } 188 208 my $number = $subword; 189 print "processWordsContainingNumbers:subword:$subword;\n" ;209 print "processWordsContainingNumbers:subword:$subword;\n" if $debug; 190 210 $number =~ s/\D//g; # removes non-digit characters 191 print "processWordsContainingNumbers:number:$number\n" ;211 print "processWordsContainingNumbers:number:$number\n" if $debug; 192 212 my @number = split(//, $number); 193 213 foreach my $digit (@number) { 194 214 _processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 195 215 } 196 197 216 } 198 217 } … … 209 228 push (@$words, uc $word); 210 229 } 211 print "info:converted number:$number: to $wordnum\n";230 print LOG "converted number:$number: to $wordnum\n"; 212 231 } elsif (($number =~ /^\d{4}$/) and ($number < 2100)){ # assume 4 digit numbers between 1000 and 2100 are years 213 232 my $datenum = year2en($number); … … 217 236 push (@$words, uc $word); 218 237 } 219 print "info:converted date:$number: to $datenum\n";238 print LOG "converted date:$number: to $datenum\n"; 220 239 } elsif ($number =~ /^\d+$/) { # contains only numbers 221 240 my $wordnum = num2en($number); … … 225 244 push (@$words, uc $word); 226 245 } 227 print "info:converted number:$number: to $wordnum\n";246 print LOG "converted number:$number: to $wordnum\n"; 228 247 } elsif ($number =~ /^£\d+/) { # convert pounds to words 229 248 $number =~ s/^£//; # remove pound sign … … 235 254 } 236 255 push (@$words, "POUNDS"); 237 print "info:converted pounds:£$number: to $wordnum pounds\n";256 print LOG "converted pounds:£$number: to $wordnum pounds\n"; 238 257 } elsif ($number =~ /^\$\d+/) { # convert dollars to words 239 258 $number =~ s/^\$//; # remove dollar sign … … 245 264 } 246 265 push (@$words, "DOLLARS"); 247 print "info:converted dollars:$number: to $wordnum dollars\n";266 print LOG "converted dollars:$number: to $wordnum dollars\n"; 248 267 # skip this - minutes do not get processed properly (since they use a signle quote) - do manually in eText file. 249 268 # } elsif ($number =~ /^\d{1,3}°$/) { # convert degrees to words … … 266 285 push (@$words, uc $word); 267 286 } 268 print "info:converted number:$number: to $wordnum \n";287 print LOG "converted number:$number: to $wordnum \n"; 269 288 } elsif ($number =~ /^\w+\d+/) { # convert ordinal number to words 270 289 my $numbertemp = $number; … … 276 295 push (@$words, uc $word); 277 296 } 278 print "info:converted number:$number: to $wordnum \n";297 print LOG "converted number:$number: to $wordnum \n"; 279 298 } else { 280 299 push (@$words, $number); Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts
r2585 r2586 1 rai0001 WHEN THE SUNLIGHT STRIKES RAINDROPS IN THE AIR THEY ACT AS A PRISM AND FORM A RAINBOW 2 rai0002 THE RAINBOW IS A DIVISION OF LIGHT WHITE LIGHT INTO MANY BEAUTIFUL COLORS 3 rai0003 THESE TAKE THE SHAPE OF A LONG ROUND ARCH WITH ITS PATH HIGH ABOVE 4 rai0004 AND ITS TWO ENDS APPARENTLY BEYOND THE HORIZON 5 rai0005 THERE IS ACCORDING TO LEGEND A BOILING POT OF GOLD AT ONE END 6 rai0006 PEOPLE LOOK BUT NO ONE EVER FINDS IT 7 rai0007 WHEN A MAN LOOKS FOR SOMETHING BEYOND HIS REACH HIS FRIENDS SAY HE IS LOOKING FOR THE POT OF GOLD AT THE END OF THE RAINBOW 8 rai0008 THROUGHOUT THE CENTURIES PEOPLE HAVE EXPLAINED THE RAINBOW IN VARIOUS WAYS SOME HAVE ACCEPTED IT AS A MIRACLE WITHOUT PHYSICAL EXPLANATIONS 9 rai0009 TO THE HEBREWS IT WAS A TOKEN THAT THERE WOULD BE NO MORE UNIVERSAL FLOODS 10 rai0010 THE GREEKS USED TO IMAGINE THAT IT WAS A SIGN FROM THE GODS TO FORETELL WAR OR HEAVY RAIN 11 rai0011 THE NORSEMEN CONSIDERED THE RAINBOW AS A BRIDGE OVER WHICH THE GODS PASSED FROM EARTH TO THEIR HOME IN THE SKY 12 rai0012 OTHERS HAVE TRIED TO EXPLAIN THE PHENOMENON PHYSICALLY 13 rai0013 ARISTOTLE THOUGHT THAT THE RAINBOW WAS CAUSED BY REFLECTION OF THE SUN'S RAYS BY THE RAIN 14 rai0014 SINCE THEN PHYSICISTS HAVE FOUND THAT IT IS NOT REFLECTION 15 rai0015 BUT REFRACTION BY THE RAINDROPS WHICH CAUSES THE RAINBOWS 16 rai0016 MANY COMPLICATED IDEAS ABOUT THE RAINBOW HAVE BEEN FORMED 17 rai0017 THE DIFFERENCE IN THE RAINBOW DEPENDS CONSIDERABLY UPON THE SIZE OF THE DROPS 18 rai0018 AND THE WIDTH OF THE COLORED BAND INCREASES AS THE SIZE OF THE DROPS INCREASES 19 rai0019 THE ACTUAL PRIMARY RAINBOW OBSERVED IS SAID TO BE THE EFFECT OF SUPER IMPOSITION 20 rai0020 OF A NUMBER OF BOWS 21 rai0021 IF THE RED OF THE SECOND BOW FALLS UPON THE GREEN OF THE FIRST THE RESULT IS TO GIVE A BOW 22 rai0022 WITH A ABNORMALLY WIDE YELLOW BAND SINCE RED AND GREEN LIGHT WHEN MIXED FORM YELLOW 23 rai0023 THIS IS NOT THIS IS A VERY COMMON TYPE OF BOW 24 rai0024 ONE SHOWING MAINLY RED AND YELLOW WITH LITTLE OR NO GREEN OR BLUE 1 rai0001 WHEN THE SUNLIGHT STRIKES RAINDROPS IN THE AIR THEY ACT AS A PRISM AND 2 rai0002 FORM A RAINBOW THE RAINBOW IS A DIVISION OF 3 rai0003 LIGHT WHITE LIGHT INTO MANY BEAUTIFUL COLORS 4 rai0004 THESE TAKE THE SHAPE OF A LONG ROUND ARCH 5 rai0005 WITH ITS PATH HIGH ABOVE 6 rai0006 AND ITS TWO ENDS APPARENTLY BEYOND THE HORIZON 7 rai0007 THERE IS ACCORDING TO LEGEND A BOILING POT OF 8 rai0008 GOLD AT ONE END PEOPLE LOOK BUT NO ONE EVER 9 rai0009 FINDS IT WHEN A MAN LOOKS FOR SOMETHING BEYOND HIS REACH 10 rai0010 HIS FRIENDS SAY HE IS LOOKING FOR THE POT OF 11 rai0011 GOLD AT THE END OF THE RAINBOW 12 rai0012 THROUGHOUT THE CENTURIES PEOPLE HAVE EXPLAINED THE RAINBOW IN VARIOUS WAYS 13 rai0013 SOME HAVE ACCEPTED IT AS A MIRACLE WITHOUT PHYSICAL EXPLANATIONS 14 rai0014 TO THE HEBREWS IT WAS A TOKEN THAT THERE WOULD BE 15 rai0015 NO MORE UNIVERSAL FLOODS THE GREEKS USED TO IMAGINE 16 rai0016 THAT IT WAS A SIGN FROM THE GODS TO FORETELL 17 rai0017 WAR OR HEAVY RAIN THE NORSEMEN CONSIDERED THE RAINBOW 18 rai0018 AS A BRIDGE OVER WHICH THE GODS PASSED FROM EARTH TO THEIR HOME IN THE SKY 19 rai0019 OTHERS HAVE TRIED TO EXPLAIN THE PHENOMENON PHYSICALLY 20 rai0020 ARISTOTLE THOUGHT THAT THE RAINBOW WAS CAUSED BY REFLECTION 21 rai0021 OF THE SUN'S RAYS BY THE RAIN 22 rai0022 SINCE THEN PHYSICISTS HAVE FOUND THAT IT IS NOT 23 rai0023 REFLECTION BUT REFRACTION BY THE RAINDROPS WHICH CAUSES THE RAINBOWS 24 rai0024 MANY COMPLICATED IDEAS ABOUT THE RAINBOW HAVE BEEN 25 rai0025 FORMED THE DIFFERENCE IN THE RAINBOW DEPENDS CONSIDERABLY UPON THE SIZE OF THE DROPS 26 rai0026 AND THE WIDTH OF THE COLORED BAND INCREASES AS THE SIZE OF THE DROPS INCREASES 27 rai0027 THE ACTUAL PRIMARY RAINBOW OBSERVED IS SAID TO BE THE EFFECT 28 rai0028 OF SUPER IMPOSITION OF A NUMBER OF BOWS 29 rai0029 IF THE RED 30 rai0030 OF THE SECOND BOW FALLS UPON THE GREEN OF THE FIRST THE 31 rai0031 RESULT IS TO GIVE A BOW WITH A 32 rai0032 ABNORMALLY WIDE YELLOW BAND SINCE RED AND GREEN LIGHT 33 rai0033 WHEN MIXED FORM YELLOW THIS IS NOT 34 rai0034 THIS IS A VERY COMMON TYPE OF BOW 35 rai0035 ONE SHOWING MAINLY RED AND YELLOW WITH 36 rai0036 LITTLE OR NO GREEN OR BLUE