Changeset 2585
- Timestamp:
- 05/14/08 22:06:47 (5 months ago)
- Files:
-
- Trunk/Scripts/Audio_scripts/AudioSegmentation (modified) (1 prop)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm (modified) (8 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm (modified) (13 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm (modified) (10 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/input_files/VoxForgeDict (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/aligned.out (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dict (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog1 (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog2 (added)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/downsampled.wav (modified) (previous)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/htksegment_log (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/wlist (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/words.mlf (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/dict (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/test/VoxForgeDict (deleted)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation
- Property svn:ignore changed from
hoe.wav
hoe-16-original.wav
hoe-original.wav
to
hoe.wav
hoe-16-original.wav
hoe-original.wav
bluefox83-20080413-bwq.tgz
bluefox83-20080512
- Property svn:ignore changed from
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm
r2578 r2585 36 36 ### Class Variables 37 37 #################################################################### 38 our($opt_a,$opt_d,$opt_h,$opt_l,$opt_r,$opt_t,$opt_x,$opt_K,$opt_T, ); # need to define these because using strict.38 our($opt_a,$opt_d,$opt_h,$opt_l,$opt_r,$opt_t,$opt_x,$opt_K,$opt_T,$opt_u); # need to define these because using strict. 39 39 my %self; 40 40 $self{'debug'} = 0; … … 72 72 $dictionary->updatePronDict(); 73 73 # need to update dict with missing words 74 74 # can't seem to change default HDMan log file with "-l" parameter 75 $command = ("cp AudioBook/interim_files/dlog AudioBook/interim_files/dlog1"); print "cmd:$command\n" if $debug; system($command); 75 76 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 77 $command = ("mv AudioBook/interim_files/dlog AudioBook/interim_files/dlog2"); print "cmd:$command\n" if $debug; system($command); 76 78 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 77 79 } else { … … 121 123 my ($self)= @_; 122 124 getopts('a:d:hl:r:t:u:x:KT'); # sets $opt_* as a side effect. 123 if ($opt_a and $opt_t and $opt_d) {125 if ($opt_a and $opt_t) { 124 126 if (-r $opt_a) { 125 127 $self->{"audiofile"}=$opt_a; … … 132 134 die "can't open -t" . $self->{"textfile"} . "\n"; 133 135 } 134 if (-r $opt_d) { 135 $self->{"voxforgeDict"}=$opt_d; 136 if (defined($opt_d)) { 137 if (-r $opt_d) { 138 $self->{"voxforgeDict"}=$opt_d; 139 } else { 140 die "can't open -d" . $self->{"voxforgeDict"} . "\n"; 141 } 136 142 } else { 137 die "can't open -d" . $self->{"voxforgeDict"} . "\n";143 $self->{"voxforgeDict"}="AudioBook/input_files/VoxForgeDict"; 138 144 } 139 145 ### … … 142 148 $self->{"tarSuffix"}=substr($opt_x,3); # only use 1st 3 characters. 143 149 }else { 144 $self->{"tarSuffix"}= random_characters(3);150 $self->{"tarSuffix"}=_random_characters(3); 145 151 } 146 152 if ($opt_u) { 153 $self->{"username"}=$opt_u; 154 }else { 155 $self->{"username"}="anonymous"; 156 } 147 157 if ($opt_r) { 148 158 if (-r $opt_r) { … … 168 178 #$self->{"textfile"}="AudioBook/test/text-simple.txt"; 169 179 $self->{"textfile"}="AudioBook/test/text-original.txt"; 170 $self->{"voxforgeDict"}="AudioBook/test/VoxForgeDict"; 180 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); system($command); 181 $self->{"voxforgeDict"}="AudioBook/interim_files/VoxForgeDict"; 171 182 $self->{"tarSuffix"}=_random_characters(3); 172 183 $self->{"username"}="test"; … … 175 186 print "=============================================\n"; 176 187 print "-a\t* audio file name\n"; 177 print "-d\t * pronunication dictionary\n";188 print "-d\t pronunciation dictionary (default = AudioBook/input_files/VoxforgeDict)\n"; 178 189 print "-h\tshow help\n"; 179 190 print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; … … 182 193 print "-u\tusername or name you want file stats collected by on VoxForge Metrics \n"; 183 194 print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n"; 195 print "-x\tunique tar file suffix (max 3 characters - remainder is truncated)\n"; 184 196 print "-K\trun test\n"; 185 print "-T\tunique tar file suffix - also acts a switch to create gzipped/tar file\n"; 186 print "\t(max 3 characters - remainder is truncated)\n"; 187 print "\n\t* required for scripts to run\n"; 197 print "-T\tcreate gzipped/tar file\n"; 198 print "\n\t* required for script to run\n"; 188 199 print "\n"; 189 200 exit; Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm
r2576 r2585 38 38 ### Class Variables 39 39 #################################################################### 40 my $average_sentence_length = 8;41 my $max_sentence_length = 1 2;40 my $average_sentence_length = 13; 41 my $max_sentence_length = 18; 42 42 #my $min_pause_for_sentence_break = 200000; # HTK time format - 100 millisecond increments 43 my $min_pause_for_sentence_break = 1500000;43 my $min_pause_for_sentence_break = 5000000; 44 44 my (@max_sentences, $max_sentence_length_found, $max_sentence_length_linenumber, $min_sentence_length_linenumber); 45 45 my $min_sentence_length_found = $max_sentence_length; … … 140 140 $filename_nosuffix =~ s/\.//; 141 141 $self->{"filename"} = $filename; 142 $self->{"filename_nosuffix"} = $filename_nosuffix; 142 143 $self->{"filename_nopath"} = $filename_nopath; 143 $self->{"filename_ nosuffix"} = $filename_nosuffix;144 $self->{"filename_prefix"} = lc(substr($filename_nopath,0,3)); 144 145 $self->{"textContents"} = $textContents; 145 146 … … 200 201 $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 201 202 202 print "### segment summary: ################################################################\n"; 203 print "### segment summary: #######################################################\n"; 204 print "\nSettings:average_sentence_length->$average_sentence_length;max_sentence_length->$max_sentence_length\n"; 205 print " pause length:$min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n"; 203 206 print "\nSentence Length: min:$min_sentence_length_linenumber->$min_sentence_length_found; max:$max_sentence_length_linenumber->$max_sentence_length_found\n"; 204 207 print "\nSentences over max_sentence_length of $max_sentence_length words:\n"; … … 206 209 print "\t$line\n"; 207 210 } 208 print "\n########################################################################### ##########\n";211 print "\n###########################################################################\n"; 209 212 } 210 213 … … 212 215 my ($self, $where, $increment,$aligned_words, $fileid, $sentence_start, $sentence_end) = @_; 213 216 my $debug = $self->{"debug"}; 214 my $filename_ nosuffix = $self->{"filename_nosuffix"};217 my $filename_prefix = $self->{"filename_prefix"}; 215 218 216 219 my @aligned_line = split(/\s/,$$aligned_words[$$sentence_end + $increment]); … … 220 223 my $count; 221 224 my $padded_fileid = sprintf("%04d",$$fileid); 222 print PROMPTS "$filename_ nosuffix$padded_fileid ";225 print PROMPTS "$filename_prefix$padded_fileid "; 223 226 for ($count = $$sentence_start; $count <= $$sentence_end + $increment; $count++) { 224 227 my @aligned_line = split(/ /,$$aligned_words[$count]); … … 259 262 _processAudio ($self,$startTime,$endTime,$padded_fileid) ; 260 263 # !!!!!! 261 _calculateStats($self,$sentence_start,$sentence_end,\$increment,$filename_ nosuffix,$padded_fileid);264 _calculateStats($self,$sentence_start,$sentence_end,\$increment,$filename_prefix,$padded_fileid); 262 265 # !!!!!! 263 #print "$filename_ nosuffix$padded_fileid:$wordcount\n"; # !!!!!!266 #print "$filename_prefix$padded_fileid:$wordcount\n"; # !!!!!! 264 267 # Reset everthing to start looking for next set words delimited by a pause 265 268 $$sentence_start = ($$sentence_end + $increment) + 1; … … 272 275 273 276 sub _calculateStats { # Calculate min and max sentence 274 my ($self,$sentence_start,$sentence_end,$increment,$filename_ nosuffix,$padded_fileid ) = @_;277 my ($self,$sentence_start,$sentence_end,$increment,$filename_prefix,$padded_fileid ) = @_; 275 278 my $debug = $self->{"debug"}; 276 279 … … 286 289 287 290 if ($sentence_length>$max_sentence_length) { 288 push (@max_sentences,"$filename_nosuffix$padded_fileid:" . $sentence_length+1); 289 } elsif ($sentence_length<$min_sentence_length_found) { 291 push (@max_sentences,"$filename_prefix$padded_fileid:" . ($sentence_length+1)); 292 } 293 if ($sentence_length<$min_sentence_length_found) { 290 294 $min_sentence_length_found = $sentence_length+1; 291 $min_sentence_length_linenumber = "$filename_ nosuffix$padded_fileid";295 $min_sentence_length_linenumber = "$filename_prefix$padded_fileid"; 292 296 } elsif ($sentence_length>$max_sentence_length_found) { 293 297 $max_sentence_length_found = $sentence_length+1; 294 $max_sentence_length_linenumber = "$filename_ nosuffix$padded_fileid";298 $max_sentence_length_linenumber = "$filename_prefix$padded_fileid"; 295 299 } 296 300 } … … 299 303 my ($self, $aligned_words, $fileid, $sentence_start, $sentence_end) = @_; 300 304 my $debug = $self->{"debug"}; 301 my $filename_ nosuffix = $self->{"filename_nosuffix"};305 my $filename_prefix = $self->{"filename_prefix"}; 302 306 my ($count, $startTime); 303 307 304 308 #print "!!!!!!_last_sentence\n"; 305 309 my $padded_fileid = sprintf("%04d",$$fileid); 306 print PROMPTS "$filename_ nosuffix$padded_fileid ";310 print PROMPTS "$filename_prefix$padded_fileid "; 307 311 for ($count = $$sentence_start; $count <= $#$aligned_words; $count++) { 308 312 my @aligned_line = split(/ /,$$aligned_words[$count]); 309 313 my ($word,$startTime,$endTime,$pause) = @aligned_line ; 310 #print $filename_ nosuffix . $#$aligned_words . "last aligned_line:@aligned_line:end\n"; # !!!!!!314 #print $filename_prefix . $#$aligned_words . "last aligned_line:@aligned_line:end\n"; # !!!!!! 311 315 print PROMPTS ("$word "); 312 316 } … … 334 338 my $process_audio = $self->{"process_audio"}; 335 339 my $samplerate = $self->{"samplerate"}; 336 my $filename_ nosuffix = $self->{"filename_nosuffix"};340 my $filename_prefix = $self->{"filename_prefix"}; 337 341 my $filename_nopath = $self->{"filename_nopath"}; 338 342 my $htk_files = $self->{'htk_files'}; … … 346 350 $command = ("HCopy -C $htk_files/copy_config -s $startTime -e $endTime AudioBook/interim_files/$filename_nopath AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 347 351 # sox command to create a proper wav file with a RIFF header; 348 $command = ("sox -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/output_files/wav/$filename_ nosuffix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command);349 print "AudioBook/output_files/wav/$filename_ nosuffix$padded_fileid.wav\n" if not $debug;352 $command = ("sox -t .raw -r $samplerate -sw AudioBook/interim_files/temp.wav AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav"); print "cmd:$command\n" if $debug; system($command); 353 print "AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav\n" if not $debug; 350 354 } else { 351 print "AudioBook/output_files/wav/$filename_ nosuffix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug;355 print "AudioBook/output_files/wav/$filename_prefix$padded_fileid.wav\t$startTime:$endTime:\n" if $debug; 352 356 } 353 357 } … … 365 369 } 366 370 367 sub getFilename__nosuffix {371 sub filename_prefix { 368 372 my $self = shift; 369 return $self->{"filename_ nosuffix"} ;373 return $self->{"filename_prefix"} ; 370 374 } 371 375 1; Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm
r2578 r2585 116 116 open(DICTIONARY,">>$voxforgeDict") or confess ("cannot open $voxforgeDict file"); 117 117 open(MISSINGWORDSOUT,"<$missing_words_out") or confess ("cannot open $missing_words_out file"); 118 print "adding the following words to VoxForge Pronun icaiton Dictionary\n";118 print "adding the following words to VoxForge Pronunciation Dictionary\n"; 119 119 while (my $line = <MISSINGWORDSOUT>) { 120 120 next if ($line =~ /^(\s)*$/); # skip blank lines Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm
r2578 r2585 27 27 use diagnostics; 28 28 use Carp; 29 30 use Lingua::EN::Numbers qw(num2en num2en_ordinal); 31 use Lingua::EN::Numbers::Years; 29 32 #################################################################### 30 33 ### Constructor … … 55 58 $line =~ tr/a-z/A-Z/; # change to uppercase 56 59 $line =~ s/,//g; # remove commas 57 $line =~ s/\.\"/ /g; 58 $line =~ s/\.\'/ /g; 59 # !!!!!! 60 $line =~ s/\.\"/ /g; # period followed by double quote 61 $line =~ s/\.\'/ /g; # period followed by single quote 62 # !!!!!! see below for processing of periods 60 63 #line =~ s/\./ /g; # remove periods # too broad - periods in emails or URLs are pronounced, but need to avoid acronyms. 61 $line =~ s/\./ /g; # remove periods62 64 # !!!!!! 63 65 # $line =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... … … 86 88 $word =~ s/^\'+//; # remove single quote(s) from beginning of word 87 89 $word =~ s/\'+$//; # remove single quote(s) from end of word 88 # !!!!!! 89 #push (@words, $word); 90 if ($word =~ /\d/) { # word contains numbers 91 processNumbers ($self, \@words, $word); 90 if ($word =~ /[\w\-.]+@[\w\-.]+\.[A-Za-z]{2,4}/) { #email address 91 _processEmails($self, \@words, $word); 92 #} elsif ($word =~ /(http|https|ftp):\/\/[\w\-.]+(:\d+)?\/[\w.\-~\/%]+/) { # url 93 } elsif ($word =~ /(www\.)?(\w+)\.([A-Za-z]{2,4})/) { # url 94 _processUrls($self, \@words, $word); 95 } elsif ($word =~ /\d/) { # word contains numbers 96 _processNumbers ($self, \@words, $word); 97 } elsif ($word =~ /\./) {# $word contains a period 98 $word =~ s/\.//g; 99 push (@words, $word); 92 100 } else { 93 101 push (@words, $word); 94 102 } 95 # !!!!!!96 103 } 97 104 } … … 101 108 } 102 109 103 sub processNumbers { 110 sub _processEmails { #private 111 my ($self,$wordarray, $word)= @_; 112 my $debug = $self->{"debug"}; 113 if ($word =~ /\d/) { 114 confess "error - fix: $word contains numbers\n" 115 } elsif ($word =~ /(\w+)\@(\w+)\.(\w+)/) { # only catches basic email address 116 push (@$wordarray, $1); 117 push (@$wordarray, "at"); 118 push (@$wordarray, $2); 119 push (@$wordarray, "dot"); 120 push (@$wordarray, $3); 121 } else { 122 confess "error - fix: $word\n" 123 } 124 125 } 126 127 #while ($word =~ s/(\w+)\.(\w+)/g) { # how to loop regex results 128 # push (@words, $1); 129 # push (@words, "dot"); 130 # push (@words, $2); 131 #} 132 sub _processUrls { #private 133 my ($self,$wordarray, $word)= @_; 134 my $debug = $self->{"debug"}; 135 print "processingUrls: $word\n"; 136 if ($word =~ /(www)?\.(\w+)\.([A-Za-z]{2,4})/) { # URL: www.abc.com 137 print "processingUrls: splitword:$1;DOT;$2;DOT;$3;\n"; 138 push (@$wordarray, $1); 139 push (@$wordarray, "DOT"); 140 if ($2 =~ /\d+/) { 141 _processWordsContainingNumbers ($self, $wordarray, $2); 142 } else { 143 push (@$wordarray, $2); 144 } 145 push (@$wordarray, "DOT"); 146 push (@$wordarray, $3); # can only be a top level domain name 147 } elsif ($word =~ /(\w+)\.([A-Za-z]{2,4})/) { # URL: abc.com 148 my $word1 = $1; 149 my $word2 = $2; 150 print "processingUrls: splitword:$word1;DOT;$word2;\n"; 151 if ($word1 =~ /\d+/) { 152 print "processingUrls: splitword2:$word1;DOT;$word2;\n"; 153 _processWordsContainingNumbers ($self, $wordarray,$word1); 154 } else { 155 push (@$wordarray, $word1); 156 } 157 push (@$wordarray, "DOT"); 158 push (@$wordarray, $word2); # can only be a top level domain name 159 } else { 160 confess "error - fix: $word\n" 161 } 162 } 163 164 sub _processWordsContainingNumbers { #private 165 my ($self,$wordarray, $subword)= @_; 166 my $debug = $self->{"debug"}; 167 # separates numbers contained in a word, and converts ordinal poriton of a words into its own word. 168 if ($subword =~ /\d+\D+/) { # assume single, consecutive set of numbers (i.e no split numbers in word) 169 my $number = $subword; 170 print "processWordsContainingNumbers:subword:$subword;\n"; 171 $number =~ s/\D//g; # removes non-digit characters 172 print "processWordsContainingNumbers:number:$number\n"; 173 my @number = split(//, $number); 174 foreach my $digit (@number) { 175 processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 176 } 177 my $word = $subword; 178 $word =~ s/\d//g; # removes digits 179 if (defined($word) and ($word ne "")) { 180 push (@$wordarray, $word); 181 } 182 } elsif ($subword =~ /\D+\d+/) { # assume single, consecutive set of numbers (i.e no split numbers in word) 183 my $word = $subword; 184 $word =~ s/\d//g; # removes digits 185 if (defined($word) and ($word ne "")) { 186 push (@$wordarray, $word); 187 } 188 my $number = $subword; 189 print "processWordsContainingNumbers:subword:$subword;\n"; 190 $number =~ s/\D//g; # removes non-digit characters 191 print "processWordsContainingNumbers:number:$number\n"; 192 my @number = split(//, $number); 193 foreach my $digit (@number) { 194 _processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 195 } 196 197 } 198 } 199 200 sub _processNumbers { #private 104 201 my ($self,$words, $number)= @_; 105 202 my $debug = $self->{"debug"}; 106 203 107 if ($number =~ /^\d+,\d+/) { # comma separated number 204 if ($number =~ /^\d+,\d+/) { # comma separated number #!!!!!! but commas are removed above ... 108 205 my $wordnum = num2en($number); 109 206 $wordnum =~ s/-/ /g; # dash - compound number … … 112 209 push (@$words, uc $word); 113 210 } 114 print " converted number:$number: to $wordnum\n";211 print "info: converted number:$number: to $wordnum\n"; 115 212 } elsif (($number =~ /^\d{4}$/) and ($number < 2100)){ # assume 4 digit numbers between 1000 and 2100 are years 116 213 my $datenum = year2en($number); … … 120 217 push (@$words, uc $word); 121 218 } 122 print " converted date:$number: to $datenum\n";219 print "info: converted date:$number: to $datenum\n"; 123 220 } elsif ($number =~ /^\d+$/) { # contains only numbers 124 221 my $wordnum = num2en($number); … … 128 225 push (@$words, uc $word); 129 226 } 130 print " converted number:$number: to $wordnum\n";227 print "info: converted number:$number: to $wordnum\n"; 131 228 } elsif ($number =~ /^£\d+/) { # convert pounds to words 132 229 $number =~ s/^£//; # remove pound sign … … 138 235 } 139 236 push (@$words, "POUNDS"); 140 print " converted pounds:£$number: to $wordnum pounds\n";237 print "info: converted pounds:£$number: to $wordnum pounds\n"; 141 238 } elsif ($number =~ /^\$\d+/) { # convert dollars to words 142 239 $number =~ s/^\$//; # remove dollar sign … … 148 245 } 149 246 push (@$words, "DOLLARS"); 150 print " converted dollars:$number: to $wordnum dollars\n";247 print "info: converted dollars:$number: to $wordnum dollars\n"; 151 248 # skip this - minutes do not get processed properly (since they use a signle quote) - do manually in eText file. 152 249 # } elsif ($number =~ /^\d{1,3}°$/) { # convert degrees to words … … 169 266 push (@$words, uc $word); 170 267 } 171 print "converted number:$number: to $wordnum \n"; 268 print "info: converted number:$number: to $wordnum \n"; 269 } elsif ($number =~ /^\w+\d+/) { # convert ordinal number to words 270 my $numbertemp = $number; 271 $numbertemp =~ s/\D+//; # remove nondigit character 272 my $wordnum = num2en_ordinal($numbertemp); 273 $wordnum =~ s/-/ /g; # dash - compound number 274 my @wordnumlist = split(/ /,$wordnum); 275 foreach my $word (@wordnumlist) { 276 push (@$words, uc $word); 277 } 278 print "info: converted number:$number: to $wordnum \n"; 172 279 } else { 173 280 push (@$words, $number); 174 print "****$number not converted\n";281 confess "error $number not converted\n"; 175 282 } 176 283 Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/aligned.out
r2578 r2585 1 1 #!MLF!# 2 2 "*/downsampled.rec" 3 0 25000000 sil -12504.145508 SENT-END 4 25000000 26600000 dh -1063.311646 THOSE 5 26600000 28700000 ow -1322.120972 6 28700000 30000000 z -869.062622 7 30000000 30000000 sp -0.235017 8 30000000 31400000 iy -905.131226 EVENING 9 31400000 32000000 v -374.610107 10 32000000 32900000 n -586.843201 11 32900000 33300000 ix -266.860107 12 33300000 35000000 ng -1051.927002 13 35000000 35000000 sp -0.235017 14 35000000 35600000 b -387.685791 BELLS 15 35600000 37000000 eh -882.883545 16 37000000 39400000 l -1523.278320 17 39400000 41100000 z -1123.562622 18 41100000 42200000 sp -636.472717 19 42200000 42500000 b -212.382492 BY 20 42500000 43800000 ay -892.630249 21 43800000 43800000 sp -0.235017 22 43800000 45000000 t -788.817322 THOMAS 23 45000000 47400000 aa -1437.506592 24 47400000 48200000 m -512.194336 25 48200000 48600000 ax -273.034210 26 48600000 50100000 s -1051.293213 27 50100000 50100000 sp -0.235017 28 50100000 51400000 m -876.319092 MOORE 29 51400000 53600000 uh -1355.980347 30 53600000 55300000 r -1143.363525 31 55300000 60200000 sp -2658.927002 32 60200000 62000000 r -1157.041748 READ 33 62000000 63200000 eh -767.579407 34 63200000 64300000 d -697.032410 35 64300000 64300000 sp -0.235017 36 64300000 65800000 f -859.398987 FOR 37 65800000 68200000 ao -1561.661621 38 68200000 70300000 r -1418.653687 39 70300000 70300000 sp -0.235017 40 70300000 72300000 m -1382.702515 MOJOMOVE411 41 72300000 73900000 ow -978.509583 42 73900000 74800000 jh -591.590271 43 74800000 76500000 ow -1101.388306 44 76500000 77700000 m -697.787048 45 77700000 80600000 uw -1760.078613 46 80600000 82600000 v -1403.530029 47 82600000 84400000 f -1091.546265 48 84400000 87700000 n -2531.834717 49 87700000 90500000 n -1852.228149 50 90500000 90500000 sp -0.235017 51 90500000 90800000 k -240.841125 COM 52 90800000 92600000 aa -1081.417236 53 92600000 94900000 m -1386.785156 54 94900000 94900000 sp -0.235017 55 94900000 95900000 b -623.744507 BY 56 95900000 103700000 ay -5017.617676 57 103700000 106800000 sp -1927.296143 58 106800000 110000000 r -2306.654541 ROBERT 59 110000000 111200000 aa -703.356628 60 111200000 112000000 b -572.268555 61 112000000 113200000 er -802.027893 62 113200000 113800000 t -399.493500 63 113800000 113800000 sp -0.235017 64 113800000 115300000 s -973.368225 SCOTT 65 115300000 115800000 k -346.667633 66 115800000 118600000 aa -1652.790161 67 118600000 118900000 t -217.191360 68 118900000 127100000 sp -4432.108887 69 127100000 128500000 ae -927.945312 AS 70 128500000 129700000 z -794.615784 71 129700000 129700000 sp -0.235017 72 129700000 130800000 p -681.060791 PART 73 130800000 131700000 aa -618.721863 74 131700000 132800000 r -771.861633 75 132800000 133100000 t -219.589584 76 133100000 133100000 sp -0.235017 77 133100000 133400000 ah -194.171768 OF 78 133400000 134500000 v -638.761475 79 134500000 134500000 sp -0.235017 80 134500000 134900000 dh -256.203461 THE 81 134900000 135400000 ax -328.101715 82 135400000 135400000 sp -0.235017 83 135400000 136400000 v -622.797791 VOXFORGE 84 136400000 138500000 aa -1219.480103 85 138500000 139400000 k -650.145081 86 139400000 140600000 s -741.079590 87 140600000 141100000 f -317.815033 88 141100000 142300000 ao -810.766357 89 142300000 144000000 r -1171.934448 90 144000000 145800000 jh -1243.679077 91 145800000 145800000 sp -0.235017 92 145800000 151700000 ao -4049.692871 ORG 93 151700000 153700000 r -1328.250610 94 153700000 154300000 g -424.893402 95 154300000 158300000 sp -2182.887939 96 158300000 160600000 sh -1436.213013 SHORTS 97 160600000 162000000 ao -967.496826 98 162000000 163200000 r -828.032471 99 163200000 163700000 t -335.383789 100 163700000 165800000 s -1386.576294 101 165800000 166600000 sp -441.940674 102 166600000 168700000 w -1360.698730 WEEKLY 103 168700000 170200000 iy -1012.960938 104 170200000 170700000 k -369.333679 105 170700000 171600000 l -659.565674 106 171600000 172600000 iy -665.369934 107 172600000 172600000 sp -0.235017 108 172600000 173000000 m -250.922897 MONOLOGUE 109 173000000 175400000 aa -1489.473389 110 175400000 175700000 n -242.715103 111 175700000 176000000 ax -221.046005 112 176000000 176300000 l -215.975601 113 176300000 176600000 ao -229.448303 114 176600000 181000000 g -3165.420654 115 181000000 182300000 sp -695.738953 116 182300000 183300000 k -677.003906 COLLECTION 117 183300000 184000000 ax -455.986847 118 184000000 185600000 l -1045.381714 119 185600000 186200000 eh -418.771515 120 186200000 187100000 k -615.256592 121 187100000 188300000 sh -724.127808 122 188300000 188700000 ax -257.210266 123 188700000 189700000 n -626.877502 124 189700000 219600000 sp -15811.746094 125 219600000 220900000 dh -852.003235 THOSE 126 220900000 222900000 ow -1237.770264 127 222900000 224100000 z -853.892212 128 224100000 224100000 sp -0.235017 129 224100000 225600000 iy -974.090637 EVENING 130 225600000 226100000 v -306.154053 131 226100000 226900000 n -529.811523 132 226900000 227400000 ix -325.192169 133 227400000 228900000 ng -916.310974 134 228900000 228900000 sp -0.235017 135 228900000 229300000 b -292.913208 BELLS 136 229300000 230700000 eh -878.266785 137 230700000 233000000 l -1477.806763 138 233000000 235100000 z -1372.717896 139 235100000 235100000 sp -0.235017 140 235100000 237000000 dh -1251.036621 THOSE 141 237000000 238900000 ow -1176.476685 142 238900000 240100000 z -808.065613 143 240100000 240100000 sp -0.235017 144 240100000 241800000 iy -1051.050415 EVENING 145 241800000 242200000 v -237.337112 146 242200000 243100000 n -561.080078 147 243100000 243600000 ix -324.897034 148 243600000 245200000 ng -928.994263 149 245200000 245200000 sp -0.235017 150 245200000 245600000 b -269.382111 BELLS 151 245600000 247000000 eh -882.679077 152 247000000 249400000 l -1534.862549 153 249400000 251000000 z -999.659790 154 251000000 252400000 sp -810.360474 155 252400000 252700000 dh -191.927155 THOSE 156 252700000 254300000 ow -1005.157776 157 254300000 254600000 z -249.030563 158 254600000 254600000 sp -0.235017 159 254600000 254900000 iy -233.816315 EVENING 160 254900000 255200000 v -248.230988 161 255200000 255800000 n -406.460815 162 255800000 258700000 ix -1827.929199 163 258700000 259000000 ng -201.385193 164 259000000 259000000 sp -0.235017 165 259000000 259300000 b -203.654861 BELLS 166 259300000 262100000 eh -1718.569336 167 262100000 262600000 l -365.826385 168 262600000 262900000 z -224.563644 169 262900000 262900000 sp -0.235017 170 262900000 263200000 hh -213.729538 HOW 171 263200000 264000000 aw -514.845581 172 264000000 264000000 sp -0.235017 173 264000000 264300000 m -196.316437 MANY 174 264300000 264600000 eh -209.032455 175 264600000 265000000 n -255.899612 176 265000000 265400000 iy -278.018890 177 265400000 265400000 sp -0.235017 178 265400000 266000000 ax -394.383759 A 179 266000000 266000000 sp -0.235017 180 266000000 266300000 t -214.197128 TALE 181 266300000 266600000 ey -219.312790 182 266600000 266900000 l -233.756607 183 266900000 266900000 sp -0.235017 184 266900000 267200000 dh -243.673233 THEIR 185 267200000 267500000 eh -275.131104 186 267500000 267800000 r -209.015839 187 267800000 267800000 sp -0.235017 188 267800000 268400000 m -354.742706 MUSIC 189 268400000 269600000 y -715.677551 190 269600000 270000000 uw -266.001984 191 270000000 271100000 z -717.290710 192 271100000 271800000 ix -453.763885 193 271800000 272200000 k -282.080200 194 272200000 273000000 sp -470.275146 195 273000000 274000000 t -657.040100 TELLS 196 274000000 275300000 eh -835.404663 197 275300000 277400000 l -1320.569702 198 277400000 279000000 z -985.225159 199 279000000 283500000 sp -2486.716064 200 283500000 284600000 ah -750.905518 OF 201 284600000 285500000 v -541.886108 202 285500000 285500000 sp -0.235017 203 285500000 287600000 y -1273.396240 YOUTH 204 287600000 288400000 uw -577.244629 205 288400000 289500000 th -723.678772 206 289500000 289500000 sp -0.235017 207 289500000 291200000 ae -1046.697388 AND 208 291200000 292400000 n -748.245056 209 292400000 292700000 d -214.433289 210 292700000 293000000 sp -200.961472 211 293000000 293400000 hh -284.731720 HOME 212 293400000 296800000 ow -2006.148438 213 296800000 298700000 m -1156.006592 214 298700000 299300000 sp -363.830475 215 299300000 300800000 ae -910.493347 AND 216 300800000 301700000 n -558.467163 217 301700000 302000000 d -213.420975 218 302000000 302200000 sp -130.064911 219 302200000 302700000 dh -349.395111 THAT 220 302700000 304400000 ae -1032.012329 221 304400000 304700000 t -217.941757 222 304700000 304900000 sp -135.911072 223 304900000 306800000 s -1303.848389 SWEET 224 306800000 307700000 w -592.553345 225 307700000 309100000 iy -904.467529 226 309100000 309400000 t -199.427887 227 309400000 310400000 sp -554.509521 228 310400000 311100000 t -465.579773 TIME 229 311100000 314800000 ay -2086.032227 230 314800000 316200000 m -892.458313 231 316200000 319900000 sp -2175.653076 232 319900000 321000000 w -772.708069 WHEN 233 321000000 321300000 eh -198.406158 234 321300000 322300000 n -625.902100 235 322300000 322300000 sp -0.235017 236 322300000 323600000 l -847.156067 LAST 237 323600000 325100000 ae -863.897278 238 325100000 326300000 s -783.176392 239 326300000 326600000 t -246.635986 240 326600000 326600000 sp -0.235017 241 326600000 328500000 ay -1189.687622 I 242 328500000 328500000 sp -0.235017 243 328500000 329000000 hh -343.691681 HEARD 244 329000000 333000000 er -2478.330078 245 333000000 333700000 d -499.631836 246 333700000 334700000 sp -597.968140 247 334700000 336000000 dh -827.133789 THEIR 248 336000000 336800000 eh -510.939667 249 336800000 337600000 r -551.281006 250 337600000 337600000 sp -0.235017 251 337600000 339700000 s -1374.088501 SOOTHING 252 339700000 341400000 uw -1030.686890 253 341400000 341800000 dh -261.625519 254 341800000 342700000 ix -569.829651 255 342700000 344100000 ng -840.131226 256 344100000 344100000 sp -0.235017 257 344100000 345400000 ch -817.539368 CHIME 258 345400000 348600000 ay -1862.573853 259 348600000 349700000 m -652.768433 260 349700000 361000000 sp -6156.908691 261 361000000 361400000 dh -263.359253 THOSE 262 361400000 362900000 ow -934.279602 263 362900000 364200000 z -906.128662 264 364200000 364200000 sp -0.235017 265 364200000 365200000 jh -701.320068 JOYOUS 266 365200000 367800000 oy -1655.157593 267 367800000 368200000 ax -284.341492 268 368200000 369500000 s -825.133789 269 369500000 369500000 sp -0.235017 270 369500000 372300000 aw -1613.715088 HOURS 271 372300000 374000000 er -1157.235229 272 374000000 375200000 z -785.704590 273 375200000 375200000 sp -0.235017 274 375200000 376500000 aa -912.884399 ARE 275 376500000 377900000 r -976.369202 276 377900000 378100000 sp -124.798691 277 378100000 378900000 p -566.831360 PASSED 278 378900000 380800000 ae -1155.449341 279 380800000 382100000 s -789.475403 280 382100000 382400000 t -213.975235 281 382400000 382400000 sp -0.235017 282 382400000 382900000 ax -341.890259 AWAY 283 382900000 385100000 w -1423.813110 284 385100000 387700000 ey -1710.973755 285 387700000 393200000 sp -3026.807373 286 393200000 394700000 ae -932.186279 AND 287 394700000 395700000 n -620.766174 288 395700000 396000000 d -194.880798 289 396000000 396000000 sp -0.235017 290 396000000 397400000 m -902.758545 MANY 291 397400000 398400000 eh -574.389038 292 398400000 398900000 n -310.895477 293 398900000 400000000 iy -690.868713 294 400000000 400000000 sp -0.235017 295 400000000 400600000 ax -425.777039 A 296 400600000 400600000 sp -0.235017 297 400600000 401400000 hh -542.003906 HEART 298 401400000 403100000 aa -1107.815918 299 403100000 405100000 r -1393.089966 300 405100000 405400000 t -198.724930 301 405400000 406500000 sp -604.077515 302 406500000 408000000 dh -986.753601 THAT 303 408000000 409300000 ae -792.031555 304 409300000 409700000 t -281.396667 305 409700000 409700000 sp -0.235017 306 409700000 410900000 dh -750.968994 THEN 307 410900000 412500000 eh -942.301880 308 412500000 414300000 n -1076.298218 309 414300000 414300000 sp -0.235017 310 414300000 414900000 w -360.840729 WAS 311 414900000 415200000 aa -250.579025 312 415200000 416600000 z -923.695618 313 416600000 416600000 sp -0.235017 314 416600000 417000000 g -280.140259 GAY 315 417000000 421700000 ey -3020.978760 316 421700000 425800000 sp -2353.206543 317 425800000 426700000 w -608.577942 WITHIN 318 426700000 427100000 ix -290.349762 319 427100000 427900000 dh -515.343750 320 427900000 429000000 ih -667.862122 321 429000000 430400000 n -839.669983 322 430400000 430400000 sp -0.235017 323 430400000 430700000 dh -193.130203 THE 324 430700000 431300000 ax -377.960571 325 431300000 431300000 sp -0.235017 326 431300000 433000000 t -1116.695679 TOMB 327 433000000 433700000 uw -442.870117 328 433700000 437800000 m -2455.729004 329 437800000 438000000 sp -126.176971 330 438000000 439400000 n -831.124573 NOW 331 439400000 441000000 aw -950.480530 332 441000000 441000000 sp -0.235017 333 441000000 442200000 d -776.556152 DARKLY 334 442200000 443000000 aa -547.418640 335 443000000 444700000 r -1168.941406 336 444700000 445000000 k -203.224365 337 445000000 446100000 l -762.129700 338 446100000 446700000 iy -436.763824 339 446700000 446700000 sp -0.235017 340 446700000 447900000 d -768.778198 DWELLS 341 447900000 449200000 w -860.453308 342 449200000 450100000 eh -581.315430 343 450100000 452000000 l -1182.789429 344 452000000 453500000 z -926.071228 345 453500000 456900000 sp -1975.332764 346 456900000 457900000 ae -665.382690 AND 347 457900000 458800000 n -577.460999 348 458800000 459500000 d -471.043732 3 0 12000000 sil -5673.811523 SENT-END 4 12000000 12700000 w -417.862579 WHEN 5 12700000 13300000 eh -347.555664 6 13300000 14600000 n -743.281616 7 14600000 14600000 sp -0.235017 8 14600000 14900000 dh -194.959946 THE 9 14900000 15300000 ax -238.537292 10 15300000 15300000 sp -0.235017 11 15300000 16900000 s -954.239319 SUNLIGHT 12 16900000 17900000 ah -595.353027 13 17900000 18800000 n -548.593445 14 18800000 19100000 l -202.643707 15 19100000 20200000 ay -687.100037 16 20200000 20800000 t -412.797913 17 20800000 20800000 sp -0.235017 18 20800000 22400000 s -993.840881 STRIKES 19 22400000 22800000 t -263.954926 20 22800000 23500000 r -437.177673 21 23500000 24900000 ay -842.782837 22 24900000 25700000 k -548.693726 23 25700000 26900000 s -756.550354 24 26900000 32600000 sp -3288.146484 25 32600000 32900000 r -203.561249 RAINDROPS 26 32900000 33200000 ey -192.660385 27 33200000 33500000 n -195.546982 28 33500000 33800000 d -188.092819 29 33800000 34300000 r -340.222931 30 34300000 35200000 aa -576.858521 31 35200000 36000000 p -513.172607 32 36000000 37300000 s -757.558289 33 37300000 37500000 sp -118.239151 34 37500000 38000000 ix -289.680725 IN 35 38000000 38700000 n -418.201782 36 38700000 38700000 sp -0.235017 37 38700000 39000000 dh -201.682281 THE 38 39000000 40400000 ax -839.995483 39 40400000 40400000 sp -0.235017 40 40400000 42300000 eh -1102.145142 AIR 41 42300000 42800000 r -326.643799 42 42800000 43400000 sp -338.905640 43 43400000 44200000 dh -528.523987 THEY 44 44200000 44500000 ey -187.813309 45 44500000 48900000 sp -2331.569824 46 48900000 52400000 ae -2167.157227 ACT 47 52400000 53400000 k -696.953003 48 53400000 53800000 t -291.110931 49 53800000 53800000 sp -0.235017 50 53800000 54100000 ae -222.647598 AS 51 54100000 54700000 z -380.766602 52 54700000 54700000 sp -0.235017 53 54700000 55200000 ax -322.413544 A 54 55200000 55200000 sp -0.235017 55 55200000 56200000 p -611.651917 PRISM 56 56200000 56900000 r -489.490723 57 56900000 57400000 ih -317.594421 58 57400000 58200000 z -513.132385 59 58200000 58900000 ax -431.784210 60 58900000 60100000 m -643.696655 61 60100000 60100000 sp -0.235017 62 60100000 60400000 ae -180.548752 AND 63 60400000 61000000 n -372.154572 64 61000000 61300000 d -188.439804 65 61300000 62500000 sp -645.124695 66 62500000 62800000 f -201.630539 FORM 67 62800000 63700000 ao -511.759155 68 63700000 64000000 r -209.089828 69 64000000 64600000 m -348.604187 70 64600000 64600000 sp -0.235017 71 64600000 65300000 ax -436.575226 A 72 65300000 65300000 sp -0.235017 73 65300000 66200000 r -563.504333 RAINBOW 74 66200000 66800000 ey -354.545532 75 66800000 67700000 n -534.946655 76 67700000 68000000 b -188.959869 77 68000000 69300000 ow -766.216187 78 69300000 77400000 sp -4230.157715 79 77400000 77700000 dh -186.641479 THE 80 77700000 78000000 ax -195.928680 81 78000000 78000000 sp -0.235017 82 78000000 78600000 r -386.282410 RAINBOW 83 78600000 79300000 ey -423.097778 84 79300000 80100000 n