voxforge.org
VoxForge Dev

Changeset 2586

Show
Ignore:
Timestamp:
05/15/08 10:44:44 (5 months ago)
Author:
kmaclean
Message:

AudioSegmentation script - alpha release

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm

    r2585 r2586  
    3636### Class Variables 
    3737#################################################################### 
    38 our($opt_a,$opt_d,$opt_h,$opt_l,$opt_r,$opt_t,$opt_x,$opt_K,$opt_T,$opt_u); # need to define these because using strict. 
     38our($opt_a,$opt_d,$opt_h,$opt_l,$opt_m,$opt_p,$opt_r,$opt_s,$opt_t,$opt_x,$opt_S,$opt_T,$opt_u); # need to define these because using strict. 
    3939my %self; 
    4040$self{'debug'} = 0; 
    4141$self{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; 
    4242$self{'htk_files'} = "AudioBook/input_files/htk"; 
     43$self{'log'} = "AudioBook/output_files/AudioBook_Log"; 
     44my $default_average_sentence_length = 13; 
     45my $default_max_sentence_length = 18; 
     46my $default_min_pause_for_sentence_break = 2000000; 
    4347my $command; 
    4448 
     
    4650### Main 
    4751#################################################################### 
     52cleanupFiles(\%self); 
    4853getOptions(\%self); 
    4954process(\%self); 
     
    6267        my $voxforgeDict = $self->{"voxforgeDict"}; 
    6368        my $htk_files = $self->{'htk_files'}; 
    64          
    65         my $textContents = AudioBook::Text->new($textfile); 
     69        my $log = $self{'log'}; 
     70 
     71        my $textContents = AudioBook::Text->new($self,$textfile); 
    6672        $textContents->createWLISTFile("AudioBook/interim_files/wlist"); 
    6773         
     
    7379                # need to update dict with missing words 
    7480                # can't seem to change default HDMan log file with "-l" parameter 
    75                 $command = ("cp AudioBook/interim_files/dlog AudioBook/interim_files/dlog1"); print "cmd:$command\n" if $debug; system($command); 
    7681                $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
    77                 $command = ("mv AudioBook/interim_files/dlog AudioBook/interim_files/dlog2"); print "cmd:$command\n" if $debug; system($command); 
    78                 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 
     82                $command = ("mv AudioBook/interim_files/dlog AudioBook/interim_files/logs/dlog2"); print "cmd:$command\n" if $debug; system($command); 
     83                # no longer required$command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 
    7984        } else { 
    80                 unlink ("AudioBook/interim_files/MissingWords_out"); 
    81                 open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file");        
    82                 print MISSINGWORDSOUT "no missing words\n"; 
    83                 close MISSINGWORDSOUT 
     85                open(LOG,">>$log") or confess ("cannot open AudioBook/output_files/MissingWords file"); 
     86                print LOG "\nMissing Words added to Pronunciation Dictionary:\n";        
     87                print LOG "------------------------------------------------\n";                          
     88                print LOG "no missing words\n"; 
     89                close LOG 
    8490        }  
    8591        $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command);       
     
    8894        if (defined($tarSuffix)){ 
    8995                _createTarFile($self); 
     96        } 
     97} 
     98 
     99sub cleanupFiles { 
     100        my ($self)= @_; 
     101        if (defined(<AudioBook/interim_files/*>)) { 
     102                unlink (<AudioBook/interim_files/*>); 
     103        } 
     104        if (defined(<AudioBook/output_files/wav/*>)) { 
     105                unlink (<AudioBook/output_files/wav/*>);         
     106        } 
     107        if (defined(<AudioBook/output_files/logs/*>)) { 
     108                unlink (<AudioBook/output_files/logs/*>);        
     109        } 
     110        if (defined(<AudioBook/output_files/*>)) {       
     111                unlink (<AudioBook/output_files/*>); 
    90112        } 
    91113} 
     
    122144sub getOptions { 
    123145        my ($self)= @_; 
    124         getopts('a:d:hl:r:t:u:x:KT');    #  sets $opt_* as a side effect. 
     146        my $debug = $self->{'debug'};    
     147        getopts('a:d:hl:m:p:r:s:t:u:x:ST');    #  sets $opt_* as a side effect. 
    125148        if ($opt_a and $opt_t) {         
    126149                if (-r $opt_a) { 
     
    143166                        $self->{"voxforgeDict"}="AudioBook/input_files/VoxForgeDict";    
    144167                } 
    145                 ### 
     168                ### Audio Processing 
     169                if ($opt_s) { 
     170                        $self->{"average_sentence_length"}=$opt_s; 
     171                } else { 
     172                        $self->{"average_sentence_length"}= $default_average_sentence_length;    
     173                } 
     174                if ($opt_m) { 
     175                        $self->{"max_sentence_length"}=$opt_m; 
     176                } else { 
     177                        $self->{"max_sentence_length"}= $default_max_sentence_length;    
     178                } 
     179                if ($opt_p) { 
     180                        $self->{"min_pause_for_sentence_break"}=$opt_p; 
     181                } else { 
     182                        $self->{"min_pause_for_sentence_break"}= $default_min_pause_for_sentence_break;  
     183                }                
     184                ### Tar file processing 
    146185                if (defined($opt_T)) { 
    147186                        if ($opt_x) { 
     
    174213                        } 
    175214                } 
    176         } elsif ($opt_K) { 
     215        } elsif ($opt_S) { # Sanity test switch 
    177216                $self->{"audiofile"}="AudioBook/test/audio.wav"; 
    178217                #$self->{"textfile"}="AudioBook/test/text-simple.txt"; 
    179218                $self->{"textfile"}="AudioBook/test/text-original.txt"; 
    180                 $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); system($command); 
     219                $command = ("cp AudioBook/input_files/VoxForgeDict AudioBook/interim_files/VoxForgeDict"); print "cmd:$command\n"; system($command); 
    181220                $self->{"voxforgeDict"}="AudioBook/interim_files/VoxForgeDict"; 
    182221                $self->{"tarSuffix"}=_random_characters(3); 
    183222                $self->{"username"}="test"; 
     223                $self->{"average_sentence_length"}= $default_average_sentence_length; 
     224                $self->{"max_sentence_length"}= $default_max_sentence_length; 
     225                $self->{"min_pause_for_sentence_break"}=$default_min_pause_for_sentence_break; 
    184226        } elsif ($opt_h) { 
    185227                print "\nVoxForge Audio Segmentation Script Parameters\n";       
    186228                print   "=============================================\n";       
    187                 print "-a\t* audio file name\n"; 
    188                 print "-d\t pronunciation dictionary  (default = AudioBook/input_files/VoxforgeDict)\n"; 
     229                print "-a\t* audio file name (WAV format only)\n"; 
     230                print "-d\tpronunciation dictionary  (default = AudioBook/input_files/VoxforgeDict)\n"; 
    189231                print "-h\tshow help\n";         
    190                 print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n";    
     232                print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; 
     233                print "-m\tMaximum sentence length (default = $default_max_sentence_length words)\n"; 
     234                print "-p\tMinimum pause for sentence break (default = $default_min_pause_for_sentence_break in units of 100ns)\n";              
    191235                print "-r\tREADME file (default = AudioBook/input_files/README)\n";              
     236                print "-s\tAverage sentence length (default = $default_average_sentence_length words)\n";                                
    192237                print "-t\t* text file name\n"; 
    193238                print "-u\tusername or name you want file stats collected by on VoxForge Metrics \n"; 
    194239                print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n";     
    195240                print "-x\tunique tar file suffix (max 3 characters - remainder is truncated)\n"; 
    196                 print "-K\trun test\n";                
     241                print "-S\trun sanity test\n";                 
    197242                print "-T\tcreate gzipped/tar file\n"; 
    198243                print "\n\t* required for script to run\n";      
     
    210255} 
    211256 
     257#################################################################### 
     258### Gettors - Public 
     259#################################################################### 
     260sub getAverage_sentence_length { 
     261        my $self = shift; 
     262        return $self->{"average_sentence_length"}; 
     263} 
     264 
     265sub getMax_sentence_length { 
     266        my $self = shift; 
     267        return $self->{"max_sentence_length"}; 
     268} 
     269 
     270sub getMin_pause_for_sentence_break { 
     271        my $self = shift; 
     272        return $self->{"max_sentence_length"}; 
     273} 
     274 
    2122751; 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Audio.pm

    r2585 r2586  
    3838### Class Variables 
    3939#################################################################### 
    40 my $average_sentence_length = 13; 
    41 my $max_sentence_length = 18; 
    42 #my $min_pause_for_sentence_break = 200000; # HTK time format - 100 millisecond increments 
    43 my $min_pause_for_sentence_break = 5000000; 
     40#my $average_sentence_length = 13; 
     41#my $max_sentence_length = 18; 
     42#my $min_pause_for_sentence_break = 5000000; # HTK time format - 100 millisecond increments 
    4443my (@max_sentences, $max_sentence_length_found, $max_sentence_length_linenumber, $min_sentence_length_linenumber); 
    45 my $min_sentence_length_found = $max_sentence_length; 
     44#my $min_sentence_length_found = $max_sentence_length; 
     45my $min_sentence_length_found; 
    4646my $up_increment = 1; 
    4747my $down_increment = -1; 
    48 my ($command)
     48my $command
    4949#################################################################### 
    5050### Constructor 
     
    5757        $self{'htk_files'} = $super->{'htk_files'}; 
    5858        $self{'g2p_model'} = $super->{'g2p_model'}; 
    59  
     59        $self{"average_sentence_length"} = $super->{"average_sentence_length"}; 
     60        $self{"max_sentence_length"} = $super->{"max_sentence_length"}; 
     61        $min_sentence_length_found = $super->{"max_sentence_length"}; 
     62        $self{"min_pause_for_sentence_break"} = $super->{"min_pause_for_sentence_break"}; 
     63        $self{'log'} = $super->{'log'};  
    6064        bless(\%self,$class); 
    6165        return \%self; 
     
    8791        $textContents->createMLFFile("downsampled","AudioBook/interim_files/words.mlf" ); 
    8892        # forced alignment - creates aligned.out 
    89 #!!!!!! $command = ("cd AudioBook/interim_files && HVite -A -D -T 1 -l '*'  -a -b SENT-END -m -C wav_config -H macros -H hmmdefs -m -t 250.0 150.0 1000.0 -I words.mlf  -i aligned.out -S train.scp dict tiedlist"); system($command) == 0 or confess "error: $command failed: $?"; 
    90         $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf  -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist"); system($command) == 0 or confess "error: $command failed: $?"; 
    91  
     93        $command = ("pwd && HVite -A -D -T 1 -l '*' -a -b SENT-END -m -C $htk_files/wav_config -H $htk_files/models/macros -H $htk_files/models/hmmdefs -m -t 250.0 150.0 1000.0 -I AudioBook/interim_files/words.mlf  -i AudioBook/interim_files/aligned.out -S $htk_files/train.scp AudioBook/interim_files/dict $htk_files/models/tiedlist > AudioBook/interim_files/logs/HVite_log"); system($command) == 0 or confess "error: $command failed: $?"; 
     94        open (HVite_Log,"AudioBook/interim_files/logs/HVite_log") || confess "error: can't open AudioBook/interim_files/HVite_log: $?"; 
     95        while (my $line = <HVite_Log>) { 
     96                chomp $line; 
     97                my $filename; 
     98                if ($line =~ /Aligning File:/) {  
     99                        my @line=split(/:/, $line); 
     100                        $filename = pop(@line); 
     101                } elsif ($line =~ /No tokens survived to final node of network at beam/) { 
     102                        my @line=split(/ /, $line); 
     103                        my $beam = pop (@line); 
     104                        $beam =~ s/ //g; 
     105                        print "\n\n******************************************************************\n"; 
     106                        print "**** check that audio corresponds to prompt in file ***\n"; 
     107                        print "******************************************************************\n\n"; 
     108                        if ($beam > 250) { 
     109                                confess "audio not corresponding to prompt file, check HVite_Log; error code: $?" ; 
     110                        }        
     111                }  
     112        } 
    92113} 
    93114 
     
    135156        my ($self,$filename,$textContents) = @_;         
    136157        my $debug = $self->{"debug"};            
    137  
     158        my $log = $self->{"log"}; 
     159         
    138160        my $filename_nopath = basename($filename); 
    139161        my $filename_nosuffix = fileparse($filename, "wav"); 
     
    144166        $self->{"filename_prefix"} = lc(substr($filename_nopath,0,3)); 
    145167        $self->{"textContents"} = $textContents; 
     168         
     169        my $average_sentence_length = $self->{"average_sentence_length"}; 
     170        my $max_sentence_length = $self->{"max_sentence_length"}; 
     171        my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 
    146172 
    147173        my $samplerate = _getSampleRate($self); 
     
    200226         
    201227        $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); 
    202          
    203         print "### segment summary: #######################################################\n"; 
    204         print "\nSettings:average_sentence_length->$average_sentence_length;max_sentence_length->$max_sentence_length\n"; 
    205         print   "         pause length:$min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n";         
    206         print "\nSentence Length: min:$min_sentence_length_linenumber->$min_sentence_length_found; max:$max_sentence_length_linenumber->$max_sentence_length_found\n"; 
    207         print "\nSentences over max_sentence_length of $max_sentence_length words:\n"; 
    208         foreach my $line (@max_sentences) { 
    209                 print "\t$line\n"; 
    210         }        
    211         print "\n###########################################################################\n"; 
     228 
     229        open(LOG,">>$log") or confess ("cannot open $log file");         
     230        print LOG "\nAudio Segmenting summary:\n"; 
     231        print LOG   "-------------------------\n";       
     232        print LOG "Settings:average_sentence_length: $average_sentence_length\n"; 
     233        print LOG "         max_sentence_length: $max_sentence_length\n";         
     234        print LOG "         pause length: $min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n\n";    
     235        print LOG "Sentence Length: min:$min_sentence_length_linenumber: $min_sentence_length_found\n"; 
     236        print LOG "                 max:$max_sentence_length_linenumber: $max_sentence_length_found\n\n"; 
     237        print LOG "Prompt lines with more than max_sentence_length of $max_sentence_length words:\n"; 
     238        if (@max_sentences) { 
     239                foreach my $line (@max_sentences) { 
     240                        print LOG "\t$line\n"; 
     241                }        
     242        } else { 
     243                print LOG "\tnone\n"; 
     244        } 
    212245} 
    213246 
     
    216249        my $debug = $self->{"debug"};    
    217250        my $filename_prefix = $self->{"filename_prefix"}; 
     251        my $average_sentence_length = $self->{"average_sentence_length"}; 
     252        #my $max_sentence_length = $self->{"max_sentence_length"}; 
     253        my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 
    218254         
    219255        my @aligned_line = split(/\s/,$$aligned_words[$$sentence_end + $increment]); 
     
    277313        my ($self,$sentence_start,$sentence_end,$increment,$filename_prefix,$padded_fileid ) = @_; 
    278314        my $debug = $self->{"debug"};    
     315        #my $average_sentence_length = $self->{"average_sentence_length"}; 
     316        my $max_sentence_length = $self->{"max_sentence_length"}; 
     317        #my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; 
    279318         
    280319        if ($debug) { 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm

    r2585 r2586  
    3838        $self{'htk_files'} = $super->{'htk_files'}; 
    3939        $self{'g2p_model'} = $super->{'g2p_model'}; 
     40        $self{'log'} = $super->{'log'};  
    4041        bless(\%self,$class); 
    4142        return \%self; 
     
    5253        $self->{"missing_words"} = $missing_words; 
    5354          
    54         $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?";  
    55         open(DLOG,"AudioBook/interim_files/dlog") or confess ("cannot open AudioBook/interim_files/dlog file"); 
     55        $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
     56        my $dlog1 = "AudioBook/interim_files/logs/dlog1"; 
     57        $command = ("mv AudioBook/interim_files/dlog $dlog1"); print "cmd:$command\n" if $debug; system($command); 
     58        open(DLOG,$dlog1) or confess ("cannot open $dlog1 file"); 
    5659        open(MISSINGWORDS,">$missing_words") or confess ("cannot open $missing_words file"); 
    5760        my $missingwordsheader = 0; 
     
    8285sub getPronunciations { # public  
    8386        my ($self,$missing_words_out)= @_; 
    84         my $debug = $self->{"debug"}; 
     87        my $debug = $self->{'debug'}; 
    8588        my $model = $self->{'g2p_model'}; 
    86         my $missing_words = $self->{"missing_words"}; 
     89        my $missing_words = $self->{'missing_words'}; 
     90        my $log = $self->{'log'}; 
    8791 
    8892        $self->{"missing_words_out"} = $missing_words_out; 
     
    9498        } 
    9599        open(MISSINGWORDSOUT,">$missing_words_out") or confess ("cannot open $missing_words_out file"); 
     100        open(LOG,">>$log") or confess ("cannot open $log file"); 
     101        print LOG "\nMissing Words added to Pronunciation Dictionary:\n";        
     102        print LOG "------------------------------------------------\n";          
    96103        my ($word, $phonemes); 
    97104        format MISSINGWORDSOUT = 
     105@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
     106$word,"[" . $word ."]",$phonemes 
     107. 
     108        format LOG = 
    98109@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
    99110$word,"[" . $word ."]",$phonemes 
     
    104115                $phonemes = join(" ",@line); 
    105116                write MISSINGWORDSOUT; 
     117                write LOG;               
    106118        } 
     119        close MISSINGWORDSOUT; 
     120        close LOG; 
    107121        return 1; 
    108122} 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm

    r2585 r2586  
    3434#################################################################### 
    3535sub new { 
    36         my ($class,$textFile) = @_;  
     36        my ($class,$super,$textFile) = @_;  
    3737        my %self; 
    3838        $self{"inputfilename"} = $textFile; 
     39        $self{'log'} = $super->{'log'};          
    3940        _clean(\%self); 
    4041        bless(\%self,$class); 
     
    4950        my $debug = $self->{"debug"};    
    5051        my $inputfilename = $self->{"inputfilename"}; 
     52        my $log = $self->{'log'};        
    5153 
    5254        open(IN, "<$inputfilename") or confess ("error: cannot open input $inputfilename for input");  
     55        open(LOG,">>$log") or confess ("cannot open $log file");         
     56        print LOG "Changes made to Text file\n"; 
     57        print LOG   "-------------------------\n";       
    5358        my @eText = <IN>; # slurp in entire file into an array 
    5459        my @words; 
     
    9196                                        _processEmails($self, \@words, $word); 
    9297                                #} elsif ($word =~ /(http|https|ftp):\/\/[\w\-.]+(:\d+)?\/[\w.\-~\/%]+/) { # url 
    93                                 } elsif ($word =~ /(www\.)?(\w+)\.([A-Za-z]{2,4})/)  { # url                    
     98                                } elsif ($word =~ /(www\.)?(\w+)\.([A-Za-z]{2,4})/)  { # url 
    9499                                        _processUrls($self, \@words, $word); 
    95                                 } elsif ($word =~ /\d/) { # word contains numbers 
     100                                } elsif ($word =~ /^\d+$/) { # word contains one or more numbers 
     101                                        print LOG "changed digit: $word \n";             
    96102                                        _processNumbers ($self, \@words, $word); 
    97                                 } elsif ($word =~ /\./) {# $word contains a period  
     103                                }elsif ($word =~ /\d/) { # word contains numbers and letters 
     104                                        print LOG "changed digit: $word \n";             
     105                                        _processWordsContainingNumbers ($self, \@words,$word); 
     106                                }  
     107                                elsif ($word =~ /\./) {# $word contains a period  
     108                                        print LOG "changed word with period in it: $word to";            
    98109                                        $word =~ s/\.//g; 
     110                                        print LOG " $word\n";                            
    99111                                        push (@words, $word);                                    
    100112                                } else { 
     
    114126                confess "error - fix: $word contains numbers\n" 
    115127        } elsif ($word =~ /(\w+)\@(\w+)\.(\w+)/)  { # only catches basic email address 
    116                 push (@$wordarray, $1); 
    117                 push (@$wordarray, "at"); 
    118                 push (@$wordarray, $2); 
    119                 push (@$wordarray, "dot"); 
    120                 push (@$wordarray, $3); 
     128                my $word1 = $1; 
     129                my $word2 = $2; 
     130                my $word3 = $3; 
     131                push (@$wordarray, $word1); 
     132                push (@$wordarray, "AT"); 
     133                push (@$wordarray, $word2); 
     134                push (@$wordarray, "DOT"); 
     135                push (@$wordarray, $word3); 
     136                print LOG "changed email address: $word to $word1 at $word2 DOT $word3\n"; 
    121137        } else { 
    122138                confess "error - fix: $word\n" 
     
    133149        my ($self,$wordarray, $word)= @_; 
    134150        my $debug = $self->{"debug"}; 
    135         print "processingUrls: $word\n";         
     151        print "processingUrls: $word\n" if $debug;       
     152         
    136153        if ($word =~ /(www)?\.(\w+)\.([A-Za-z]{2,4})/)  { # URL: www.abc.com 
    137                 print "processingUrls: splitword:$1;DOT;$2;DOT;$3;\n";           
    138                 push (@$wordarray, $1); 
     154                my $word1 = $1; 
     155                my $word2 = $2;  
     156                my $word3 = $3;                  
     157                print LOG "changed URL $word to:$word1 DOT $word2 DOT $word3;\n";                
     158                push (@$wordarray, $word1); 
    139159                push (@$wordarray, "DOT");                               
    140                 if ($2 =~ /\d+/) { 
    141                         _processWordsContainingNumbers ($self, $wordarray, $2); 
     160                if ($word2 =~ /\d+/) { 
     161                        _processWordsContainingNumbers ($self, $wordarray,$word2); 
    142162                } else { 
    143                         push (@$wordarray, $2); 
     163                        push (@$wordarray, $word2); 
    144164                } 
    145165                push (@$wordarray, "DOT");       
    146                 push (@$wordarray, $3); # can only be a top level domain name 
     166                push (@$wordarray, $word3); # can only be a top level domain name 
    147167        } elsif ($word =~ /(\w+)\.([A-Za-z]{2,4})/)  { # URL: abc.com 
    148168                my $word1 = $1; 
    149169                my $word2 = $2; 
    150                 print "processingUrls: splitword:$word1;DOT;$word2;\n";        
     170                print LOG "changed URL $word to:$word1;DOT;$word2;\n";         
    151171                if ($word1 =~ /\d+/) { 
    152                         print "processingUrls: splitword2:$word1;DOT;$word2;\n";  
     172                        print "processingUrls: splitword2:$word1 DOT $word2;\n" if $debug;  
    153173                        _processWordsContainingNumbers ($self, $wordarray,$word1); 
    154174                } else { 
     
    168188        if ($subword =~ /\d+\D+/) {  # assume single, consecutive set of numbers (i.e no split numbers in word) 
    169189                my $number = $subword; 
    170                 print "processWordsContainingNumbers:subword:$subword;\n"
     190                print "processWordsContainingNumbers:subword:$subword;\n" if $debug
    171191                $number =~ s/\D//g; # removes non-digit characters 
    172                 print "processWordsContainingNumbers:number:$number\n"
     192                print "processWordsContainingNumbers:number:$number\n" if $debug
    173193                my @number = split(//, $number); 
    174194                foreach my $digit (@number) { 
    175                         processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 
     195                        _processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 
    176196                }                
    177197                my $word = $subword; 
     
    187207                }        
    188208                my $number = $subword; 
    189                 print "processWordsContainingNumbers:subword:$subword;\n"
     209                print "processWordsContainingNumbers:subword:$subword;\n" if $debug
    190210                $number =~ s/\D//g; # removes non-digit characters 
    191                 print "processWordsContainingNumbers:number:$number\n"
     211                print "processWordsContainingNumbers:number:$number\n" if $debug
    192212                my @number = split(//, $number); 
    193213                foreach my $digit (@number) { 
    194214                        _processNumbers ($self, $wordarray, $digit); # will push the resulting ordinal converted to a word onto the array 
    195215                }                        
    196                  
    197216        }        
    198217} 
     
    209228                        push (@$words, uc $word); 
    210229                } 
    211                 print "info: converted number:$number: to $wordnum\n"; 
     230                print LOG "converted number:$number: to $wordnum\n"; 
    212231        } elsif (($number =~ /^\d{4}$/) and ($number < 2100)){ # assume 4 digit numbers between 1000 and 2100 are years 
    213232                my $datenum = year2en($number); 
     
    217236                        push (@$words, uc $word); 
    218237                }        
    219                 print "info: converted date:$number: to $datenum\n";   
     238                print LOG "converted date:$number: to $datenum\n";     
    220239        } elsif ($number =~ /^\d+$/) { # contains only numbers 
    221240                my $wordnum = num2en($number); 
     
    225244                        push (@$words, uc $word); 
    226245                } 
    227                 print "info: converted number:$number: to $wordnum\n"; 
     246                print LOG "converted number:$number: to $wordnum\n"; 
    228247        } elsif ($number =~ /^£\d+/) { # convert pounds to words 
    229248                $number =~ s/^£//; # remove pound sign 
     
    235254                } 
    236255                push (@$words, "POUNDS"); 
    237                 print "info: converted pounds:£$number: to $wordnum pounds\n"; 
     256                print LOG "converted pounds:£$number: to $wordnum pounds\n"; 
    238257        } elsif ($number =~ /^\$\d+/) { # convert dollars to words 
    239258                $number =~ s/^\$//; # remove dollar sign 
     
    245264                } 
    246265                push (@$words, "DOLLARS"); 
    247                 print "info: converted dollars:$number: to $wordnum dollars\n"; 
     266                print LOG "converted dollars:$number: to $wordnum dollars\n"; 
    248267# skip this - minutes do not get processed properly (since they use a signle quote) - do manually in eText file.                 
    249268#       } elsif ($number =~ /^\d{1,3}°$/) { # convert degrees to words 
     
    266285                        push (@$words, uc $word); 
    267286                } 
    268                 print "info: converted number:$number: to $wordnum \n"; 
     287                print LOG "converted number:$number: to $wordnum \n"; 
    269288        } elsif ($number =~ /^\w+\d+/) { # convert ordinal number to words 
    270289                my $numbertemp = $number; 
     
    276295                        push (@$words, uc $word); 
    277296                } 
    278                 print "info: converted number:$number: to $wordnum \n"; 
     297                print LOG "converted number:$number: to $wordnum \n"; 
    279298        } else { 
    280299                push (@$words, $number); 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts

    r2585 r2586  
    1 rai0001 WHEN THE SUNLIGHT STRIKES RAINDROPS IN THE AIR THEY ACT AS A PRISM AND FORM A RAINBOW  
    2 rai0002 THE RAINBOW IS A DIVISION OF LIGHT WHITE LIGHT INTO MANY BEAUTIFUL COLORS  
    3 rai0003 THESE TAKE THE SHAPE OF A LONG ROUND ARCH WITH ITS PATH HIGH ABOVE  
    4 rai0004 AND ITS TWO ENDS APPARENTLY BEYOND THE HORIZON  
    5 rai0005 THERE IS ACCORDING TO LEGEND A BOILING POT OF GOLD AT ONE END  
    6 rai0006 PEOPLE LOOK BUT NO ONE EVER FINDS IT  
    7 rai0007 WHEN A MAN LOOKS FOR SOMETHING BEYOND HIS REACH HIS FRIENDS SAY HE IS LOOKING FOR THE POT OF GOLD AT THE END OF THE RAINBOW  
    8 rai0008 THROUGHOUT THE CENTURIES PEOPLE HAVE EXPLAINED THE RAINBOW IN VARIOUS WAYS SOME HAVE ACCEPTED IT AS A MIRACLE WITHOUT PHYSICAL EXPLANATIONS  
    9 rai0009 TO THE HEBREWS IT WAS A TOKEN THAT THERE WOULD BE NO MORE UNIVERSAL FLOODS  
    10 rai0010 THE GREEKS USED TO IMAGINE THAT IT WAS A SIGN FROM THE GODS TO FORETELL WAR OR HEAVY RAIN  
    11 rai0011 THE NORSEMEN CONSIDERED THE RAINBOW AS A BRIDGE OVER WHICH THE GODS PASSED FROM EARTH TO THEIR HOME IN THE SKY  
    12 rai0012 OTHERS HAVE TRIED TO EXPLAIN THE PHENOMENON PHYSICALLY  
    13 rai0013 ARISTOTLE THOUGHT THAT THE RAINBOW WAS CAUSED BY REFLECTION OF THE SUN'S RAYS BY THE RAIN  
    14 rai0014 SINCE THEN PHYSICISTS HAVE FOUND THAT IT IS NOT REFLECTION  
    15 rai0015 BUT REFRACTION BY THE RAINDROPS WHICH CAUSES THE RAINBOWS  
    16 rai0016 MANY COMPLICATED IDEAS ABOUT THE RAINBOW HAVE BEEN FORMED  
    17 rai0017 THE DIFFERENCE IN THE RAINBOW DEPENDS CONSIDERABLY UPON THE SIZE OF THE DROPS  
    18 rai0018 AND THE WIDTH OF THE COLORED BAND INCREASES AS THE SIZE OF THE DROPS INCREASES  
    19 rai0019 THE ACTUAL PRIMARY RAINBOW OBSERVED IS SAID TO BE THE EFFECT OF SUPER IMPOSITION  
    20 rai0020 OF A NUMBER OF BOWS  
    21 rai0021 IF THE RED OF THE SECOND BOW FALLS UPON THE GREEN OF THE FIRST THE RESULT IS TO GIVE A BOW  
    22 rai0022 WITH A ABNORMALLY WIDE YELLOW BAND SINCE RED AND GREEN LIGHT WHEN MIXED FORM YELLOW  
    23 rai0023 THIS IS NOT THIS IS A VERY COMMON TYPE OF BOW  
    24 rai0024 ONE SHOWING MAINLY RED AND YELLOW WITH LITTLE OR NO GREEN OR BLUE  
     1rai0001 WHEN THE SUNLIGHT STRIKES RAINDROPS IN THE AIR THEY ACT AS A PRISM AND  
     2rai0002 FORM A RAINBOW THE RAINBOW IS A DIVISION OF  
     3rai0003 LIGHT WHITE LIGHT INTO MANY BEAUTIFUL COLORS  
     4rai0004 THESE TAKE THE SHAPE OF A LONG ROUND ARCH  
     5rai0005 WITH ITS PATH HIGH ABOVE  
     6rai0006 AND ITS TWO ENDS APPARENTLY BEYOND THE HORIZON  
     7rai0007 THERE IS ACCORDING TO LEGEND A BOILING POT OF  
     8rai0008 GOLD AT ONE END PEOPLE LOOK BUT NO ONE EVER  
     9rai0009 FINDS IT WHEN A MAN LOOKS FOR SOMETHING BEYOND HIS REACH  
     10rai0010 HIS FRIENDS SAY HE IS LOOKING FOR THE POT OF  
     11rai0011 GOLD AT THE END OF THE RAINBOW  
     12rai0012 THROUGHOUT THE CENTURIES PEOPLE HAVE EXPLAINED THE RAINBOW IN VARIOUS WAYS  
     13rai0013 SOME HAVE ACCEPTED IT AS A MIRACLE WITHOUT PHYSICAL EXPLANATIONS  
     14rai0014 TO THE HEBREWS IT WAS A TOKEN THAT THERE WOULD BE  
     15rai0015 NO MORE UNIVERSAL FLOODS THE GREEKS USED TO IMAGINE  
     16rai0016 THAT IT WAS A SIGN FROM THE GODS TO FORETELL  
     17rai0017 WAR OR HEAVY RAIN THE NORSEMEN CONSIDERED THE RAINBOW  
     18rai0018 AS A BRIDGE OVER WHICH THE GODS PASSED FROM EARTH TO THEIR HOME IN THE SKY  
     19rai0019 OTHERS HAVE TRIED TO EXPLAIN THE PHENOMENON PHYSICALLY  
     20rai0020 ARISTOTLE THOUGHT THAT THE RAINBOW WAS CAUSED BY REFLECTION  
     21rai0021 OF THE SUN'S RAYS BY THE RAIN  
     22rai0022 SINCE THEN PHYSICISTS HAVE FOUND THAT IT IS NOT  
     23rai0023 REFLECTION BUT REFRACTION BY THE RAINDROPS WHICH CAUSES THE RAINBOWS  
     24rai0024 MANY COMPLICATED IDEAS ABOUT THE RAINBOW HAVE BEEN  
     25rai0025 FORMED THE DIFFERENCE IN THE RAINBOW DEPENDS CONSIDERABLY UPON THE SIZE OF THE DROPS  
     26rai0026 AND THE WIDTH OF THE COLORED BAND INCREASES AS THE SIZE OF THE DROPS INCREASES  
     27rai0027 THE ACTUAL PRIMARY RAINBOW OBSERVED IS SAID TO BE THE EFFECT  
     28rai0028 OF SUPER IMPOSITION OF A NUMBER OF BOWS  
     29rai0029 IF THE RED  
     30rai0030 OF THE SECOND BOW FALLS UPON THE GREEN OF THE FIRST THE  
     31rai0031 RESULT IS TO GIVE A BOW WITH A  
     32rai0032 ABNORMALLY WIDE YELLOW BAND SINCE RED AND GREEN LIGHT  
     33rai0033 WHEN MIXED FORM YELLOW THIS IS NOT  
     34rai0034 THIS IS A VERY COMMON TYPE OF BOW  
     35rai0035 ONE SHOWING MAINLY RED AND YELLOW WITH  
     36rai0036 LITTLE OR NO GREEN OR BLUE