- Timestamp:
- 05/21/08 15:26:36 (8 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm
r2588 r2589 110 110 } elsif ($word =~ /\d/) { # word contains numbers and letters 111 111 _processWordsContainingNumbers ($self, \@words,$word); 112 } elsif ($word =~ /^\.$/) {# $word only contains a period112 } elsif ($word =~ /^\.$/) {# entire word only contains a period 113 113 $word =~ s/\.//g; 114 } elsif ($word =~ /\./) {# $word contains a period 115 my $wordsbefore; 116 if ($word =~ /\.$/){ # period is at end of word 117 $word =~ s/\.//g; 118 } else { 119 $wordsbefore = $word; 120 $word =~ s/\.//g; 121 push (@periodlog, "from:$wordsbefore to:$word\n"); 122 } 123 push (@words, $word); 114 } elsif ($word =~ /\./) {# $word contains at least one period 115 _processPeriods ($self, \@words, $word,\@periodlog,\@quotelog); 124 116 } elsif ($word =~ /\'/) {# $words containing single quotes 125 117 push (@quotelog, "$word\n"); … … 144 136 close(IN); 145 137 } 138 139 sub _processPeriods { #private 140 my ($self,$wordarray, $word,$periodlog,$quotelog)= @_; 141 my $debug = $self->{"debug"}; 142 my $wordsbefore; 143 my $match = 0; 144 while ($word =~ /\./g) { # checking for acronyms (i.e. more than one period in the word ... like I.B.M.) 145 $match++; 146 } 147 if ($match == 1) { # only one period 148 if ( ($word =~ /\.\'$/) or ($word =~ /\'\.$/) ){ # if period & single quote, need to log change in quotelog. 149 $wordsbefore = $word; 150 $word =~ s/\.\'$//g; # remove period & single quote at end of word 151 $word =~ s/\'\.$//g; # remove single quote & period at end of word 152 push (@$periodlog, "from:$wordsbefore to:$word\n"); 153 } elsif ($word =~ /\.$/){ # period is at end of word, no need to log change in periodlog. 154 $word =~ s/\.//g; 155 } else { # remove single period from body of word 156 $wordsbefore = $word; 157 $word =~ s/\.//g; # remove period 158 push (@$periodlog, "from:$wordsbefore to:$word\n"); 159 } 160 push (@$wordarray, $word); 161 } elsif ($match > 1) { # more than one period, therefore assume that it is an acronym 162 $wordsbefore = $word; 163 $word =~ s/\./ /g; # replace period with a space 164 push (@$periodlog, "from:$wordsbefore to:$word\n"); 165 } else { 166 confess "error: period got lost???\n"; 167 } 168 } 146 169 147 170 sub _processEmails { #private … … 165 188 } 166 189 167 #while ($word =~ s/(\w+)\.(\w+)/g) { # how to loop regex results168 # push (@words, $1);169 # push (@words, "dot");170 # push (@words, $2);171 #}172 190 sub _processUrls { #private 173 191 my ($self,$wordarray, $word)= @_;