| 2 | | #################################################################### |
|---|
| 3 | | ### |
|---|
| 4 | | ### script name : Audio.pm |
|---|
| 5 | | ### version: 0.2 |
|---|
| 6 | | ### created by: Ken MacLean |
|---|
| 7 | | ### mail: contact@voxforge.org |
|---|
| 8 | | ### Date: 2007.03.22 |
|---|
| 9 | | ### |
|---|
| 10 | | ### Copyright (C) 2007 Ken MacLean |
|---|
| 11 | | ### |
|---|
| 12 | | ### This program is free software; you can redistribute it and/or |
|---|
| 13 | | ### modify it under the terms of the GNU General Public License |
|---|
| 14 | | ### as published by the Free Software Foundation; either version 2 |
|---|
| 15 | | ### of the License, or (at your option) any later version. |
|---|
| 16 | | ### |
|---|
| 17 | | ### This program is distributed in the hope that it will be useful, |
|---|
| 18 | | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 19 | | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 20 | | ### GNU General Public License for more details. |
|---|
| 21 | | ### |
|---|
| 22 | | ### Changes: |
|---|
| 23 | | ### 2007/06/12 - 0.1.2 - modularize code |
|---|
| 24 | | ### 2008/05/02 - 0.2 - Convert to class; major refacture; renamed from htksegment to Audio.pm |
|---|
| 25 | | ### |
|---|
| 26 | | ### Current Issues: |
|---|
| 27 | | ### Warning: Use of uninitialized value in numeric gt (>) "if ($sentence_length>$max_sentence_length) {" |
|---|
| 28 | | #################################################################### |
|---|
| | 2 | $VERSION = 0.2; |
|---|
| | 3 | |
|---|
| | 4 | =head1 NAME |
|---|
| | 5 | |
|---|
| | 6 | AudioBook::Audio - Audio processing |
|---|
| | 7 | |
|---|
| | 8 | =cut |
|---|
| | 9 | |
|---|
| 68 | | #################################################################### |
|---|
| 69 | | ### Methods |
|---|
| 70 | | #################################################################### |
|---|
| | 57 | |
|---|
| | 58 | =head2 segment |
|---|
| | 59 | |
|---|
| | 60 | Public Method that does the actual segmentation of the speech audio file using the text file supplied by the user. |
|---|
| | 61 | |
|---|
| | 62 | First pass Forced Alignment on user submitted speech audio file and corresponding transcription text. This generates |
|---|
| | 63 | the segmented audio files with corresponding prompt entries in the PROMPTS file. |
|---|
| | 64 | |
|---|
| | 65 | =cut |
|---|
| | 66 | |
|---|
| | 67 | sub segment { # public |
|---|
| | 68 | my ($self,$filename,$textContents) = @_; |
|---|
| | 69 | my $debug = $self->{"debug"}; |
|---|
| | 70 | my $log = $self->{"log"}; |
|---|
| | 71 | |
|---|
| | 72 | my $filename_nopath = basename($filename); |
|---|
| | 73 | my $filename_nosuffix = fileparse($filename, "wav"); |
|---|
| | 74 | $filename_nosuffix =~ s/\.//; |
|---|
| | 75 | $self->{"filename"} = $filename; |
|---|
| | 76 | $self->{"filename_nosuffix"} = $filename_nosuffix; |
|---|
| | 77 | $self->{"filename_nopath"} = $filename_nopath; |
|---|
| | 78 | $self->{"filename_prefix"} = lc(substr($filename_nopath,0,3)); |
|---|
| | 79 | $self->{"textContents"} = $textContents; |
|---|
| | 80 | |
|---|
| | 81 | my $average_sentence_length = $self->{"average_sentence_length"}; |
|---|
| | 82 | my $max_sentence_length = $self->{"max_sentence_length"}; |
|---|
| | 83 | my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; |
|---|
| | 84 | |
|---|
| | 85 | my $samplerate = _getSampleRate($self); |
|---|
| | 86 | $self->{"samplerate"} = $samplerate; |
|---|
| | 87 | |
|---|
| | 88 | _forceAlign($self); |
|---|
| | 89 | my $aligned_words= _processHViteOutput($self); |
|---|
| | 90 | $self->{"aligned_words"} = $aligned_words; |
|---|
| | 91 | |
|---|
| | 92 | print "### segment::$filename ###########################\n"; |
|---|
| | 93 | # copy to "interim_files"" directory for processing; also converts to 16 bits per sample (-w=16-bits) so it can be processed by HVite |
|---|
| | 94 | $command = ("sox $filename -w AudioBook/interim_files/$filename_nopath"); print "$command\n"; system($command) == 0 or confess "fullrun $command failed: $?"; |
|---|
| | 95 | |
|---|
| | 96 | my $loop; |
|---|
| | 97 | my $lastSentence = 0; |
|---|
| | 98 | my $sentence_start = 0; |
|---|
| | 99 | my $sentence_end = $average_sentence_length; |
|---|
| | 100 | my $up = 1; |
|---|
| | 101 | my $down = 0; |
|---|
| | 102 | |
|---|
| | 103 | my $fileid = 1; |
|---|
| | 104 | my @aligned_line = split(/ /,$$aligned_words[$sentence_end]); |
|---|
| | 105 | my ($word,$startTime,$endTime,$pause) = @aligned_line; |
|---|
| | 106 | open(PROMPTS, ">AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); |
|---|
| | 107 | while (!$lastSentence) { |
|---|
| | 108 | $loop++; |
|---|
| | 109 | if ($up) { |
|---|
| | 110 | if (($sentence_end + $up_increment) < $#$aligned_words) { |
|---|
| | 111 | #print "segment-up:" . ($sentence_end + $up_increment). "sentence_end:$sentence_end;up_increment:$up_increment\n"; # !!!!!! |
|---|
| | 112 | _sentence_test ($self, "up", $up_increment, $aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| | 113 | $up = 0; |
|---|
| | 114 | $down = 1; |
|---|
| | 115 | $up_increment++; |
|---|
| | 116 | } elsif (($sentence_end + $up_increment) >= $#$aligned_words) { # catches last prompt line |
|---|
| | 117 | $sentence_end = $#$aligned_words; |
|---|
| | 118 | #print "sentence_end:$sentence_end\n"; # !!!!!! |
|---|
| | 119 | _last_sentence ($self, $aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| | 120 | $lastSentence = 1; |
|---|
| | 121 | } |
|---|
| | 122 | } elsif ($down) { |
|---|
| | 123 | if ((($sentence_end + $down_increment) >= 0) and (($sentence_end + $down_increment)>$sentence_start) and (($sentence_end + $down_increment) < $#$aligned_words)){ |
|---|
| | 124 | #print "segment-down:" . ($sentence_end + $down_increment) . "sentence_end:$sentence_end;down_increment:$down_increment\n"; # !!!!!! |
|---|
| | 125 | _sentence_test ($self, "down", $down_increment,$aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| | 126 | $down = 0; |
|---|
| | 127 | $up = 1; |
|---|
| | 128 | $down_increment--; |
|---|
| | 129 | } elsif (($sentence_end + $down_increment) <= $sentence_start) { |
|---|
| | 130 | $up = 1; |
|---|
| | 131 | } elsif (($sentence_end + $down_increment) >= $#$aligned_words) { # sometimes the sentence_end is larger than the total number of words. |
|---|
| | 132 | $up = 1; |
|---|
| | 133 | } elsif (($sentence_end + $down_increment) < 0) { |
|---|
| | 134 | confess "Error\n"; |
|---|
| | 135 | } |
|---|
| | 136 | } |
|---|
| | 137 | } |
|---|
| | 138 | |
|---|
| | 139 | $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); |
|---|
| | 140 | |
|---|
| | 141 | open(LOG,">>$log") or confess ("cannot open $log file"); |
|---|
| | 142 | print LOG "\nAudio Segmenting summary:\n"; |
|---|
| | 143 | print LOG "-------------------------\n"; |
|---|
| | 144 | print LOG "Settings:average_sentence_length: $average_sentence_length\n"; |
|---|
| | 145 | print LOG " max_sentence_length: $max_sentence_length\n"; |
|---|
| | 146 | print LOG " pause length: $min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n\n"; |
|---|
| | 147 | print LOG "Sentence Length: min:$min_sentence_length_linenumber: $min_sentence_length_found\n"; |
|---|
| | 148 | print LOG " max:$max_sentence_length_linenumber: $max_sentence_length_found\n\n"; |
|---|
| | 149 | print LOG "Prompt lines with more than max_sentence_length of $max_sentence_length words:\n"; |
|---|
| | 150 | if (@max_sentences) { |
|---|
| | 151 | foreach my $line (@max_sentences) { |
|---|
| | 152 | print LOG "\t$line\n"; |
|---|
| | 153 | } |
|---|
| | 154 | } else { |
|---|
| | 155 | print LOG "\tnone\n"; |
|---|
| | 156 | } |
|---|
| | 157 | close LOG; |
|---|
| | 158 | } |
|---|
| | 159 | |
|---|
| | 160 | =head3 _getSampleRate |
|---|
| | 161 | |
|---|
| | 162 | gets the sample rate of the audio file using Audio::Wav module |
|---|
| | 163 | |
|---|
| | 164 | =cut |
|---|
| | 165 | |
|---|
| 172 | | sub segment { # public |
|---|
| 173 | | my ($self,$filename,$textContents) = @_; |
|---|
| 174 | | my $debug = $self->{"debug"}; |
|---|
| 175 | | my $log = $self->{"log"}; |
|---|
| 176 | | |
|---|
| 177 | | my $filename_nopath = basename($filename); |
|---|
| 178 | | my $filename_nosuffix = fileparse($filename, "wav"); |
|---|
| 179 | | $filename_nosuffix =~ s/\.//; |
|---|
| 180 | | $self->{"filename"} = $filename; |
|---|
| 181 | | $self->{"filename_nosuffix"} = $filename_nosuffix; |
|---|
| 182 | | $self->{"filename_nopath"} = $filename_nopath; |
|---|
| 183 | | $self->{"filename_prefix"} = lc(substr($filename_nopath,0,3)); |
|---|
| 184 | | $self->{"textContents"} = $textContents; |
|---|
| 185 | | |
|---|
| 186 | | my $average_sentence_length = $self->{"average_sentence_length"}; |
|---|
| 187 | | my $max_sentence_length = $self->{"max_sentence_length"}; |
|---|
| 188 | | my $min_pause_for_sentence_break = $self->{"min_pause_for_sentence_break"}; |
|---|
| 189 | | |
|---|
| 190 | | my $samplerate = _getSampleRate($self); |
|---|
| 191 | | $self->{"samplerate"} = $samplerate; |
|---|
| 192 | | |
|---|
| 193 | | _forceAlign($self); |
|---|
| 194 | | my $aligned_words= _processHViteOutput($self); |
|---|
| 195 | | $self->{"aligned_words"} = $aligned_words; |
|---|
| 196 | | |
|---|
| 197 | | print "### segment::$filename ###########################\n"; |
|---|
| 198 | | # copy to "interim_files"" directory for processing; also converts to 16 bits per sample (-w=16-bits) so it can be processed by HVite |
|---|
| 199 | | $command = ("sox $filename -w AudioBook/interim_files/$filename_nopath"); print "$command\n"; system($command) == 0 or confess "fullrun $command failed: $?"; |
|---|
| 200 | | |
|---|
| 201 | | my $loop; |
|---|
| 202 | | my $lastSentence = 0; |
|---|
| 203 | | my $sentence_start = 0; |
|---|
| 204 | | my $sentence_end = $average_sentence_length; |
|---|
| 205 | | my $up = 1; |
|---|
| 206 | | my $down = 0; |
|---|
| 207 | | |
|---|
| 208 | | my $fileid = 1; |
|---|
| 209 | | my @aligned_line = split(/ /,$$aligned_words[$sentence_end]); |
|---|
| 210 | | my ($word,$startTime,$endTime,$pause) = @aligned_line; |
|---|
| 211 | | open(PROMPTS, ">AudioBook/interim_files/prompts") or confess ("cannot open AudioBook/output_files/prompts for output"); |
|---|
| 212 | | while (!$lastSentence) { |
|---|
| 213 | | $loop++; |
|---|
| 214 | | if ($up) { |
|---|
| 215 | | if (($sentence_end + $up_increment) < $#$aligned_words) { |
|---|
| 216 | | #print "segment-up:" . ($sentence_end + $up_increment). "sentence_end:$sentence_end;up_increment:$up_increment\n"; # !!!!!! |
|---|
| 217 | | _sentence_test ($self, "up", $up_increment, $aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| 218 | | $up = 0; |
|---|
| 219 | | $down = 1; |
|---|
| 220 | | $up_increment++; |
|---|
| 221 | | } elsif (($sentence_end + $up_increment) >= $#$aligned_words) { # catches last prompt line |
|---|
| 222 | | $sentence_end = $#$aligned_words; |
|---|
| 223 | | #print "sentence_end:$sentence_end\n"; # !!!!!! |
|---|
| 224 | | _last_sentence ($self, $aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| 225 | | $lastSentence = 1; |
|---|
| 226 | | } |
|---|
| 227 | | } elsif ($down) { |
|---|
| 228 | | if ((($sentence_end + $down_increment) >= 0) and (($sentence_end + $down_increment)>$sentence_start) and (($sentence_end + $down_increment) < $#$aligned_words)){ |
|---|
| 229 | | #print "segment-down:" . ($sentence_end + $down_increment) . "sentence_end:$sentence_end;down_increment:$down_increment\n"; # !!!!!! |
|---|
| 230 | | _sentence_test ($self, "down", $down_increment,$aligned_words, \$fileid, \$sentence_start, \$sentence_end); |
|---|
| 231 | | $down = 0; |
|---|
| 232 | | $up = 1; |
|---|
| 233 | | $down_increment--; |
|---|
| 234 | | } elsif (($sentence_end + $down_increment) <= $sentence_start) { |
|---|
| 235 | | $up = 1; |
|---|
| 236 | | } elsif (($sentence_end + $down_increment) >= $#$aligned_words) { # sometimes the sentence_end is larger than the total number of words. |
|---|
| 237 | | $up = 1; |
|---|
| 238 | | } elsif (($sentence_end + $down_increment) < 0) { |
|---|
| 239 | | confess "Error\n"; |
|---|
| 240 | | } |
|---|
| 241 | | } |
|---|
| 242 | | } |
|---|
| 243 | | |
|---|
| 244 | | $command = ("rm AudioBook/interim_files/temp.wav"); print "cmd:$command\n" if $debug; system($command); |
|---|
| 245 | | |
|---|
| 246 | | open(LOG,">>$log") or confess ("cannot open $log file"); |
|---|
| 247 | | print LOG "\nAudio Segmenting summary:\n"; |
|---|
| 248 | | print LOG "-------------------------\n"; |
|---|
| 249 | | print LOG "Settings:average_sentence_length: $average_sentence_length\n"; |
|---|
| 250 | | print LOG " max_sentence_length: $max_sentence_length\n"; |
|---|
| 251 | | print LOG " pause length: $min_pause_for_sentence_break (" . $min_pause_for_sentence_break/10000000 . " seconds)\n\n"; |
|---|
| 252 | | print LOG "Sentence Length: min:$min_sentence_length_linenumber: $min_sentence_length_found\n"; |
|---|
| 253 | | print LOG " max:$max_sentence_length_linenumber: $max_sentence_length_found\n\n"; |
|---|
| 254 | | print LOG "Prompt lines with more than max_sentence_length of $max_sentence_length words:\n"; |
|---|
| 255 | | if (@max_sentences) { |
|---|
| 256 | | foreach my $line (@max_sentences) { |
|---|
| 257 | | print LOG "\t$line\n"; |
|---|
| 258 | | } |
|---|
| 259 | | } else { |
|---|
| 260 | | print LOG "\tnone\n"; |
|---|
| 261 | | } |
|---|
| 262 | | close LOG; |
|---|
| 263 | | } |
|---|
| | 283 | =head3 _sentence_test |
|---|
| | 284 | |
|---|
| | 285 | Determines if pause is long enough after the 15th word, and if not, iterates up and down until a word with a suitable pause |
|---|
| | 286 | duration is found. |
|---|
| | 287 | |
|---|
| | 288 | =cut |
|---|
| | 613 | |
|---|
| | 614 | =head1 Change Log |
|---|
| | 615 | |
|---|
| | 616 | 2008/05/02 - 0.2 - Convert to class; major refacture; renamed from htksegment to Audio.pm |
|---|
| | 617 | 2007/06/12 - 0.1.2 - modularize code |
|---|
| | 618 | 2007.03.22 - 0.1 - created |
|---|
| | 619 | |
|---|
| | 620 | =head1 Current Unresolved Issues: |
|---|
| | 621 | |
|---|
| | 622 | Warning: Use of uninitialized value in numeric gt (>) "if ($sentence_length>$max_sentence_length) {" |
|---|
| | 623 | |
|---|
| | 624 | =head1 AUTHOR |
|---|
| | 625 | |
|---|
| | 626 | Ken MacLean |
|---|
| | 627 | contact@voxforge.org |
|---|
| | 628 | |
|---|
| | 629 | =head1 COPYRIGHT AND LICENSE |
|---|
| | 630 | |
|---|
| | 631 | Copyright (C) 2007 Ken MacLean |
|---|
| | 632 | |
|---|
| | 633 | This program is free software; you can redistribute it and/or |
|---|
| | 634 | modify it under the terms of the GNU General Public License |
|---|
| | 635 | as published by the Free Software Foundation; either version 2 |
|---|
| | 636 | of the License, or (at your option) any later version. |
|---|
| | 637 | |
|---|
| | 638 | This program is distributed in the hope that it will be useful, |
|---|
| | 639 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| | 640 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| | 641 | GNU General Public License for more details. |
|---|
| | 642 | |
|---|
| | 643 | =cut |
|---|
| | 644 | |
|---|