source: Trunk/Scripts/VoxForge/lib/Corpus/Quarantine/Submission/Audio/AudioFile/WavTools.pm @ 3721

Revision 3567, 11.1 KB checked in by kmaclean, 3 years ago (diff)

see ticket #511 - fix for mean sample calculation when file duration less than 2 seconds

Line 
1#!/usr/bin/perl
2####################################################################
3###
4### script name : WavTools.pm
5### version: 0.1
6### created by: Ken MacLean
7### mail: contact@voxforge.org
8### Date: 2010.6.23
9###   
10### Copyright (C) 2010 Ken MacLean
11###
12### This program is free software; you can redistribute it and/or
13### modify it under the terms of the GNU General Public License
14### as published by the Free Software Foundation; either version 3
15### of the License, or (at your option) any later version.
16###
17### This program is distributed in the hope that it will be useful,
18### but WITHOUT ANY WARRANTY; without even the implied warranty of
19### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20### GNU General Public License for more details.
21###
22### Change History:   
23### 0.1 - 2010.6.23 - created Moose Role - like a Mixin class or Interface (see http://search.cpan.org/~doy/Moose-1.08/lib/Moose/Manual/Roles.pod)
24####################################################################
25package Corpus::Quarantine::Submission::Audio::AudioFile::WavTools;
26use 5.10.0;
27
28=head1 Corpus::Quarantine::Submission::Audio::AudioFile::WavTools
29
30=cut
31
32use Moose::Role;
33use IPC::System::Simple qw(system capture);
34use List::Util qw(min max);
35
36=head1 Corpus::Quarantine::Submission::Audio::AudioFile
37
38# Audio::Wav requires Inline::C package to run without some warnings...
39# Audio::Wav need to modify source as follows to run without 'sub redefine' warnings...
40#               # read is generated by _init_read_sub
41#               # !!!!!!
42#               # sub read { die "ERROR: can't call read without first calling _init_read_sub"; };
43#               # !!!!!!
44
45=cut 
46# "use lib" is not required when running runDaily.t in Eclipse or from the command line...
47# but Eclipse still says it cannot file "CpanPackageModified::Audio::Wav::Tools" in @INC;
48# adding "/homer/kmaclean/VoxForge-dev/Main/Scripts/VoxForge" path in
49# project>Properties>Perl Include Path, cleared this issue with Eclipse paths up.
50#
51# Also had to rename VoxForge/config.pm to configuration.pm to clear up NameSpace collisions
52# with Perl's own config package.
53#
54# for some reason, this was never an issue when running from the command line.
55# using a full path "use lib" also clears up problems with Eclipse finding things in @INC, but
56# becomes an irrelevant entry when moving to prod...
57# use lib '/homer/kmaclean/VoxForge-dev/Main/Scripts/VoxForge';
58use Toolkit::CpanPackageModified::Audio::Wav;
59
60my $logger = Log::Log4perl->get_logger();
61my $command;
62
63=head2 downsampleAudioFile
64
65### Overloading Class/Instance methods
66
67        # sox: rate: -r 8000, 16000, 44100, 48000 ...
68        #      data size: -b = 8 or 16 
69
70=cut
71
72sub downsampleAudioFile {
73        my ($type,$p) = @_;
74        my $tr = $p->{'targetRate'};
75        my $dir = $p->{'audioDir'};
76        my $fi;
77        if (ref $type) { # being called as an object
78                $fi = $type->get_audioFile;
79        } else { # being called as a class
80                $fi = $p->{'filename'};
81        }
82        $logger->debug( "downsampling:$dir/$fi");
83
84        if (-f "$dir/$fi") {
85            $command = ("sox $dir/$fi -c 1 -r $tr -b 16 $dir/temp-$fi 2>&1");$logger->debug ( $command ); $logger->warn( capture( $command ) );
86                $command = ("mv $dir/temp-$fi $dir/$fi");$logger->debug ( $command ); system($command);
87        } else {
88                my $m = "$fi: can\'t find wav file: [$dir/$fi]\n";
89                $logger->warn($m);
90                Corpus::Quarantine::Submission::Audio::AudioFile::Wav::CantReadWavFile::Exception->throw($m);
91        }
92}
93
94
95=head2 getHeaderAndSampleMean
96
97Gets header information from the wav file header
98
99Gets the sample mean from the first 2 seconds of the audio file... rather than reading the entire audio file
100But if sample is less than 2 seconds, only read the sample.
101
102=cut
103
104sub getHeaderAndSampleMean {
105        my ($self) = @_;
106        my $fi = $self->get_audioFile;
107        my $pa = $self->get_path;
108       
109        my $undefSamples = 0;
110
111        my $sampleTotal = 0;
112        my $numOfSamples = 0;
113        my @samples = [];
114
115        if (-z "$pa/$fi") {
116                my $m = "$fi: Warning: zero file size";
117                $logger->warn($m);                     
118            Corpus::Quarantine::Submission::Audio::AudioFile::Wav::EmptyAudioFile::Exception->throw ($m);       
119        } else {
120                my $wav = Toolkit::CpanPackageModified::Audio::Wav->new();
121                my $data;
122                $data = $wav->readFile( "$pa/$fi" ) || Corpus::Quarantine::Submission::Audio::AudioFile::Wav::CantReadWavFile::Exception->throw (error => "can't read wav file: $fi" );
123
124                if ($data->length_samples() > 0) {
125                        $self->details($data->details);
126                        my $sr = $data->details->{sample_rate};         
127                        my $bs = $data->details->{bits_sample};
128                        $self->peakVolThresh( (2**$bs) * 0.45 );               
129                        $self->lowVolThresh( (2**$bs) * 0.0125 );       
130                        $self->zeroOffsetThresh( (2**$bs) * 0.035 );
131                        $self->lengthSeconds( $data->length_seconds );
132
133      # !!!!!!
134      my $numberSecondsToRead = min($self->lengthSeconds, 2); # lesser of: 2 seconds or duration of entire audio file
135
136                        #for (my $x = 0; $x < $sr * 2; $x++) { # read first two seconds of audio to calculate mean
137                        for (my $x = 0; $x < $sr * $numberSecondsToRead; $x++) { # read first two seconds (or entire audio file if less than 2 secs) of audio to calculate mean
138      # !!!!!!
139                                my @channels = $data->readFile();
140                    my $sample = $channels[0]; # mono or one channel in stereo
141                    if ( defined ($sample) ) { # sometimes audio wav files only contain header information and no audio data... therefore check to see if sample is defined
142                                  $numOfSamples++;
143                                $sampleTotal = $sampleTotal + $sample;
144                            push @samples,$sample;
145                    } else {
146                        if ($x == 0)
147          {
148                                my $m = "$fi: sample contains no audio data";
149                                $logger->warn($m);
150                                Corpus::Quarantine::Submission::Audio::AudioFile::Wav::EmptyAudioFile::Exception->throw ($m);
151                        }
152          elsif ($x > 0 and !$undefSamples)
153          {
154                                $undefSamples = 1;
155                                my $m = "$fi: undefined samples in sample mean, starting at number $x ... error in file?";
156            $m = $m . "\nsr: [$sr]; bs: [$bs]; sample: [$sample]\n";
157                                Corpus::Quarantine::Submission::Audio::AudioFile::Wav::UndefinedSamples::Exception->throw ($m);                         
158                                $logger->warn($m);                             
159                        }
160                    }       
161                        }
162                        $self->sampleMean( $sampleTotal/$numOfSamples ); # (dcOffset or zero-offset = non-zero mean of all samples) need to read the entire file first to calculate this...
163                } else {
164                        my $m = "$fi: zero length audio file";
165                        $logger->debug($m);                     
166                    Corpus::Quarantine::Submission::Audio::AudioFile::Wav::EmptyAudioFile::Exception->throw ($m);               
167                }
168        }
169}
170
171=head2 processSamples
172
173This methods reads the entire audio file and calculates the following
174        $self->avgEnergy - average energy for entire recording         
175        $self->numPosPeaks - counts the number of positive peaks over the peak volume threshold (peakVolThresh)
176        $self->numNegPeaks - counts the number of negative peaks less than the peak volume threshold (peakVolThresh)
177        $self->numPosLowVol
178        $self->numNegLowVol
179        $self->posPeak 
180        $self->negPeak
181        $self->posSampleMean
182
183# todo samplerange needs to be a relative number ( to sample rate??? )
184
185=cut
186
187sub processSamples {
188        my ($self) = @_;       
189        my $fi = $self->get_audioFile; 
190        my $pa = $self->get_path;               
191        my $sm = $self->get_sampleMean;
192        my $pv = $self->get_peakVolThresh;             
193        my $lv = $self->get_lowVolThresh;
194        my $details = $self->get_details;
195       
196        # Volume Analysis
197        my $numPosPeaks = 0;
198        my $numNegPeaks = 0;
199        my $numPosLowVol = 0;
200        my $numNegLowVol = 0;
201        my $posPeak = 0;
202        my $negPeak = 0;
203        my $posSampleTot = 0;# !!!!!!
204        my $posSampleCount = 0;# !!!!!!
205        # Zero Cross calculation
206        my $prevSign;
207        my $currentSign;
208        my $zeroCrossCount = 0;
209        my $graphX = $self->get_graphX;
210        my $zeroCross = $self->get_zeroCross;
211        my $window = ($details->{sample_rate}/100);  # 10 milliseconds
212        # Energy
213        my $energy = $self->get_energy;
214        my $winEnergy = 0;     
215        my $totEnergy = 0;                     
216        # Frequency
217        my $frequencyX = $self->get_frequencyX;
218        my $frequencyY = $self->get_frequencyY;         
219
220        my $wav = Toolkit::CpanPackageModified::Audio::Wav->new();
221
222        my $data = $wav->readFile( "$pa/$fi" ) || Corpus::Quarantine::Submission::Audio::AudioFile::Wav::CantReadWavFile::Exception->throw (error => "can't read wav file: $fi" );
223
224        my $undefSamples = 0;           
225        for (my $x = 0 ; $x < $data->length_samples ; $x++) { # read entire file
226                my @channels = $data->readFile();
227                if ( defined($channels[0]) ) { # sometimes audio wav files only contain header information and no audio data... therefore check to see if sample is defined
228                        my $sample = $channels[0] - $sm; # normalize (i.e. remove zero offset)         
229                        if ($x == 0 and $sample > 35000) { $sample = 35000 }; # very first sample sometimes has bad data (very large number), and skipping it causes problems with graphing
230
231                        # Zero Cross calculation
232                        if ($sample != 0) { # $prevSign unchanged if $sample == 0
233                                if ($sample > 0) {
234                                        $currentSign = 1; # +
235                                } else {
236                                        $currentSign = 0; # -
237                                }
238                        }
239                        if ( defined($prevSign) and ($currentSign != $prevSign) ) { # zero-cross
240                                $zeroCrossCount++;
241                        }
242                        $prevSign = $currentSign;
243
244                        # Short-Term Energy
245                        $winEnergy = $winEnergy + abs($sample);
246                        $totEnergy = $totEnergy + abs($sample); # assume energy cancels out zero offset...     
247                       
248                        if ($x % $window == 0) { # use modulus to determine when new window occurs
249                                push @$graphX, $x/$details->{sample_rate}; # to get this in seconds
250                                push @$zeroCross, $zeroCrossCount;
251                                $zeroCrossCount = 0;
252                                push @$energy, $winEnergy/$window;      # avg energy for window
253                                $winEnergy = 0;
254                        }
255
256                        # Frequency
257                        # todo need to be a function of sample rate - i.e. only every tenth for greater then 32 kHz, and every one for 16kHz and under???
258                        if ($x % 100 == 0) { # only use every 100th sample for data for frequency graph; don't need all samples to generate a decent sample graph...
259                                push @$frequencyX,$x/$details->{sample_rate}; # divide sample number by sample rate to get position in seconds
260                                push @$frequencyY,$channels[0]; # raw           
261                        }       
262                       
263            if ($sample > $pv) {
264                $numPosPeaks++;
265            } elsif ($sample < (-1 * $pv) ) {
266                $numNegPeaks++;         
267            }
268            if ($sample > $lv) {
269                $numPosLowVol++;
270            } elsif ($sample < (-1 * $lv) ) {
271                $numNegLowVol++;               
272            }   
273            if ($sample > $posPeak) {
274                $posPeak = $sample;
275            }               
276            if ($sample < $negPeak) {
277                $negPeak = $sample;
278            }
279                } else {
280        if ($x == 0) {
281                my $m = "$fi: sample contains no audio data";
282                $logger->warn($m);
283                Corpus::Quarantine::Submission::Audio::AudioFile::Wav::EmptyAudioFile::Exception->throw ($m);
284        }
285      elsif ($x > 0 and !$undefSamples) {
286                $undefSamples = 1;
287                my $m = "$fi: undefined samples in audio, starting at number $x ... error in file?";
288                Corpus::Quarantine::Submission::Audio::AudioFile::Wav::UndefinedSamples::Exception->throw ($m);                         
289                $logger->warn($m);                             
290        }
291                }
292        }
293        $self->avgEnergy( $totEnergy / $data->length_samples ); # average energy for entire recording           
294        $self->numPosPeaks($numPosPeaks);
295        $self->numNegPeaks($numNegPeaks);       
296        $self->numPosLowVol($numPosLowVol);                     
297        $self->numNegLowVol($numNegLowVol);             
298        my $sampleRange = 75;   
299        if ( $posPeak  < $sampleRange and $negPeak > (-1 * $sampleRange) ) { # test sample range
300                my $m = "$fi: No Speech in Submission - just silence (negative ($negPeak) and positive ($posPeak) sample peaks less than +- $sampleRange)";
301                $logger->warn($m);                     
302        Corpus::Quarantine::Submission::Audio::AudioFile::SilenceRecording::Exception->throw ($m);                             
303        }
304}
305
3061;
Note: See TracBrowser for help on using the repository browser.