voxforge.org
VoxForge Dev

root/Trunk/Scripts/Testing_scripts/archive/Nightly_test.pm

Revision 1559, 10.4 kB (checked in by kmaclean, 2 years ago)

updates to Testing_scripts

Line 
1 #!/usr/bin/perl -w
2 ####################################################################
3 ###
4 ### script name: Nightly_Test.pm
5 ### modified by: Ken MacLean
6 ### email: contact@voxforge.org
7 ### Date: 2007.01.24
8 ### Command: ./Nightly_Test.pm
9 ### Version: 0.1
10 ###             
11 ### Copyright (C) 2007 Ken MacLean
12 ###
13 ### This program is free software; you can redistribute it and/or
14 ### modify it under the terms of the GNU General Public License
15 ### as published by the Free Software Foundation; either version 2
16 ### of the License, or (at your option) any later version.
17 ###
18 ### This program is distributed in the hope that it will be useful,
19 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 ### GNU General Public License for more details.
22 ###
23 ####################################################################
24 package Nightly_test;
25 use strict;
26 use File::Spec;
27 use POSIX qw(strftime);
28
29 my %parms;
30 $parms{"debug"} = 1;
31 $parms{"VoxForgeHome"} = "/home/kmaclean/workspace/VoxForge"; # for testing
32 #$parms{"VoxForgeHome"} = "/data/svn-mirror/Trunk";
33         $parms{"AudioDirectory"} = $parms{"VoxForgeHome"}."/Audio";
34         $parms{"LexiconDirectory"} = $parms{"VoxForgeHome"}."/Lexicon";
35         $parms{"QSdir"} = $parms{"VoxForgeHome"}."/QuickStart";
36         $parms{"ScriptsDirectory"} = $parms{"VoxForgeHome"}."/Scripts";
37                 $parms{"AudioScripts_dir"} = $parms{"ScriptsDirectory"}."/Audio_scripts";       
38                         $parms{"Repository_dir"} = $parms{"AudioScripts_dir"}."/Repository";   
39                         $parms{"UserSubmission_dir"} = $parms{"AudioScripts_dir"}."/UserSubmission";
40                 $parms{"HTK_dir"} = $parms{"ScriptsDirectory"}."/HTK/AMCreate_scripts";
41                 $parms{"MirroringScripts_dir"} = $parms{"ScriptsDirectory"}."/Mirroring_scripts";
42                 $parms{"Testing_scripts_dir"} = $parms{"ScriptsDirectory"}."/Testing_scripts";
43 my $year = strftime "%Y", localtime; #%Y      year (4 digits)
44 my $mon = strftime "%m", localtime; # %m      month number, starting with 01
45 my $mday = strftime "%d", localtime; #  %d      numeric day of the month, with leading zeros (eg 01..31)                       
46 #$parms{"mirror_dir"}  = "/data/svn-mirror";
47 $parms{"mirror_dir"}  = "/data/svn-mirror-copy"; # for testing
48 $parms{"Nightly_dir"}  = $parms{"mirror_dir"} . "/Nightly_Builds"; # for testing
49         $parms{"Nightly_dirname"} = $parms{"Nightly_dir"}."/AcousticModel-$year-$mon-$mday";   
50         $parms{"16kHz_16bit_NightlyAM"} = $parms{"Nightly_dirname"}."/HTK/16kHz_16bit/MFCC_O_D";                               
51         $parms{"8kHz_16bit_NightlyAM"} = $parms{"Nightly_dirname"}."/HTK/8kHz_16bit/MFCC_O_D";         
52 $parms{"Tags"}  = $parms{"mirror_dir"} . "/Tags"; # for testing
53         $parms{"Releases"} = $parms{"Tags"}."/Releases";       
54         $parms{"CurrentBuild_dir"} = $parms{"Releases"}."/0_1_1-build726";
55                         $parms{"16kHz_16bit_CurrentAM"} = $parms{"CurrentBuild_dir"}."/AcousticModels/HTK/16kHz-16bit/MFCC_O_D";                               
56                         $parms{"8kHz_16bit_CurrentAM"} = $parms{"CurrentBuild_dir"}."/AcousticModels/HTK/8kHz-16bit/MFCC_O_D";                                 
57 $parms{"HTKBin"}  = "/usr/local/HTK"; # for testing
58 #$parms{"HTKBin"}  = "/usr/local/bin";
59 my $command;
60 ####################################################################
61 my $debug = 0;
62 my $HTKbin = $parms{"HTKBin"};
63
64 my $Testing_scripts_dir = $parms{"Testing_scripts_dir"};
65 my $LexiconDirectory = $parms{"LexiconDirectory"} ;
66
67 # Step 1 - Acoustic Models
68 # path to AM: /data/svn-mirror/Nightly_Builds/AcousticModel-2007-01-24/HTK/16kHz_16bit/MFCC_O_D
69
70 # Step 2 - Create Test Prompts
71 $command = ("perl $HTKbin/prompts2mlf testref.mlf testprompts"); print "$command\n" if $debug; system($command);   
72
73 # Step 3 - Recording the Test Data
74 # path to audio: /home/kmaclean/workspace/VoxForge/Audio/Test/16kHz_16bit
75
76 # Step 4 - Coding the Data
77 # already done!
78 open(OUTPUT, ">TestResults") or die ("cannot write to TestResults file"); # open for output
79 print OUTPUT "Disclaimer: this is a *sanity test* only, and is *not* to be regarded as a valid test of the VoxForge Acoustic Models!
80  * the audio files used for testing include only one voice, the Acoustic Models were also trained using that same voice, so the results will look better;
81  * there are only 50 audio samples in the test database - not enough for a good test;
82  * the VoxForge Acoustic Models are still alpha with respect to Speaker Independent Speech Recognition, so please donate some speech to VoxForge,
83  thanks,
84  Ken
85  \n";
86 processNightly();
87 processCurrent();
88 close(OUTPUT);
89
90 # real time testing
91 #$command = ("julian -input mic -h $MFCC_files/hmmdefs -hlist $MFCC_files/tiedlist -smpFreq 16000 -C julian.jconf"); print "$command\n" if $debug; system($command) ;   
92 # no "_Z" (zero mean) permitted on HVite live? Live does not work! get "ERROR [+6320]  OpenAsChannel: cannot zero mean within buffer" error
93 #$command = ("HVite -A -D -T 1 -H $MFCC_files/macros -H $MFCC_files/hmmdefs -C HTK_Live_config -w wdnet -p 0.0 -s 5.0 $MFCC_files/dict $MFCC_files/tiedlist"); print "$command\n" if $debug; system($command) ;   
94
95 ####################################################################
96 ### Subroutines
97 ####################################################################
98 sub processNightly {
99         my $Nightly_dirname = $parms{"Nightly_dirname"};
100         print OUTPUT "Testing Acoustic Models created in: $Nightly_dirname\n\n";
101         HTK_16kHz_16bit($parms{"16kHz_16bit_NightlyAM"});
102         Julian_16kHz_16bit($parms{"16kHz_16bit_NightlyAM"});
103         HTK_8kHz_16bit($parms{"8kHz_16bit_NightlyAM"});
104         Julian_8kHz_16bit($parms{"8kHz_16bit_NightlyAM"});
105         PrintNotes();
106 }
107
108 sub processCurrent {
109         my $CurrentBuild_dir = $parms{"CurrentBuild_dir"};
110         print OUTPUT "\n\n\n================================================================================================================\n";
111         print OUTPUT "For comparison purposes, see below for the same Tests on the most current release of the VoxForge Acoustic Models:
112 ($CurrentBuild_dir)\n";
113         print OUTPUT "================================================================================================================\n";
114         HTK_16kHz_16bit($parms{"16kHz_16bit_CurrentAM"});
115         Julian_16kHz_16bit($parms{"16kHz_16bit_CurrentAM"});
116         HTK_8kHz_16bit($parms{"8kHz_16bit_CurrentAM"});
117         Julian_8kHz_16bit($parms{"8kHz_16bit_CurrentAM"});
118         PrintNotes();
119 }
120
121 sub HTK_16kHz_16bit {
122         my ($AM_Dir) = @_;     
123         my $wordinspen = "0.0";
124         my $gramscfact = "1.0";
125         system("rm -f recout.mlf");
126         $command = ("$HTKbin/HVite -A -D -T 1 -H $AM_Dir/macros -H $AM_Dir/hmmdefs -C $Testing_scripts_dir/wav_config -S $Testing_scripts_dir/wavlst16kHz_16bit -l '*' -i recout.mlf -w $Testing_scripts_dir/wdnet -p $wordinspen -s $gramscfact $LexiconDirectory/VoxForge/VoxForgeDict $AM_Dir/tiedlist  1>/dev/null 2>/dev/null");  system($command);   
127         my $HTK_TestResults = `$HTKbin/HResults -I testref.mlf $AM_Dir/tiedlist recout.mlf`;
128         $HTK_TestResults =~ s/ HTK//;
129         print OUTPUT ("HTK 16kHz_16bit\n");   
130         print OUTPUT ("---------------\n");     
131         print OUTPUT ("\tword insertion penalty: $wordinspen\n");
132         print OUTPUT ("\tgrammar scale factor: $gramscfact\n");         
133         print OUTPUT "$HTK_TestResults\n";
134 }
135
136 sub Julian_16kHz_16bit {       
137         my ($AM_Dir) = @_;     
138         my $penalty1="0.5";
139         my $penalty2="100.0";
140         my $iwsppenalty = "-55.0";
141         system("rm -f julianProcessed");               
142         $command = ("julian -penalty1 $penalty1 -penalty2 $penalty2 -iwsppenalty $iwsppenalty -input rawfile -filelist wavlst16kHz_16bit -h $AM_Dir/hmmdefs -hlist $AM_Dir/tiedlist -smpFreq 16000  -C julian.jconf  > julianOutput 2>/dev/null"); print "$command\n" if $debug; system($command) ;   
143         $command = ("perl ./ProcessJulianOutput.pl julianOutput julianProcessed"); print "$command\n" if $debug; system($command) ;   
144         my $Julian_TestResults = `$HTKbin/HResults -I testref.mlf $AM_Dir/tiedlist julianProcessed` ;
145         $Julian_TestResults =~ s/ HTK//;
146         print OUTPUT ("Julian 16kHz_16bit\n");
147         print OUTPUT ("------------------\n");           
148         print OUTPUT ("\tword insertion penalty\n"); 
149         print OUTPUT ("\t  first pass (-penalty1):$penalty1\n"); 
150         print OUTPUT ("\t  second pass (-penalty2):$penalty2\n"); 
151         print OUTPUT ("\ttransition penalty:$iwsppenalty (for short-term inter-word pauses between words (-iwsppenalty))\n");           
152         print OUTPUT "$Julian_TestResults\n";
153 }
154
155 sub HTK_8kHz_16bit {
156         my ($AM_Dir) = @_;             
157         my $wordinspen = "10.0";
158         my $gramscfact = "5.0";
159         system("rm -f recout.mlf");             
160         $command = ("$HTKbin/HVite -A -D -T 1 -H $AM_Dir/macros -H $AM_Dir/hmmdefs -C $Testing_scripts_dir/wav_config -S $Testing_scripts_dir/wavlst8kHz_16bit -l '*' -i recout.mlf -w $Testing_scripts_dir/wdnet -p $wordinspen -s $gramscfact $LexiconDirectory/VoxForge/VoxForgeDict $AM_Dir/tiedlist 1>/dev/null 2>/dev/null");  system($command);   
161         my $HTK_TestResults = `$HTKbin/HResults -I testref.mlf $AM_Dir/tiedlist recout.mlf`;
162         $HTK_TestResults =~ s/ HTK//;
163         print OUTPUT ("HTK 8kHz_16bit\n"); 
164         print OUTPUT ("---------------\n");     
165         print OUTPUT ("\tword insertion penalty: $wordinspen\n");
166         print OUTPUT ("\tgrammar scale factor: $gramscfact\n");         
167         print OUTPUT "$HTK_TestResults\n";
168 }
169
170 sub Julian_8kHz_16bit {
171         my ($AM_Dir) = @_;                     
172         my $penalty1="50.0";
173         my $penalty2="100.0";
174         my $iwsppenalty = "-55.0";
175         system("rm -f julianProcessed");               
176         $command = ("julian -penalty1 $penalty1 -penalty2 $penalty2 -iwsppenalty $iwsppenalty -input rawfile -filelist wavlst8kHz_16bit -h $AM_Dir/hmmdefs -hlist $AM_Dir/tiedlist -smpFreq 8000  -C julian.jconf  > julianOutput 2>/dev/null"); print "$command\n" if $debug; system($command) ;   
177         $command = ("perl ./ProcessJulianOutput.pl julianOutput julianProcessed"); print "$command\n" if $debug; system($command) ;   
178         my $Julian_TestResults = `$HTKbin/HResults -I testref.mlf $AM_Dir/tiedlist julianProcessed` ;
179         $Julian_TestResults =~ s/ HTK//;
180         print OUTPUT ("Julian 8kHz_16bit\n"); 
181         print OUTPUT ("------------------\n");         
182         print OUTPUT ("\tword insertion penalty\n"); 
183         print OUTPUT ("\t  first pass (-penalty1):$penalty1\n"); 
184         print OUTPUT ("\t  second pass (-penalty2):$penalty2\n"); 
185         print OUTPUT ("\ttransition penalty::$iwsppenalty (for short-term inter-word pauses between words (-iwsppenalty))\n");                 
186         print OUTPUT "$Julian_TestResults\n";
187 }       
188
189 sub PrintNotes {
190 print OUTPUT "Notes:
191
192     * the line starting with SENT gives the percentage of sentences that were recognized correctly, out of N sentences in total.
193     * the line starting with WORD gives the percentage of words that were recognized correctly, out of N words in total
194       However, since HTK or Julius erroneously 'added' words that are not in the audio file (i.e. insertion errors) they usually get a lower percentage accuracy rating.
195     * Count definitions:
196           o D - Deletion Error
197           o S - Substitution Error
198           o I - Insertion Error\n";
199 }
200
201 1;
Note: See TracBrowser for help on using the browser.