| 39 | | getopts('a:d:hl:r:t:u:x:KT'); # sets $opt_* as a side effect. |
|---|
| 40 | | my ($audiofile, $textfile, $voxforgeDict,$tarSuffix,$README,$LICENCE,$username); |
|---|
| 41 | | if ($opt_a and $opt_t and $opt_d) { |
|---|
| 42 | | if (-r $opt_a) { |
|---|
| 43 | | $audiofile=$opt_a; |
|---|
| | 39 | my %self; |
|---|
| | 40 | $self{'debug'} = 0; |
|---|
| | 41 | $self{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; |
|---|
| | 42 | $self{'htk_files'} = "AudioBook/input_files/htk"; |
|---|
| | 43 | my $command; |
|---|
| | 44 | |
|---|
| | 45 | #################################################################### |
|---|
| | 46 | ### Main |
|---|
| | 47 | #################################################################### |
|---|
| | 48 | getOptions(\%self); |
|---|
| | 49 | process(\%self); |
|---|
| | 50 | print "completed!\n"; |
|---|
| | 51 | |
|---|
| | 52 | #################################################################### |
|---|
| | 53 | ### Main |
|---|
| | 54 | #################################################################### |
|---|
| | 55 | sub getOptions { |
|---|
| | 56 | my ($self)= @_; |
|---|
| | 57 | getopts('a:d:hl:r:t:u:x:KT'); # sets $opt_* as a side effect. |
|---|
| | 58 | if ($opt_a and $opt_t and $opt_d) { |
|---|
| | 59 | if (-r $opt_a) { |
|---|
| | 60 | $self->{"audiofile"}=$opt_a; |
|---|
| | 61 | } else { |
|---|
| | 62 | die "can't open -a" . $self->{"audiofile"} . "\n"; |
|---|
| | 63 | } |
|---|
| | 64 | if (-r $opt_t) { |
|---|
| | 65 | $self->{"textfile"}=$opt_t; |
|---|
| | 66 | } else { |
|---|
| | 67 | die "can't open -t" . $self->{"textfile"} . "\n"; |
|---|
| | 68 | } |
|---|
| | 69 | if (-r $opt_d) { |
|---|
| | 70 | $self->{"voxforgeDict"}=$opt_d; |
|---|
| | 71 | } else { |
|---|
| | 72 | die "can't open -d" . $self->{"voxforgeDict"} . "\n"; |
|---|
| | 73 | } |
|---|
| | 74 | ### |
|---|
| | 75 | if (defined($opt_T)) { |
|---|
| | 76 | if ($opt_x) { |
|---|
| | 77 | $self->{"tarSuffix"}=substr($opt_x,3); # only use 1st 3 characters. |
|---|
| | 78 | }else { |
|---|
| | 79 | $self->{"tarSuffix"}=random_characters(3); |
|---|
| | 80 | } |
|---|
| | 81 | |
|---|
| | 82 | if ($opt_r) { |
|---|
| | 83 | if (-r $opt_r) { |
|---|
| | 84 | $self->{"README"}=$opt_r; |
|---|
| | 85 | } else { |
|---|
| | 86 | die "can't open -r" . $self->{"README"} . "\n"; |
|---|
| | 87 | } |
|---|
| | 88 | } else { |
|---|
| | 89 | $self->{"README"}="AudioBook/input_files/README"; |
|---|
| | 90 | } |
|---|
| | 91 | if ($opt_l) { |
|---|
| | 92 | if (-r $opt_l) { |
|---|
| | 93 | $self->{"LICENCE"}=$opt_l; |
|---|
| | 94 | } else { |
|---|
| | 95 | die "can't open -l" . $self->{"LICENCE"} . "\n"; |
|---|
| | 96 | } |
|---|
| | 97 | } else { |
|---|
| | 98 | $self->{"LICENCE"}="AudioBook/input_files/LICENCE"; |
|---|
| | 99 | } |
|---|
| | 100 | } |
|---|
| | 101 | } elsif ($opt_K) { |
|---|
| | 102 | $self->{"audiofile"}="AudioBook/test/audio.wav"; |
|---|
| | 103 | $self->{"textfile"}="AudioBook/test/text-simple.txt"; |
|---|
| | 104 | $self->{"voxforgeDict"}="AudioBook/test/VoxForgeDict"; |
|---|
| | 105 | $self->{"tarSuffix"}=_random_characters(3); |
|---|
| | 106 | $self->{"username"}="test"; |
|---|
| | 107 | print "test using the following files:\n"; |
|---|
| | 108 | print "\t" . $self->{"audiofile"} . "\n"; |
|---|
| | 109 | print "\t" . $self->{"textfile"} . "\n"; |
|---|
| | 110 | print "\t" . $self->{"voxforgeDict"} . "\n"; |
|---|
| | 111 | } elsif ($opt_h) { |
|---|
| | 112 | print "\nVoxForge Audio Segmentation Script Parameters\n"; |
|---|
| | 113 | print "=============================================\n"; |
|---|
| | 114 | print "-a\t* audio file name\n"; |
|---|
| | 115 | print "-d\t* pronunication dictionary\n"; |
|---|
| | 116 | print "-h\tshow help\n"; |
|---|
| | 117 | print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; |
|---|
| | 118 | print "-r\tREADME file (default = AudioBook/input_files/README)\n"; |
|---|
| | 119 | print "-t\t* text file name\n"; |
|---|
| | 120 | print "-u\tusername or name you want file stats collected by on VoxForge Metrics \n"; |
|---|
| | 121 | print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n"; |
|---|
| | 122 | print "-K\trun test\n"; |
|---|
| | 123 | print "-T\tunique tar file suffix - also acts a switch to create gzipped/tar file\n"; |
|---|
| | 124 | print "\t(max 3 characters - remainder is truncated)\n"; |
|---|
| | 125 | print "\n\t* required for scripts to run\n"; |
|---|
| | 126 | print "\n"; |
|---|
| | 127 | exit; |
|---|
| 47 | | if (-r $opt_t) { |
|---|
| 48 | | $textfile=$opt_t; |
|---|
| | 134 | print "audiofile:" . $self->{"audiofile"}. "\n"; |
|---|
| | 135 | print "textfile:" . $self->{"textfile"}. "\n"; |
|---|
| | 136 | print "voxforgeDict:" . $self->{"voxforgeDict"} . "\n"; |
|---|
| | 137 | } |
|---|
| | 138 | |
|---|
| | 139 | sub process { |
|---|
| | 140 | my ($self)= @_; |
|---|
| | 141 | my $debug = $self->{'debug'}; |
|---|
| | 142 | my $audiofile = $self->{"audiofile"}; |
|---|
| | 143 | my $textfile = $self->{"textfile"}; |
|---|
| | 144 | my $username = $self->{"username"}; |
|---|
| | 145 | my $tarSuffix = $self->{"tarSuffix"}; |
|---|
| | 146 | my $voxforgeDict = $self->{"voxforgeDict"}; |
|---|
| | 147 | my $tarSuffix = $self->{"tarSuffix"}; |
|---|
| | 148 | my $htk_files = $self->{'htk_files'}; |
|---|
| | 149 | |
|---|
| | 150 | my $textContents = AudioBook::Text->new($textfile); |
|---|
| | 151 | $textContents->createWLISTFile("AudioBook/interim_files/wlist"); |
|---|
| | 152 | |
|---|
| | 153 | my $dictionary = AudioBook::Dictionary->new($self); |
|---|
| | 154 | my $missingwordfound = $dictionary->findOutOfVocabularyWords($voxforgeDict,"AudioBook/interim_files/MissingWords"); |
|---|
| | 155 | if ($missingwordfound) { |
|---|
| | 156 | $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p |
|---|
| | 157 | $dictionary->updatePronDict(); |
|---|
| | 158 | # need to update dict with missing words |
|---|
| | 159 | |
|---|
| | 160 | $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict" . $self->{"voxforgeDict"}); system($command) == 0 or confess "fullrun $command failed: $?"; |
|---|
| | 161 | $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); |
|---|
| 52 | | if (-r $opt_d) { |
|---|
| 53 | | $voxforgeDict=$opt_d; |
|---|
| 54 | | } else { |
|---|
| 55 | | die "can't open -d $voxforgeDict\n"; |
|---|
| 56 | | } |
|---|
| 57 | | ### |
|---|
| 58 | | if (defined($opt_T)) { |
|---|
| 59 | | if ($opt_x) { |
|---|
| 60 | | $tarSuffix=substr($opt_x,3); # only use 1st 3 characters. |
|---|
| 61 | | }else { |
|---|
| 62 | | $tarSuffix=random_characters(3); |
|---|
| 63 | | } |
|---|
| 64 | | |
|---|
| 65 | | if ($opt_r) { |
|---|
| 66 | | if (-r $opt_r) { |
|---|
| 67 | | $README=$opt_r; |
|---|
| 68 | | } else { |
|---|
| 69 | | die "can't open -r $README\n"; |
|---|
| 70 | | } |
|---|
| 71 | | } else { |
|---|
| 72 | | $README="AudioBook/input_files/README"; |
|---|
| 73 | | } |
|---|
| 74 | | if ($opt_l) { |
|---|
| 75 | | if (-r $opt_l) { |
|---|
| 76 | | $LICENCE=$opt_l; |
|---|
| 77 | | } else { |
|---|
| 78 | | die "can't open -l $LICENCE\n"; |
|---|
| 79 | | } |
|---|
| 80 | | } else { |
|---|
| 81 | | $LICENCE="AudioBook/input_files/LICENCE"; |
|---|
| 82 | | } |
|---|
| 83 | | } |
|---|
| 84 | | } elsif ($opt_K) { |
|---|
| 85 | | $audiofile="AudioBook/test/audio.wav"; |
|---|
| 86 | | $textfile= "AudioBook/test/text-simple.txt"; |
|---|
| 87 | | $voxforgeDict="AudioBook/test/VoxForgeDict"; |
|---|
| 88 | | $tarSuffix=random_characters(3); |
|---|
| 89 | | $username="test"; |
|---|
| 90 | | print "test using the following files:\n\t$audiofile\n\t$textfile\n\t$voxforgeDict\n"; |
|---|
| 91 | | } elsif ($opt_h) { |
|---|
| 92 | | print "\nVoxForge Audio Segmentation Script Parameters\n"; |
|---|
| 93 | | print "=============================================\n"; |
|---|
| 94 | | print "-a\t* audio file name\n"; |
|---|
| 95 | | print "-d\t* pronunication dictionary\n"; |
|---|
| 96 | | print "-h\tshow help\n"; |
|---|
| 97 | | print "-l\tLICENSE file (default = AudioBook/input_files/LICENCE)\n"; |
|---|
| 98 | | print "-r\tREADME file (default = AudioBook/input_files/README)\n"; |
|---|
| 99 | | print "-t\t* text file name\n"; |
|---|
| 100 | | print "-u\tusername or name you want file stats collected by on VoxForge Metrics \n"; |
|---|
| 101 | | print "\tpage:\t(http://www.voxforge.org/home/downloads/metrics)\n"; |
|---|
| 102 | | print "-K\trun test\n"; |
|---|
| 103 | | print "-T\tunique tar file suffix - also acts a switch to create gzipped/tar file\n"; |
|---|
| 104 | | print "\t(max 3 characters - remainder is truncated)\n"; |
|---|
| 105 | | print "\n\t* required for scripts to run\n"; |
|---|
| 106 | | print "\n"; |
|---|
| 107 | | exit; |
|---|
| 108 | | } else { |
|---|
| 109 | | print "\nVoxForge Audio Segmentation Script\n"; |
|---|
| 110 | | print "==================================\n"; |
|---|
| 111 | | print "parms -a, -t, -d need to be defined, use -h parameter for more information\n\n"; |
|---|
| 112 | | exit; |
|---|
| 113 | | } |
|---|
| 114 | | print "audiofile:$audiofile\n"; |
|---|
| 115 | | print "testfile:$textfile\n"; |
|---|
| 116 | | print "voxforgeDict:$voxforgeDict\n"; |
|---|
| 117 | | #################################################################### |
|---|
| 118 | | ### Class Variables |
|---|
| 119 | | #################################################################### |
|---|
| 120 | | my $command; |
|---|
| 121 | | my %parms; |
|---|
| 122 | | $parms{'debug'} = 0; |
|---|
| 123 | | $parms{'g2p_model'} = "AudioBook/input_files/g2p/models/model-5"; |
|---|
| 124 | | $parms{'htk_files'} = "AudioBook/input_files/htk"; |
|---|
| 125 | | |
|---|
| 126 | | #################################################################### |
|---|
| 127 | | ### Main |
|---|
| 128 | | #################################################################### |
|---|
| 129 | | my $debug = $parms{'debug'}; |
|---|
| 130 | | my $textContents = AudioBook::Text->new(\%parms,$textfile); |
|---|
| 131 | | $textContents->createWLISTFile("AudioBook/interim_files/wlist"); |
|---|
| 132 | | my $dictionary = AudioBook::Dictionary->new(\%parms,$voxforgeDict); |
|---|
| 133 | | my $missingwordfound = $dictionary->findOutOfVocabularyWords("AudioBook/interim_files/MissingWords"); |
|---|
| 134 | | if ($missingwordfound) { |
|---|
| 135 | | $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p |
|---|
| 136 | | $dictionary->updatePronDict(); |
|---|
| 137 | | # need to update dict with missing words |
|---|
| 138 | | my $htk_files = $parms{'htk_files'}; |
|---|
| 139 | | $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; |
|---|
| 140 | | $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); |
|---|
| 141 | | } else { |
|---|
| 142 | | open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file"); |
|---|
| 143 | | print MISSINGWORDSOUT "no missing words\n"; |
|---|
| 144 | | close MISSINGWORDSOUT |
|---|
| 145 | | } |
|---|
| 146 | | $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command); |
|---|
| 147 | | my $audio = AudioBook::Audio->new(\%parms,$audiofile,$textContents); |
|---|
| 148 | | $audio->segment(); |
|---|
| 149 | | if (defined($tarSuffix)){ |
|---|
| 150 | | my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); |
|---|
| 151 | | $year += 1900; |
|---|
| 152 | | $mon = sprintf("%02d", $mon); |
|---|
| 153 | | $mday = sprintf("%02d", $mday); |
|---|
| 154 | | print "date:$sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst\n"; |
|---|
| 155 | | print "creating qzipped tar file:$username$\-$year$mon$mday\-$tarSuffix\.tgz \n"; |
|---|
| 156 | | $command = ("tar -zcvf $username$\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); |
|---|
| 157 | | print "please submit your tar file to: www.voxforge.org\n"; |
|---|
| 158 | | } |
|---|
| 159 | | print "completed!\n"; |
|---|
| 160 | | |
|---|
| 161 | | ########################################################## |
|---|
| 162 | | # Subroutines |
|---|
| 163 | | ########################################################### |
|---|
| 164 | | sub random_characters { |
|---|
| | 173 | } |
|---|
| | 174 | |
|---|
| | 175 | sub _createTarFile { |
|---|
| | 176 | my ($self)= @_; |
|---|
| | 177 | my $debug = $self->{'debug'}; |
|---|
| | 178 | my $username = $self->{"username"}; |
|---|
| | 179 | my $tarSuffix = $self->{"tarSuffix"}; |
|---|
| | 180 | my $tarSuffix = $self->{"tarSuffix"}; |
|---|
| | 181 | |
|---|
| | 182 | my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); |
|---|
| | 183 | $year += 1900; |
|---|
| | 184 | $mon = sprintf("%02d", $mon); |
|---|
| | 185 | $mday = sprintf("%02d", $mday); |
|---|
| | 186 | print "creating gzipped tar file:$username\-$year$mon$mday\-$tarSuffix\.tgz \n"; |
|---|
| | 187 | if ($debug) { |
|---|
| | 188 | $command = ("tar -zcvf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); |
|---|
| | 189 | } else { |
|---|
| | 190 | $command = ("tar -zcf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); |
|---|
| | 191 | } |
|---|
| | 192 | print "please submit your tar file to: www.voxforge.org\n"; |
|---|
| | 193 | } |
|---|
| | 194 | |
|---|
| | 195 | sub _random_characters { |
|---|