Changeset 2578
- Timestamp:
- 05/12/08 17:48:45 (7 months ago)
- Files:
-
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm (modified) (3 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/MissingWords_out (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/aligned.out (modified) (6 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dict (modified) (6 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog (modified) (3 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/htksegment_log (modified) (4 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/wlist (modified) (6 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/words.mlf (modified) (3 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/dict (modified) (6 diffs)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts (modified) (1 diff)
- Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/test/text-original.txt (moved) (moved from Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/test/text-original)
- Trunk/Scripts/Audio_scripts/UserSubmission.pm (modified) (11 diffs)
- Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS (added)
- Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS.pm (modified) (4 diffs)
- Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS/EN.pm (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm
r2576 r2578 53 53 ### Main 54 54 #################################################################### 55 sub process { 56 my ($self)= @_; 57 my $debug = $self->{'debug'}; 58 my $audiofile = $self->{"audiofile"}; 59 my $textfile = $self->{"textfile"}; 60 my $username = $self->{"username"}; 61 my $tarSuffix = $self->{"tarSuffix"}; 62 my $voxforgeDict = $self->{"voxforgeDict"}; 63 my $htk_files = $self->{'htk_files'}; 64 65 my $textContents = AudioBook::Text->new($textfile); 66 $textContents->createWLISTFile("AudioBook/interim_files/wlist"); 67 68 my $dictionary = AudioBook::Dictionary->new($self); 69 my $missingwordfound = $dictionary->findOutOfVocabularyWords($voxforgeDict,"AudioBook/interim_files/MissingWords"); 70 if ($missingwordfound) { 71 $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p 72 $dictionary->updatePronDict(); 73 # need to update dict with missing words 74 75 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 76 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 77 } else { 78 unlink ("AudioBook/interim_files/MissingWords_out"); 79 open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file"); 80 print MISSINGWORDSOUT "no missing words\n"; 81 close MISSINGWORDSOUT 82 } 83 $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command); 84 my $audio = AudioBook::Audio->new($self); 85 $audio->segment($audiofile,$textContents); 86 if (defined($tarSuffix)){ 87 _createTarFile($self); 88 } 89 } 90 91 sub _createTarFile { 92 my ($self)= @_; 93 my $debug = $self->{'debug'}; 94 my $username = $self->{"username"}; 95 my $tarSuffix = $self->{"tarSuffix"}; 96 97 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 98 $year += 1900; 99 $mon = sprintf("%02d", $mon); 100 $mday = sprintf("%02d", $mday); 101 print "creating gzipped tar file:$username\-$year$mon$mday\-$tarSuffix\.tgz \n"; 102 if ($debug) { 103 $command = ("tar -zcvf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 104 } else { 105 $command = ("tar -zcf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 106 } 107 print "please submit your tar file to: www.voxforge.org\n"; 108 } 109 110 sub _random_characters { 111 my ($length) = @_; 112 my @chars=('a'..'z'); 113 my $randomString; 114 foreach (1..$length){ 115 $randomString.=$chars[rand @chars]; 116 } 117 return $randomString; 118 } 119 55 120 sub getOptions { 56 121 my ($self)= @_; … … 101 166 } elsif ($opt_K) { 102 167 $self->{"audiofile"}="AudioBook/test/audio.wav"; 103 $self->{"textfile"}="AudioBook/test/text-simple.txt"; 168 #$self->{"textfile"}="AudioBook/test/text-simple.txt"; 169 $self->{"textfile"}="AudioBook/test/text-original.txt"; 104 170 $self->{"voxforgeDict"}="AudioBook/test/VoxForgeDict"; 105 171 $self->{"tarSuffix"}=_random_characters(3); 106 172 $self->{"username"}="test"; 107 print "test using the following files:\n";108 print "\t" . $self->{"audiofile"} . "\n";109 print "\t" . $self->{"textfile"} . "\n";110 print "\t" . $self->{"voxforgeDict"} . "\n";111 173 } elsif ($opt_h) { 112 174 print "\nVoxForge Audio Segmentation Script Parameters\n"; … … 137 199 } 138 200 139 sub process {140 my ($self)= @_;141 my $debug = $self->{'debug'};142 my $audiofile = $self->{"audiofile"};143 my $textfile = $self->{"textfile"};144 my $username = $self->{"username"};145 my $tarSuffix = $self->{"tarSuffix"};146 my $voxforgeDict = $self->{"voxforgeDict"};147 my $tarSuffix = $self->{"tarSuffix"};148 my $htk_files = $self->{'htk_files'};149 150 my $textContents = AudioBook::Text->new($textfile);151 $textContents->createWLISTFile("AudioBook/interim_files/wlist");152 153 my $dictionary = AudioBook::Dictionary->new($self);154 my $missingwordfound = $dictionary->findOutOfVocabularyWords($voxforgeDict,"AudioBook/interim_files/MissingWords");155 if ($missingwordfound) {156 $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p157 $dictionary->updatePronDict();158 # need to update dict with missing words159 160 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict" . $self->{"voxforgeDict"}); system($command) == 0 or confess "fullrun $command failed: $?";161 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command);162 } else {163 open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file");164 print MISSINGWORDSOUT "no missing words\n";165 close MISSINGWORDSOUT166 }167 $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command);168 my $audio = AudioBook::Audio->new($self);169 $audio->segment($audiofile,$textContents);170 if (defined($tarSuffix)){171 _createTarFile($self);172 }173 }174 175 sub _createTarFile {176 my ($self)= @_;177 my $debug = $self->{'debug'};178 my $username = $self->{"username"};179 my $tarSuffix = $self->{"tarSuffix"};180 my $tarSuffix = $self->{"tarSuffix"};181 182 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);183 $year += 1900;184 $mon = sprintf("%02d", $mon);185 $mday = sprintf("%02d", $mday);186 print "creating gzipped tar file:$username\-$year$mon$mday\-$tarSuffix\.tgz \n";187 if ($debug) {188 $command = ("tar -zcvf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command);189 } else {190 $command = ("tar -zcf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command);191 }192 print "please submit your tar file to: www.voxforge.org\n";193 }194 195 sub _random_characters {196 my ($length) = @_;197 my @chars=('a'..'z');198 my $randomString;199 foreach (1..$length){200 $randomString.=$chars[rand @chars];201 }202 return $randomString;203 }204 201 1; Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm
r2576 r2578 26 26 use diagnostics; 27 27 use Carp; 28 use Lingua::EN::Numbers qw(num2en num2en_ordinal); 29 use Lingua::EN::Numbers::Years; 28 30 29 31 my $command; 30 31 32 #################################################################### 32 33 ### Constructor Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm
r2576 r2578 172 172 } else { 173 173 push (@$words, $number); 174 print "**** not sure what to do with:$number: please correct in eText\n";174 print "****$number not converted\n"; 175 175 } 176 176 Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/MissingWords_out
r2570 r2578 1 MOJOMOVE411 [MOJOMOVE411] m ow jh ow m uw v f n n Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/aligned.out
r2575 r2578 38 38 68200000 70300000 r -1418.653687 39 39 70300000 70300000 sp -0.235017 40 70300000 72300000 m -1382.702515 MOJOMOVE 40 70300000 72300000 m -1382.702515 MOJOMOVE411 41 41 72300000 73900000 ow -978.509583 42 42 73900000 74800000 jh -591.590271 … … 45 45 77700000 80600000 uw -1760.078613 46 46 80600000 82600000 v -1403.530029 47 82600000 82600000 sp -0.235017 48 82600000 84400000 f -1091.546265 FOUR 49 84400000 85300000 ao -612.738708 50 85300000 86600000 r -949.012756 51 86600000 86600000 sp -0.235017 52 86600000 87300000 w -470.687256 ONE 53 87300000 89000000 ah -1068.666382 54 89000000 90500000 n -927.216187 47 82600000 84400000 f -1091.546265 48 84400000 87700000 n -2531.834717 49 87700000 90500000 n -1852.228149 55 50 90500000 90500000 sp -0.235017 56 90500000 90800000 w -213.120544 ONE57 90800000 9 3000000 ah -1393.07617258 9 3000000 94900000 n -1128.90502951 90500000 90800000 k -240.841125 COM 52 90800000 92600000 aa -1081.417236 53 92600000 94900000 m -1386.785156 59 54 94900000 94900000 sp -0.235017 60 94900000 96000000 d -717.843140 DOT 61 96000000 97800000 aa -1176.526489 62 97800000 98500000 t -505.287354 63 98500000 98500000 sp -0.235017 64 98500000 100400000 k -1282.235229 COM 65 100400000 103400000 aa -1583.814209 66 103400000 105200000 m -1105.898193 67 105200000 106600000 sp -737.433594 68 106600000 106900000 b -189.849640 BY 69 106900000 108300000 ay -951.593445 70 108300000 108300000 sp -0.235017 71 108300000 110000000 r -1148.621338 ROBERT 55 94900000 95900000 b -623.744507 BY 56 95900000 103700000 ay -5017.617676 57 103700000 106800000 sp -1927.296143 58 106800000 110000000 r -2306.654541 ROBERT 72 59 110000000 111200000 aa -703.356628 73 60 111200000 112000000 b -572.268555 … … 101 88 141100000 142300000 ao -810.766357 102 89 142300000 144000000 r -1171.934448 103 144000000 145200000 jh -803.447327 104 145200000 145200000 sp -0.235017 105 145200000 145700000 d -377.922943 DOT 106 145700000 148400000 aa -1687.383545 107 148400000 149000000 t -434.164429 108 149000000 149000000 sp -0.235017 109 149000000 151700000 ao -1835.239380 ORG 90 144000000 145800000 jh -1243.679077 91 145800000 145800000 sp -0.235017 92 145800000 151700000 ao -4049.692871 ORG 110 93 151700000 153700000 r -1328.250610 111 94 153700000 154300000 g -424.893402 … … 121 104 170200000 170700000 k -369.333679 122 105 170700000 171600000 l -659.565674 123 171600000 173000000 iy -908.040466 124 173000000 173000000 sp -0.235017 125 173000000 173800000 p -549.036621 POETRY 126 173800000 175900000 ow -1213.417236 127 175900000 176200000 ax -204.439636 128 176200000 177700000 t -1050.475952 129 177700000 178900000 r -805.788147 130 178900000 181100000 iy -1380.469849 131 181100000 182300000 sp -633.162842 106 171600000 172600000 iy -665.369934 107 172600000 172600000 sp -0.235017 108 172600000 173000000 m -250.922897 MONOLOGUE 109 173000000 175400000 aa -1489.473389 110 175400000 175700000 n -242.715103 111 175700000 176000000 ax -221.046005 112 176000000 176300000 l -215.975601 113 176300000 176600000 ao -229.448303 114 176600000 181000000 g -3165.420654 115 181000000 182300000 sp -695.738953 132 116 182300000 183300000 k -677.003906 COLLECTION 133 117 183300000 184000000 ax -455.986847 … … 168 152 247000000 249400000 l -1534.862549 169 153 249400000 251000000 z -999.659790 170 251000000 252500000 sp -869.750610 171 252500000 252800000 hh -198.110382 HOW 172 252800000 254500000 aw -1056.930664 173 254500000 254500000 sp -0.235017 174 254500000 255500000 m -680.137939 MANY 175 255500000 256200000 eh -439.305176 176 256200000 256700000 n -315.102844 177 256700000 257900000 iy -738.825989 178 257900000 257900000 sp -0.235017 179 257900000 258400000 ax -315.671692 A 180 258400000 258400000 sp -0.235017 181 258400000 260400000 t -1291.936768 TALE 182 260400000 262100000 ey -1101.318237 183 262100000 264400000 l -1475.232666 184 264400000 264900000 sp -292.931213 185 264900000 265600000 dh -449.029144 THEIR 186 265600000 266600000 eh -643.028442 187 266600000 267400000 r -548.846680 188 267400000 267400000 sp -0.235017 189 267400000 268400000 m -633.779907 MUSIC 154 251000000 252400000 sp -810.360474 155 252400000 252700000 dh -191.927155 THOSE 156 252700000 254300000 ow -1005.157776 157 254300000 254600000 z -249.030563 158 254600000 254600000 sp -0.235017 159 254600000 254900000 iy -233.816315 EVENING 160 254900000 255200000 v -248.230988 161 255200000 255800000 n -406.460815 162 255800000 258700000 ix -1827.929199 163 258700000 259000000 ng -201.385193 164 259000000 259000000 sp -0.235017 165 259000000 259300000 b -203.654861 BELLS 166 259300000 262100000 eh -1718.569336 167 262100000 262600000 l -365.826385 168 262600000 262900000 z -224.563644 169 262900000 262900000 sp -0.235017 170 262900000 263200000 hh -213.729538 HOW 171 263200000 264000000 aw -514.845581 172 264000000 264000000 sp -0.235017 173 264000000 264300000 m -196.316437 MANY 174 264300000 264600000 eh -209.032455 175 264600000 265000000 n -255.899612 176 265000000 265400000 iy -278.018890 177 265400000 265400000 sp -0.235017 178 265400000 266000000 ax -394.383759 A 179 266000000 266000000 sp -0.235017 180 266000000 266300000 t -214.197128 TALE 181 266300000 266600000 ey -219.312790 182 266600000 266900000 l -233.756607 183 266900000 266900000 sp -0.235017 184 266900000 267200000 dh -243.673233 THEIR 185 267200000 267500000 eh -275.131104 186 267500000 267800000 r -209.015839 187 267800000 267800000 sp -0.235017 188 267800000 268400000 m -354.742706 MUSIC 190 189 268400000 269600000 y -715.677551 191 190 269600000 270000000 uw -266.001984 … … 383 382 507400000 509300000 ow -1212.547852 384 383 509300000 509300000 sp -0.235017 385 509300000 510200000 t -628.625244 TWILL 386 510200000 511100000 w -645.009827 384 509300000 509800000 t -371.192139 T 385 509800000 510100000 iy -226.032471 386 510100000 510100000 sp -0.235017 387 510100000 511100000 w -710.753845 WILL 387 388 511100000 511500000 ih -308.505402 388 389 511500000 512900000 l -938.400208 Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dict
r2570 r2578 15 15 DELLS [DELLS] d eh l z sp 16 16 DOMAIN [DOMAIN] d ow m ey n sp 17 DOT [DOT] d aa t sp18 17 DWELLS [DWELLS] d w eh l z sp 19 18 EVENING [EVENING] iy v n ix ng sp 20 19 FOR [FOR] f ao r sp 21 FOUR [FOUR] f ao r sp22 20 GAY [GAY] g ey sp 23 21 GONE [GONE] g ao n sp … … 34 32 LAST [LAST] l ae s t sp 35 33 MANY [MANY] m eh n iy sp 36 MOJOMOVE [MOJOMOVE] m ow jh ow m uw v sp 34 MOJOMOVE411 [MOJOMOVE411] m ow jh ow m uw v f n n sp 35 MONOLOGUE [MONOLOGUE] m aa n ax l ao g sp 37 36 MOORE [MOORE] m uh r sp 38 37 MORE [MORE] m ao r sp … … 42 41 OF [OF] ah v sp 43 42 ON [ON] aa n sp 44 ONE [ONE] w ah n sp45 43 ORG [ORG] ao r g sp 46 44 OTHER [OTHER] ah dh er sp … … 48 46 PASSED [PASSED] p ae s t sp 49 47 PEAL [PEAL] p iy l sp 50 POETRY [POETRY] p ow ax t r iy sp51 48 PRAISE [PRAISE] p r ey z sp 52 49 PUBLIC [PUBLIC] p ah b l ix k sp … … 65 62 STILL [STILL] s t ih l sp 66 63 SWEET [SWEET] s w iy t sp 64 T [T] t iy sp 67 65 TALE [TALE] t ey l sp 68 66 TELLS [TELLS] t eh l z sp … … 78 76 TOMB [TOMB] t uw m sp 79 77 TUNEFUL [TUNEFUL] t uw n f ax l sp 80 TWILL [TWILL] t w ih l sp81 78 VOXFORGE [VOXFORGE] v aa k s f ao r jh sp 82 79 WALK [WALK] w ao k sp Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog
r2573 r2578 4 4 --------------------------- 5 5 Dictionary TotalWords WordsUsed TotalProns PronsUsed 6 VoxForgeDict 12975 7 90 129778 907 dict 90 90 90 906 VoxForgeDict 129758 87 129779 87 7 dict 87 87 87 87 8 8 9 90words required, 0 missing9 87 words required, 0 missing 10 10 11 11 New Phone Usage Counts 12 12 --------------------- 13 13 1. ax : 10 14 2. sp : 8 814 2. sp : 85 15 15 3. ae : 7 16 4. m : 1 417 5. n : 1 518 6. d : 916 4. m : 15 17 5. n : 17 18 6. d : 8 19 19 7. aa : 13 20 8. r : 2 220 8. r : 20 21 21 9. z : 14 22 10. w : 1 222 10. w : 10 23 23 11. ey : 5 24 24 12. b : 6 … … 30 30 18. k : 11 31 31 19. sh : 3 32 20. ow : 833 21. t : 1934 22. v : 435 23. ix : 736 24. ng : 537 25. f : 438 26. ao : 1039 27. g : 340 28. hh : 541 29. er : 442 30. ih : 932 20. ow : 7 33 21. v : 4 34 22. ix : 7 35 23. ng : 5 36 24. f : 4 37 25. ao : 10 38 26. g : 4 39 27. hh : 5 40 28. er : 4 41 29. ih : 8 42 30. t : 17 43 43 31. aw : 3 44 44 32. jh : 3 … … 48 48 36. uh : 1 49 49 37. y : 3 50 38. ah : 450 38. ah : 3 51 51 39. dh : 10 52 40. p : 652 40. p : 5 53 53 41. dx : 1 54 54 42. sil : 2 Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/htksegment_log
r2570 r2578 7 7 READ 60200000 64300000 0 8 8 FOR 64300000 70300000 0 9 MOJOMOVE 70300000 82600000 0 10 FOUR 82600000 86600000 0 11 ONE 86600000 90500000 0 12 ONE 90500000 94900000 0 13 DOT 94900000 98500000 0 14 COM 98500000 105200000 1400000 15 BY 106600000 108300000 0 16 ROBERT 108300000 113800000 0 9 MOJOMOVE411 70300000 90500000 0 10 COM 90500000 94900000 0 11 BY 94900000 103700000 3100000 12 ROBERT 106800000 113800000 0 17 13 SCOTT 113800000 118900000 8200000 18 14 AS 127100000 129700000 0 … … 20 16 OF 133100000 134500000 0 21 17 THE 134500000 135400000 0 22 VOXFORGE 135400000 145200000 0 23 DOT 145200000 149000000 0 24 ORG 149000000 154300000 4000000 18 VOXFORGE 135400000 145800000 0 19 ORG 145800000 154300000 4000000 25 20 SHORTS 158300000 165800000 800000 26 WEEKLY 166600000 17 3000000 027 POETRY 173000000 181100000 120000021 WEEKLY 166600000 172600000 0 22 MONOLOGUE 172600000 181000000 1300000 28 23 COLLECTION 182300000 189700000 29900000 29 24 THOSE 219600000 224100000 0 … … 32 27 THOSE 235100000 240100000 0 33 28 EVENING 240100000 245200000 0 34 BELLS 245200000 251000000 1500000 35 HOW 252500000 254500000 0 36 MANY 254500000 257900000 0 37 A 257900000 258400000 0 38 TALE 258400000 264400000 500000 39 THEIR 264900000 267400000 0 40 MUSIC 267400000 272200000 800000 29 BELLS 245200000 251000000 1400000 30 THOSE 252400000 254600000 0 31 EVENING 254600000 259000000 0 32 BELLS 259000000 262900000 0 33 HOW 262900000 264000000 0 34 MANY 264000000 265400000 0 35 A 265400000 266000000 0 36 TALE 266000000 266900000 0 37 THEIR 266900000 267800000 0 38 MUSIC 267800000 272200000 800000 41 39 TELLS 273000000 279000000 4500000 42 40 OF 283500000 285500000 0 … … 84 82 AND 502900000 505500000 0 85 83 SO 505500000 509300000 0 86 TWILL 509300000 512900000 0 84 T 509300000 510100000 0 85 WILL 510100000 512900000 0 87 86 BE 512900000 517100000 0 88 87 WHEN 517100000 520900000 0 Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/wlist
r2552 r2578 15 15 DELLS 16 16 DOMAIN 17 DOT18 17 DWELLS 19 18 EVENING 20 19 FOR 21 FOUR22 20 GAY 23 21 GONE … … 34 32 LAST 35 33 MANY 36 MOJOMOVE 34 MOJOMOVE411 35 MONOLOGUE 37 36 MOORE 38 37 MORE … … 42 41 OF 43 42 ON 44 ONE45 43 ORG 46 44 OTHER … … 48 46 PASSED 49 47 PEAL 50 POETRY51 48 PRAISE 52 49 PUBLIC … … 65 62 STILL 66 63 SWEET 64 T 67 65 TALE 68 66 TELLS … … 78 76 TOMB 79 77 TUNEFUL 80 TWILL81 78 VOXFORGE 82 79 WALK Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/words.mlf
r2572 r2578 9 9 READ 10 10 FOR 11 MOJOMOVE 12 FOUR 13 ONE 14 ONE 15 DOT 11 MOJOMOVE411 16 12 COM 17 13 BY … … 23 19 THE 24 20 VOXFORGE 25 DOT26 21 ORG 27 22 SHORTS 28 23 WEEKLY 29 POETRY 24 MONOLOGUE 30 25 COLLECTION 26 THOSE 27 EVENING 28 BELLS 31 29 THOSE 32 30 EVENING … … 86 84 AND 87 85 SO 88 TWILL 86 T 87 WILL 89 88 BE 90 89 WHEN Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/dict
r2574 r2578 15 15 DELLS [DELLS] d eh l z sp 16 16 DOMAIN [DOMAIN] d ow m ey n sp 17 DOT [DOT] d aa t sp18 17 DWELLS [DWELLS] d w eh l z sp 19 18 EVENING [EVENING] iy v n ix ng sp 20 19 FOR [FOR] f ao r sp 21 FOUR [FOUR] f ao r sp22 20 GAY [GAY] g ey sp 23 21 GONE [GONE] g ao n sp … … 34 32 LAST [LAST] l ae s t sp 35 33 MANY [MANY] m eh n iy sp 36 MOJOMOVE [MOJOMOVE] m ow jh ow m uw v sp 34 MOJOMOVE411 [MOJOMOVE411] m ow jh ow m uw v f n n sp 35 MONOLOGUE [MONOLOGUE] m aa n ax l ao g sp 37 36 MOORE [MOORE] m uh r sp 38 37 MORE [MORE] m ao r sp … … 42 41 OF [OF] ah v sp 43 42 ON [ON] aa n sp 44 ONE [ONE] w ah n sp45 43 ORG [ORG] ao r g sp 46 44 OTHER [OTHER] ah dh er sp … … 48 46 PASSED [PASSED] p ae s t sp 49 47 PEAL [PEAL] p iy l sp 50 POETRY [POETRY] p ow ax t r iy sp51 48 PRAISE [PRAISE] p r ey z sp 52 49 PUBLIC [PUBLIC] p ah b l ix k sp … … 65 62 STILL [STILL] s t ih l sp 66 63 SWEET [SWEET] s w iy t sp 64 T [T] t iy sp 67 65 TALE [TALE] t ey l sp 68 66 TELLS [TELLS] t eh l z sp … … 78 76 TOMB [TOMB] t uw m sp 79 77 TUNEFUL [TUNEFUL] t uw n f ax l sp 80 TWILL [TWILL] t w ih l sp81 78 VOXFORGE [VOXFORGE] v aa k s f ao r jh sp 82 79 WALK [WALK] w ao k sp Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts
r2573 r2578 1 audio0001 THOSE EVENING BELLS BY THOMAS MOORE 2 audio0002 READ FOR MOJOMOVE FOUR ONE ONE DOT COM BY ROBERT SCOTT 3 audio0003 AS PART OF THE VOXFORGE DOT ORG 4 audio0004 SHORTS WEEKLY POETRY COLLECTION THOSE EVENING BELLS THOSE EVENING BELLS 5 audio0005 HOW MANY A TALE THEIR MUSIC TELLS 6 audio0006 OF YOUTH AND HOME AND THAT SWEET TIME 7 audio0007 WHEN LAST I HEARD THEIR SOOTHING CHIME 8 audio0008 THOSE JOYOUS HOURS ARE PASSED AWAY 9 audio0009 AND MANY A HEART THAT THEN WAS GAY 10 audio0010 WITHIN THE TOMB NOW DARKLY DWELLS AND HEARS NO MORE 11 audio0011 THOSE EVENING BELLS AND SO TWILL BE WHEN I AM GONE 12 audio0012 THAT TUNEFUL PEAL WILL STILL RING ON 13 audio0013 WHILE OTHER BARDS SHALL WALK THESE DELLS 14 audio0014 AND SING YOUR PRAISE SWEET EVENING BELLS 15 audio0015 THIS RECORDING IS IN THE PUBLIC DOMAIN 1 audio0001 THOSE EVENING BELLS BY THOMAS MOORE READ FOR MOJOMOVE411 COM BY 2 audio0002 ROBERT SCOTT AS PART OF THE VOXFORGE ORG 3 audio0003 SHORTS WEEKLY MONOLOGUE COLLECTION 4 audio0004 THOSE EVENING BELLS THOSE EVENING BELLS THOSE EVENING BELLS HOW MANY A TALE THEIR MUSIC TELLS 5 audio0005 OF YOUTH AND HOME AND THAT SWEET TIME 6 audio0006 WHEN LAST I HEARD THEIR SOOTHING CHIME 7 audio0007 THOSE JOYOUS HOURS ARE PASSED AWAY 8 audio0008 AND MANY A HEART THAT THEN WAS GAY 9 audio0009 WITHIN THE TOMB NOW DARKLY DWELLS AND HEARS NO MORE 10 audio0010 THOSE EVENING BELLS AND SO T WILL BE WHEN I AM GONE 11 audio0011 THAT TUNEFUL PEAL WILL STILL RING ON 12 audio0012 WHILE OTHER BARDS SHALL WALK THESE DELLS 13 audio0013 AND SING YOUR PRAISE SWEET EVENING BELLS 14 audio0014 THIS RECORDING IS IN THE PUBLIC DOMAIN Trunk/Scripts/Audio_scripts/UserSubmission.pm
r2517 r2578 35 35 #use UserSubmission::README; # Import @EXPORT symbols 36 36 use UserSubmission::README(); # Import nothing 37 use UserSubmission::PROMPTS(); 37 use UserSubmission::PROMPTS; 38 use UserSubmission::PROMPTS::EN; 38 39 use UserSubmission::LICENSE(); 39 40 use UserSubmission::AUDIO(); … … 56 57 # maybe should just open fles in binmode rather than use dos2unix???? 57 58 my ($parms, $audio_dir_name ) = @_; 58 # !!!!!!59 #my $debug = $$parms{"debug"};60 #my $QuarantineDir = $$parms{"QuarantineDir"};61 59 my $debug = $parms->getDebug; 62 60 my $QuarantineDir = $parms->getQuarantineDir; 63 # !!!!!!64 61 opendir(DIR, "$QuarantineDir/$audio_dir_name") || confess ("Unable to open directory: $QuarantineDir/$audio_dir_name\n"); 65 62 while (my $filename = readdir(DIR)) { … … 84 81 85 82 sub ValidateFilenames { 86 # !!!!!!87 #my ($parms, $AudioDirName) = @_;88 83 my($parms,$AudioDirNameWithPath) = @_; # assumes a 'flat' directory (no sub-directories) 89 #my $debug = $$parms{"debug"};90 # !!!!!!91 84 # check that user used PROMPTS & README for filenames 92 85 # windows defaults to prompts.txt 93 86 # look into converting to XML and run through XML schema validation ... 94 # !!!!!! use OO syntax95 #UserSubmission::README::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ;96 #UserSubmission::LICENSE::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ;97 #UserSubmission::PROMPTS::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ;98 print "!!!!!!AudioDirNameWithPath1:$AudioDirNameWithPath\n";99 87 100 88 my $readmeWithPath = UserSubmission::README->getFileWithPath($parms,$AudioDirNameWithPath) or confess "UserSubmission error: $?" ; … … 106 94 my $promptsWithPath = UserSubmission::PROMPTS->getFileWithPath($parms,$AudioDirNameWithPath) || confess "UserSubmission error: $?" ; 107 95 UserSubmission::PROMPTS->ValidateFileName($parms,$AudioDirNameWithPath,$promptsWithPath); 108 # !!!!!!109 96 } 110 97 111 98 sub Cleanup { # Repository class calls this method 112 99 my ($parms, $audio_dir_name) = @_; 113 # !!!!!!114 #my $debug = $$parms{"debug"};115 #my $QuarantineDir = $$parms{"QuarantineDir"};116 #my $ToBeProcessedDir = $$parms{"ToBeProcessedDir"};117 100 my $debug = $parms->getDebug; 118 101 my $QuarantineDir = $parms->getQuarantineDir; … … 121 104 $command = ("rm -f $QuarantineDir/$audio_dir_name\.zip $QuarantineDir/$audio_dir_name\.tgz $QuarantineDir/$audio_dir_name\.tar.gz "); print "$command\n" if $debug; system($command) == 0 or confess "system $command failed: $?"; # remove hidden backup files 122 105 $command = ("rm -rf $ToBeProcessedDir/$audio_dir_name"); print "$command\n" if $debug; system($command) == 0 or confess "system $command failed: $?"; # remove hidden backup files 123 # !!!!!!124 106 return 1; 125 107 } … … 154 136 } 155 137 my $prompts; 156 $prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme);157 138 if ($readme->getLanguage() =~ /EN/) { 139 $prompts = UserSubmission::PROMPTS::EN->new($parms, $audio_dir_name, $readme); 158 140 # if updating this or other languages, need to set VALIDATION-PROMPTS for the new language in PROMPTS->_clean 159 141 #$prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme); … … 163 145 confess "UserSubmission error: $audio->getErrorMessage" ; 164 146 } 147 } else { 148 $prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme); 165 149 } 166 150 $directorynames{$audio_dir_name} = [$readme->getSamplingrate, $readme->getSamplingrateformat, $readme->getFiletype, $audio ]; … … 185 169 print "***moved to 'ToBeProcessed' directory: $audio_dir_name:$samplingrate:$samplingrateformat:$filetype\n"; 186 170 } 187 # !!!!!!188 189 171 return \%userSubmission; 190 #print "\nPromptsValidation.pl completed!****************************************************\n\n";191 # !!!!!!192 172 } 193 173 … … 223 203 } 224 204 close(DIR); 225 # !!!!!!226 #return \@tarlist;227 205 my @sorted = sort(@tarlist); 228 206 return \@sorted; 229 # !!!!!!230 231 207 } 232 208 … … 234 210 #to do look at Archive::Extract Cpan Module to handle tar tgz tar.gz and zip extractions ... 235 211 my ($parms, $filename, $suffix, $tarfile) = @_; 236 # !!!!!!237 #my $debug = $$parms{"debug"};238 #my $QuarantineDir = $$parms{"QuarantineDir"};239 212 my $debug = $parms->getDebug; 240 213 my $QuarantineDir = $parms->getQuarantineDir; 241 # !!!!!!242 214 my (@clamscan_output, $clamscan_results); 243 215 #$command = ("freshclam"); print "$command\n"; system($command) == 0 or confess "system $command failed: $?"; … … 300 272 sub _Copy2ToBeProcessed { # private 301 273 my ($parms, $audio_dir_name, $filetype, $audio) = @_; 302 # !!!!!!303 #my $debug = $$parms{"debug"};304 #my $QuarantineDir = $$parms{"QuarantineDir"};305 # my $ToBeProcessedDir = $$parms{"ToBeProcessedDir"};306 274 my $debug = $parms->getDebug; 307 275 my $QuarantineDir = $parms->getQuarantineDir; 308 276 my $ToBeProcessedDir = $parms-> getToBeProcessedDir; 309 # !!!!!!310 277 $audio->Finalize($parms, $audio_dir_name, $filetype); 311 278 if ($audio->getError) { Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS.pm
r2505 r2578 32 32 use lib '../../Audio_scripts'; 33 33 use UserSubmission::README; 34 35 34 my $command; 36 35 #################################################################### … … 137 136 open (FILEIN, "$QuarantineDir/$audio_dir_name/PROMPTS") ||confess ("Unable to open PROMPTS file for reading"); 138 137 # !!!!!!! 139 if ($readme->getLanguage() =~ /EN/) {140 open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing");141 }138 #if ($readme->getLanguage() =~ /EN/) { 139 # open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing"); 140 #} 142 141 # !!!!!!! 143 142 open (FILEOUT_PROD,">$QuarantineDir/$audio_dir_name/PROD-PROMPTS") || confess ("Unable to open PROD-PROMPTS file for writing"); … … 156 155 $linescalar =~ tr/a-z/A-Z/; # change to uppercase 157 156 $linescalar =~ s/,//g; # remove commas 158 $linescalar =~ s/\.//g; # remove periods157 # $linescalar =~ s/\.//g; # remove periods 159 158 # dealing with quotes 160 159 # $linescalar =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... 161 160 # $linescalar =~ s/\'\b(.*)\b\'/$1/g; # remove single quotes from quoted text; single quote must be at start of a word, and at end of a word - does not work if there are two words with single quotesin them in same sentence ... 162 $linescalar =~ s/\'EM//g; # remove leading single quotes for contraction of them163 161 $linescalar =~ s/\"//g; # remove double quotes 164 $linescal