voxforge.org
VoxForge Dev

Changeset 2578

Show
Ignore:
Timestamp:
05/12/08 17:48:45 (7 months ago)
Author:
kmaclean
Message:

updates to Prompt processing scripts - break EN our from non-EN language processing

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook.pm

    r2576 r2578  
    5353### Main 
    5454#################################################################### 
     55sub process { 
     56        my ($self)= @_; 
     57        my $debug = $self->{'debug'}; 
     58        my $audiofile = $self->{"audiofile"}; 
     59        my $textfile = $self->{"textfile"}; 
     60        my $username = $self->{"username"}; 
     61        my $tarSuffix = $self->{"tarSuffix"}; 
     62        my $voxforgeDict = $self->{"voxforgeDict"}; 
     63        my $htk_files = $self->{'htk_files'}; 
     64         
     65        my $textContents = AudioBook::Text->new($textfile); 
     66        $textContents->createWLISTFile("AudioBook/interim_files/wlist"); 
     67         
     68        my $dictionary = AudioBook::Dictionary->new($self); 
     69        my $missingwordfound = $dictionary->findOutOfVocabularyWords($voxforgeDict,"AudioBook/interim_files/MissingWords"); 
     70        if ($missingwordfound) {  
     71                $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p 
     72                $dictionary->updatePronDict(); 
     73                # need to update dict with missing words 
     74         
     75                $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict $voxforgeDict"); system($command) == 0 or confess "fullrun $command failed: $?"; 
     76                $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 
     77        } else { 
     78                unlink ("AudioBook/interim_files/MissingWords_out"); 
     79                open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file");        
     80                print MISSINGWORDSOUT "no missing words\n"; 
     81                close MISSINGWORDSOUT 
     82        }  
     83        $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command);       
     84        my $audio = AudioBook::Audio->new($self); 
     85        $audio->segment($audiofile,$textContents); 
     86        if (defined($tarSuffix)){ 
     87                _createTarFile($self); 
     88        } 
     89} 
     90 
     91sub _createTarFile { 
     92        my ($self)= @_; 
     93        my $debug = $self->{'debug'}; 
     94        my $username = $self->{"username"}; 
     95        my $tarSuffix = $self->{"tarSuffix"}; 
     96         
     97        my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
     98        $year += 1900; 
     99        $mon = sprintf("%02d", $mon);  
     100        $mday = sprintf("%02d", $mday); 
     101        print "creating gzipped tar file:$username\-$year$mon$mday\-$tarSuffix\.tgz \n"; 
     102        if ($debug) { 
     103                $command = ("tar -zcvf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 
     104        } else { 
     105                $command = ("tar -zcf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 
     106        } 
     107        print "please submit your tar file to: www.voxforge.org\n";      
     108} 
     109 
     110sub _random_characters { 
     111        my ($length) = @_;       
     112        my @chars=('a'..'z'); 
     113        my $randomString; 
     114        foreach (1..$length){ 
     115                $randomString.=$chars[rand @chars]; 
     116        } 
     117        return $randomString; 
     118} 
     119 
    55120sub getOptions { 
    56121        my ($self)= @_; 
     
    101166        } elsif ($opt_K) { 
    102167                $self->{"audiofile"}="AudioBook/test/audio.wav"; 
    103                 $self->{"textfile"}="AudioBook/test/text-simple.txt"; 
     168                #$self->{"textfile"}="AudioBook/test/text-simple.txt"; 
     169                $self->{"textfile"}="AudioBook/test/text-original.txt"; 
    104170                $self->{"voxforgeDict"}="AudioBook/test/VoxForgeDict"; 
    105171                $self->{"tarSuffix"}=_random_characters(3); 
    106172                $self->{"username"}="test"; 
    107                 print "test using the following files:\n"; 
    108                 print "\t" . $self->{"audiofile"} . "\n"; 
    109                 print "\t" . $self->{"textfile"} . "\n"; 
    110                 print "\t" . $self->{"voxforgeDict"} . "\n"; 
    111173        } elsif ($opt_h) { 
    112174                print "\nVoxForge Audio Segmentation Script Parameters\n";       
     
    137199} 
    138200 
    139 sub process { 
    140         my ($self)= @_; 
    141         my $debug = $self->{'debug'}; 
    142         my $audiofile = $self->{"audiofile"}; 
    143         my $textfile = $self->{"textfile"}; 
    144         my $username = $self->{"username"}; 
    145         my $tarSuffix = $self->{"tarSuffix"}; 
    146         my $voxforgeDict = $self->{"voxforgeDict"}; 
    147         my $tarSuffix = $self->{"tarSuffix"}; 
    148         my $htk_files = $self->{'htk_files'}; 
    149          
    150         my $textContents = AudioBook::Text->new($textfile); 
    151         $textContents->createWLISTFile("AudioBook/interim_files/wlist"); 
    152          
    153         my $dictionary = AudioBook::Dictionary->new($self); 
    154         my $missingwordfound = $dictionary->findOutOfVocabularyWords($voxforgeDict,"AudioBook/interim_files/MissingWords"); 
    155         if ($missingwordfound) {  
    156                 $dictionary->getPronunciations("AudioBook/interim_files/MissingWords_out"); # uses g2p 
    157                 $dictionary->updatePronDict(); 
    158                 # need to update dict with missing words 
    159          
    160                 $command = ("HDMan -A -D -T 1 -g $htk_files/global.ded -m -w AudioBook/interim_files/wlist -i -l AudioBook/interim_files/dlog AudioBook/interim_files/dict" . $self->{"voxforgeDict"}); system($command) == 0 or confess "fullrun $command failed: $?"; 
    161                 $command = ("cp AudioBook/interim_files/MissingWords_out AudioBook/output_files/MissingWords"); print "cmd:$command\n" if $debug; system($command); 
    162         } else { 
    163                 open(MISSINGWORDSOUT,">AudioBook/output_files/MissingWords") or confess ("cannot open AudioBook/output_files/MissingWords file");        
    164                 print MISSINGWORDSOUT "no missing words\n"; 
    165                 close MISSINGWORDSOUT 
    166         }  
    167         $command = ("cp AudioBook/interim_files/dict AudioBook/output_files"); print "cmd:$command\n" if $debug; system($command);       
    168         my $audio = AudioBook::Audio->new($self); 
    169         $audio->segment($audiofile,$textContents); 
    170         if (defined($tarSuffix)){ 
    171                 _createTarFile($self); 
    172         } 
    173 } 
    174  
    175 sub _createTarFile { 
    176                 my ($self)= @_; 
    177                 my $debug = $self->{'debug'}; 
    178                 my $username = $self->{"username"}; 
    179                 my $tarSuffix = $self->{"tarSuffix"}; 
    180                 my $tarSuffix = $self->{"tarSuffix"}; 
    181                  
    182                 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
    183                 $year += 1900; 
    184                 $mon = sprintf("%02d", $mon);  
    185                 $mday = sprintf("%02d", $mday); 
    186                 print "creating gzipped tar file:$username\-$year$mon$mday\-$tarSuffix\.tgz \n"; 
    187                 if ($debug) { 
    188                         $command = ("tar -zcvf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 
    189                 } else { 
    190                         $command = ("tar -zcf $username\-$year$mon$mday\-$tarSuffix\.tgz AudioBook/output_files --exclude \"\.svn\" "); print "cmd:$command\n" if $debug; system($command); 
    191                 } 
    192                 print "please submit your tar file to: www.voxforge.org\n";      
    193 } 
    194          
    195 sub _random_characters { 
    196         my ($length) = @_;       
    197         my @chars=('a'..'z'); 
    198         my $randomString; 
    199         foreach (1..$length){ 
    200                 $randomString.=$chars[rand @chars]; 
    201         } 
    202         return $randomString; 
    203 } 
    2042011; 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Dictionary.pm

    r2576 r2578  
    2626use diagnostics; 
    2727use Carp; 
     28use Lingua::EN::Numbers qw(num2en num2en_ordinal); 
     29use Lingua::EN::Numbers::Years; 
    2830 
    2931my $command; 
    30  
    3132#################################################################### 
    3233### Constructor 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/Text.pm

    r2576 r2578  
    172172        } else { 
    173173                push (@$words, $number); 
    174                 print "****not sure what to do with:$number: please correct in eText\n"; 
     174                print "****$number not converted\n"; 
    175175        } 
    176176                 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/MissingWords_out

    r2570 r2578  
     1MOJOMOVE411     [MOJOMOVE411]   m ow jh ow m uw v f n n 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/aligned.out

    r2575 r2578  
    383868200000 70300000 r -1418.653687 
    393970300000 70300000 sp -0.235017 
    40 70300000 72300000 m -1382.702515 MOJOMOVE 
     4070300000 72300000 m -1382.702515 MOJOMOVE411 
    414172300000 73900000 ow -978.509583 
    424273900000 74800000 jh -591.590271 
     
    454577700000 80600000 uw -1760.078613 
    464680600000 82600000 v -1403.530029 
    47 82600000 82600000 sp -0.235017 
    48 82600000 84400000 f -1091.546265 FOUR 
    49 84400000 85300000 ao -612.738708 
    50 85300000 86600000 r -949.012756 
    51 86600000 86600000 sp -0.235017 
    52 86600000 87300000 w -470.687256 ONE 
    53 87300000 89000000 ah -1068.666382 
    54 89000000 90500000 n -927.216187 
     4782600000 84400000 f -1091.546265 
     4884400000 87700000 n -2531.834717 
     4987700000 90500000 n -1852.228149 
    555090500000 90500000 sp -0.235017 
    56 90500000 90800000 w -213.120544 ONE 
    57 90800000 93000000 ah -1393.076172 
    58 93000000 94900000 n -1128.905029 
     5190500000 90800000 k -240.841125 COM 
     5290800000 92600000 aa -1081.417236 
     5392600000 94900000 m -1386.785156 
    595494900000 94900000 sp -0.235017 
    60 94900000 96000000 d -717.843140 DOT 
    61 96000000 97800000 aa -1176.526489 
    62 97800000 98500000 t -505.287354 
    63 98500000 98500000 sp -0.235017 
    64 98500000 100400000 k -1282.235229 COM 
    65 100400000 103400000 aa -1583.814209 
    66 103400000 105200000 m -1105.898193 
    67 105200000 106600000 sp -737.433594 
    68 106600000 106900000 b -189.849640 BY 
    69 106900000 108300000 ay -951.593445 
    70 108300000 108300000 sp -0.235017 
    71 108300000 110000000 r -1148.621338 ROBERT 
     5594900000 95900000 b -623.744507 BY 
     5695900000 103700000 ay -5017.617676 
     57103700000 106800000 sp -1927.296143 
     58106800000 110000000 r -2306.654541 ROBERT 
    7259110000000 111200000 aa -703.356628 
    7360111200000 112000000 b -572.268555 
     
    10188141100000 142300000 ao -810.766357 
    10289142300000 144000000 r -1171.934448 
    103 144000000 145200000 jh -803.447327 
    104 145200000 145200000 sp -0.235017 
    105 145200000 145700000 d -377.922943 DOT 
    106 145700000 148400000 aa -1687.383545 
    107 148400000 149000000 t -434.164429 
    108 149000000 149000000 sp -0.235017 
    109 149000000 151700000 ao -1835.239380 ORG 
     90144000000 145800000 jh -1243.679077 
     91145800000 145800000 sp -0.235017 
     92145800000 151700000 ao -4049.692871 ORG 
    11093151700000 153700000 r -1328.250610 
    11194153700000 154300000 g -424.893402 
     
    121104170200000 170700000 k -369.333679 
    122105170700000 171600000 l -659.565674 
    123 171600000 173000000 iy -908.040466 
    124 173000000 173000000 sp -0.235017 
    125 173000000 173800000 p -549.036621 POETRY 
    126 173800000 175900000 ow -1213.417236 
    127 175900000 176200000 ax -204.439636 
    128 176200000 177700000 t -1050.475952 
    129 177700000 178900000 r -805.788147 
    130 178900000 181100000 iy -1380.469849 
    131 181100000 182300000 sp -633.162842 
     106171600000 172600000 iy -665.369934 
     107172600000 172600000 sp -0.235017 
     108172600000 173000000 m -250.922897 MONOLOGUE 
     109173000000 175400000 aa -1489.473389 
     110175400000 175700000 n -242.715103 
     111175700000 176000000 ax -221.046005 
     112176000000 176300000 l -215.975601 
     113176300000 176600000 ao -229.448303 
     114176600000 181000000 g -3165.420654 
     115181000000 182300000 sp -695.738953 
    132116182300000 183300000 k -677.003906 COLLECTION 
    133117183300000 184000000 ax -455.986847 
     
    168152247000000 249400000 l -1534.862549 
    169153249400000 251000000 z -999.659790 
    170 251000000 252500000 sp -869.750610 
    171 252500000 252800000 hh -198.110382 HOW 
    172 252800000 254500000 aw -1056.930664 
    173 254500000 254500000 sp -0.235017 
    174 254500000 255500000 m -680.137939 MANY 
    175 255500000 256200000 eh -439.305176 
    176 256200000 256700000 n -315.102844 
    177 256700000 257900000 iy -738.825989 
    178 257900000 257900000 sp -0.235017 
    179 257900000 258400000 ax -315.671692 A 
    180 258400000 258400000 sp -0.235017 
    181 258400000 260400000 t -1291.936768 TALE 
    182 260400000 262100000 ey -1101.318237 
    183 262100000 264400000 l -1475.232666 
    184 264400000 264900000 sp -292.931213 
    185 264900000 265600000 dh -449.029144 THEIR 
    186 265600000 266600000 eh -643.028442 
    187 266600000 267400000 r -548.846680 
    188 267400000 267400000 sp -0.235017 
    189 267400000 268400000 m -633.779907 MUSIC 
     154251000000 252400000 sp -810.360474 
     155252400000 252700000 dh -191.927155 THOSE 
     156252700000 254300000 ow -1005.157776 
     157254300000 254600000 z -249.030563 
     158254600000 254600000 sp -0.235017 
     159254600000 254900000 iy -233.816315 EVENING 
     160254900000 255200000 v -248.230988 
     161255200000 255800000 n -406.460815 
     162255800000 258700000 ix -1827.929199 
     163258700000 259000000 ng -201.385193 
     164259000000 259000000 sp -0.235017 
     165259000000 259300000 b -203.654861 BELLS 
     166259300000 262100000 eh -1718.569336 
     167262100000 262600000 l -365.826385 
     168262600000 262900000 z -224.563644 
     169262900000 262900000 sp -0.235017 
     170262900000 263200000 hh -213.729538 HOW 
     171263200000 264000000 aw -514.845581 
     172264000000 264000000 sp -0.235017 
     173264000000 264300000 m -196.316437 MANY 
     174264300000 264600000 eh -209.032455 
     175264600000 265000000 n -255.899612 
     176265000000 265400000 iy -278.018890 
     177265400000 265400000 sp -0.235017 
     178265400000 266000000 ax -394.383759 A 
     179266000000 266000000 sp -0.235017 
     180266000000 266300000 t -214.197128 TALE 
     181266300000 266600000 ey -219.312790 
     182266600000 266900000 l -233.756607 
     183266900000 266900000 sp -0.235017 
     184266900000 267200000 dh -243.673233 THEIR 
     185267200000 267500000 eh -275.131104 
     186267500000 267800000 r -209.015839 
     187267800000 267800000 sp -0.235017 
     188267800000 268400000 m -354.742706 MUSIC 
    190189268400000 269600000 y -715.677551 
    191190269600000 270000000 uw -266.001984 
     
    383382507400000 509300000 ow -1212.547852 
    384383509300000 509300000 sp -0.235017 
    385 509300000 510200000 t -628.625244 TWILL 
    386 510200000 511100000 w -645.009827 
     384509300000 509800000 t -371.192139 T 
     385509800000 510100000 iy -226.032471 
     386510100000 510100000 sp -0.235017 
     387510100000 511100000 w -710.753845 WILL 
    387388511100000 511500000 ih -308.505402 
    388389511500000 512900000 l -938.400208 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dict

    r2570 r2578  
    1515DELLS           [DELLS]         d eh l z sp 
    1616DOMAIN          [DOMAIN]        d ow m ey n sp 
    17 DOT             [DOT]           d aa t sp 
    1817DWELLS          [DWELLS]        d w eh l z sp 
    1918EVENING         [EVENING]       iy v n ix ng sp 
    2019FOR             [FOR]           f ao r sp 
    21 FOUR            [FOUR]          f ao r sp 
    2220GAY             [GAY]           g ey sp 
    2321GONE            [GONE]          g ao n sp 
     
    3432LAST            [LAST]          l ae s t sp 
    3533MANY            [MANY]          m eh n iy sp 
    36 MOJOMOVE        [MOJOMOVE]      m ow jh ow m uw v sp 
     34MOJOMOVE411     [MOJOMOVE411]   m ow jh ow m uw v f n n sp 
     35MONOLOGUE       [MONOLOGUE]     m aa n ax l ao g sp 
    3736MOORE           [MOORE]         m uh r sp 
    3837MORE            [MORE]          m ao r sp 
     
    4241OF              [OF]            ah v sp 
    4342ON              [ON]            aa n sp 
    44 ONE             [ONE]           w ah n sp 
    4543ORG             [ORG]           ao r g sp 
    4644OTHER           [OTHER]         ah dh er sp 
     
    4846PASSED          [PASSED]        p ae s t sp 
    4947PEAL            [PEAL]          p iy l sp 
    50 POETRY          [POETRY]        p ow ax t r iy sp 
    5148PRAISE          [PRAISE]        p r ey z sp 
    5249PUBLIC          [PUBLIC]        p ah b l ix k sp 
     
    6562STILL           [STILL]         s t ih l sp 
    6663SWEET           [SWEET]         s w iy t sp 
     64T               [T]             t iy sp 
    6765TALE            [TALE]          t ey l sp 
    6866TELLS           [TELLS]         t eh l z sp 
     
    7876TOMB            [TOMB]          t uw m sp 
    7977TUNEFUL         [TUNEFUL]       t uw n f ax l sp 
    80 TWILL           [TWILL]         t w ih l sp 
    8178VOXFORGE        [VOXFORGE]      v aa k s f ao r jh sp 
    8279WALK            [WALK]          w ao k sp 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/dlog

    r2573 r2578  
    44--------------------------- 
    55  Dictionary    TotalWords WordsUsed  TotalProns PronsUsed 
    6 VoxForgeDict    129757         90     129778         90 
    7         dict        90         90         90         90 
     6VoxForgeDict    129758         87     129779         87 
     7        dict        87         87         87         87 
    88 
    9 90 words required, 0 missing 
     987 words required, 0 missing 
    1010 
    1111New Phone Usage Counts 
    1212--------------------- 
    1313  1. ax    :    10 
    14   2. sp    :    88 
     14  2. sp    :    85 
    1515  3. ae    :     7 
    16   4. m     :    14 
    17   5. n     :    15 
    18   6. d     :     9 
     16  4. m     :    15 
     17  5. n     :    17 
     18  6. d     :     8 
    1919  7. aa    :    13 
    20   8. r     :    22 
     20  8. r     :    20 
    2121  9. z     :    14 
    22  10. w     :    12 
     22 10. w     :    10 
    2323 11. ey    :     5 
    2424 12. b     :     6 
     
    3030 18. k     :    11 
    3131 19. sh    :     3 
    32  20. ow    :     8 
    33  21. t     :    19 
    34  22. v     :     4 
    35  23. ix    :     7 
    36  24. ng    :     5 
    37  25. f     :     4 
    38  26. ao    :    10 
    39  27. g     :     3 
    40  28. hh    :     5 
    41  29. er    :     4 
    42  30. ih    :     9 
     32 20. ow    :     7 
     33 21. v     :     4 
     34 22. ix    :     7 
     35 23. ng    :     5 
     36 24. f     :     4 
     37 25. ao    :    10 
     38 26. g     :     4 
     39 27. hh    :     5 
     40 28. er    :     4 
     41 29. ih    :     8 
     42 30. t     :    17 
    4343 31. aw    :     3 
    4444 32. jh    :     3 
     
    4848 36. uh    :     1 
    4949 37. y     :     3 
    50  38. ah    :     4 
     50 38. ah    :     3 
    5151 39. dh    :    10 
    52  40. p     :     6 
     52 40. p     :     5 
    5353 41. dx    :     1 
    5454 42. sil   :     2 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/htksegment_log

    r2570 r2578  
    77READ 60200000 64300000 0 
    88FOR 64300000 70300000 0 
    9 MOJOMOVE 70300000 82600000 0 
    10 FOUR 82600000 86600000 0 
    11 ONE 86600000 90500000 0 
    12 ONE 90500000 94900000 0 
    13 DOT 94900000 98500000 0 
    14 COM 98500000 105200000 1400000 
    15 BY 106600000 108300000 0 
    16 ROBERT 108300000 113800000 0 
     9MOJOMOVE411 70300000 90500000 0 
     10COM 90500000 94900000 0 
     11BY 94900000 103700000 3100000 
     12ROBERT 106800000 113800000 0 
    1713SCOTT 113800000 118900000 8200000 
    1814AS 127100000 129700000 0 
     
    2016OF 133100000 134500000 0 
    2117THE 134500000 135400000 0 
    22 VOXFORGE 135400000 145200000 0 
    23 DOT 145200000 149000000 0 
    24 ORG 149000000 154300000 4000000 
     18VOXFORGE 135400000 145800000 0 
     19ORG 145800000 154300000 4000000 
    2520SHORTS 158300000 165800000 800000 
    26 WEEKLY 166600000 173000000 0 
    27 POETRY 173000000 181100000 1200000 
     21WEEKLY 166600000 172600000 0 
     22MONOLOGUE 172600000 181000000 1300000 
    2823COLLECTION 182300000 189700000 29900000 
    2924THOSE 219600000 224100000 0 
     
    3227THOSE 235100000 240100000 0 
    3328EVENING 240100000 245200000 0 
    34 BELLS 245200000 251000000 1500000 
    35 HOW 252500000 254500000 0 
    36 MANY 254500000 257900000 0 
    37 A 257900000 258400000 0 
    38 TALE 258400000 264400000 500000 
    39 THEIR 264900000 267400000 0 
    40 MUSIC 267400000 272200000 800000 
     29BELLS 245200000 251000000 1400000 
     30THOSE 252400000 254600000 0 
     31EVENING 254600000 259000000 0 
     32BELLS 259000000 262900000 0 
     33HOW 262900000 264000000 0 
     34MANY 264000000 265400000 0 
     35A 265400000 266000000 0 
     36TALE 266000000 266900000 0 
     37THEIR 266900000 267800000 0 
     38MUSIC 267800000 272200000 800000 
    4139TELLS 273000000 279000000 4500000 
    4240OF 283500000 285500000 0 
     
    8482AND 502900000 505500000 0 
    8583SO 505500000 509300000 0 
    86 TWILL 509300000 512900000 0 
     84T 509300000 510100000 0 
     85WILL 510100000 512900000 0 
    8786BE 512900000 517100000 0 
    8887WHEN 517100000 520900000 0 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/wlist

    r2552 r2578  
    1515DELLS 
    1616DOMAIN 
    17 DOT 
    1817DWELLS 
    1918EVENING 
    2019FOR 
    21 FOUR 
    2220GAY 
    2321GONE 
     
    3432LAST 
    3533MANY 
    36 MOJOMOVE 
     34MOJOMOVE411 
     35MONOLOGUE 
    3736MOORE 
    3837MORE 
     
    4241OF 
    4342ON 
    44 ONE 
    4543ORG 
    4644OTHER 
     
    4846PASSED 
    4947PEAL 
    50 POETRY 
    5148PRAISE 
    5249PUBLIC 
     
    6562STILL 
    6663SWEET 
     64T 
    6765TALE 
    6866TELLS 
     
    7876TOMB 
    7977TUNEFUL 
    80 TWILL 
    8178VOXFORGE 
    8279WALK 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/interim_files/words.mlf

    r2572 r2578  
    99READ 
    1010FOR 
    11 MOJOMOVE 
    12 FOUR 
    13 ONE 
    14 ONE 
    15 DOT 
     11MOJOMOVE411 
    1612COM 
    1713BY 
     
    2319THE 
    2420VOXFORGE 
    25 DOT 
    2621ORG 
    2722SHORTS 
    2823WEEKLY 
    29 POETRY 
     24MONOLOGUE 
    3025COLLECTION 
     26THOSE 
     27EVENING 
     28BELLS 
    3129THOSE 
    3230EVENING 
     
    8684AND 
    8785SO 
    88 TWILL 
     86
     87WILL 
    8988BE 
    9089WHEN 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/dict

    r2574 r2578  
    1515DELLS           [DELLS]         d eh l z sp 
    1616DOMAIN          [DOMAIN]        d ow m ey n sp 
    17 DOT             [DOT]           d aa t sp 
    1817DWELLS          [DWELLS]        d w eh l z sp 
    1918EVENING         [EVENING]       iy v n ix ng sp 
    2019FOR             [FOR]           f ao r sp 
    21 FOUR            [FOUR]          f ao r sp 
    2220GAY             [GAY]           g ey sp 
    2321GONE            [GONE]          g ao n sp 
     
    3432LAST            [LAST]          l ae s t sp 
    3533MANY            [MANY]          m eh n iy sp 
    36 MOJOMOVE        [MOJOMOVE]      m ow jh ow m uw v sp 
     34MOJOMOVE411     [MOJOMOVE411]   m ow jh ow m uw v f n n sp 
     35MONOLOGUE       [MONOLOGUE]     m aa n ax l ao g sp 
    3736MOORE           [MOORE]         m uh r sp 
    3837MORE            [MORE]          m ao r sp 
     
    4241OF              [OF]            ah v sp 
    4342ON              [ON]            aa n sp 
    44 ONE             [ONE]           w ah n sp 
    4543ORG             [ORG]           ao r g sp 
    4644OTHER           [OTHER]         ah dh er sp 
     
    4846PASSED          [PASSED]        p ae s t sp 
    4947PEAL            [PEAL]          p iy l sp 
    50 POETRY          [POETRY]        p ow ax t r iy sp 
    5148PRAISE          [PRAISE]        p r ey z sp 
    5249PUBLIC          [PUBLIC]        p ah b l ix k sp 
     
    6562STILL           [STILL]         s t ih l sp 
    6663SWEET           [SWEET]         s w iy t sp 
     64T               [T]             t iy sp 
    6765TALE            [TALE]          t ey l sp 
    6866TELLS           [TELLS]         t eh l z sp 
     
    7876TOMB            [TOMB]          t uw m sp 
    7977TUNEFUL         [TUNEFUL]       t uw n f ax l sp 
    80 TWILL           [TWILL]         t w ih l sp 
    8178VOXFORGE        [VOXFORGE]      v aa k s f ao r jh sp 
    8279WALK            [WALK]          w ao k sp 
  • Trunk/Scripts/Audio_scripts/AudioSegmentation/AudioBook/output_files/prompts

    r2573 r2578  
    1 audio0001 THOSE EVENING BELLS BY THOMAS MOORE  
    2 audio0002 READ FOR MOJOMOVE FOUR ONE ONE DOT COM BY ROBERT SCOTT  
    3 audio0003 AS PART OF THE VOXFORGE DOT ORG  
    4 audio0004 SHORTS WEEKLY POETRY COLLECTION THOSE EVENING BELLS THOSE EVENING BELLS  
    5 audio0005 HOW MANY A TALE THEIR MUSIC TELLS  
    6 audio0006 OF YOUTH AND HOME AND THAT SWEET TIME  
    7 audio0007 WHEN LAST I HEARD THEIR SOOTHING CHIME  
    8 audio0008 THOSE JOYOUS HOURS ARE PASSED AWAY  
    9 audio0009 AND MANY A HEART THAT THEN WAS GAY  
    10 audio0010 WITHIN THE TOMB NOW DARKLY DWELLS AND HEARS NO MORE  
    11 audio0011 THOSE EVENING BELLS AND SO TWILL BE WHEN I AM GONE  
    12 audio0012 THAT TUNEFUL PEAL WILL STILL RING ON  
    13 audio0013 WHILE OTHER BARDS SHALL WALK THESE DELLS  
    14 audio0014 AND SING YOUR PRAISE SWEET EVENING BELLS  
    15 audio0015 THIS RECORDING IS IN THE PUBLIC DOMAIN  
     1audio0001 THOSE EVENING BELLS BY THOMAS MOORE READ FOR MOJOMOVE411 COM BY  
     2audio0002 ROBERT SCOTT AS PART OF THE VOXFORGE ORG  
     3audio0003 SHORTS WEEKLY MONOLOGUE COLLECTION  
     4audio0004 THOSE EVENING BELLS THOSE EVENING BELLS THOSE EVENING BELLS HOW MANY A TALE THEIR MUSIC TELLS  
     5audio0005 OF YOUTH AND HOME AND THAT SWEET TIME  
     6audio0006 WHEN LAST I HEARD THEIR SOOTHING CHIME  
     7audio0007 THOSE JOYOUS HOURS ARE PASSED AWAY  
     8audio0008 AND MANY A HEART THAT THEN WAS GAY  
     9audio0009 WITHIN THE TOMB NOW DARKLY DWELLS AND HEARS NO MORE  
     10audio0010 THOSE EVENING BELLS AND SO T WILL BE WHEN I AM GONE  
     11audio0011 THAT TUNEFUL PEAL WILL STILL RING ON  
     12audio0012 WHILE OTHER BARDS SHALL WALK THESE DELLS  
     13audio0013 AND SING YOUR PRAISE SWEET EVENING BELLS  
     14audio0014 THIS RECORDING IS IN THE PUBLIC DOMAIN  
  • Trunk/Scripts/Audio_scripts/UserSubmission.pm

    r2517 r2578  
    3535#use UserSubmission::README;                    # Import @EXPORT symbols 
    3636use UserSubmission::README();                  # Import nothing 
    37 use UserSubmission::PROMPTS(); 
     37use UserSubmission::PROMPTS; 
     38use UserSubmission::PROMPTS::EN; 
    3839use UserSubmission::LICENSE(); 
    3940use UserSubmission::AUDIO(); 
     
    5657        # maybe should just open fles in binmode rather than use dos2unix???? 
    5758        my ($parms, $audio_dir_name ) = @_; 
    58                 # !!!!!! 
    59                 #my $debug = $$parms{"debug"}; 
    60                 #my $QuarantineDir = $$parms{"QuarantineDir"}; 
    6159                my $debug = $parms->getDebug; 
    6260                my $QuarantineDir = $parms->getQuarantineDir; 
    63                 # !!!!!! 
    6461        opendir(DIR, "$QuarantineDir/$audio_dir_name") || confess ("Unable to open directory: $QuarantineDir/$audio_dir_name\n"); 
    6562        while (my $filename = readdir(DIR)) { 
     
    8481 
    8582sub ValidateFilenames { 
    86         # !!!!!! 
    87         #my ($parms, $AudioDirName) = @_; 
    8883        my($parms,$AudioDirNameWithPath) = @_; # assumes a 'flat' directory (no sub-directories) 
    89         #my $debug = $$parms{"debug"}; 
    90         # !!!!!! 
    9184        # check that user used PROMPTS & README for filenames 
    9285        # windows defaults to prompts.txt 
    9386        # look into converting to XML and run through XML schema validation ... 
    94         # !!!!!! use OO syntax 
    95         #UserSubmission::README::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ; 
    96         #UserSubmission::LICENSE::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ;        
    97         #UserSubmission::PROMPTS::ValidateFileName($parms, $AudioDirName) || confess "UserSubmission error: $?" ; 
    98         print "!!!!!!AudioDirNameWithPath1:$AudioDirNameWithPath\n"; 
    9987         
    10088        my $readmeWithPath = UserSubmission::README->getFileWithPath($parms,$AudioDirNameWithPath) or confess "UserSubmission error: $?" ; 
     
    10694        my $promptsWithPath = UserSubmission::PROMPTS->getFileWithPath($parms,$AudioDirNameWithPath) || confess "UserSubmission error: $?" ; 
    10795        UserSubmission::PROMPTS->ValidateFileName($parms,$AudioDirNameWithPath,$promptsWithPath); 
    108         # !!!!!! 
    10996} 
    11097 
    11198sub Cleanup {  # Repository class calls this method 
    11299        my ($parms, $audio_dir_name) = @_; 
    113         # !!!!!! 
    114         #my $debug = $$parms{"debug"}; 
    115         #my $QuarantineDir = $$parms{"QuarantineDir"};   
    116         #my $ToBeProcessedDir = $$parms{"ToBeProcessedDir"}; 
    117100        my $debug = $parms->getDebug; 
    118101        my $QuarantineDir = $parms->getQuarantineDir;    
     
    121104        $command = ("rm -f $QuarantineDir/$audio_dir_name\.zip $QuarantineDir/$audio_dir_name\.tgz $QuarantineDir/$audio_dir_name\.tar.gz "); print "$command\n" if $debug;   system($command) == 0 or confess "system $command failed: $?"; # remove hidden backup files 
    122105        $command = ("rm -rf $ToBeProcessedDir/$audio_dir_name"); print "$command\n" if $debug;   system($command) == 0 or confess "system $command failed: $?"; # remove hidden backup files 
    123         # !!!!!! 
    124106        return 1; 
    125107}                                                      
     
    154136                } 
    155137                my $prompts; 
    156                 $prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme); 
    157138                if ($readme->getLanguage() =~ /EN/) {  
     139                        $prompts = UserSubmission::PROMPTS::EN->new($parms, $audio_dir_name, $readme);   
    158140                        # if updating this or other languages, need to set VALIDATION-PROMPTS for the new language in PROMPTS->_clean 
    159141                        #$prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme); 
     
    163145                                confess "UserSubmission error: $audio->getErrorMessage" ; 
    164146                        } 
     147                } else { 
     148                        $prompts = UserSubmission::PROMPTS->new($parms, $audio_dir_name, $readme);       
    165149                } 
    166150                $directorynames{$audio_dir_name} = [$readme->getSamplingrate, $readme->getSamplingrateformat, $readme->getFiletype, $audio ]; 
     
    185169                print "***moved to 'ToBeProcessed' directory: $audio_dir_name:$samplingrate:$samplingrateformat:$filetype\n"; 
    186170        } 
    187         # !!!!!! 
    188  
    189171        return \%userSubmission; 
    190         #print "\nPromptsValidation.pl completed!****************************************************\n\n"; 
    191         # !!!!!! 
    192172} 
    193173 
     
    223203        }        
    224204        close(DIR);  
    225         # !!!!!! 
    226         #return \@tarlist; 
    227205        my @sorted = sort(@tarlist); 
    228206        return \@sorted; 
    229         # !!!!!! 
    230  
    231207} 
    232208 
     
    234210        #to do look at Archive::Extract Cpan Module to handle tar tgz tar.gz and zip extractions ... 
    235211        my ($parms, $filename, $suffix, $tarfile) = @_; 
    236         # !!!!!! 
    237         #my $debug = $$parms{"debug"};   
    238         #my $QuarantineDir = $$parms{"QuarantineDir"}; 
    239212        my $debug = $parms->getDebug; 
    240213        my $QuarantineDir = $parms->getQuarantineDir; 
    241         # !!!!!! 
    242214        my (@clamscan_output, $clamscan_results); 
    243215        #$command = ("freshclam"); print "$command\n"; system($command) == 0 or confess "system $command failed: $?";  
     
    300272sub _Copy2ToBeProcessed { # private 
    301273        my ($parms, $audio_dir_name, $filetype, $audio) = @_; 
    302         # !!!!!! 
    303         #my $debug = $$parms{"debug"}; 
    304         #my $QuarantineDir = $$parms{"QuarantineDir"};   
    305         #       my $ToBeProcessedDir = $$parms{"ToBeProcessedDir"};      
    306274        my $debug = $parms->getDebug; 
    307275        my $QuarantineDir = $parms->getQuarantineDir; 
    308276        my $ToBeProcessedDir = $parms-> getToBeProcessedDir; 
    309         # !!!!!! 
    310277        $audio->Finalize($parms, $audio_dir_name, $filetype); 
    311278        if ($audio->getError) { 
  • Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS.pm

    r2505 r2578  
    3232use lib '../../Audio_scripts'; 
    3333use UserSubmission::README;  
    34  
    3534my $command; 
    3635#################################################################### 
     
    137136        open (FILEIN, "$QuarantineDir/$audio_dir_name/PROMPTS") ||confess ("Unable to open PROMPTS file for reading"); 
    138137        # !!!!!!! 
    139         if ($readme->getLanguage() =~ /EN/) {  
    140               open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing"); 
    141        
     138        #if ($readme->getLanguage() =~ /EN/) {  
     139        #     open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing"); 
     140        #
    142141        # !!!!!!! 
    143142        open (FILEOUT_PROD,">$QuarantineDir/$audio_dir_name/PROD-PROMPTS") || confess ("Unable to open PROD-PROMPTS file for writing"); 
     
    156155                        $linescalar =~ tr/a-z/A-Z/; # change to uppercase 
    157156                        $linescalar =~ s/,//g; # remove commas  
    158                       $linescalar =~ s/\.//g; # remove periods   
     157                #     $linescalar =~ s/\.//g; # remove periods   
    159158                        # dealing with quotes 
    160159                        #  $linescalar =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... 
    161160                        # $linescalar =~ s/\'\b(.*)\b\'/$1/g; # remove single quotes from quoted text; single quote must be at start of a word, and at end of a word - does not work if there are two words with single quotesin them in same sentence ... 
    162                         $linescalar =~ s/\'EM//g; # remove leading single quotes for contraction of them 
    163161                        $linescalar =~ s/\"//g; # remove double quotes 
    164                       $linescal