voxforge.org
VoxForge Dev

Changeset 2579

Show
Ignore:
Timestamp:
05/12/08 22:03:24 (4 months ago)
Author:
kmaclean
Message:

Updates to Prompt processing scripts - subclass prompts into languages that need specialized processing (EN)

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Audio_scripts/UserSubmission/LICENSE.pm

    r2517 r2579  
    3131use File::Copy; 
    3232use lib '../../Audio_scripts'; 
    33 use UserSubmission::README; 
     33use UserSubmission::README;  
    3434 
    3535 
  • Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS.pm

    r2578 r2579  
    135135        my $audio_rate_dir = "$samplingrate:$samplingrateformat";        
    136136        open (FILEIN, "$QuarantineDir/$audio_dir_name/PROMPTS") ||confess ("Unable to open PROMPTS file for reading"); 
    137         # !!!!!!! 
    138         #if ($readme->getLanguage() =~ /EN/) {  
    139         #       open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing"); 
    140         #} 
    141         # !!!!!!! 
    142137        open (FILEOUT_PROD,">$QuarantineDir/$audio_dir_name/PROD-PROMPTS") || confess ("Unable to open PROD-PROMPTS file for writing"); 
    143138        my %prompts; 
     
    155150                        $linescalar =~ tr/a-z/A-Z/; # change to uppercase 
    156151                        $linescalar =~ s/,//g; # remove commas  
    157                 #     $linescalar =~ s/\.//g; # remove periods   
     152                      $linescalar =~ s/\.//g; # remove periods   
    158153                        # dealing with quotes 
    159154                        #  $linescalar =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... 
    160155                        # $linescalar =~ s/\'\b(.*)\b\'/$1/g; # remove single quotes from quoted text; single quote must be at start of a word, and at end of a word - does not work if there are two words with single quotesin them in same sentence ... 
    161156                        $linescalar =~ s/\"//g; # remove double quotes 
    162                 #     $linescalar =~ s/://g; # remove colon 
     157                      $linescalar =~ s/://g; # remove colon 
    163158                        # $linescalar =~ s/-//g; # compound word dash 
    164                 #     $linescalar =~ s/--//g; # double dash 
    165                 #     $linescalar =~ s/ - / /g; # space dash space punctuation         
    166                 #     $linescalar =~ s/ -/ /g; # space dash punctuation                        
     159                      $linescalar =~ s/--//g; # double dash 
     160                      $linescalar =~ s/ - / /g; # space dash space punctuation         
     161                      $linescalar =~ s/ -/ /g; # space dash punctuation                        
    167162                        $linescalar =~ s/;//g; # semi-colon 
    168163                        $linescalar =~ s/!//g; # exclamation mark 
    169164                        $linescalar =~ s/\?//g; # question mark 
    170                         # Other cleanup !!!!!! need to change the prompts files directly rather than doing this!!! or add to dictionnary!!! 
    171                         #!!!!!! 
    172                         #if ($readme->getLanguage() =~ /EN/) {  
    173                         #       print (FILEOUT "$path_mfcc/$filename $linescalar\n"); 
    174                         #}  
    175                         #!!!!!! 
     165                        $linescalar =~ s/\+//g; # plus 
    176166                        print (FILEOUT_PROD "$prod_path_mfcc/$filename $linescalar\n"); # output to file 
    177167                        $prompts{'PromptCount'}++; 
     
    179169        } 
    180170        close(FILEIN); 
    181         #!!!!!! 
    182         #if ($readme->getLanguage() =~ /EN/) {  
    183         #       close(FILEOUT); 
    184         #}  
    185         #close(FILEOUT_PROD); 
    186         #!!!!!!          
     171        close(FILEOUT_PROD); 
    187172        rename "$QuarantineDir/$audio_dir_name/PROMPTS", "$QuarantineDir/$audio_dir_name/prompts-original" || confess "can't rename file\n"; 
    188173        rename "$QuarantineDir/$audio_dir_name/PROD-PROMPTS", "$QuarantineDir/$audio_dir_name/PROMPTS" || confess "can't rename file\n";; 
  • Trunk/Scripts/Audio_scripts/UserSubmission/PROMPTS/EN.pm

    r2578 r2579  
    5252        my $audio_rate_dir = "$samplingrate:$samplingrateformat";        
    5353        open (FILEIN, "$QuarantineDir/$audio_dir_name/PROMPTS") ||confess ("Unable to open PROMPTS file for reading"); 
    54         #if ($readme->getLanguage() =~ /EN/) {  
    5554        open (FILEOUT,">$QuarantineDir/$audio_dir_name/VALIDATION-PROMPTS") || confess ("Unable to open VALIDATION-PROMPTS file for writing"); 
    56         #} 
    5755        open (FILEOUT_PROD,">$QuarantineDir/$audio_dir_name/PROD-PROMPTS") || confess ("Unable to open PROD-PROMPTS file for writing"); 
    5856        my %prompts; 
     
    7775                        #  $linescalar =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... 
    7876                        # $linescalar =~ s/\'\b(.*)\b\'/$1/g; # remove single quotes from quoted text; single quote must be at start of a word, and at end of a word - does not work if there are two words with single quotesin them in same sentence ... 
    79                         $linescalar =~ s/\'EM//g; # remove leading single quotes for contraction of them 
    8077                        $linescalar =~ s/\"//g; # remove double quotes 
    8178                        $linescalar =~ s/://g; # remove colon 
     
    8885                        $linescalar =~ s/\?//g; # question mark 
    8986                        # Other cleanup !!!!!! need to change the prompts files directly rather than doing this!!! or add to dictionnary!!! 
     87                        $linescalar =~ s/\'EM//g; # remove leading single quotes for contraction of them 
    9088                        $linescalar =~ s/&/AND/g;  
    9189                        $linescalar =~ s/2000/TWO THOUSAND/g;  
     
    9896        } 
    9997        close(FILEIN); 
    100         #if ($readme->getLanguage() =~ /EN/) {  
    101                 close(FILEOUT); 
    102         #}  
     98        close(FILEOUT); 
    10399        close(FILEOUT_PROD); 
    104100        rename "$QuarantineDir/$audio_dir_name/PROMPTS", "$QuarantineDir/$audio_dir_name/prompts-original" || confess "can't rename file\n";