voxforge.org
VoxForge Dev

Changeset 2464

Show
Ignore:
Timestamp:
03/03/08 15:00:42 (10 months ago)
Author:
kmaclean
Message:

WebGUI forum update scripts - adding German processing and support for German character encodings

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Trunk/Scripts/Mirroring_scripts/Single_WebGUIForum.pm

    r2459 r2464  
    2424use diagnostics; 
    2525use Carp; 
     26use Archive::Extract; 
     27use File::Basename; 
    2628#################################################################### 
    2729use VoxForge_config;  
     
    3739### Directory 
    3840#################################################################### 
    39 my $AudioDirName="ruth-20080302-rhb"; 
     41#my $AudioDirName="NL-JohanLingen-20080302-das[1496202351].zip"; 
    4042my $path_original ="/home/kmaclean/temp2"; 
     43my $tarfile ="DE-anonymous-20080303-svq[186061628].zip";  # if undef than attachment does not get processed by LWP! 
     44my $prefix  ="DE-"; # prefix to remove from submission 
     45#################################################################### 
     46print "tarfile1:$path_original/$tarfile\n"; 
     47if ($tarfile =~ /\[.*\]/) { 
     48# can't get this to work with Archive::Extract??: 
     49        my $newDirectoryName = $tarfile; 
     50        $newDirectoryName =~ s/$prefix//; 
     51        $newDirectoryName =~ s/\[.*\]//; 
     52        rename("$path_original/$tarfile", "$path_original/$newDirectoryName")  || confess "error:tar file not found, cannot rename: $?";         
     53        $tarfile  = $newDirectoryName; 
     54} 
     55print "tarfile2:$path_original/$tarfile\n"; 
     56my @suffixlist = ("tgz", "tar\.gz", "zip"); 
     57my ($AudioDirName,$path,$suffix) = fileparse($tarfile,@suffixlist); 
     58$AudioDirName =~ s/\.$//; # remove trailing period that fileparse does not remove for some reason?? 
     59print "AudioDirName:$AudioDirName\n"; 
     60 
     61my $ae = Archive::Extract->new( archive => "$path_original/$tarfile" ); 
     62$ae->extract( to => "$path_original/$AudioDirName") or die $ae->error;; 
    4163##########submission with tarfile as attachment: 
    4264my $promptspath = "$path_original/$AudioDirName/prompts.txt"; 
    4365my $readmepath = "$path_original/$AudioDirName/readme.txt"; 
    4466my $licensepath = "$path_original/$AudioDirName/license.txt"; 
    45 my $tarfile ="$path_original/$AudioDirName.zip";  # if undef than attachment does not get processed by LWP!  
    46 ##########fix submission that did not work properly; not tarfile to upload since linking to voxforge1.org 
     67 
     68##########fix submission that did not work properly; no tarfile to upload since linking to voxforge1.org 
    4769#my $readmepath = "$path_original/$AudioDirName/etc/README"; 
    4870#my $promptspath = "$path_original/$AudioDirName/etc/prompts-original"; 
     
    5981 
    6082if (defined $tarfile) { 
    61         if (-e "$path_original/$AudioDirName.zip") { 
     83        if ((-e "$path_original/$tarfile") and (-e "$path_original/$AudioDirName")) { 
    6284                print "path $path_original/$AudioDirName.zip\n"; 
    6385                Update($parms, \%ProcessedUserSubmissions, 'speechsubmission', 'n@wp@$$') || confess "Single_WebGUIForumUpdate error: $?"; 
     
    94116                        #my $licensepath = "$path_original/$AudioDirName/LICENSE"; 
    95117                        my ($content,$title)= WebGUIForum::getSubmissionContents($parms,$readmepath,$promptspath,$licensepath,$AudioDirName, $readme,$tarfile); 
    96                         WebGUIForum::Upload($parms,$browser,$content,$title,$readme,$tarfile); 
     118                        WebGUIForum::Upload($parms,$browser,$content,$title,$readme,"$path_original/$tarfile"); 
    97119                #} else { 
    98120                #       print "No WebGUI Forum Update - not submitted using VoxForge Speech Submission Application, or flagged for webgui forum update\n"; 
  • Trunk/Scripts/Mirroring_scripts/WebGUIForum.pm

    r2459 r2464  
    2828use Carp; 
    2929use strict; 
    30 # !!!!!! 
    31 #use WWW::Curl::Easy; 
    3230use LWP::UserAgent; 
    3331use URI::Escape; 
    3432#use Archive::Extract; 
    35 # !!!!!! 
     33use Encode; 
    3634#################################################################### 
    3735###  Declarations                                                           
     
    4139my $englishURL = 'http://www.voxforge.org/home/downloads/speech/english';  
    4240my $dutchURL = 'http://www.voxforge.org/home/downloads/speech/dutch'; 
     41#my $germanURL = 'http://www.voxforge.org/home/downloads/speech/german-speech-files'; 
    4342#my $englishURL = 'http://www.voxforge.org/home/downloads/speech/voxforgeivr';  # for testing  
    4443#my $dutchURL = 'http://www.voxforge.org/home/downloads/speech/voxforgeivr'; # for testing  
     44my $germanURL = 'http://www.voxforge.org/home/downloads/speech/voxforgeivr'; # for testing  
    4545my $tarfile = undef; # not uploading a tarfile to WebGUI forum 
    4646#################################################################### 
     
    5252        print "\nWebGUIForumUpdate.pm starting****************************************************\n\n"; 
    5353        my $counter; 
    54         # !!!!!! 
    55         # Login($parms, $username, $identifier);         
    5654        my $browser = LWP::UserAgent->new(keep_alive => 1 ); 
    5755        $browser->cookie_jar( {} ); 
    5856        Login($parms, $browser, $username, $identifier);         
    59         # !!!!!! 
    6057        foreach my $AudioDirName (keys %$ProcessedUserSubmissions) { 
    6158                $counter++; 
     
    9188        } elsif ($readme->getLanguage() =~ /NL/) { 
    9289                $UploadURL =$dutchURL; 
     90        } elsif ($readme->getLanguage() =~ /DE/) { 
     91                $UploadURL =$germanURL;  
    9392        } else { 
    9493                $UploadURL =$englishURL;         
     
    9897 
    9998sub getSubmissionContents { 
    100         # !!!!!! 
    101         #my ($parms,$readmepath,$promptspath,$licensepath,$AudioDirName,$readme) = @_; 
    10299        my ($parms,$readmepath,$promptspath,$licensepath,$AudioDirName,$readme,$tarfile) = @_; 
    103         # !!!!!! 
    104100                my $debug = $$parms{"debug"}; 
    105101        my ($content); 
     
    121117        #       confess ("can't extract file:$path_original/$AudioDirName") 
    122118        #} 
    123         $title =~ s/\.zip//; 
    124         $content = join(" ",@readme); 
     119        $title =~ s/\.zip//;     
     120        #!!!!!! 
     121        #$content = join(" ",@readme);   
     122        my $tempContent = join(" ",@readme); # for some reason, README has different characert encoding than Prompts and License??? 
     123        $content = encode("utf8", $tempContent); 
     124        # !!!!!! 
    125125        $content .= "<h2>Prompts:</h2>"; 
    126126        $content .= join(" ", @prompts); 
     
    144144        #$content =~ s/&/and/g; # CURL (or WebGUI) doesn't like ampersands??  
    145145        # !!!!!! 
     146 
    146147        return (\$content,\$title); 
    147148} 
     
    164165 
    165166sub Login { 
    166         # !!!!!! 
    167         #my ($parms, $username, $identifier) = @_; 
    168167        my ($parms, $browser, $username, $identifier) = @_; 
    169168                my $debug = $$parms{"debug"}; 
    170         #my ($result, $err); 
    171         #my $post = "op=auth;method=login;username=$username;identifier=$identifier;submit=login"; 
    172          
    173         #my $curl = new WWW::Curl::Easy; 
    174         #$curl->setopt(CURLOPT_URL, $LoginURL); 
    175         #$curl->setopt(CURLOPT_COOKIEJAR, "my_cookies.txt"); 
    176         #$curl->setopt(CURLOPT_COOKIEFILE, "my_cookies.txt"); 
    177         #$curl->setopt(CURLOPT_CONNECTTIMEOUT, 120); 
    178         #$curl->setopt(CURLOPT_TIMEOUT, 10); 
    179         #$curl->setopt(CURLOPT_FOLLOWLOCATION,1); 
    180         #$curl->setopt(CURLOPT_POST, 1); 
    181         #$curl->setopt(CURLOPT_POSTFIELDS, $post); 
    182         #$curl->setopt(CURLOPT_WRITEFUNCTION, \&chunk ); 
    183         #$curl->setopt(CURLOPT_FILE, \$result); 
    184         #$curl->perform; 
    185         #if ($err = $curl->errbuf) { 
    186         #       confess ("ERROR: login failed with error: $err\n");  
    187         #} 
    188         #if ($result) { 
    189         #       ParseLoginResult($parms, \$result); 
    190         #} else { 
    191         #       confess ("ERROR: login failed, no result\n") 
    192         #} 
    193169        my $result = $browser->post( 
    194170      "$LoginURL", 
     
    215191                # print  "\n\nresult:\n$result\n"; 
    216192        } 
    217         # !!!!!! 
    218193} 
    219194 
     
    221196        my ($parms, $result) = @_; 
    222197                my $debug = $$parms{"debug"}; 
    223         # !!!!!!!                
    224         #if ($$result =~ /(Hello) <a href=".+">(.+)<\/a>/i) {            
    225198        if ($result->content =~ /(Hello) <a href=".+">(.+)<\/a>/i) { 
    226199                print "login status: successful\n"; 
    227200        } else { 
    228                 #confess ("ERROR: cannot parse login result; login probably failed\nresult:$$result\n");  
    229201                confess ("ERROR: cannot parse login result; login probably failed\nresult:$result->content\n");  
    230202        } 
    231         # !!!!!! 
    232203} 
    233204 
     
    235206        my ($parms,$browser,$content,$title,$readme,$tarfile) = @_; 
    236207                my $debug = $$parms{"debug"}; 
    237         #my ($result, $err, $post); 
    238         #       if (($tarfile ne "none") and (defined($tarfile)) and ($tarfile ne "")) { 
    239         #$post = "func=editSave&assetId=new&class=WebGUI::Asset::Post::Thread&proceed=showConfirmation&title=$$title&content=test&__storageId_action=upload&storageId_file=\@$tarfile&subscribe=1&submit=save"; 
    240          
    241         #my $curl = new WWW::Curl::Easy; 
    242         #$curl->setopt(CURLOPT_URL, $UploadURL); 
    243         #$curl->setopt(CURLOPT_COOKIEJAR, "my_cookies.txt"); 
    244         #$curl->setopt(CURLOPT_COOKIEFILE, "my_cookies.txt"); 
    245         #$curl->setopt(CURLOPT_CONNECTTIMEOUT, 120); 
    246         #$curl->setopt(CURLOPT_TIMEOUT, 28800); 
    247         #$curl->setopt(CURLOPT_FOLLOWLOCATION,1); 
    248         #$curl->setopt(CURLOPT_HTTPPOST, 1); 
    249         #$curl->setopt(CURLOPT_POSTFIELDS, $post); 
    250         #$curl->setopt(CURLOPT_WRITEFUNCTION, \&chunk ); 
    251         #$curl->setopt(CURLOPT_FILE, \$result); 
    252         #$curl->perform; 
    253208        my $local_tarfile; 
    254209        if (!defined $tarfile) { 
     
    271226            'submit'      => 'save', 
    272227          ], 
    273           'Content_Type' => 'form-data', 
     228          'Content_Type' => 'form-data', 
     229          'charset' => 'UTF-8', 
     230 
    274231        ); 
    275         #if ($err = $curl->errbuf) { 
    276         #       confess ("ERROR: login failed with error: $err");  
    277         #} 
    278         #if ($result) { 
    279         #       ParseUploadResult($parms, \$result); 
    280         #} else { 
    281         #       confess ("ERROR: Upload failed, no result\n") 
    282         #} 
    283232        if ($result->is_success) { 
    284233        ParseUploadResult($parms, $result); 
     
    293242                print  "http code:" . $result->status_line. "\n";                
    294243        } 
    295  
    296244} 
    297245 
     
    299247        my ($parms, $result) = @_; 
    300248                my $debug = $$parms{"debug"}; 
    301                  
    302         #if ($$result =~ /(Your post has been received)/i) { 
    303249        if ($result->content =~ /(Your post has been received)/i) { 
    304250                print "upload status: $1\n"; 
    305         #} elsif ($$result =~ /<h1>(Permission Denied!)<\/h1>/i) {               
    306251        } elsif ($result->content =~ /<h1>(Permission Denied!)<\/h1>/i) { 
    307                 #print "ERROR: $1\n\nresult:\n$$result\n"; 
    308252                confess "ERROR: $1\n\nresult:\n$result->content\n"; 
    309253        } else { 
    310                 #confess ("ERROR: cannot parse upload result; upload probably failed\nresult:$$result\n");  
    311254                confess ("ERROR: cannot parse upload result; upload probably failed\nresult:$result->content\n");  
    312255        } 
    313256} 
    314 # !!!!!! 
    315 #sub chunk { 
    316 #       my ($data,$pointer)=@_;  
    317 #       $$pointer .= $data; 
    318 #       return length($data); 
    319 #} 
    320 # !!!!!! 
    3212571;