root/trunk/submit_audio.php

Revision 38, 6.3 KB (checked in by trevarthan, 5 years ago)

Bump up connection timeout. Had to manually submit a few botched submissions due to DNS slowness. This might hopefully solve that.

  • Property svn:executable set to *
Line 
1#!/usr/bin/php
2<?
3// submit_audio.php - automated audio submission to Voxforge.org
4// Copyright (C) 2007  Jesse D. Guardiani
5//
6// This program is free software; you can redistribute it and/or
7// modify it under the terms of the GNU General Public License
8// as published by the Free Software Foundation; either version 2
9// of the License, or (at your option) any later version.
10//
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License
17// along with this program; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19// --------------------------------------------------------------------------------
20// This script does four things:
21// 1.) .tgz audio dir
22// 2.) Login to voxforge forum
23// 3.) Upload .tgz to forum via HTTP POST
24// 4.) Delete .tgz
25
26$login_url = "http://www.voxforge.org/home/login";
27$production_upload_url = "http://www.voxforge.org/home/downloads/audio/model-repository/audio-speech-files";
28$testing_upload_url    = "http://www.voxforge.org/home/downloads/audio2/voxforgeivr";
29$upload_url = $production_upload_url;
30$file_a = array(); // contains paths for all files we create at runtime. used for cleanup.
31
32main(); // The is The End, my friend.
33
34
35function main() {
36  global $argv;
37  global $login_url;
38  global $upload_url;
39
40  set_time_limit(1800); // run for 30 minutes, max
41
42  $session_dir = $argv[1];
43  $session_id  = basename($session_dir);
44  $tar_gz_path = "/tmp/$session_id.tgz";
45
46  if (count($argv) != 2) crash(1,"usage: submit_audio.php /path/to/audio/dir/to/submit");
47
48  exec("rm -f my_cookies.txt");
49  if (tar_gz($session_dir,$tar_gz_path) !== 0) crash(6,"ERROR: tar failed");
50 
51  $result = login($login_url);
52  if (!$result) crash(2,'ERROR: login failed');
53  $login_status = parse_login($result);
54  if ($login_status) echo "$login_status\n";
55  else crash(3,"ERROR: cannot parse login result; login probably failed");
56
57  $result = upload($upload_url,$session_id,$tar_gz_path,$session_dir);
58  if (!$result) crash(4,'ERROR: upload failed');
59  $upload_status = parse_upload($result);
60  if ($upload_status) echo "$upload_status\n";
61  else crash(5,"ERROR: cannot parse upload result; upload probably failed");
62
63  // Move successfully submitted dir to a different subdirectory to make it
64  // easy to see at a glance which dirs have been successfully submitted.
65  $parent_dir = dirname($session_dir);
66  $parent_of_parent_dir = dirname($parent_dir);
67  exec("mv $session_dir $parent_of_parent_dir/submitted/");
68 
69  cleanup();
70  exit(0); // Happy
71}
72
73function crash($code,$msg) {
74  global $argv;
75
76  cleanup();
77  echo "$msg\n";
78
79  $session_dir = $argv[1];
80  $mailmsg = <<<EOTXT
81    ERROR while submitting directory "$session_dir"!
82    msg="$msg"
83EOTXT;
84  mail('voxforge-ivr@guardiani.us',"voxforge-ivr ERROR!",$mailmsg);
85
86  exit(intval($code));
87}
88
89function tar_gz($orig,$dest) {
90  global $file_a;
91
92  $file_a[] = $dest;
93  $code     = 0;
94  $out      = array();
95
96  $target_dir = basename($orig);
97  $parent_dir = dirname($orig);
98  exec("tar -czf $dest -C $parent_dir $target_dir",$out,$code);
99  return $code;
100}
101
102function cleanup() {
103  global $file_a;
104  foreach ($file_a as $path) System("rm -f $path"); // FIXME: yes, I'm trusting this path. bad programmer. no biscuit.
105}
106
107function login($url) {
108  $post_a = array(
109    'op'         => 'auth',
110    'method'     => 'login',
111    'username'   => 'voxforge-ivr',
112    'identifier' => 'X4voxforge2007@bert',
113    'submit'     => 'login',
114  );
115  $ch = curl_init();
116  curl_setopt($ch, CURLOPT_COOKIEJAR, "my_cookies.txt");
117  curl_setopt($ch, CURLOPT_COOKIEFILE, "my_cookies.txt");
118  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
119  curl_setopt($ch, CURLOPT_TIMEOUT, 10);
120  curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
121  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
122  curl_setopt($ch, CURLOPT_URL, $url);
123  curl_setopt($ch, CURLOPT_POST, 1);
124  curl_setopt($ch, CURLOPT_POSTFIELDS, $post_a);
125  $result = curl_exec($ch);
126
127  /* To quote curl_errno documentation:
128       Returns the error number for the last cURL operation on the resource ch, or 0 (zero) if no error occurred. */
129  if (curl_errno($ch)) {
130    error_log('ERROR: curl_exec,msg="'.curl_error($ch).'"');
131    return false;
132  }
133
134  curl_close($ch);
135  return $result;
136}
137
138function upload($url,$session_id,$tar_gz_path,$session_dir) {
139  $body_s  = '';
140  $body_s .= file_get_contents("$session_dir/README");
141  $body_s .= "<h2>License:</h2>\n";
142  $body_s .= file_get_contents("$session_dir/LICENSE");
143  $body_s .= "<h2>Transcriptions (i.e. the prompts file):</h2>\n";
144  $body_s .= file_get_contents("$session_dir/prompts");
145  $body_s = preg_replace("/\n/m","<br>\n",$body_s);
146  $post_a = array(
147    'func'        => 'editSave',
148    'assetId'     => 'new',
149    'class'       => 'WebGUI::Asset::Post::Thread',
150    'proceed'     => 'showConfirmation',
151    'title'       => "voxforge-ivr-$session_id",
152    'content'     => $body_s,
153    '__storageId_action' => 'upload',
154    'storageId_file' => "@$tar_gz_path",
155    'subscribe'   => '1',
156    'submit'      => 'save',
157  );
158
159  $ch = curl_init();
160  curl_setopt($ch, CURLOPT_COOKIEJAR, "my_cookies.txt");
161  curl_setopt($ch, CURLOPT_COOKIEFILE, "my_cookies.txt");
162  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
163  curl_setopt($ch, CURLOPT_TIMEOUT, 28800);
164  curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
165  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
166  curl_setopt($ch, CURLOPT_URL, $url);
167  curl_setopt($ch, CURLOPT_VERBOSE, 1);
168  curl_setopt($ch, CURLOPT_POST, 1);
169  curl_setopt($ch, CURLOPT_POSTFIELDS, $post_a);
170  $result = curl_exec($ch);
171
172  /* To quote curl_errno documentation:
173       Returns the error number for the last cURL operation on the resource ch, or 0 (zero) if no error occurred. */
174  if (curl_errno($ch)) {
175    error_log('ERROR: curl_exec,msg="'.curl_error($ch).'"');
176    return false;
177  }
178
179  curl_close($ch);
180  return $result;
181}
182
183function parse_login($subject) {
184  $matches = array();
185  if (preg_match('/(Hello) <a href=".+">(.+)<\/a>/im',$subject,$matches)) return $matches[1];
186  return false;
187}
188
189function parse_upload($subject) {
190  $matches = array();
191  if (preg_match('/(Your post has been received)/im',$subject,$matches)) return $matches[1];
192  return false;
193}
194
195?>
Note: See TracBrowser for help on using the browser.