#!/usr/bin/perl
use strict;
use warnings;
# usage: cat some.words | espeak -v de -x -q 3>&1 1>&2 2>&3 | ./espeak2phones.pl > some.phones
# tokens that have to be transformed from espeak to our phoneset
my %trans = ('3' => '6', 'A' => 'a', 'A:', => 'a:', 'E2' => 'E', 'i2' => 'warn: I/i ',
'W' => '9', 'y' => 'Y', 'Y:' => '2:', '*' => 'r', 'C2' => 'warn: g/C',
'dZ' => 'd Z', 'j/' => 'j', 'l/' => 'l', 'pF' => 'p f', 'tS' => 't S',
'ts' => 't s', '_!' => 'Q', '_|' => 'Q', '_' => 'sil',
);
# tokens that are identical in espeak and our phoneset
my @keep = ('@', 'a', 'aI', 'aU', 'E', 'E:', 'e:', 'I', 'i:', 'O', 'o:', 'OY', 'U',
'u:', 'y:', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n',
'N', 'p', 's', 'S', 't', 'T', 'v', 'w', 'x', 'z', 'Z', ' ',
);
# tokens that are known output of espeak, but that should not regularly appear
my %error = ('EI' => 1, 'D' => 1, 'r' => 1, 'r/' => 1,
);
# tokens that are known output of espeak and can safely be ignored
my %ignore = ('@-' => 1, ':' => 1, ';' => 1, '_^_' => 1, '\'' => 1, '%%' => 1,
'_' => 1, ',' => 1,
);
my $TRUE = (0 == 0);
my $FALSE = !$TRUE;
my @tokens = (keys %trans, @keep, keys %error, keys %ignore);
@tokens = sort { length $b <=> length $a } @tokens;
#print join "+", @tokens;
#print "\n";
#print $#tokens;
while (my $line = <STDIN>) {
while ($line !~ m/^\s*$/) { # while there's still something to process
my $token;
my $i = 0;
my $match = $FALSE;
until (($i > $#tokens) or $match) {
$token = $tokens[$i];
$match = ($line =~ s/^\Q$token\E//);
$i++;
}
warn "something did not match: $line\n" unless ($match);
warn "Error case found: $token in $line\n" if ($error{$token});
next if ($ignore{$token});
if ($trans{$token}) {
print "$trans{$token} ";
} else {
print "$token "; # this must be a keep token
}
}
print "\n";
}
__END__
3 6
@- ignore
@ @
a a
A a
A: a:
aI aI
aU aU
E E
E2 E
E: E:
e: e:
EI throw error
I I
i2 I/i: (i: reduce to I)
i: i:
O O
o: o:
OY OY
U U
u: u:
W 9
y Y
y: y:
Y: 2:
* r
: ignore
; ignore
b b
C C
C2 if (vowel follows) then /g/ otherwise C
d d
D throw error
dZ d Z
f f
g g
h h
j j
j/ j
k k
l l
l/ l
m m
n n
N N
p p
pF p f
r throw error (upside down R)
r/ throw error (upside down R)
s s
S S
t t
tS t S
ts t s
v v
x x
z z
Z Z
_! Q
_| Q
_^_ ignore
' ignore
%% ignore