#!/usr/bin/perl

use strict;
use warnings;

# usage: cat some.words | espeak -v de -x -q 3>&1 1>&2 2>&3 | ./espeak2phones.pl > some.phones

# tokens that have to be transformed from espeak to our phoneset
my %trans = ('3' => '6', 'A' => 'a', 'A:', => 'a:', 'E2' => 'E', 'i2' => 'warn: I/i ', 
	     'W' => '9', 'y' => 'Y', 'Y:' => '2:', '*' => 'r', 'C2' => 'warn: g/C', 
	     'dZ' => 'd Z', 'j/' => 'j', 'l/' => 'l', 'pF' => 'p f', 'tS' => 't S', 
	     'ts' => 't s', '_!' => 'Q', '_|' => 'Q', '_' => 'sil',
	    );

# tokens that are identical in espeak and our phoneset
my @keep = ('@', 'a', 'aI', 'aU', 'E', 'E:', 'e:', 'I', 'i:', 'O', 'o:', 'OY', 'U',
	    'u:', 'y:', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 
	    'N', 'p', 's', 'S', 't', 'T', 'v', 'w', 'x', 'z', 'Z', ' ', 
	   );

# tokens that are known output of espeak, but that should not regularly appear
my %error = ('EI' => 1, 'D' => 1, 'r' => 1, 'r/' => 1, 
	    );

# tokens that are known output of espeak and can safely be ignored
my %ignore = ('@-' => 1, ':' => 1, ';' => 1, '_^_' => 1, '\'' => 1, '%%' => 1, 
	      '_' => 1, ',' => 1, 
	     );

my $TRUE = (0 == 0);
my $FALSE = !$TRUE;

my @tokens = (keys %trans, @keep, keys %error, keys %ignore);
@tokens = sort { length $b <=> length $a } @tokens;
#print join "+", @tokens;
#print "\n";
#print $#tokens;

while (my $line = <STDIN>) {
	while ($line !~ m/^\s*$/) { # while there's still something to process
		my $token;
		my $i = 0;
		my $match = $FALSE;
		until (($i > $#tokens) or $match) {
			$token = $tokens[$i];
			$match = ($line =~ s/^\Q$token\E//);
			$i++;
		}
		warn "something did not match: $line\n" unless ($match);
		warn "Error case found: $token in $line\n" if ($error{$token});
		next if ($ignore{$token});
		if ($trans{$token}) {
			print "$trans{$token} ";
		} else {
			print "$token "; # this must be a keep token
		}
	}
	print "\n";
}

__END__

3    6
@-   ignore
@    @
a    a
A    a
A:   a:
aI   aI
aU   aU
E    E
E2   E
E:   E:
e:   e:
EI   throw error
I    I
i2   I/i: (i: reduce to I)
i:   i:
O    O
o:   o:
OY   OY
U    U
u:   u:
W    9
y    Y
y:   y:
Y:   2:

*    r
:    ignore
;    ignore
b    b
C    C
C2   if (vowel follows) then /g/ otherwise C
d    d
D    throw error
dZ   d Z
f    f
g    g
h    h
j    j
j/   j
k    k
l    l
l/   l
m    m
n    n
N    N
p    p
pF   p f
r    throw error (upside down R)
r/   throw error (upside down R)
s    s
S    S
t    t
tS   t S
ts   t s
v    v
x    x
z    z
Z    Z
_!   Q
_|   Q
_^_  ignore
'    ignore
%%   ignore