#!/usr/athena/bin/perl -w
# Tamil input preprocessor using the itrans transliteration method 
#
## Copyright (C) 2002-2005, Arun A Tharuvai <aatharuv@MIT.eDU>
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use charnames ":full";
use strict;

my %indvowels = (
	      'a' => "\N{TAMIL LETTER A}",
	      'A' => "\N{TAMIL LETTER AA}", 'aa' => "\N{TAMIL LETTER AA}",
	      'i' => "\N{TAMIL LETTER I}",
	      'ii' => "\N{TAMIL LETTER II}", 'I' => "\N{TAMIL LETTER II}",
	      'u' => "\N{TAMIL LETTER U}",
	      'uu' => "\N{TAMIL LETTER UU}", 'U' => "\N{TAMIL LETTER UU}",
	      'e' => "\N{TAMIL LETTER E}",
	      'ee' => "\N{TAMIL LETTER EE}", 'E' => "\N{TAMIL LETTER EE}",
	      'ai' => "\N{TAMIL LETTER AI}", 
	      'o'  => "\N{TAMIL LETTER O}",
	      'oo' => "\N{TAMIL LETTER OO}", 'O' => "\N{TAMIL LETTER OO}",
	      'au' => "\N{TAMIL LETTER AU}", 
	      'q' => "\N{TAMIL SIGN VISARGA}",
		 );

my %depvowels = (
#	      'a' => "\N{TAMIL VOWEL SIGN A}",
	      'A' => "\N{TAMIL VOWEL SIGN AA}", 'aa' => "\N{TAMIL VOWEL SIGN AA}",
	      'i' => "\N{TAMIL VOWEL SIGN I}",
	      'ii' => "\N{TAMIL VOWEL SIGN II}", 'I' => "\N{TAMIL VOWEL SIGN II}",
	      'u' => "\N{TAMIL VOWEL SIGN U}",
	      'uu' => "\N{TAMIL VOWEL SIGN UU}", 'U' => "\N{TAMIL VOWEL SIGN UU}",
	      'e' => "\N{TAMIL VOWEL SIGN E}",
	      'ee' => "\N{TAMIL VOWEL SIGN EE}", 'E' => "\N{TAMIL VOWEL SIGN EE}",
	      'ai' => "\N{TAMIL VOWEL SIGN AI}", 
	      'o'  => "\N{TAMIL VOWEL SIGN O}",
	      'oo' => "\N{TAMIL VOWEL SIGN OO}", 'O' => "\N{TAMIL VOWEL SIGN OO}",
	      'au' => "\N{TAMIL VOWEL SIGN AU}", 
		 );

my %consonants = ( 'k' => "\N{TAMIL LETTER KA}",    'g' => "\N{TAMIL LETTER KA}", 
		   '~N' => "\N{TAMIL LETTER NGA}",  'N^' => "\N{TAMIL LETTER NGA}",
		   'ch' => "\N{TAMIL LETTER CA}",
		   '~n' => "\N{TAMIL LETTER NYA}",
		   'T'=> "\N{TAMIL LETTER TTA}",    'Th'=> "\N{TAMIL LETTER TTA}",
		   'N'=> "\N{TAMIL LETTER NNA}",
		   't'=> "\N{TAMIL LETTER TA}",      'th'=> "\N{TAMIL LETTER TA}",
		   'n' => "\N{TAMIL LETTER NA}",
		   'p' => "\N{TAMIL LETTER PA}",     'b' => "\N{TAMIL LETTER PA}",
		   'm' => "\N{TAMIL LETTER MA}",
		   'y' => "\N{TAMIL LETTER YA}",
		   'r' => "\N{TAMIL LETTER RA}",
		   'l' => "\N{TAMIL LETTER LA}",
		   'v' => "\N{TAMIL LETTER VA}",     'w' => "\N{TAMIL LETTER VA}",
		   'J' => "\N{TAMIL LETTER LLLA}",   'z' => "\N{TAMIL LETTER LLLA}",
		   'L' => "\N{TAMIL LETTER LLA}",
		   'R' => "\N{TAMIL LETTER RRA}",
		   '^n'=> "\N{TAMIL LETTER NNNA}",
		   'j' => "\N{TAMIL LETTER JA}",
		   'Sh' => "\N{TAMIL LETTER SSA}",
		   's' => "\N{TAMIL LETTER SA}",
		   'h' => "\N{TAMIL LETTER HA}",
		   'x' => "\N{TAMIL LETTER KA}\N{TAMIL SIGN VIRAMA}\N{TAMIL LETTER SSA}");
sub detvowel {
    my ($l1,$l2) = @_;
    my $outstr = "";
    my $used=0;
    if (defined $depvowels{$l1 . $l2}) {
	$used = 2; $outstr = $depvowels{$l1 . $l2};
    } elsif (defined $depvowels{$l1}) {
	$used = 1; $outstr = $depvowels{$l1};
    } elsif ($l1 eq 'a') {
	$used = 1; $outstr = "";
    } else {
	$used = 0; $outstr = "\N{TAMIL SIGN VIRAMA}";
    }
    return ($used,$outstr);
}
sub detvowelold {
    my ($l1,$l2) = @_;
    my $outstr ="";
    my $used=0;
    if ($l1 eq 'a') {
	if ($l2 eq 'i') {
	    $outstr .= "\N{TAMIL VOWEL SIGN AI}";
	    $used =2;
	} elsif ($l2 eq 'a') {
	    $outstr .= "\N{TAMIL VOWEL SIGN AA}";
	    $used =2;
	} elsif ($l2 eq 'u') {
	    $outstr .= "\N{TAMIL VOWEL SIGN AU}";
	} else {
	    $used = 1;
	}
    } elsif ($l1 eq 'A') {
	$outstr .= "\N{TAMIL VOWEL SIGN AA}";
	$used = 1;
    } elsif ($l1 eq 'i') {
	if ($l2 eq 'i') {
	    $outstr .= "\N{TAMIL VOWEL SIGN II}";
	    $used = 2;
	} else {
	    $outstr .= "\N{TAMIL VOWEL SIGN I}";
	    $used = 1;
	}
    } elsif ($l1 eq 'I') {
	$outstr .= "\N{TAMIL VOWEL SIGN II}";
	$used = 1;
    } elsif ($l1 eq 'u') {
	if ($l2 eq 'u') {
	    $outstr .= "\N{TAMIL VOWEL SIGN UU}";
	    $used =2;
	} else {
	    $outstr .= "\N{TAMIL VOWEL SIGN U}";
	    $used =1;
	} 
    } elsif ($l1 eq 'U') {
        $outstr .= "\N{TAMIL VOWEL SIGN UU}";
	$used = 1;
    } elsif ($l1 eq 'e') {
	$outstr .= "\N{TAMIL VOWEL SIGN E}";
	$used = 1;
    } elsif ($l1 eq 'E') {
	$outstr .= "\N{TAMIL VOWEL SIGN EE}";
	$used = 1;
    } elsif ($l1 eq 'o') {
	$outstr .= "\N{TAMIL VOWEL SIGN O}";
	$used = 1;
    } elsif ($l1 eq 'O') {
	$outstr .= "\N{TAMIL VOWEL SIGN OO}";
	$used = 1;
    } else {
	$outstr .= "\N{TAMIL SIGN VIRAMA}";
	$used = 0;
    }
    return ($used,$outstr);
}

while (<>) {
    my $outstr = "";
    my $outstr2 = "";
    my @line = split //;
    my $size = $#line +1;
    my @output = "";
    my $used;
    my $counter = 0;
    while ($counter < $size) {
	$outstr2 = "";
	if (defined $consonants{$line[$counter] . $line[$counter + 1]}) {
	    $outstr .= $consonants{$line[$counter] . $line[$counter+1]};
	    ($used,$outstr2) = detvowel($line[$counter+2],$line[$counter+3]);
	    $outstr .= $outstr2;
	    $counter+=2;
	    $counter+=$used;
	} elsif (defined $consonants{$line[$counter]}) {
	    $outstr .= $consonants{$line[$counter]};
	    ($used,$outstr2) = detvowel($line[$counter+1],$line[$counter+2]);
	    $outstr .= $outstr2;
	    $counter++;
	    $counter+=$used;
	} elsif (defined $indvowels{$line[$counter] . $line[$counter+1]}) {
	    $outstr .= $indvowels{$line[$counter] . $line[$counter+1]};
	    $counter+=2;
	} elsif (defined $indvowels{$line[$counter]}) {
	    $outstr .= $indvowels{$line[$counter]};
	    $counter++;
	} elsif ($line[$counter] =~ /\s/) {
	    $outstr .= $line[$counter];
	    $counter++;
	} else {
	    print $line[$counter];
	    $counter++;
	}
    }
    print "${outstr}\n";
}
