#!/usr/bin/perl ## universal cyrillic decoder ##---------------------------------------------------------------------------## ## Copyright (C) 1995: Ilya Sandler isandler@utkux.utcc.utk.edu ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License. ## THERE IS NO WARRANTY OF ANY KIND. USE IT AT YOUR OWN RISK. ##---------------------------------------------------------------------------## ## cyrcode is a Perl program to convert between different ## cyrillic encodings, cyrcode uses external encoding table cyrtab. ## The program reads standard input and writes to standard output ## usage: ## cyrcode.pl from to ## where from and to are names of encoding tables ## note table names should coincide with those indicated in cyrtab ##-------------------------------------------------------- open(dat,'/mit/kolya/arch/share/lib/cyrtab'); @names=&readnext; if (($ARGV[0] eq "-help") || ($#ARGV < 1)){ print "Universal cyrillic decoder \n"; print "usage: \n cyrcode.pl from to outfile"; print "\n where 'from' and 'to' are the names of encoding tables \n"; print "currently supported tables are: ", join(", ",@names), "\n" ; print "Example: cyrcode.pl koi8 alt < file1.txt \n" ; print "converts file.txt from 'koi8' to 'alt' and writes the result to "; print "standard output \n"; exit; } #ignore cases $ARGV[0] =~ y/A-Z/a-z/; $ARGV[1] =~ y/A-Z/a-z/; $from=$to=-1; for ($i=0; $i <= $#names; $i++) { $names[$i]=~ y/A-Z/a-z/; if (index($ARGV[0],$names[$i]) >=0) {$from=$i;} if (index($ARGV[1], $names[$i]) >=0) {$to=$i;}; } if (($from == -1) || ($to == -1)) { print "unknown encoding table name \n"; print "known table names:",join(', ',@names),"\n"; exit; } #initialize decoding table for ($i = 0; $i < 256; $i++) {$decode[$i]=pack("c",$i)}; while (!eof(dat)){ @m=&readnext; if ($m[0] eq "") {last;} if ($m[$from]=~ tr/a-z//){ print "can not convert from ", $names[$from],"\n"; exit; } if ($m[$to]=~tr/a-zA-Z\'\"//){$decode[$m[$from]]=$m[$to];} else{ $decode[$m[$from]]=pack("c",$m[$to]);} # print $m[$to],' ',$m[$from],"\n"; }; #actual convertion is here while (!eof(STDIN)){ print $decode[ord(getc)]; }; exit 0; # subprograms---------------------- sub readnext { local($str)=''; while ((!eof(dat))&&($str eq '')){ $str=; $str=~ s/\n//g; #strips \n $str=~ s/^[\ \t]+//g; #strips leading white space $str=~ s/#[\0-\377]+//g; #strips comment } split(/[\ \t]+/,$str); }