#!/imsa/bin/perl -w

# Lexical Steganography Decoder v1.0
# Copyright 1998 Keith Winstein <keithw@imsa.edu>

$oversample = 4;
$max_mode = 4;

open DATA, 'tlex.data';;
while (<DATA>) {
  my ($word, @synset) = split;
  $wordcache{ $word } = [ $word, @synset ];
}
close DATA;

%huffman = ( ' ' => "111",
	     E => "000",
	     T => "1101",
	     A => "1011",
	     I => "1001",
	     O => "1000",
	     R => "0111",
	     S => "0110",
	     N => "0100",
	     H => "11001",
	     C => "10101",
	     L => "10100",
	     D => "01011",
	     M => "00111",
	     U => "00110",
	     P => "00100",
	     F => "110001",
	     G => "110000",
	     B => "010100",
	     W => "001011",
	     Y => "001010",
	     V => "0101010",
	     K => "01010110",
	     X => "010101110",
	     Q => "0101011110",
	     J => "01010111110",
	     Z => "01010111111"
	   );

%revhuff = reverse %huffman;

my $line_num = -1;
my @nonwordrec;
my %synsetable;
my %ordinal;
my $s = 0;

while (defined(my $line = <>)) {
  $line_num++;

  chomp $line;
  $line = 'a ' . $line;

  my @words = split /[^A-Za-z\-]+/, $line;
  my @nonwords = split /[A-Za-z\-]+/, $line;

  $wordrec[ $line_num ] = \@words;
  $nonwordrec[ $line_num ] = \@nonwords;

  my $word_num = -1;
 WORD: for my $word (@words) {
    $word_num++;
    next WORD unless (defined($wordcache{ $word }));
    $synsetable{ $line_num . ' ' . $word_num } = $wordcache{ $word };
    $ordinal{ $line_num . ' ' . $word_num } = $s++;

    print STDERR "Looking at $word (", (join ", ", @{$wordcache{ $word }})
      , ") \n";
  }
}

my $variance = 0;
for (keys %synsetable) {
  print STDERR "Eyeing $_\n";
  $variance += (log (scalar @{$synsetable{ $_ }}))/(log 2);
}

print STDERR "Variance bits in text: $variance\n";

my @real_zones = sort { $ordinal{ $a } <=> $ordinal{ $b } } keys %synsetable;

print STDERR "rz: @real_zones\n";

for my $mode (1 .. $max_mode) {
  print STDERR "Using mode $mode encoding frequency.\n";
  
  my (@zones) = @real_zones;
  $bits = '';

  BIG: while (scalar @zones > 0) {
    my @these_zones;
    my $bits_assigned = 0;
    while ($bits_assigned < $mode * $oversample) {
      if (scalar @zones == 0) {
	last BIG;
      }
      push @these_zones, (my $z = shift @zones);
      print STDERR "z: $z\n";
      $bits_assigned += (log(scalar @{$synsetable{ $z }}))/(log 2);
    }
    print STDERR "tz: @these_zones\n";
    decode(\@these_zones, $oversample);
  }
  print "bits: $bits\n";
  compute($bits);
}

sub compute {
  my ($bits) = @_;
  my @bitarray = split '', $bits;

  my $token = '';

  while (scalar @bitarray > 0) {
    $token .= shift @bitarray;
    if (defined($revhuff{ $token })) {
      print $revhuff{ $token };
      $token = '';
    }
  }
  print "\n";
}

sub decode {
  my ($r_zones, $num_bits) = @_;

  my (@zones) = @{$r_zones};

  my @choices;
  my @word;

  for my $i (0 .. $#zones) {
    my ($l, $w) = split /\s+/, $zones[$i];
    $word[$i] = $wordrec[ $l ][ $w ];
    @{$choices[$i]} = sort @{$wordcache{ $word[$i] }};
  }

  print STDERR "word: @word\n";

  print STDERR "*\n";
  my @vector;
  WORD: for my $i (0 .. $#word) {
    for my $j (0 .. (scalar @{$choices[$i]} - 1)) {
      print STDERR "-";
      if ($choices[$i][$j] eq $word[$i]) {
	push @vector, $j;
	next WORD;
      }
    }
  }

  my @iterarray = (0) x (scalar @word);
  my @codearray = (0) x $num_bits;
  print STDERR "num_bits: $num_bits\n";
  my @codes;

  my $iter = 0;

  print STDERR "^\n";
  LOOP: while (scalar @iterarray == scalar @word) {
    $iter++;
    print STDERR "." unless ($iter % 1000);
    
    my $equal = 1;
    for (my $q = 0; $q <= $#iterarray; $q++) {
      $equal = 0 unless ($iterarray[$q] eq $vector[$q]);
    }

    if ($equal) {
      $bits .= (join '', @codearray[0 .. ($num_bits - 1)]);
      print STDERR "bits now $bits\n";
      last LOOP;
    }

    $iterarray[ 0 ]++;
    $codearray[ 0 ]++;
    for my $i (0 .. $#iterarray) {
      if ($iterarray[ $i ] >= scalar @{$choices[$i]}) {
	$iterarray[ $i ] = 0;
	$iterarray[ $i + 1 ]++;
      }
    }
    
    if (scalar @codearray > $num_bits) {
      pop @codearray;
      $codearray[ 0 ]++;
      for my $i (0 .. $#codearray) {
	if ($codearray[ $i ] >= 2) {
	  $codearray[ $i ] = 0;
	  $codearray[ $i + 1 ]++;
	}
      }
    }
    for my $i (0 .. $#codearray) {
      if ($codearray[ $i ] >= 2) {
	$codearray[ $i ] = 0;
	$codearray[ $i + 1 ]++;
      }
    }
  }
}

