#!/imsa/bin/perl -w # Lexical Steganography Encoder v1.0 # Copyright 1998 Keith Winstein $oversample = 4; $max_mode = 4; open DATA, 'tlex.data';; while () { my ($word, @synset) = split; $wordcache{ $word } = [ $word, @synset ]; } close DATA; %huffman = ( ' ' => "111", E => "000", T => "1101", A => "1011", I => "1001", O => "1000", R => "0111", S => "0110", N => "0100", H => "11001", C => "10101", L => "10100", D => "01011", M => "00111", U => "00110", P => "00100", F => "110001", G => "110000", B => "010100", W => "001011", Y => "001010", V => "0101010", K => "01010110", X => "010101110", Q => "0101011110", J => "01010111110", Z => "01010111111" ); my $bitstring; for (split '', shift) { $bitstring .= $huffman{ uc $_ } } sub nextbit { return(undef) if ($bitstring eq ''); my $x = substr($bitstring, 0, 1); unless (length $bitstring == 0) { $bitstring = substr($bitstring, 1); } else { $bitstring = ''; } return $x; } my $line_num = -1; my @nonwordrec; my %synsetable; my %ordinal; my $s = 0; while (defined(my $line = <>)) { $line_num++; chomp $line; $line = 'a ' . $line; my @words = split /[^A-Za-z\-]+/, $line; my @nonwords = split /[A-Za-z\-]+/, $line; $wordrec[ $line_num ] = \@words; $nonwordrec[ $line_num ] = \@nonwords; my $word_num = -1; WORD: for my $word (@words) { $word_num++; next WORD unless (defined($wordcache{ $word })); $synsetable{ $line_num . ' ' . $word_num } = $wordcache{ $word }; $ordinal{ $line_num . ' ' . $word_num } = $s++; print STDERR "Looking at $word (", (join ", ", @{$wordcache{ $word }}) , ") \n"; } } my $variance = 0; for (keys %synsetable) { print STDERR "Eyeing $_\n"; $variance += (log (scalar @{$synsetable{ $_ }}))/(log 2); } print STDERR "Length of message: ", length $bitstring, "\n"; print STDERR "Variance bits in text: $variance\n"; my $mode = int($variance / length $bitstring); if ($mode > $max_mode) { $mode = $max_mode; } unless ($mode >= 1) { print STDERR "Sorry, there is not enough entropy to encode the message.\n"; exit 5; } print STDERR "Using mode $mode encoding frequency.\n"; my @zones = sort { $ordinal{ $a } <=> $ordinal{ $b } } keys %synsetable; while (length $bitstring > 0) { print STDERR "String: $bitstring, Length: ", length $bitstring, "\n"; my @bit_array; for (1 .. $oversample) { my $bit = nextbit; last unless (defined $bit); push @bit_array, $bit; print STDERR "pushed: $bit_array[ $_ - 1 ] ", scalar(@bit_array), "\n"; } my $bits_assigned = 0; my @these_zones; while ($bits_assigned < $mode * scalar(@bit_array)) { push @these_zones, (my $z = shift @zones); $bits_assigned += (log(scalar @{$synsetable{ $z }}))/(log 2); } encode(\@bit_array, \@these_zones); } sub encode { my ($r_bits, $r_zones) = @_; my (@bits) = @{$r_bits}; my (@zones) = @{$r_zones}; my $num_bits = scalar @bits; my @choices; my @word; print STDERR "Encoding @bits into ", (join ", ", @zones), "\n"; for my $i (0 .. $#zones) { my ($l, $w) = split /\s+/, $zones[$i]; $word[$i] = $wordrec[ $l ][ $w ]; @{$choices[$i]} = sort @{$wordcache{ $word[$i] }}; } print STDERR "word: @word\n"; print STDERR "*\n"; my @vector; WORD: for my $i (0 .. $#word) { for my $j (0 .. (scalar @{$choices[$i]} - 1)) { print STDERR "-"; if ($choices[$i][$j] eq $word[$i]) { push @vector, $j; next WORD; } } } my @iterarray = (0) x (scalar @word); my @codearray = (0) x $num_bits; my @codes; my $iter = 0; print STDERR "^\n"; while (scalar @iterarray == scalar @word) { $iter++; print STDERR "." unless ($iter % 1000); my $equal = 1; for (my $q = 0; $q <= $#bits; $q++) { $equal = 0 unless ($codearray[$q] eq $bits[$q]); } if ($equal) { push @codes, (join '', @iterarray); } $iterarray[ 0 ]++; $codearray[ 0 ]++; for my $i (0 .. $#iterarray) { if ($iterarray[ $i ] >= scalar @{$choices[$i]}) { $iterarray[ $i ] = 0; $iterarray[ $i + 1 ]++; } } if (scalar @codearray > $num_bits) { pop @codearray; $codearray[ 0 ]++; for my $i (0 .. $#codearray) { if ($codearray[ $i ] >= 2) { $codearray[ $i ] = 0; $codearray[ $i + 1 ]++; } } } for my $i (0 .. $#codearray) { if ($codearray[ $i ] >= 2) { $codearray[ $i ] = 0; $codearray[ $i + 1 ]++; } } } print STDERR "bits: {", join '', @bits, "} \n"; print STDERR "finding best in ", scalar @codes, " encodings ..."; my $best = v_dist(\@vector, $codes[0]); my $best_num = 0; my $itera = 0; for (@codes) { if (v_dist(\@vector, $_) < $best) { $best = v_dist(\@vector, $_); $best_num = $itera; } $itera++; } print STDERR "best_num: $best_num\n"; my @enc_choices = split '', $codes[$best_num]; print STDERR "done\n"; print STDERR "choices: @enc_choices (", v_dist(\@vector, $codes[$best_num]), ")\n"; for my $i (0 .. $#enc_choices) { my ($l, $w) = split /\s+/, $zones[$i]; $wordrec[ $l ][ $w ] = $choices[$i][ $enc_choices[$i] ]; } } sub v_dist { my ($vec_ref, $p) = @_; my ($dist) = (0, 0); for $i (0 .. (scalar @{$vec_ref} - 1)) { unless ( (@{$vec_ref})[$i] eq substr($p, $i, 1) ) { $dist++; } } return ($dist); } for my $i (0 .. $line_num ) { my $line = ''; *words = $wordrec[ $i ]; *nonwords = $nonwordrec[ $i ]; for (my $i = 0; $i < scalar @words + scalar @nonwords; $i++) { $line .= ($i % 2 ? $words[($i - 1) / 2] : $nonwords[$i / 2]); } $line = substr ($line, 2); print $line, "\n"; }