#!/usr/athena/bin/perl my %schars = ( '~' => 'tilde', '`' => 'backtick', '!' => 'exclamation', '@' => 'at', '#' => 'pound', '\$' => 'dollar', '%' => 'percent', '\^' => 'carat', '&' => 'and', '\*' => 'astrisk', '$' => 'lparenthesis', '$' => 'rparenthesis', '_' => 'underscore', '-' => 'hyphen', '\+' => 'plus', '=' => 'equals', '\[' => 'lbracket', '\]' => 'rbracket', '\{' => 'lbrace', '\}' => 'rbrace', '\\\\' => 'backslash', '\|' => 'pipe', ';' => 'semicolon', ':' => 'colon', '\'' => 'quote', '"' => 'doublequote', '<' => 'lessthan', '>' => 'greaterthan', ',' => 'comma', '\.' => 'period', '/' => 'slash', '\?' => 'question' ); my %wordidxs; my $nwordidxs = 0; my $nwords = 0; my @wordnames; my @wordfreqs; my @lfreqs; my @rfreqs; my @plinewords; my @linewords; my @nlinewords; while (my $line = ) { chomp ($line); $line = lc ($line); study ($line); foreach my $schar (keys %schars) { $line =~ s/$schar/ char$schars{$schar} /g; } $line =~ s/\s+/ /g; $line =~ s/^\s+//g; $line =~ s/\s+$//g; next if ($line =~ /^$/); @plinewords = @linewords; @linewords = @nlinewords; @nlinewords = split (/\s+/, $line); next if ($#plinewords == -1); for ($c = 0; $c <= $#linewords; $c++) { my $pword; my $cword; my $nword; $nwords++; if ($c == 0) { $pword = $plinewords[$#plinewords]; } else { $pword = $linewords[$c - 1]; } if ($c == $#linewords) { $nword = $nlinewords[0]; } else { $nword = $linewords[$c + 1]; } $cword = $linewords[$c]; if (!(defined ($wordidxs{$cword}))) { $wordidxs{$cword} = $nwordidxs; $wordnames[$nwordidxs] = $cword; $nwordidxs = $nwordidxs + 1; } my $cwordidx = $wordidxs{$cword}; my $pwordidx = $wordidxs{$pword}; my $nwordidx = $wordidxs{$nword}; if (!(defined ($wordfreqs[$cwordidx]))) { $wordfreqs[$cwordidx] = 0; } if (!(defined ($lfreqs[$cwordidx]))) { $lfreqs[$cwordidx] = (); } if (!(defined ($rfreqs[$cwordidx]))) { $rfreqs[$cwordidx] = (); } if (!(defined ($lfreqs[$cwordidx]->[$pwordidx]))) { $lfreqs[$cwordidx]->[$pwordidx] = 0; } if (!(defined ($rfreqs[$cwordidx]->[$nwordidx]))) { $rfreqs[$cwordidx]->[$nwordidx] = 0; } $wordfreqs[$cwordidx]++; $lfreqs[$cwordidx]->[$pwordidx]++; $rfreqs[$cwordidx]->[$nwordidx]++; } } dispvec (); exit; sub dispfull { foreach my $cword (sort keys %wordidxs) { my $cwordidx = $wordidxs{$cword}; print $cword . " " x (30 - length ($cword)) . $wordfreqs[$cwordidx] . ":\n"; print " left hand side:\n"; foreach my $pword (sort keys %wordidxs) { my $pwordidx = $wordidxs{$pword}; if (!(defined ($lfreqs[$cwordidx]->[$pwordidx]))) { $lfreqs[$cwordidx]->[$pwordidx] = 0; } print " "; print $pword . " " x (30 - length ($pword)); print $lfreqs[$cwordidx]->[$pwordidx] . "\n"; } print " right hand side:\n"; foreach my $nword (sort keys %wordidxs) { my $nwordidx = $wordidxs{$nword}; if (!(defined ($rfreqs[$cwordidx]->[$nwordidx]))) { $rfreqs[$cwordidx]->[$nwordidx] = 0; } print " "; print $nword . " " x (30 - length ($nword)); print $rfreqs[$cwordidx]->[$nwordidx] . "\n"; } } return; } sub dispvec { print "$nwords\n"; print "$nwordidxs\n"; print "\n"; my $c = 0; foreach my $cword (sort keys %wordidxs) { my $cwordidx = $wordidxs{$cword}; print $c++ . " " . $cword . " " . $wordfreqs[$cwordidx] . "\n"; } print "\n"; foreach my $cword (sort keys %wordidxs) { my $cwordidx = $wordidxs{$cword}; foreach my $pword (sort keys %wordidxs) { my $pwordidx = $wordidxs{$pword}; if (!(defined ($lfreqs[$cwordidx]->[$pwordidx]))) { $lfreqs[$cwordidx]->[$pwordidx] = 0; } print $lfreqs[$cwordidx]->[$pwordidx] . " "; } print "\n"; } print "\n"; foreach my $cword (sort keys %wordidxs) { my $cwordidx = $wordidxs{$cword}; foreach my $nword (sort keys %wordidxs) { my $nwordidx = $wordidxs{$nword}; if (!(defined ($rfreqs[$cwordidx]->[$nwordidx]))) { $rfreqs[$cwordidx]->[$nwordidx] = 0; } print $rfreqs[$cwordidx]->[$nwordidx] . " "; } print "\n"; } return; }