#! /bin/csh if ($#argv != 1 ) then echo " " echo "'split-into-tokens' attempts to return a sorted list of unique" echo " ascii words that are contained in a given file" echo " " echo " syntax: split-into-tokens " echo " " exit endif cat $1 | \ sed "s/\=/ /g" | \ sed "s/\/ /g" | \ sed "s/\:/ /g" | \ sed "s/\./ /g" | \ sed "s/\-/ /g" | \ sed "s/\#/ /g" | \ sed "s%/% %g" | \ sed 's/\"/ /g' | \ sed "s/\!/ /g" | \ sed "s/\@/ /g" | \ sed "s/\[/ /g" | \ sed "s/\]/ /g" | \ sed "s/\{/ /g" | \ sed "s/\}/ /g" | \ sed "s/\~/ /g" | \ sed "s/\&/ /g" | \ sed "s/\,/ /g" | \ sed "s/\'/ /g" | \ #sed "s/\(/ /g" | \ #sed "s/\)/ /g" | \ sed "s/1/ /g" | \ sed "s/2/ /g" | \ sed "s/3/ /g" | \ sed "s/4/ /g" | \ sed "s/5/ /g" | \ sed "s/6/ /g" | \ sed "s/7/ /g" | \ sed "s/8/ /g" | \ sed "s/9/ /g" | \ sed "s/0/ /g" | \ awk '{print $1 "\n" $2 "\n" $3 "\n" $4 "\n" $5 "\n" $6 "\n" $7 "\n" $8 "\n" $9 "\n" $10 "\n" $11 "\n" $12 "\n" $13 "\n" $14 "\n" $15 "\n" $16 "\n" $17 "\n" $18 "\n" $19 "\n" $20 "\n" $21 "\n" $22 "\n" $23 "\n" $24 "\n" $25 "\n" $26 "\n" $27 "\n" $28 "\n" $29 "\n" $30 }' | \ sort -fiu | \ fgrep -iv '(' | \ fgrep -iv ')'