#!/usr/athena/bin/perl #-*- perl -*- # collect acronyms sub chopall { my ($line1) = @_; while ($line1 =~ /[\n\r]$/){chop($line1);} return $line1;} open (FILE,"$ARGV[0]") || die "$ARGV[0] $!"; #List of strings to parse $lineNo = 0; #counter on number of lines read $parsed = 0; #counter on fully parsed lines $unparsed = 0; #counter on lines with text remaining LINE: while () { $line = $_; print "##$line"; $lineNo += 1; $line =~ s/\badd\b//gi; #remove superfluous addition words $line =~ s/\bplease\b//gi; #remove superfluous addition words $line =~ s/\btaken\b//gi; #remove superfluous words $line =~ s/\bapply\b//gi; #remove superfluous words $line =~ s/\bone-half\b/0.5/gi; #alter number words to digits $line =~ s/\bone\b/1/gi; #alter number words to digits $line =~ s/\btwo\b/2/gi; #alter number words to digits $line =~ s/\bthree\b/3/gi; #alter number words to digits $line =~ s/ to /-/gi; #alter "a to b" to "a-b" $name = ""; $dose = ""; $route = ""; $freq = ""; $quant = ""; $refill = ""; $dir = ""; open (UNITS, "units") || die "units $!"; #Lexicon of dosage units open (FREQS, "frequencies") || die "frequencies $!"; #Lexicon of frequency abbreviations open (ROUTES, "routes") || die "routes $!"; #Lexicon of route abbreviations open (DRUGS, "Dictionary.txt") || die "Dictionary.txt $!"; #Lexicon of drug names ROUTE: while (){ $rte = $_; chop $rte;#Remove \n at the end if ($line =~ /(\b$rte\b)/i){ $route = $1; $line =~ s/$route/ROUTE/g; last ROUTE; } } FREQ: while (){ $frequency = $_; chop $frequency;#Remove \n at the end if ($line =~ /(\b$frequency\b)/i){ $freq = $1; $line =~ s/$freq/FREQ/g; last FREQ; } } if ($line =~ /(\bprn\b)/){ #check for the as needed clause $freq = "$freq prn"; $line =~ s/$1/FREQ/g; } DOSE: while (){ $unit = $_; chop $unit;#Remove \n at the end if ($line =~ /\b([0-9.\-\/]+ $unit\b)/i){ $dose .= " $1"; $line =~ s/$1/DOSE/; } } if ($line =~ /(\btab[a-z]+\b [0-9]+\b)/i){ $dose .= " $1"; $line =~ s/$1/DOSE/; } DRUG: while (){ $drug = $_; chop $drug;#Remove \n at the end if ($line =~ /(\b$drug\b)/i){ $name = $1; $line =~ s/$name/DRUG/i; last DRUG; } } QUANTITY: if ($line =~ /\#([0-9]+) x ([0-9]+)/){ $quant = $1; $refill = $2; $line =~ s/(quantity )?\#$1 x $2/QUANT REFILL/; } elsif ($line =~ /(\#[0-9]+)/){ $quant = $1; $line =~ s/(quantity )?$quant/QUANT/; } if ($line =~ /\bquantity ([0-9]+ ([a-z]+\b)?)/){ $quant = $1; $line =~ s/quantity $quant/QUANT/; } REFILL: if ($line =~ /([0-9]+ year)/){ $refill = $1; $line =~ s/$refill/REFILL/; } if ($line =~ /(([0-9]+|no) refills?)/){ $refill = $1; $line =~ s/$refill/REFILL/; } DIRECTIONS: if ($line =~ /(\b(for|as per|from) ([0-9]+ )?[A-Z]?[a-z]+\b)/){ $dir = $1; $line =~ s/$dir/DIRECTIONS/; } MOREDOSE: if ($line =~ /DRUG ([0-9]+\W?) /){ $dose = "$1 $dose"; $line =~ s/$1/DOSE/; } close UNITS; close FREQ; close ROUTES; close DRUGS; print "Name: $name \n"; print "Dose: $dose \n"; print "Route: $route \n"; print "Freq: $freq \n"; print "Quant: $quant \n"; print "Refill: $refill \n"; print "Directions: $dir \n"; if ($line =~ /[a-z]/){ $unparsed += 1; $line =~ s/DRUG//g; $line =~ s/DOSE//g; $line =~ s/FREQ//g; $line =~ s/ROUTE//g; $line =~ s/QUANT//g; $line =~ s/REFILL//g; $line =~ s/DIRECTIONS//g; print "Unparsed words: $line \n"; } else{ $parsed += 1; } print "\n\n"; } print "$lineNo lines parsed\n"; print "$parsed lines fully parsed\n"; print "$unparsed lines with text remaining\n"; close FILE; # close OFILE;