#!/usr/athena/bin/perl
#-*- perl -*-

# collect acronyms

sub chopall {
    my ($line1) = @_;
    while ($line1 =~ /[\n\r]$/){chop($line1);}
    return $line1;}

open (FILE,"$ARGV[0]") || die "$ARGV[0] $!"; #List of strings to parse

$lineNo = 0; #counter on number of lines read
$parsed = 0; #counter on fully parsed lines
$unparsed = 0; #counter on lines with text remaining


LINE: while (<FILE>) {
    $line = $_;
    print "##$line";
    $lineNo += 1;
    $line =~ s/\badd\b//gi; #remove superfluous addition words
    $line =~ s/\bplease\b//gi; #remove superfluous addition words
    $line =~ s/\btaken\b//gi; #remove superfluous words
    $line =~ s/\bapply\b//gi; #remove superfluous words
    $line =~ s/\bone-half\b/0.5/gi; #alter number words to digits
    $line =~ s/\bone\b/1/gi; #alter number words to digits
    $line =~ s/\btwo\b/2/gi; #alter number words to digits
    $line =~ s/\bthree\b/3/gi; #alter number words to digits
    $line =~ s/ to /-/gi; #alter "a to b" to "a-b"
    $name = "";
    $dose = "";
    $route = "";
    $freq = "";
    $quant = "";
    $refill = "";
    $dir = "";
    open (UNITS, "units") || die "units $!"; #Lexicon of dosage units
    open (FREQS, "frequencies") || die "frequencies $!"; #Lexicon of frequency abbreviations
    open (ROUTES, "routes") || die "routes $!"; #Lexicon of route abbreviations
    open (DRUGS, "Dictionary.txt") || die "Dictionary.txt $!"; #Lexicon of drug names
  ROUTE: while (<ROUTES>){
      $rte = $_;
      chop $rte;#Remove \n at the end
	  if ($line =~ /(\b$rte\b)/i){
	      $route = $1;	
	      $line =~ s/$route/ROUTE/g;
	      last ROUTE;
	  }
  }
  FREQ: while (<FREQS>){
      $frequency = $_;
      chop $frequency;#Remove \n at the end
	  if ($line =~ /(\b$frequency\b)/i){
	      $freq = $1;	
	      $line =~ s/$freq/FREQ/g;
	      last FREQ;
	  }
  }
    if ($line =~ /(\bprn\b)/){ #check for the as needed clause
	$freq = "$freq prn";
	$line =~ s/$1/FREQ/g;
	}
  
  DOSE: while (<UNITS>){
      $unit = $_;
      chop $unit;#Remove \n at the end
	  if ($line =~ /\b([0-9.\-\/]+ $unit\b)/i){
	      $dose .= " $1";	
	      $line =~ s/$1/DOSE/;
	  }
  }
    if ($line =~ /(\btab[a-z]+\b [0-9]+\b)/i){
	$dose .= " $1";
	$line =~ s/$1/DOSE/;
    }

  DRUG: while (<DRUGS>){
      $drug = $_;
      chop $drug;#Remove \n at the end
	  if ($line =~ /(\b$drug\b)/i){
	      $name = $1;
	      $line =~ s/$name/DRUG/i;
	      last DRUG;
	  }
  }

    
  QUANTITY: if ($line =~ /\#([0-9]+) x ([0-9]+)/){
      $quant = $1;
      $refill = $2;
      $line =~ s/(quantity )?\#$1 x $2/QUANT REFILL/;
  }
    elsif ($line =~ /(\#[0-9]+)/){
	$quant = $1;
	$line =~ s/(quantity )?$quant/QUANT/;
    }

    if ($line =~ /\bquantity ([0-9]+ ([a-z]+\b)?)/){
	$quant = $1;
	$line =~ s/quantity $quant/QUANT/;
    }

  REFILL: if ($line =~ /([0-9]+ year)/){
      $refill = $1;
      $line =~ s/$refill/REFILL/;
  }
    if ($line =~ /(([0-9]+|no) refills?)/){
	$refill = $1;
	$line =~ s/$refill/REFILL/;
    }


  DIRECTIONS: if ($line =~ /(\b(for|as per|from) ([0-9]+ )?[A-Z]?[a-z]+\b)/){
      $dir = $1;
      $line =~ s/$dir/DIRECTIONS/;
  }

   
  MOREDOSE: if ($line =~ /DRUG ([0-9]+\W?) /){
      $dose = "$1 $dose";
      $line =~ s/$1/DOSE/;
  }

    close UNITS;
    close FREQ;
    close ROUTES;
    close DRUGS;

    print "Name: $name \n";
    print "Dose: $dose \n";
    print "Route: $route \n";
    print "Freq: $freq \n";
    print "Quant: $quant \n";
    print "Refill: $refill \n";
    print "Directions: $dir \n";


    if ($line =~ /[a-z]/){
	$unparsed += 1;
	$line =~ s/DRUG//g;
	$line =~ s/DOSE//g;
	$line =~ s/FREQ//g;
	$line =~ s/ROUTE//g;
	$line =~ s/QUANT//g;
	$line =~ s/REFILL//g;
	$line =~ s/DIRECTIONS//g;
	print "Unparsed words: $line \n";
    }
    else{
	$parsed += 1;
    }

    print "\n\n";


}

print "$lineNo lines parsed\n";
print "$parsed lines fully parsed\n";
print "$unparsed lines with text remaining\n";


close FILE;
# close OFILE;