#!/usr/bin/perl -w

BEGIN { push @INC, '/home/keithw/polling/local/lib/perl/5.6.1' }

use strict;

my $BINSIZE = 10;
my $SIZE = 600;
my $DISTRICT_SIZE = 250;
my $RUNS = 100_000;

open POLLS, 'allpolls.csv' or die;

my (%votes, %kerry_mean, %bush_mean, %nader_mean, %size);

while (<POLLS>) {
    chomp;
    my ($day, $length, $state, $votes, $kerry, $bush, $nader) = split /,/, $_;
    next if ($votes{ $state });

    $nader = 0 if ($nader eq "");
   
    $votes{ $state } = $votes;
    $kerry_mean{ $state } = 0.01 * $kerry;
    $bush_mean{ $state } = 0.01 * $bush;
    $nader_mean{ $state } = 0.01 * $nader;
    $size{ $state } = $SIZE;
}

$votes{ "Maine" } = 2;
$votes{ "Nebraska" } = 2;

for my $district (qw[Maine_D1 Maine_D2 Nebraska_D1 Nebraska_D2 Nebraska_D3]) {
    $votes{ $district } = 1;

    my ($state) = $district =~ /^(.*?)_/;

    $kerry_mean{ $district } = $kerry_mean{ $state };
    $bush_mean{ $district } = $bush_mean{ $state };
    $nader_mean{ $district } = $nader_mean{ $state };
    $size{ $district } = $DISTRICT_SIZE;
}

close POLLS or die;

if ( (scalar keys %votes) != 56 ) {
    die "Not enough polls!";
}

my $simulations = 0;
my ($kerry_wins, $bush_wins, $nader_wins, $within_20, $ties) = (0, 0, 0, 0, 0);
my %bush_margin_binned;

sub limit {
    my $n = shift @_;

    $n = 100 if ( $n > 100 );
    $n = 0 if ( $n < 0 );

    return $n;
}

for (1 .. $RUNS) {
    my ($kerry_votes, $bush_votes, $nader_votes) = (0, 0, 0);

    for my $state ( keys %votes ) {
      FAKESTATE: {
	  my ($kerry_result, $bush_result, $nader_result) = (0, 0, 0);
	  for (1 .. $size{ $state }) {
	      my $vote = rand;
	      if ( $vote < $kerry_mean{ $state } ) {
		  $kerry_result++;
	      } elsif ( $vote < ($kerry_mean { $state } + $bush_mean{ $state }) ) {
		  $bush_result++;
	      } elsif ( $vote < ($kerry_mean{ $state } + $bush_mean{ $state }
				 + $nader_mean{ $state }) ) {
		  $nader_result++;
	      }
	      
	  }
	  
	  if ( $kerry_result > $bush_result
	       and $kerry_result > $nader_result ) {
	      $kerry_votes += $votes{ $state };
	  } elsif ( $bush_result > $kerry_result
		    and $bush_result > $nader_result ) {
	      $bush_votes += $votes{ $state };
	  } elsif ( $nader_result > $kerry_result
		    and $nader_result > $bush_result ) {
	      $nader_votes += $votes{ $state };
	  } else {
	      redo FAKESTATE;
	  }
      }
    }

    if ( $kerry_votes > $bush_votes and $kerry_votes > $nader_votes ) {
	$kerry_wins++;
    } elsif ( $bush_votes > $kerry_votes and $bush_votes > $nader_votes ) {
	$bush_wins++;
    } elsif ( $nader_votes > $kerry_votes and $nader_votes > $bush_votes ) {
	$nader_wins++;
    } else {
	$ties++;
    }

    if ( abs( $kerry_votes - $bush_votes ) < 20 ) {
	$within_20++;
    }

    my $bush_margin = $bush_votes - $kerry_votes;
    my $bush_margin_binned = $BINSIZE * sprintf("%.0f", $bush_margin / $BINSIZE) + 0;
    if ( defined $bush_margin_binned{ $bush_margin_binned } ) {
	$bush_margin_binned{ $bush_margin_binned }++;
    } else {
	$bush_margin_binned{ $bush_margin_binned } = 1;
    }

    $simulations++;
}

$simulations /= 100;

$kerry_wins /= $simulations;
$bush_wins /= $simulations;
$nader_wins /= $simulations;
$within_20 /= $simulations;
$ties /= $simulations;

print STDERR "Probabilities: Kerry ($kerry_wins%), Bush ($bush_wins%), Nader ($nader_wins%), Within 20 ($within_20%), Exact tie ($ties%).\n";

for ( sort { $a <=> $b } keys %bush_margin_binned ) {
    print "$_ ", $bush_margin_binned{ $_ } / $simulations, "\n";
}
