#!/usr/bin/perl -w BEGIN { push @INC, '/home/keithw/polling/local/lib/perl/5.6.1' } use strict; my $BINSIZE = 10; my $SIZE = 600; my $DISTRICT_SIZE = 250; my $RUNS = 100_000; open POLLS, 'allpolls.csv' or die; my (%votes, %kerry_mean, %bush_mean, %nader_mean, %size); while () { chomp; my ($day, $length, $state, $votes, $kerry, $bush, $nader) = split /,/, $_; next if ($votes{ $state }); $nader = 0 if ($nader eq ""); $votes{ $state } = $votes; $kerry_mean{ $state } = 0.01 * $kerry; $bush_mean{ $state } = 0.01 * $bush; $nader_mean{ $state } = 0.01 * $nader; $size{ $state } = $SIZE; } $votes{ "Maine" } = 2; $votes{ "Nebraska" } = 2; for my $district (qw[Maine_D1 Maine_D2 Nebraska_D1 Nebraska_D2 Nebraska_D3]) { $votes{ $district } = 1; my ($state) = $district =~ /^(.*?)_/; $kerry_mean{ $district } = $kerry_mean{ $state }; $bush_mean{ $district } = $bush_mean{ $state }; $nader_mean{ $district } = $nader_mean{ $state }; $size{ $district } = $DISTRICT_SIZE; } close POLLS or die; if ( (scalar keys %votes) != 56 ) { die "Not enough polls!"; } my $simulations = 0; my ($kerry_wins, $bush_wins, $nader_wins, $within_20, $ties) = (0, 0, 0, 0, 0); my %bush_margin_binned; sub limit { my $n = shift @_; $n = 100 if ( $n > 100 ); $n = 0 if ( $n < 0 ); return $n; } for (1 .. $RUNS) { my ($kerry_votes, $bush_votes, $nader_votes) = (0, 0, 0); for my $state ( keys %votes ) { FAKESTATE: { my ($kerry_result, $bush_result, $nader_result) = (0, 0, 0); for (1 .. $size{ $state }) { my $vote = rand; if ( $vote < $kerry_mean{ $state } ) { $kerry_result++; } elsif ( $vote < ($kerry_mean { $state } + $bush_mean{ $state }) ) { $bush_result++; } elsif ( $vote < ($kerry_mean{ $state } + $bush_mean{ $state } + $nader_mean{ $state }) ) { $nader_result++; } } if ( $kerry_result > $bush_result and $kerry_result > $nader_result ) { $kerry_votes += $votes{ $state }; } elsif ( $bush_result > $kerry_result and $bush_result > $nader_result ) { $bush_votes += $votes{ $state }; } elsif ( $nader_result > $kerry_result and $nader_result > $bush_result ) { $nader_votes += $votes{ $state }; } else { redo FAKESTATE; } } } if ( $kerry_votes > $bush_votes and $kerry_votes > $nader_votes ) { $kerry_wins++; } elsif ( $bush_votes > $kerry_votes and $bush_votes > $nader_votes ) { $bush_wins++; } elsif ( $nader_votes > $kerry_votes and $nader_votes > $bush_votes ) { $nader_wins++; } else { $ties++; } if ( abs( $kerry_votes - $bush_votes ) < 20 ) { $within_20++; } my $bush_margin = $bush_votes - $kerry_votes; my $bush_margin_binned = $BINSIZE * sprintf("%.0f", $bush_margin / $BINSIZE) + 0; if ( defined $bush_margin_binned{ $bush_margin_binned } ) { $bush_margin_binned{ $bush_margin_binned }++; } else { $bush_margin_binned{ $bush_margin_binned } = 1; } $simulations++; } $simulations /= 100; $kerry_wins /= $simulations; $bush_wins /= $simulations; $nader_wins /= $simulations; $within_20 /= $simulations; $ties /= $simulations; print STDERR "Probabilities: Kerry ($kerry_wins%), Bush ($bush_wins%), Nader ($nader_wins%), Within 20 ($within_20%), Exact tie ($ties%).\n"; for ( sort { $a <=> $b } keys %bush_margin_binned ) { print "$_ ", $bush_margin_binned{ $_ } / $simulations, "\n"; }