Princeton Election Consortium

A first draft of electoral history. Since 2004

Senate_estimator.m, July 11 2014

%%% Senate_estimator.m – a MATLAB script
%%% Copyright 2008, 2014 by Samuel S.-H. Wang
%%% Noncommercial-use-only license:
%%% You may use or modify this software, but only for noncommercial purposes.
%%% To seek a commercial-use license, contact the author at sswang@princeton.edu.

% Likelihood analysis of all possible outcomes of election based
% on the meta-analytical methods of Prof. Sam Wang, Princeton University.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Senate_estimator.m
%
% This script loads ‘poll.median.2014Senate.txt’ and generates or updates/replaces 4 CSV files:
%
% Senate_estimates.csv
% all in one line:
% 2 values – median_seats for the two parties, where the first value is for Democrats/Independents
% 1 values – Dem/Ind control probability
% 3 values – assigned (>95% prob) seats for each party, with a third entry for undecided;
% 4 values – confidence intervals for party 1′s EV: +/-1 sigma, then
% 95% band; and
% 1 value – number of state polls used to make the estimates.
% 1 value – (calculated by Senate_metamargin and appended) the meta-margin.
%
% Another file, Senate_estimate_history, is updated with the same
% information as Senate_estimates.csv plus 1 value for the date.
%
% stateprobs.csv
% An N-line file giving percentage probabilities for Dem/Ind win of the popular vote, state by state.
% Note that this is the same as the EV calculation, except 1 seats per race
% The second field on each line is the current median polling margin.
% The third field on each line is the two-letter postal abbreviation.
%
% Senate_histogram.csv
% A 100-line file giving the probability histogram of each seat-count outcome. Line 1 is
% the probability of party #1 (Democrats/Independents) getting 1 seat. Line 2 is 2 seat, and so on.
% Note that 0 seat is left out of this histogram for ease of indexing.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% This routine expects the global variables biaspct and analysisdate

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%% Initialize variables %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% polls.state=[
% 'AL,AK,AZ,AR,CA,CO,CT,DC,DE,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,ME,MD,MA,MI,MN,MS,MO,MT,NE,NV,NH,NJ,NM,NY,NC,ND,OH,OK,OR,PA,RI,SC,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY '];
polls.state=['AK,AR,CO,GA,HI,IA,KS,KY,LA,MI,MN,MS,MT,NC,NH,OR,SD,VA,WV ']; % 19 races
%AK Begich Sullivan
%AR Pryor Cotton
%CO Udall Gardner
%GA Nunn Kingston
%HI Schatz
%IA Braley Ernst
%KS Roberts Taylor
%KY Grimes McConnell
%LA Landrieu Cassidy
%MI Peters Land
%MN Franken McFadden
%MS Childers Cochran
%MT Walsh Daines
%NC Hagan Tillis
%NH Shaheen Brown
%OR Merkley Wehby
%SD Weiland Rounds
%VA Warner Gillespie
%WV Tennant Capito
polls.EV=ones(1, length(polls.state)/3);
num_states=size(polls.EV,2);

assignedEV(3)=sum(polls.EV);
assignedEV(1)=40; assignedEV(2)=41; % these are the seats not up for election
Demsafe=assignedEV(1);
% 1=Dem, 2=GOP, 3=up for election
% checksum to make sure no double assignment or missed assignment
if (sum(assignedEV)~=100)
warning(‘Warning: Senate seats do not sum to 100!’)
assignedEV
end

if ~exist(‘biaspct’,'var’)
biaspct=0;
end
forhistory=biaspct==0;

if ~exist(‘analysisdate’,'var’)
analysisdate=0;
end

if ~exist(‘metacalc’,'var’)
metacalc=1;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% Load and parse polling data %%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
polldata=load(‘polls.median.2014Senate.txt’);
numlines = size(polldata,1);
if mod(numlines,num_states)>0
warning(‘Warning: polls.median.txt is not a multiple of num_states lines long’);
end
% Currently we are using median and effective SEM of the last 3 polls.
% To de-emphasize extreme outliers, in place of SD we use (median absolute deviation)/0.6745

% find the desired data within the file
if analysisdate>0 && numlines>num_states
foo=find(polldata(:,5)==analysisdate,1,’first’);
% ind=min([size(polldata,1)-50 foo']);
foo2=find(polldata(:,5)==max(polldata(:,5)),1,’first’);
ind=max([foo2 foo]); %assume reverse time order
polldata=polldata(ind:ind+50,:);
clear foo2 foo ind
elseif numlines>num_states
% polldata = polldata(numlines-num_states+1:numlines,:);
polldata = polldata(1:num_states,:);
end

% Use statistics from data file
polls.margin=polldata(:,3)’;
polls.SEM=polldata(:,4)’;
polls.SEM=max(polls.SEM,zeros(1,num_states)+2)
totalpollsused=sum(polldata(:,1))-1 % assume DC has no polls

% mock data in case we ever need to do a dry run
% Use three poll (as of 23 July)
%polls.margin=[-14 -7 -10 -10 24 7 20 81 9 -2 -9 30 -13 13 1 10 -20 -16 -19 10 13 16 5 18 -6 0 5 -16 2 3 11 5 13 -4 0 -6 -14 9 4 24 -9 -4 -15 -9 -24 34 0 12 -8 11 -13];
%polls.SEM=zeros(1,num_states)+3;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%% Where the magic happens! %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Senate_median
stateprobs

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% Plot the histogram %%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
close
phandle=plot([49.5 49.5],[0 max(histogram)*105],’-r’,'LineWidth’,1.5);
EVticks=200:20:380;
grid on
hold on
bar(Senateseats(5:9),histogram(5:9)*100,’r')
bar(Senateseats(10:14),histogram(10:14)*100,’b')
axis([Senateseats(5)-0.5 Senateseats(14)+0.5 0 max(histogram)*105])
xlabel(‘Democratic/Independent Senate seats’,'FontSize’,14);
ylabel(‘Probability (%)’,'FontSize’,14)
set(gcf, ‘InvertHardCopy’, ‘off’);
title(‘Distribution of all possible outcomes’,'FontSize’,14)

Dstr=['D control: ',num2str(round(D_Senate_control_probability*100)),'%'];
Rstr=['R control: ',num2str(round(R_Senate_control_probability*100)),'%'];
text(Senateseats(5)-0.35,max(histogram)*99,Rstr,’FontSize’,18)
text(Senateseats(14)-3.2,max(histogram)*99,Dstr,’FontSize’,18)
if analysisdate==0
datelabel=datestr(now);
else
datelabel=datestr(analysisdate);
end
text(44.6,max(histogram)*92,datelabel(1:6),’FontSize’,12)
text(44.6,max(histogram)*86,’election.princeton.edu’,'FontSize’,12)
if biaspct==0
set(gcf,’PaperPositionMode’,'auto’)
print -djpeg EV_histogram_today.jpg
end

confidenceintervals(3)=Senateseats(find(cumulative_prob<=0.025,1,’last’)); % 95-pct lower limit
confidenceintervals(1)=Senateseats(find(cumulative_prob<=0.15865,1,’last’)); % 1-sigma lower limit confidenceintervals(2)=Senateseats(find(cumulative_prob>=0.84135,1,’first’)); % 1-sigma upper limit
confidenceintervals(4)=Senateseats(find(cumulative_prob>=0.975,1,’first’)); % 95-pct upper limit
mode_seats(1)=find(histogram==max(histogram));
median_seats(2)=100-median_seats(1); % assume no seats go to a third candidate
mode_seats(2)=100-mode_seats(1); % assume no seats go to a third candidate

% Re-calculate safe EV for each party
assignedEV(1)=sum(polls.EV(find(stateprobs>=95)));
assignedEV(2)=sum(polls.EV(find(stateprobs<=5)));
assignedEV(3)=100-assignedEV(1)-assignedEV(2);

uncertain=intersect(find(stateprobs<95),find(stateprobs>5));
uncertainstates=”;
for i=1:max(size(uncertain))
uncertainstates=[uncertainstates statename2(uncertain(i),polls.state) ' '];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%% Daily file update %%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Write a file of unbiased statewise percentage probabilities
% Only write this file if bias is zero!
outputs=[median_seats D_Senate_control_probability assignedEV confidenceintervals totalpollsused];
if biaspct==0
% Export probability histogram:
dlmwrite(‘Senate_histogram.csv’,histogram’)
% Export state-by-state percentage probabilities as CSV, with 2-letter state abbreviations:
% Each line includes hypothetical probabilities for D+2% and R+2% biases
if exist(‘stateprobs.csv’,'file’)
delete(‘stateprobs.csv’)
end
foo=(polls.margin+2)./polls.SEM;
D2probs=round((erf(foo/sqrt(2))+1)*50);
foo=(polls.margin-2)./polls.SEM;
R2probs=round((erf(foo/sqrt(2))+1)*50);
for i=1:num_states
foo=[num2str(stateprobs(i)) ',' num2str(polls.margin(i)) ',' num2str(D2probs(i)) ',' num2str(R2probs(i)) ',' statename2(i,polls.state)];
dlmwrite(‘stateprobs.csv’,foo,’-append’,'delimiter’,”)
end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%% The meta-margin %%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

reality=R_Senate_control_probability;

if metacalc==0
metamargin=-999;
else
foo=biaspct;
% biaspct=round((269-median_seats(1))/1.25)/10-2; % clever way to start
% range
biaspct=-7; % just brute force
Senate_median
while median_seats(1) < 50
biaspct=biaspct+.02;
Senate_median
end
metamargin=-biaspct
biaspct=foo;
clear foo
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% Daily and History Update %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
outputs = [outputs metamargin];
dlmwrite(‘Senate_estimates.csv’, outputs)
if forhistory && size(polldata,2)==5
dlmwrite(‘Senate_estimate_history.csv’,[polldata(1,5) outputs],’-append’)
end