Princeton Election Consortium

A first draft of electoral history. Since 2004

Senate prediction over time – by Rick in Miami

% Senate estimate 19 September 2012
% Copyright Sam Wang, Princeton University, sswang@princeton.edu
% GNU license – free use but retain this header.
%
% Modifications by reader Rick in Miami
% Notes:
%
% Before running, load the latest CSV data files from HuffPost. An alternate
% approach is to use the wget command below.
%
% If the vector T is started at 7/31, then the plot ends on 11/8 as intended.
% Right now, because the plot starts on 8/1 and marches in 7-day
% increments to election day, it terminates six days before election day.

files={‘connecticut-senate-mcmahon-vs-murphy’ ‘indiana-senate-mourdock-vs-donnelly’ …
‘massachusetts-senate-brown-vs-warren’ ‘north-dakota-senate-berg-vs-heitkamp’ …
‘virginia-senate-allen-vs-kaine’ ‘wisconsin-senate-thompson-vs-baldwin’…
‘montana-senate-rehberg-vs-tester’ ‘missouri-senate-mccaskill-vs-akin’ …
‘nevada-senate-heller-vs-berkley’ ‘arizona-senate-flake-vs-carmona’ …
‘florida-senate-mack-vs-nelson’ ‘michigan-senate-hoekstra-vs-stabenow’ …
‘new-mexico-senate-heinrich-vs-wilson’ ‘ohio-senate-brown-vs-mandel’};
order=[-1 1 1 -1 1 1 -1 1 -1 -1 1 1 1 1]; % 1 if D listed first, -1 if R listed first

bias=0; % this allows the future change to be biased in one direction or
% the other. bias>0 indicates toward Democrats
demsafe=43;
GOPsafe=43;
un=0;
numstates=100-demsafe-GOPsafe;
% polls newest to oldest from HuffPost.
% last three polls, going back to second half August at the latest
% Margin is + for Dem lead, – for Rep lead

% download latest files
% or use a python script – or download by other means
% for i=1:length(files);
% eval(['!wget -N -r -nd http://elections.huffingtonpost.com/pollster/2012-', ...
% char(files(i)),'.csv']);
% end;

T=datenum(’1-Aug-2012′):datenum(date);
Pdem=NaN*ones(length(T),1);

for tind=1:length(T);

margins=NaN*ones(numstates,10);
for i=1:length(files);
fid=fopen(['2012-',char(files(i)),'.csv']);
x=textscan(fid,’%s’,'delimiter’,',’);
fclose(fid);
x=x{1}; y=reshape(x,[13 length(x)/13]);
% header line: y(:,1)
x=y(:,2:size(y,2)); y=y(:,1);

% start date: x(2,:) end date: x(3,:)
% num obs: x(5,:) dem: x(8,:) rep: x(9,:) (switch if order(i)=-1)
t=NaN*ones(size(x,2),2); marg=NaN*ones(size(x,2),1);
for n=1:size(x,2);
t(n,1)=datenum(x(2,n));
t(n,2)=datenum(x(3,n));
marg(n)=order(i)*(str2num(char(x(8,n)))-str2num(char(x(9,n))));
end;
t=mean(t’)';

[t,ind]=sort(t,1,’descend’); marg=marg(ind);

% if T if (tind q=find(t>T(tind));
t(q)=[]; marg(q)=[];
end;

% if there are more than three polls, take only the most recent three,
% or all the polls taken within the last week (whichever is larger)
if (length(t)>3)
q=find(t>=T(tind)-7);
if (length(q)>3)
t=t(q); marg=marg(q);
else
t=t(1:3); marg=marg(1:3);
end;
end;

margins(i,1:length(marg))=marg;
end;

med=nanmedian(margins’)';
len=NaN*ones(1,size(margins,1));
sem=len;
for i=1:size(margins,1);
len(i)=length(find(isfinite(margins(i,:))));
sem(i)=mad(margins(i,1:len(i)))/0.6745*1.2/sqrt(len(i));
end;

alldists=zeros(1,numstates+1);num=0;allprobs=zeros(1,numstates);

for diff=-3+bias:0.2:3+bias;

for i=1:numstates
z(i)=(med(i)+diff)/max(sqrt(sem(i)*sem(i)+un*un),sqrt(1/500/len(i)));
end

senate_probs=tcdf(z,len);

[diff sum(senate_probs)];

senate_dist=[1-senate_probs(1) senate_probs(1)];
for i=2:numstates
senate_dist=conv(senate_dist,[1-senate_probs(i) senate_probs(i)]);
end

allprobs=allprobs+senate_probs;
alldists=alldists+senate_dist;
num=num+1;
end
alldists=alldists/num;
allprobs=allprobs/num;
alldists=alldists*100;

%dlmwrite(‘Sen_histogram.csv’, alldists’);

mode_dem=demsafe-1+find(alldists==max(alldists));
mode_gop=100-mode_dem;
prob_dem=sum(alldists(52-demsafe:numstates+1))+alldists(51-demsafe)*0.88;

Pdem(tind)=prob_dem;

end; % loop over T

clf;
subplot(211);
bar(demsafe:demsafe+numstates,alldists,’r')
% bar(demsafe:49,alldists(1:50-demsafe),’r')
hold on
bar(50:demsafe+numstates,alldists(51-demsafe:numstates+1),’b')
bar(50,alldists(4)*0.12,’r')
hold off;
grid on
xlabel(‘Democratic/Independent Senate seats’)
ylabel(‘Probability (%)’)
title(['Probability of Dem. senate: ',num2str(prob_dem,'%3.1f'),'%']);

subplot(212);
plot(T,Pdem,’b',’linewidth’,2);
ylabel(‘Prob. of Dem. senate’)
T2=min(T):7:datenum(’6-Nov-2012′);
set(gca,’xtick’,T2,’xticklabel’, …
datestr(T2,6),’ytick’,0:10:100);
grid on;
set(gca,’xlim’,[min(T2) max(T2)]);