Forked from elliottmorris/election_night_live_model.R
Last active
January 23, 2023 03:15
-
-
Save lemurey/9c4dfb7fc28b64952ee48c89b73f3238 to your computer and use it in GitHub Desktop.
A live election-night prediction model using The Economist's pre-election forecast
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import ipywidgets as widgets | |
| RNG = np.random.default_rng() | |
| def logit(x): | |
| return np.log(x / (1 - x)) | |
| def inv_logit(x): | |
| return 1 / (1 + np.exp(-x)) | |
| class SimRunner(): | |
| def __init__(self, target_nsim=1000, max_runs=20, rng=RNG): | |
| self.target_nsim = target_nsim | |
| self.max_runs = max_runs | |
| self.rng = RNG | |
| self.biden_states = set() | |
| self.trump_states = set() | |
| self.biden_state_dict = {} | |
| self._initialize_data() | |
| self._initialize_controls() | |
| self.fstr = 'pr(biden wins the electoral college) = {:.0%}\n[nsim = {}; se = {:.1%}]' | |
| self.display = widgets.HBox([self.button_container, self.display_area]) | |
| def _initialize_controls(self): | |
| self.buttons = [] | |
| self.sliders = [] | |
| for st in self.ev.index: | |
| b = widgets.ToggleButtons(options=['Biden', 'Trump', 'N/A'], | |
| value='N/A', | |
| description=st) | |
| s = widgets.FloatRangeSlider(value=[0, 1], min=0, max=1, step=0.01, | |
| description='', continuous_update=False) | |
| s._state_label = st | |
| if st in self.hides: | |
| b.layout.display = 'none' | |
| s.layout.display = 'none' | |
| b.observe(self._state_button_press, names=['index']) | |
| s.observe(self._slider_change, names=['value']) | |
| self.buttons.append(b) | |
| self.sliders.append(s) | |
| self.button_container = widgets.VBox([widgets.HBox([b, s]) for b, s in zip(self.buttons, self.sliders)]) | |
| self.display_area = widgets.Output(layout=widgets.Layout(display="flex", justify_content="flex-start")) | |
| def _initialize_data(self): | |
| s = pd.read_csv('https://cdn.economistdatateam.com/us-2020-forecast/data/president/electoral_college_simulations.csv') | |
| ev_state = pd.read_csv('https://raw.githubusercontent.com/TheEconomist/us-potus-model/master/data/2012.csv', | |
| index_col='state')['ev'] | |
| sim = s.iloc[:, 3:] | |
| sim = pd.concat([sim] * 5).reset_index(drop=True) | |
| national = self.rng.normal(0, 0.01, size=(sim.shape[0], 1)) | |
| state = RNG.normal(0, 0.02, size=sim.shape) | |
| sim_forecast = sim + national + state | |
| sim_forecast[sim_forecast <= 0] = 0.0001 | |
| sim_forecast[sim_forecast >= 1] = 0.9999 | |
| self.sim_forecast = sim_forecast | |
| self.ev = ev_state[sim_forecast.columns] | |
| self._run_me_ne_updates() | |
| self.sim_forecast = self.sim_forecast.sort_index(axis=1) | |
| self.ev = self.ev[self.sim_forecast.columns] | |
| self.ss = np.cov(logit(self.sim_forecast), rowvar=False) | |
| self.sigma = pd.DataFrame(self.ss, index=self.sim_forecast.columns, | |
| columns=self.sim_forecast.columns) | |
| self.mu = pd.Series(logit(self.sim_forecast).mean(), index=self.sim_forecast.columns) | |
| _, _, _, p = self.update_prob() | |
| self.hides = p[p > 0.99].index.union(p[p < 0.01].index) | |
| def _run_me_ne_updates(self): | |
| self.ev['ME'] = 2 | |
| self.ev['NE'] = 2 | |
| self.ev['ME1'] = 1 | |
| self.ev['ME2'] = 1 | |
| self.ev['NE1'] = 1 | |
| self.ev['NE2'] = 1 | |
| self.ev['NE3'] = 1 | |
| me_ne_leans = pd.DataFrame({'state': ['ME', 'ME', 'NE', 'NE', 'NE'], | |
| 'district': [1, 2, 1, 2, 3], | |
| 'dem_cd_lean': [0.053745445221462296, | |
| -0.060452990625671894, | |
| 0.023745883670417745, | |
| 0.10881583949611655, | |
| -0.13915995886163904]}) | |
| for _, row in me_ne_leans.iterrows(): | |
| name = '{}{}'.format(row['state'], row['district']) | |
| vals = self.rng.normal(row['dem_cd_lean'], 0.0075, size=self.sim_forecast.shape[0]) | |
| self.sim_forecast.loc[:, name] = self.sim_forecast[row['state']] + vals | |
| def _state_button_press(self, change): | |
| # self.display.layout.display = 'none' | |
| if change['name'] in ('_property_lock', 'label', 'value'): | |
| return | |
| b = change['owner'] | |
| st = b.get_state() | |
| index = st['index'] | |
| label = st['description'] | |
| if '-' in label: | |
| label, _ = label.split('-') | |
| if index == 0: | |
| if label in self.trump_states: | |
| self.trump_states.remove(label) | |
| self.biden_states.add(label) | |
| elif index == 1: | |
| if label in self.biden_states: | |
| self.biden_states.remove(label) | |
| self.trump_states.add(label) | |
| else: | |
| if label in self.trump_states: | |
| self.trump_states.remove(label) | |
| if label in self.biden_states: | |
| self.biden_states.remove(label) | |
| self._update_display() | |
| # self.display.layout.display = None | |
| def _slider_change(self, change): | |
| low, high = change['new'] | |
| state = change['owner']._state_label | |
| if low == 0 and high == 1: | |
| if state in self.biden_state_dict: | |
| del self.biden_state_dict[state] | |
| else: | |
| self.biden_state_dict[state] = (low, high) | |
| self._update_display() | |
| def _update_display(self): | |
| p, ev_dist, sd, state_win = self.update_prob() | |
| with self.display_area: | |
| self.display_area.clear_output() | |
| print(self.fstr.format(p, ev_dist.shape[0], sd)) | |
| for child in self.buttons: | |
| desc = child.description | |
| if '-' in desc: | |
| state, _ = desc.split('-') | |
| else: | |
| state = desc | |
| val = state_win[state] | |
| child.description = '{}-{: >10.0%}'.format(state, val) | |
| def draw_samples(self, early_return): | |
| output = pd.DataFrame() | |
| num_runs = 0 | |
| while True: | |
| if output.shape[0] > self.target_nsim: | |
| break | |
| num_runs += 1 | |
| sim = inv_logit(self.rng.multivariate_normal(self.mu, self.sigma, size=(100000))) | |
| proposals = pd.DataFrame(sim, columns=self.mu.index) | |
| if early_return: | |
| return proposals, None | |
| for state in self.biden_states: | |
| proposals.loc[proposals[state] < 0.5, state] = np.nan | |
| for state in self.trump_states: | |
| proposals.loc[proposals[state] > 0.5, state] = np.nan | |
| for state, (biden_low, biden_high) in self.biden_state_dict.items(): | |
| if biden_low > 1: | |
| biden_low = biden_low / 100 | |
| if biden_high > 1: | |
| biden_high = biden_high / 100 | |
| inds = (proposals[state] > biden_high) | (proposals[state] < biden_low) | |
| proposals.loc[inds, state] = np.nan | |
| output = pd.concat((output, proposals.dropna(how='any')), ignore_index=True) | |
| if (output.shape[0] < self.target_nsim and | |
| output.shape[0] / (proposals.shape[0] * num_runs) < (1 - 99.99 / 100)): | |
| print('more than 99.88% of simulations are rejected, try relaxing some constraints and try again') | |
| return None, None | |
| if num_runs > self.max_runs: | |
| print(f'more than {max_runs} runs, aborting') | |
| return None, None | |
| return output, output.shape[0] / (proposals.shape[0] * num_runs) | |
| def update_prob(self, early_return=False, show_local=False): | |
| sim, acceptance_rate = self.draw_samples(early_return=early_return) | |
| if early_return: | |
| return None, None, None, sim | |
| ev_dist = ((sim > 0.5) * self.ev).sum(axis=1) | |
| state_win = (sim > 0.5).mean() | |
| p = (ev_dist > 269).mean() | |
| sd = np.sqrt(p * (1 - p) / ev_dist.shape[0]) | |
| if show_local: | |
| print('Pr(biden wins) by state:') | |
| for st in state_win.index: | |
| print('{}: {:.0%}'.format(st, state_win[st])) | |
| # print(state_win) | |
| print(self.fstr.format(p, ev_dist.shape[0], sd)) | |
| return | |
| return p, ev_dist, sd, state_win | |
| sr = SimRunner() | |
| ##### can ignore below if transfering to notebook | |
| sr.update_prob(show_local=True) | |
| sr.biden_states.add('FL') | |
| sr.update_prob(show_local=True) | |
| sr.biden_states.remove('FL') | |
| sr.trump_states.add('FL') | |
| sr.update_prob(show_local=True) | |
| sr.trump_states.add('NC') | |
| sr.biden_states.add('VA') | |
| sr.biden_state_dict['MI'] = (45, 55) | |
| sr.update_prob(show_local=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' Description | |
| #' This file runs a live election-night forecast based on The Economist's pre-election forecasting model | |
| #' available at projects.economist.com/us-2020-forecast/president. | |
| #' It is resampling model based on https://pkremp.github.io/update_prob.html. | |
| #' This script does not input any real election results! You will have to enter your picks/constraints manually (scroll to the bottom of the script). | |
| #' | |
| #' Licence | |
| #' This software is published by *[The Economist](https://www.economist.com)* under the [MIT licence](https://opensource.org/licenses/MIT). The data generated by *The Economist* are available under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). | |
| #' The licences include only the data and the software authored by *The Economist*, and do not cover any *Economist* content or third-party data or content made available using the software. More information about licensing, syndication and the copyright of *Economist* content can be found [here](https://www.economist.com/rights/). | |
| rm(list=ls()) | |
| library(tidyverse) | |
| library(mvtnorm) | |
| # functions first --------------------------------------------------------- | |
| logit <- function(x) log(x/(1-x)) | |
| inv_logit <- function(x) 1/(1 + exp(-x)) | |
| draw_samples <- function(biden_states = NULL, trump_states = NULL, states = NULL, | |
| upper_biden = NULL, lower_biden = NULL, print_acceptance = FALSE, target_nsim = 1000){ | |
| sim <- matrix(NA, nr = 1, nc = length(mu)) | |
| n <- 0 | |
| while(nrow(sim) < target_nsim){ | |
| # randomly sample from the posterior distribution and reject when constraints are not met | |
| n <- n + 1 | |
| proposals <- inv_logit(rmvnorm(1e5, mu, Sigma, method = "svd")) # "DC" is pretty much uncorrelated | |
| colnames(proposals) <- names(mu) | |
| if (!is.null(biden_states)) proposals[which(proposals[,biden_states] < .5)] <- NA | |
| if (!is.null( trump_states)) proposals[which(proposals[, trump_states] > .5)] <- NA | |
| if (!is.null( states)){ | |
| for (s in states){ | |
| proposals[which(proposals[, s] > upper_biden[s] | | |
| proposals[, s] < lower_biden[s])] <- NA | |
| } | |
| } | |
| reject <- apply(proposals, 1, function(x) any(is.na(x))) | |
| sim <- rbind(sim, proposals[!reject,]) | |
| if (nrow(sim) < target_nsim & nrow(sim)/(nrow(proposals)*n) < 1-99.99/100){ | |
| stop(paste("rmvnorm() is working hard... but more than 99.99% of the samples are rejected; you should relax some contraints.", sep = "")) | |
| } | |
| } | |
| return(list("matrix" = sim[-1,], "acceptance_rate" = nrow(sim)/(nrow(proposals)*n))) | |
| } | |
| update_prob <- function(biden_states = NULL, trump_states = NULL, biden_scores_list = NULL, target_nsim = 1000, show_all_states = TRUE){ | |
| states <- names(biden_scores_list) | |
| lower_biden <- sapply(biden_scores_list, function(x) x[1]/100) | |
| upper_biden <- sapply(biden_scores_list, function(x) x[2]/100) | |
| sim <- draw_samples(biden_states = biden_states, trump_states = trump_states, states = states, | |
| upper_biden = upper_biden, lower_biden = lower_biden, | |
| target_nsim = target_nsim) | |
| ev_dist <- (sim[["matrix"]] > .5) %*% ev | |
| state_win <- colMeans(sim[["matrix"]] > .5) | |
| p <- mean(ev_dist >= 270) | |
| sd <- sqrt(p*(1-p)/length(ev_dist)) | |
| if (show_all_states){ | |
| cat("Pr(biden wins) by state, in %:\n") | |
| print(t(round(100*state_win))) | |
| cat("--------\n") | |
| } | |
| cat(paste("Pr(biden wins the electoral college) = ", round(100*p), "%\n[nsim = ", length(ev_dist), "; se = ", round(sd*100,1), "%]", sep = "")) | |
| if (show_all_states) cat("\n--------\n") | |
| } | |
| # read data --------------------------------------------------------------- | |
| # get simulations | |
| sim_forecast <- read_csv('https://cdn.economistdatateam.com/us-2020-forecast/data/president/electoral_college_simulations.csv') | |
| # check initial parameters | |
| nrow(sim_forecast) | |
| mean(sim_forecast$dem_ev > 269) | |
| # select relevant columns and make the data frae into a matrix | |
| sim_forecast <- sim_forecast %>% select(4:ncol(.)) | |
| sim_forecast <- list(sim_forecast,sim_forecast,sim_forecast,sim_forecast,sim_forecast) %>% bind_rows %>% as.matrix | |
| # this bit is really hacky | |
| # it make the simulations a little less correlated by add a bunch of random noise | |
| # this helps our live model react to really implausible events that could happen on election night | |
| # but will have the result of pushing the initial pre-results forecasts back toward 50-50 | |
| sim_forecast <- sim_forecast + | |
| rnorm(nrow(sim_forecast), 0, 0.01) + # national error component | |
| replicate(ncol(sim_forecast),rnorm(nrow(sim_forecast),0,0.02)) # state | |
| sim_forecast <- ifelse(sim_forecast <= 0, 0.0001, sim_forecast) | |
| sim_forecast <- ifelse(sim_forecast >= 1, 0.99999, sim_forecast) | |
| sim_forecast <- as_tibble(sim_forecast) | |
| # now, get electoral votes in each state | |
| # and make sure they're in the right order | |
| #ev_state <- read_csv('data/state_evs.csv')$ev | |
| #names(ev_state) <- read_csv('data/state_evs.csv')$state | |
| ev_state <- read_csv('https://raw.githubusercontent.com/TheEconomist/us-potus-model/master/data/2012.csv')$ev | |
| names(ev_state) <- read_csv('https://raw.githubusercontent.com/TheEconomist/us-potus-model/master/data/2012.csv')$state | |
| ev <- ev_state[colnames(sim_forecast)] | |
| # check that the EVs and state forecasts add up to the right amounts | |
| sim_evs <- apply(sim_forecast, | |
| 1, | |
| function(x){ | |
| sum((x > 0.5 )* ev) | |
| }) | |
| hist(sim_evs,breaks=538) | |
| median(sim_evs) | |
| mean(sim_evs) | |
| mean(sim_evs > 269) | |
| enframe(prop.table(table(sim_evs)),'dem_ev','pct') %>% arrange(desc(pct)) | |
| # adding ME1 and ME2, NE1 NE2 to sim_forecast matrix and ev vector | |
| # we do this by adding the average district-level two-party dem presidential vote, relative | |
| # to the state-level dem two-party vote, back to to our state-level forecast | |
| # first, split up EVs | |
| ev["ME"] <- 2 | |
| ev["NE"] <- 2 | |
| ev <- c(ev, "ME1" = 1, "ME2" = 1, "NE1" = 1, "NE2" = 1, "NE3" = 1) | |
| sum(ev) | |
| # create simulations for ME and NE districts | |
| me_ne_leans <- politicaldata::pres_results_by_cd %>% filter(year >= 2012, state_abb %in% c("ME","NE")) %>% | |
| select(-other) %>% | |
| rename(state = state_abb) %>% | |
| group_by(year,state) %>% | |
| mutate(sum_pct = dem + rep, | |
| total_votes = total_votes * sum_pct, | |
| dem = dem /sum_pct, | |
| rep = rep/sum_pct) %>% | |
| mutate(dem_vote_state = sum(dem * total_votes) / sum(total_votes)) %>% | |
| mutate(dem_cd_lean = dem - dem_vote_state) %>% | |
| group_by(state,district) %>% | |
| summarise(dem_cd_lean = weighted.mean(dem_cd_lean, c(0.3,0.7))) | |
| # bind new simulation columns for the congressional districts, based on the above | |
| sim_forecast <- bind_cols( | |
| sim_forecast, | |
| tibble( | |
| ME1 = sim_forecast %>% pull(ME) + | |
| rnorm(nrow(sim_forecast), | |
| me_ne_leans[me_ne_leans$state == "ME" & me_ne_leans$district == 1,]$dem_cd_lean, | |
| .0075), | |
| ME2 = sim_forecast %>% pull(ME) + | |
| rnorm(nrow(sim_forecast), | |
| me_ne_leans[me_ne_leans$state == "ME" & me_ne_leans$district == 2,]$dem_cd_lean, | |
| .0075), | |
| NE1 = sim_forecast %>% pull(NE) + | |
| rnorm(nrow(sim_forecast), | |
| me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 1,]$dem_cd_lean, | |
| .0075), | |
| NE2 = sim_forecast %>% pull(NE) + | |
| rnorm(nrow(sim_forecast), | |
| me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 2,]$dem_cd_lean, | |
| .0075), | |
| NE3 = sim_forecast %>% pull(NE) + | |
| rnorm(nrow(sim_forecast), | |
| me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 3,]$dem_cd_lean, | |
| .0075) | |
| ) | |
| ) | |
| sim_forecast | |
| sim_evs <- apply(sim_forecast, | |
| 1, | |
| function(x){ | |
| sum((x > 0.5 )* ev) | |
| }) | |
| colMeans(sim_forecast > 0.5) | |
| hist(sim_evs,breaks=538) | |
| median(sim_evs) | |
| mean(sim_evs) | |
| mean(sim_evs > 269) | |
| enframe(prop.table(table(sim_evs)),'dem_ev','pct') %>% arrange(desc(pct)) | |
| # final data wrangling -- this stuff getes passed into the update_prob function | |
| # mainly we just want to make sure everything is in the right order with the right names | |
| ev <- ev[colnames(sim_forecast)] | |
| Sigma <- cov(logit(sim_forecast)) | |
| Sigma | |
| mu <- colMeans(logit(sim_forecast)) | |
| names(mu) <- colnames(sim_forecast) | |
| # run --------------------------------------------------------------------- | |
| # for example... | |
| # raw prob, no constraints | |
| update_prob(biden_states = NULL, | |
| trump_states = NULL, | |
| biden_scores_list = NULL) | |
| # biden wins florida | |
| update_prob(biden_states = c("FL"), | |
| trump_states = NULL, | |
| biden_scores_list = NULL) | |
| # trump wins florida | |
| update_prob(biden_states = NULL, | |
| trump_states = c("FL"), | |
| biden_scores_list = NULL) | |
| # trump wins fl and nc, biden wins va, biden margin in MI can't be outside -10 or 10 | |
| update_prob(biden_states = c("VA"), | |
| trump_states = c("FL","NC"), | |
| biden_scores_list = list("MI" = c(45,55))) | |
| # now it's your turn.... | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment