Skip to content

Instantly share code, notes, and snippets.

@elliottmorris
Last active October 18, 2025 10:10
Show Gist options
  • Select an option

  • Save elliottmorris/c70fd4d32049c9986a45e2dfc07fb4f0 to your computer and use it in GitHub Desktop.

Select an option

Save elliottmorris/c70fd4d32049c9986a45e2dfc07fb4f0 to your computer and use it in GitHub Desktop.

Revisions

  1. G. Elliott Morris revised this gist Nov 2, 2020. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -9,8 +9,6 @@
    #' The licences include only the data and the software authored by *The Economist*, and do not cover any *Economist* content or third-party data or content made available using the software. More information about licensing, syndication and the copyright of *Economist* content can be found [here](https://www.economist.com/rights/).



    rm(list=ls())
    library(tidyverse)
    library(mvtnorm)
    library(politicaldata)
  2. G. Elliott Morris revised this gist Nov 1, 2020. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -13,6 +13,7 @@
    rm(list=ls())
    library(tidyverse)
    library(mvtnorm)
    library(politicaldata)

    # functions first ---------------------------------------------------------
    logit <- function(x) log(x/(1-x))
  3. G. Elliott Morris revised this gist Nov 1, 2020. 1 changed file with 3 additions and 6 deletions.
    9 changes: 3 additions & 6 deletions election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -61,10 +61,10 @@ update_prob <- function(biden_states = NULL, trump_states = NULL, biden_scores_l
    sd <- sqrt(p*(1-p)/length(ev_dist))
    if (show_all_states){
    cat("Pr(biden wins) by state, in %:\n")
    print(t(round(100*state_win)))
    print(t(round(100*state_win,1)))
    cat("--------\n")
    }
    cat(paste("Pr(biden wins the electoral college) = ", round(100*p), "%\n[nsim = ", length(ev_dist), "; se = ", round(sd*100,1), "%]", sep = ""))
    cat(paste("Pr(biden wins the electoral college) = ", round(100*p,1), "%\n[nsim = ", length(ev_dist), "; se = ", round(sd*100,1), "%]", sep = ""))
    if (show_all_states) cat("\n--------\n")
    }

    @@ -201,9 +201,7 @@ names(mu) <- colnames(sim_forecast)
    # run ---------------------------------------------------------------------
    # for example...
    # raw prob, no constraints
    update_prob(biden_states = NULL,
    trump_states = NULL,
    biden_scores_list = NULL)
    update_prob()

    # biden wins florida
    update_prob(biden_states = c("FL"),
    @@ -221,4 +219,3 @@ update_prob(biden_states = c("VA"),
    biden_scores_list = list("MI" = c(45,55)))

    # now it's your turn....

  4. G. Elliott Morris revised this gist Oct 31, 2020. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,6 @@
    rm(list=ls())
    library(tidyverse)
    library(mvtnorm)
    library(lqmm)

    # functions first ---------------------------------------------------------
    logit <- function(x) log(x/(1-x))
  5. G. Elliott Morris revised this gist Oct 31, 2020. 1 changed file with 13 additions and 5 deletions.
    18 changes: 13 additions & 5 deletions election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,14 @@
    # this file runs a live election-night forecast based on The Economist's pre-election forecasting model
    # available at projects.economist.com/us-2020-forecast/president
    # it is resampling model based heavily on on https://pkremp.github.io/update_prob.html
    # this does not input any real election results -- you will have to enter your picks/constraints manually
    #' Description
    #' This file runs a live election-night forecast based on The Economist's pre-election forecasting model
    #' available at projects.economist.com/us-2020-forecast/president.
    #' It is resampling model based on https://pkremp.github.io/update_prob.html.
    #' This script does not input any real election results! You will have to enter your picks/constraints manually (scroll to the bottom of the script).
    #'
    #' Licence
    #' This software is published by *[The Economist](https://www.economist.com)* under the [MIT licence](https://opensource.org/licenses/MIT). The data generated by *The Economist* are available under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).
    #' The licences include only the data and the software authored by *The Economist*, and do not cover any *Economist* content or third-party data or content made available using the software. More information about licensing, syndication and the copyright of *Economist* content can be found [here](https://www.economist.com/rights/).



    rm(list=ls())
    library(tidyverse)
    @@ -214,4 +221,5 @@ update_prob(biden_states = c("VA"),
    trump_states = c("FL","NC"),
    biden_scores_list = list("MI" = c(45,55)))

    # now it's your turn....
    # now it's your turn....

  6. G. Elliott Morris created this gist Oct 31, 2020.
    217 changes: 217 additions & 0 deletions election_night_live_model.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,217 @@
    # this file runs a live election-night forecast based on The Economist's pre-election forecasting model
    # available at projects.economist.com/us-2020-forecast/president
    # it is resampling model based heavily on on https://pkremp.github.io/update_prob.html
    # this does not input any real election results -- you will have to enter your picks/constraints manually

    rm(list=ls())
    library(tidyverse)
    library(mvtnorm)
    library(lqmm)

    # functions first ---------------------------------------------------------
    logit <- function(x) log(x/(1-x))


    inv_logit <- function(x) 1/(1 + exp(-x))


    draw_samples <- function(biden_states = NULL, trump_states = NULL, states = NULL,
    upper_biden = NULL, lower_biden = NULL, print_acceptance = FALSE, target_nsim = 1000){
    sim <- matrix(NA, nr = 1, nc = length(mu))
    n <- 0
    while(nrow(sim) < target_nsim){
    # randomly sample from the posterior distribution and reject when constraints are not met
    n <- n + 1
    proposals <- inv_logit(rmvnorm(1e5, mu, Sigma, method = "svd")) # "DC" is pretty much uncorrelated
    colnames(proposals) <- names(mu)
    if (!is.null(biden_states)) proposals[which(proposals[,biden_states] < .5)] <- NA
    if (!is.null( trump_states)) proposals[which(proposals[, trump_states] > .5)] <- NA
    if (!is.null( states)){
    for (s in states){
    proposals[which(proposals[, s] > upper_biden[s] |
    proposals[, s] < lower_biden[s])] <- NA
    }
    }
    reject <- apply(proposals, 1, function(x) any(is.na(x)))
    sim <- rbind(sim, proposals[!reject,])
    if (nrow(sim) < target_nsim & nrow(sim)/(nrow(proposals)*n) < 1-99.99/100){
    stop(paste("rmvnorm() is working hard... but more than 99.99% of the samples are rejected; you should relax some contraints.", sep = ""))
    }
    }
    return(list("matrix" = sim[-1,], "acceptance_rate" = nrow(sim)/(nrow(proposals)*n)))
    }


    update_prob <- function(biden_states = NULL, trump_states = NULL, biden_scores_list = NULL, target_nsim = 1000, show_all_states = TRUE){
    states <- names(biden_scores_list)
    lower_biden <- sapply(biden_scores_list, function(x) x[1]/100)
    upper_biden <- sapply(biden_scores_list, function(x) x[2]/100)
    sim <- draw_samples(biden_states = biden_states, trump_states = trump_states, states = states,
    upper_biden = upper_biden, lower_biden = lower_biden,
    target_nsim = target_nsim)
    ev_dist <- (sim[["matrix"]] > .5) %*% ev
    state_win <- colMeans(sim[["matrix"]] > .5)
    p <- mean(ev_dist >= 270)
    sd <- sqrt(p*(1-p)/length(ev_dist))
    if (show_all_states){
    cat("Pr(biden wins) by state, in %:\n")
    print(t(round(100*state_win)))
    cat("--------\n")
    }
    cat(paste("Pr(biden wins the electoral college) = ", round(100*p), "%\n[nsim = ", length(ev_dist), "; se = ", round(sd*100,1), "%]", sep = ""))
    if (show_all_states) cat("\n--------\n")
    }


    # read data ---------------------------------------------------------------
    # get simulations
    sim_forecast <- read_csv('https://cdn.economistdatateam.com/us-2020-forecast/data/president/electoral_college_simulations.csv')

    # check initial parameters
    nrow(sim_forecast)
    mean(sim_forecast$dem_ev > 269)

    # select relevant columns and make the data frae into a matrix
    sim_forecast <- sim_forecast %>% select(4:ncol(.))
    sim_forecast <- list(sim_forecast,sim_forecast,sim_forecast,sim_forecast,sim_forecast) %>% bind_rows %>% as.matrix


    # this bit is really hacky
    # it make the simulations a little less correlated by add a bunch of random noise
    # this helps our live model react to really implausible events that could happen on election night
    # but will have the result of pushing the initial pre-results forecasts back toward 50-50
    sim_forecast <- sim_forecast +
    rnorm(nrow(sim_forecast), 0, 0.01) + # national error component
    replicate(ncol(sim_forecast),rnorm(nrow(sim_forecast),0,0.02)) # state

    sim_forecast <- ifelse(sim_forecast <= 0, 0.0001, sim_forecast)
    sim_forecast <- ifelse(sim_forecast >= 1, 0.99999, sim_forecast)

    sim_forecast <- as_tibble(sim_forecast)

    # now, get electoral votes in each state
    # and make sure they're in the right order
    #ev_state <- read_csv('data/state_evs.csv')$ev
    #names(ev_state) <- read_csv('data/state_evs.csv')$state
    ev_state <- read_csv('https://raw.githubusercontent.com/TheEconomist/us-potus-model/master/data/2012.csv')$ev
    names(ev_state) <- read_csv('https://raw.githubusercontent.com/TheEconomist/us-potus-model/master/data/2012.csv')$state
    ev <- ev_state[colnames(sim_forecast)]

    # check that the EVs and state forecasts add up to the right amounts
    sim_evs <- apply(sim_forecast,
    1,
    function(x){
    sum((x > 0.5 )* ev)
    })

    hist(sim_evs,breaks=538)
    median(sim_evs)
    mean(sim_evs)
    mean(sim_evs > 269)
    enframe(prop.table(table(sim_evs)),'dem_ev','pct') %>% arrange(desc(pct))


    # adding ME1 and ME2, NE1 NE2 to sim_forecast matrix and ev vector
    # we do this by adding the average district-level two-party dem presidential vote, relative
    # to the state-level dem two-party vote, back to to our state-level forecast

    # first, split up EVs
    ev["ME"] <- 2
    ev["NE"] <- 2
    ev <- c(ev, "ME1" = 1, "ME2" = 1, "NE1" = 1, "NE2" = 1, "NE3" = 1)
    sum(ev)

    # create simulations for ME and NE districts
    me_ne_leans <- politicaldata::pres_results_by_cd %>% filter(year >= 2012, state_abb %in% c("ME","NE")) %>%
    select(-other) %>%
    rename(state = state_abb) %>%
    group_by(year,state) %>%
    mutate(sum_pct = dem + rep,
    total_votes = total_votes * sum_pct,
    dem = dem /sum_pct,
    rep = rep/sum_pct) %>%
    mutate(dem_vote_state = sum(dem * total_votes) / sum(total_votes)) %>%
    mutate(dem_cd_lean = dem - dem_vote_state) %>%
    group_by(state,district) %>%
    summarise(dem_cd_lean = weighted.mean(dem_cd_lean, c(0.3,0.7)))

    # bind new simulation columns for the congressional districts, based on the above
    sim_forecast <- bind_cols(
    sim_forecast,
    tibble(
    ME1 = sim_forecast %>% pull(ME) +
    rnorm(nrow(sim_forecast),
    me_ne_leans[me_ne_leans$state == "ME" & me_ne_leans$district == 1,]$dem_cd_lean,
    .0075),
    ME2 = sim_forecast %>% pull(ME) +
    rnorm(nrow(sim_forecast),
    me_ne_leans[me_ne_leans$state == "ME" & me_ne_leans$district == 2,]$dem_cd_lean,
    .0075),

    NE1 = sim_forecast %>% pull(NE) +
    rnorm(nrow(sim_forecast),
    me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 1,]$dem_cd_lean,
    .0075),
    NE2 = sim_forecast %>% pull(NE) +
    rnorm(nrow(sim_forecast),
    me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 2,]$dem_cd_lean,
    .0075),
    NE3 = sim_forecast %>% pull(NE) +
    rnorm(nrow(sim_forecast),
    me_ne_leans[me_ne_leans$state == "NE" & me_ne_leans$district == 3,]$dem_cd_lean,
    .0075)
    )
    )

    sim_forecast


    sim_evs <- apply(sim_forecast,
    1,
    function(x){
    sum((x > 0.5 )* ev)
    })

    colMeans(sim_forecast > 0.5)

    hist(sim_evs,breaks=538)
    median(sim_evs)
    mean(sim_evs)
    mean(sim_evs > 269)
    enframe(prop.table(table(sim_evs)),'dem_ev','pct') %>% arrange(desc(pct))


    # final data wrangling -- this stuff getes passed into the update_prob function
    # mainly we just want to make sure everything is in the right order with the right names
    ev <- ev[colnames(sim_forecast)]

    Sigma <- cov(logit(sim_forecast))
    Sigma

    mu <- colMeans(logit(sim_forecast))
    names(mu) <- colnames(sim_forecast)


    # run ---------------------------------------------------------------------
    # for example...
    # raw prob, no constraints
    update_prob(biden_states = NULL,
    trump_states = NULL,
    biden_scores_list = NULL)

    # biden wins florida
    update_prob(biden_states = c("FL"),
    trump_states = NULL,
    biden_scores_list = NULL)

    # trump wins florida
    update_prob(biden_states = NULL,
    trump_states = c("FL"),
    biden_scores_list = NULL)

    # trump wins fl and nc, biden wins va, biden margin in MI can't be outside -10 or 10
    update_prob(biden_states = c("VA"),
    trump_states = c("FL","NC"),
    biden_scores_list = list("MI" = c(45,55)))

    # now it's your turn....