Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active November 2, 2022 21:50
Show Gist options
  • Select an option

  • Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.

Select an option

Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.

Revisions

  1. BioSciEconomist revised this gist Nov 2, 2022. 1 changed file with 45 additions and 23 deletions.
    68 changes: 45 additions & 23 deletions basic DID.r
    Original file line number Diff line number Diff line change
    @@ -116,37 +116,59 @@ summary(mix)
    # explore parallel trends for groups
    #

    # aggregate treatment X time data using all the pre-period data from dat

    ctrl <- dat[dat$treat == 0,]
    tmp1 <- ctrl%>%
    group_by(year) %>%
    summarize(steps = mean(steps))
    tmp <- dat%>%group_by(treat,year) %>%
    summarize(steps = mean(steps),
    N = n())

    tmp1$treat = 0
    trt <- dat[dat$treat == 1,]
    tmp2 <- trt%>%
    group_by(year) %>%
    summarize(steps = mean(steps))
    tmp2$treat = 1

    # stack data
    tmp3 <- rbind(tmp1,tmp2)
    plot(tmp$year[tmp$treat == 1], tmp$steps[tmp$treat==1], type="b", col = "green",
    xlim=c(2015,2017), ylim=c(3000,4500),xlab="year", ylab="steps", xaxt = "n")
    axis(1, at = 2015:2017)
    lines(tmp$year[tmp$treat==0], tmp$steps[tmp$treat==0], col=2, type="b")
    abline(v=2016, col="blue")
    title(main="Outcomes by Year", sub="Green = Treatment | Red = Control")

    #
    # plot group trends
    #

    plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",
    xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")
    #---------------------------------------
    # exploring cluster robust standard errors and pre-post paired designs
    #--------------------------------------

    axis(1, at = 2015:2017)

    lines(tmp3$year[tmp3$treat==0], tmp3$steps[tmp3$treat==0], col=2, type="b")
    tmp <- dat[dat$year %in% c(2016,2017),]
    tmp <- tmp[tmp$treat == 0,]
    tmp <- tmp[order(tmp$time,tmp$id),]

    abline(v=2016, col="blue")

    title(main="Steps by Year", sub="Green = Treatment | Red = Control")
    # paired t-test


    t.test(steps ~ time,
    data= tmp,
    paired=TRUE,
    conf.level=0.95)

    (273.5/4) #SE = 68.38

    t.test(steps ~ time,
    data= tmp,
    paired=FALSE,
    conf.level=0.95)

    (273.5/2.9) #SE = 94.31

    summary(lm(steps ~ time, data = tmp)) # SE = 93.4

    m1 <- lm(steps ~ time, data = tmp) # save model
    m1.vcovCL <- cluster.vcov(m1, tmp$id) # get variance-covariance matrix
    coeftest(m1, m1.vcovCL) # results with clustered standard errors

    # SE = 70 similar to the SE we got from the paired t-test

    library(nlme)

    mix<-lme(steps ~ time, random=(~1 | id), data = tmp)
    summary(mix)

    # SE = 68.13 similar to the SE we got from the paired t-test

  2. BioSciEconomist revised this gist Oct 29, 2022. 1 changed file with 104 additions and 103 deletions.
    207 changes: 104 additions & 103 deletions basic DID.r
    Original file line number Diff line number Diff line change
    @@ -1,10 +1,10 @@
    # *-----------------------------------------------------------------
    # | PROGRAM NAME: fun with CIs.r
    # | DATE: 7/16/22
    # | PROGRAM NAME: basic 2x2 DID.r
    # | DATE:
    # | CREATED BY: MATT BOGARD
    # | PROJECT FILE:
    # *----------------------------------------------------------------
    # | PURPOSE: basic CI simulation
    # | PURPOSE: simple example of DID and parallel trends
    # *----------------------------------------------------------------

    rm(list=ls()) # get rid of any existing data
    @@ -14,93 +14,120 @@ options("scipen" =100, "digits" = 4) # override R's tendency to use scientific n

    library(dplyr)

    # read toy data

    cards <- "id steps year time treat
    1 3700 2015 0 1
    1 3800 2016 0 1
    1 5000 2017 1 1
    2 3700 2015 0 1
    2 3800 2016 0 1
    2 4700 2017 1 1
    3 3700 2015 0 1
    3 3800 2016 0 1
    3 4450 2017 1 1
    4 3700 2015 0 1
    4 3800 2016 0 1
    4 4600 2017 1 1
    5 3700 2015 0 1
    5 3800 2016 0 1
    5 4500 2017 1 1
    6 3700 2015 0 1
    6 3800 2016 0 1
    6 4300 2017 1 1
    7 3700 2015 0 1
    7 3800 2016 0 1
    7 3800 2017 1 1
    8 3700 2015 0 1
    8 3800 2016 0 1
    8 4000 2017 1 1
    9 3700 2015 0 1
    9 3800 2016 0 1
    9 3900 2017 1 1
    10 3700 2015 0 1
    10 3800 2016 0 1
    10 4200 2017 1 1
    11 3700 2015 0 0
    11 3800 2016 0 0
    11 4300 2017 1 0
    12 3700 2015 0 0
    12 3800 2016 0 0
    12 4300 2017 1 0
    13 3700 2015 0 0
    13 3800 2016 0 0
    13 4300 2017 1 0
    14 3500 2015 0 0
    14 3600 2016 0 0
    14 3900 2017 1 0
    15 3600 2015 0 0
    15 3700 2016 0 0
    15 3700 2017 1 0
    16 3700 2015 0 0
    16 3800 2016 0 0
    16 3800 2017 1 0
    17 3650 2015 0 0
    17 3750 2016 0 0
    17 3900 2017 1 0
    18 3400 2015 0 0
    18 3500 2016 0 0
    18 3850 2017 1 0
    19 3300 2015 0 0
    19 3400 2016 0 0
    19 3825 2017 1 0
    20 3500 2015 0 0
    20 3600 2016 0 0
    20 3610 2017 1 0"
    dat <- read.table(textConnection(cards), header = TRUE)
    closeAllConnections()

    # let's start with the basic 2x2 design

    df <- dat[dat$year %in% c(2016,2017),]


    #
    # read data
    # analysis - regression
    #

    cards <- "id         steps      year       time       treat
    1             3700      2015      0             1
    1             3800      2016      0             1
    1             5000      2017      1             1
    2             3700      2015      0             1
    2             3800      2016      0             1
    2             4700      2017      1             1
    3             3700      2015      0             1
    3             3800      2016      0             1
    3             4450      2017      1             1
    4             3700      2015      0             1
    4             3800      2016      0             1
    4             4600      2017      1             1
    5             3700      2015      0             1
    5             3800      2016      0             1
    5             4500      2017      1             1
    6             3700      2015      0             1
    6             3800      2016      0             1
    6             4300      2017      1             1
    7             3700      2015      0             1
    7             3800      2016      0             1
    7             3800      2017      1             1
    8             3700      2015      0             1
    8             3800      2016      0             1
    8             4000      2017      1             1
    9             3700      2015      0             1
    9             3800      2016      0             1
    9             3900      2017      1             1
    10           3700      2015      0             1
    10           3800      2016      0             1
    10           4200      2017      1             1
    11           3700      2015      0             0
    11           3800      2016      0             0
    11           4300      2017      1             0
    12           3700      2015      0             0
    12           3800      2016      0             0
    12           4300      2017      1             0
    13           3700      2015      0             0
    13           3800      2016      0             0
    13           4300      2017      1             0
    14           3500      2015      0             0
    14           3600      2016      0             0
    14           3900      2017      1             0
    15           3600      2015      0             0
    15           3700      2016      0             0
    15           3700      2017      1             0
    16           3700      2015      0             0
    16           3800      2016      0             0
    16           3800      2017      1             0
    17           3650      2015      0             0
    17           3750      2016      0             0
    17           3900      2017      1             0
    18           3400      2015      0             0
    18           3500      2016      0             0
    18           3850      2017      1             0
    19           3300      2015      0             0
    19           3400      2016      0             0
    19           3825      2017      1             0
    20           3500      2015      0             0
    20           3600      2016      0             0
    20           3610      2017      1             0"
    df <- read.table(textConnection(cards), header = TRUE)
    closeAllConnections()

    summary(lm(steps ~ treat + time + time*treat, data = df))

    #
    # clustered standard errors
    #

    library(multiwayvcov)
    library(lmtest)

    m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
    m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
    coeftest(m1, m1.vcovCL) # results with clustered standard errors

    #
    # estimation using a linear mixed effects model
    #

    library(nlme)

    mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
    summary(mix)


    #
    # explore parallel trends for groups
    #

    # aggregate treatment X time data
    # aggregate treatment X time data using all the pre-period data from dat

    ctrl <- df[df$treat == 0,]
    ctrl <- dat[dat$treat == 0,]
    tmp1 <- ctrl%>%
    group_by(year) %>%
    summarize(steps = mean(steps))
    group_by(year) %>%
    summarize(steps = mean(steps))

    tmp1$treat = 0
    trt <- df[df$treat == 1,]
    trt <- dat[dat$treat == 1,]
    tmp2 <- trt%>%
    group_by(year) %>%
    summarize(steps = mean(steps))
    group_by(year) %>%
    summarize(steps = mean(steps))
    tmp2$treat = 1

    # stack data
    @@ -110,8 +137,8 @@ tmp3 <- rbind(tmp1,tmp2)
    # plot group trends
    #

    plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",

    plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",
    xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")

    axis(1, at = 2015:2017)
    @@ -122,30 +149,4 @@ abline(v=2016, col="blue")

    title(main="Steps by Year", sub="Green = Treatment | Red = Control")

    #
    # analysis - regression
    #

    summary(lm(steps ~ treat + time + time*treat, data = df))

    #
    # clustered standard errors
    #

    library(multiwayvcov)
    library(lmtest)

    m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
    m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
    coeftest(m1, m1.vcovCL) # results with clustered standard errors

    #
    # estimation using a linear mixed effects model
    #

    library(nlme)

    mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
    summary(mix)


  3. BioSciEconomist revised this gist Oct 26, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion basic DID.r
    Original file line number Diff line number Diff line change
    @@ -80,7 +80,7 @@ cards <- "id         steps      year       time       t
    20           3500      2015      0             0
    20           3600      2016      0             0
    20           3610      2017      1             0"
    dat <- read.table(textConnection(cards), header = TRUE)
    df <- read.table(textConnection(cards), header = TRUE)
    closeAllConnections()


  4. BioSciEconomist revised this gist Oct 10, 2022. 1 changed file with 75 additions and 2 deletions.
    77 changes: 75 additions & 2 deletions basic DID.r
    Original file line number Diff line number Diff line change
    @@ -1,16 +1,89 @@
    # *-----------------------------------------------------------------
    # | PROGRAM NAME: fun with CIs.r
    # | DATE: 7/16/22
    # | CREATED BY: MATT BOGARD
    # | PROJECT FILE:
    # *----------------------------------------------------------------
    # | PURPOSE: basic CI simulation
    # *----------------------------------------------------------------

    rm(list=ls()) # get rid of any existing data
    cat("\f") # clear console
    dev.off() # clear plots
    options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

    library(dplyr)
    library(dplyr)


    #
    # read data
    #

    df <- read.csv("M://Matt B//MB Coders Corner//DID toy dat2.csv")
    cards <- "id         steps      year       time       treat
    1             3700      2015      0             1
    1             3800      2016      0             1
    1             5000      2017      1             1
    2             3700      2015      0             1
    2             3800      2016      0             1
    2             4700      2017      1             1
    3             3700      2015      0             1
    3             3800      2016      0             1
    3             4450      2017      1             1
    4             3700      2015      0             1
    4             3800      2016      0             1
    4             4600      2017      1             1
    5             3700      2015      0             1
    5             3800      2016      0             1
    5             4500      2017      1             1
    6             3700      2015      0             1
    6             3800      2016      0             1
    6             4300      2017      1             1
    7             3700      2015      0             1
    7             3800      2016      0             1
    7             3800      2017      1             1
    8             3700      2015      0             1
    8             3800      2016      0             1
    8             4000      2017      1             1
    9             3700      2015      0             1
    9             3800      2016      0             1
    9             3900      2017      1             1
    10           3700      2015      0             1
    10           3800      2016      0             1
    10           4200      2017      1             1
    11           3700      2015      0             0
    11           3800      2016      0             0
    11           4300      2017      1             0
    12           3700      2015      0             0
    12           3800      2016      0             0
    12           4300      2017      1             0
    13           3700      2015      0             0
    13           3800      2016      0             0
    13           4300      2017      1             0
    14           3500      2015      0             0
    14           3600      2016      0             0
    14           3900      2017      1             0
    15           3600      2015      0             0
    15           3700      2016      0             0
    15           3700      2017      1             0
    16           3700      2015      0             0
    16           3800      2016      0             0
    16           3800      2017      1             0
    17           3650      2015      0             0
    17           3750      2016      0             0
    17           3900      2017      1             0
    18           3400      2015      0             0
    18           3500      2016      0             0
    18           3850      2017      1             0
    19           3300      2015      0             0
    19           3400      2016      0             0
    19           3825      2017      1             0
    20           3500      2015      0             0
    20           3600      2016      0             0
    20           3610      2017      1             0"
    dat <- read.table(textConnection(cards), header = TRUE)
    closeAllConnections()



    #
    # explore parallel trends for groups
  5. BioSciEconomist created this gist Oct 10, 2022.
    78 changes: 78 additions & 0 deletions basic DID.r
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,78 @@
    rm(list=ls()) # get rid of any existing data
    cat("\f") # clear console
    dev.off() # clear plots
    options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

    library(dplyr)


    #
    # read data
    #

    df <- read.csv("M://Matt B//MB Coders Corner//DID toy dat2.csv")

    #
    # explore parallel trends for groups
    #

    # aggregate treatment X time data

    ctrl <- df[df$treat == 0,]
    tmp1 <- ctrl%>%
    group_by(year) %>%
    summarize(steps = mean(steps))

    tmp1$treat = 0
    trt <- df[df$treat == 1,]
    tmp2 <- trt%>%
    group_by(year) %>%
    summarize(steps = mean(steps))
    tmp2$treat = 1

    # stack data
    tmp3 <- rbind(tmp1,tmp2)

    #
    # plot group trends
    #

    plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",

    xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")

    axis(1, at = 2015:2017)

    lines(tmp3$year[tmp3$treat==0], tmp3$steps[tmp3$treat==0], col=2, type="b")

    abline(v=2016, col="blue")

    title(main="Steps by Year", sub="Green = Treatment | Red = Control")

    #
    # analysis - regression
    #

    summary(lm(steps ~ treat + time + time*treat, data = df))

    #
    # clustered standard errors
    #

    library(multiwayvcov)
    library(lmtest)

    m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
    m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
    coeftest(m1, m1.vcovCL) # results with clustered standard errors

    #
    # estimation using a linear mixed effects model
    #

    library(nlme)

    mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
    summary(mix)