Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active November 2, 2022 21:50
Show Gist options
  • Select an option

  • Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.

Select an option

Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.
Basic difference in differences in R
# *-----------------------------------------------------------------
# | PROGRAM NAME: fun with CIs.r
# | DATE: 7/16/22
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: basic CI simulation
# *----------------------------------------------------------------
rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
library(dplyr)
#
# read data
#
cards <- "id         steps      year       time       treat
1             3700      2015      0             1
1             3800      2016      0             1
1             5000      2017      1             1
2             3700      2015      0             1
2             3800      2016      0             1
2             4700      2017      1             1
3             3700      2015      0             1
3             3800      2016      0             1
3             4450      2017      1             1
4             3700      2015      0             1
4             3800      2016      0             1
4             4600      2017      1             1
5             3700      2015      0             1
5             3800      2016      0             1
5             4500      2017      1             1
6             3700      2015      0             1
6             3800      2016      0             1
6             4300      2017      1             1
7             3700      2015      0             1
7             3800      2016      0             1
7             3800      2017      1             1
8             3700      2015      0             1
8             3800      2016      0             1
8             4000      2017      1             1
9             3700      2015      0             1
9             3800      2016      0             1
9             3900      2017      1             1
10           3700      2015      0             1
10           3800      2016      0             1
10           4200      2017      1             1
11           3700      2015      0             0
11           3800      2016      0             0
11           4300      2017      1             0
12           3700      2015      0             0
12           3800      2016      0             0
12           4300      2017      1             0
13           3700      2015      0             0
13           3800      2016      0             0
13           4300      2017      1             0
14           3500      2015      0             0
14           3600      2016      0             0
14           3900      2017      1             0
15           3600      2015      0             0
15           3700      2016      0             0
15           3700      2017      1             0
16           3700      2015      0             0
16           3800      2016      0             0
16           3800      2017      1             0
17           3650      2015      0             0
17           3750      2016      0             0
17           3900      2017      1             0
18           3400      2015      0             0
18           3500      2016      0             0
18           3850      2017      1             0
19           3300      2015      0             0
19           3400      2016      0             0
19           3825      2017      1             0
20           3500      2015      0             0
20           3600      2016      0             0
20           3610      2017      1             0"
dat <- read.table(textConnection(cards), header = TRUE)
closeAllConnections()
 
#
# explore parallel trends for groups
#
# aggregate treatment X time data
ctrl <- df[df$treat == 0,]
tmp1 <- ctrl%>%
group_by(year) %>%
summarize(steps = mean(steps))
tmp1$treat = 0
trt <- df[df$treat == 1,]
tmp2 <- trt%>%
group_by(year) %>%
summarize(steps = mean(steps))
tmp2$treat = 1
# stack data
tmp3 <- rbind(tmp1,tmp2)
#
# plot group trends
#
plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",
xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")
axis(1, at = 2015:2017)
lines(tmp3$year[tmp3$treat==0], tmp3$steps[tmp3$treat==0], col=2, type="b")
abline(v=2016, col="blue")
title(main="Steps by Year", sub="Green = Treatment | Red = Control")
#
# analysis - regression
#
summary(lm(steps ~ treat + time + time*treat, data = df))
#
# clustered standard errors
#
library(multiwayvcov)
library(lmtest)
m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
coeftest(m1, m1.vcovCL) # results with clustered standard errors
#
# estimation using a linear mixed effects model
#
library(nlme)
mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
summary(mix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment