BioSciEconomist · November 2, 2022 21:50
diff --git a/basic DID.r b/basic DID.r
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: fun with CIs.r
 # | DATE: 7/16/22 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:             
 # *----------------------------------------------------------------
 # | PURPOSE: basic CI simulation
 # *----------------------------------------------------------------

 rm(list=ls()) # get rid of any existing data
 cat("\f") # clear console
 dev.off() # clear plots
 options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

 library(dplyr)


 #
 # read data
 #

 cards <- "id         steps      year       time       treat
 1             3700      2015      0             1
 1             3800      2016      0             1
 1             5000      2017      1             1
 2             3700      2015      0             1
 2             3800      2016      0             1
 2             4700      2017      1             1
 3             3700      2015      0             1
 3             3800      2016      0             1
 3             4450      2017      1             1
 4             3700      2015      0             1
 4             3800      2016      0             1
 4             4600      2017      1             1
 5             3700      2015      0             1
 5             3800      2016      0             1
 5             4500      2017      1             1
 6             3700      2015      0             1
 6             3800      2016      0             1
 6             4300      2017      1             1
 7             3700      2015      0             1
 7             3800      2016      0             1
 7             3800      2017      1             1
 8             3700      2015      0             1
 8             3800      2016      0             1
 8             4000      2017      1             1
 9             3700      2015      0             1
 9             3800      2016      0             1
 9             3900      2017      1             1
 10           3700      2015      0             1
 10           3800      2016      0             1
 10           4200      2017      1             1
 11           3700      2015      0             0
 11           3800      2016      0             0
 11           4300      2017      1             0
 12           3700      2015      0             0
 12           3800      2016      0             0
 12           4300      2017      1             0
 13           3700      2015      0             0
 13           3800      2016      0             0
 13           4300      2017      1             0
 14           3500      2015      0             0
 14           3600      2016      0             0
 14           3900      2017      1             0
 15           3600      2015      0             0
 15           3700      2016      0             0
 15           3700      2017      1             0
 16           3700      2015      0             0
 16           3800      2016      0             0
 16           3800      2017      1             0
 17           3650      2015      0             0
 17           3750      2016      0             0
 17           3900      2017      1             0
 18           3400      2015      0             0
 18           3500      2016      0             0
 18           3850      2017      1             0
 19           3300      2015      0             0
 19           3400      2016      0             0
 19           3825      2017      1             0
 20           3500      2015      0             0
 20           3600      2016      0             0
 20           3610      2017      1             0"
 dat <- read.table(textConnection(cards), header = TRUE)
 closeAllConnections()
  


 #
 # explore parallel trends for groups
 #

 # aggregate treatment X time data

 ctrl <- df[df$treat == 0,]
 tmp1 <- ctrl%>%
 group_by(year) %>%
 summarize(steps = mean(steps))

 tmp1$treat = 0
 trt <- df[df$treat == 1,]
 tmp2 <- trt%>%
 group_by(year) %>%
 summarize(steps = mean(steps))
 tmp2$treat = 1

 # stack data
 tmp3 <- rbind(tmp1,tmp2)

 #
 # plot group trends
 #

 plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",

     xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")

 axis(1, at = 2015:2017)

 lines(tmp3$year[tmp3$treat==0], tmp3$steps[tmp3$treat==0], col=2, type="b")

 abline(v=2016, col="blue")

 title(main="Steps by Year", sub="Green = Treatment | Red = Control")

 #
 # analysis - regression
 #

 summary(lm(steps ~ treat + time + time*treat, data = df))

 #
 # clustered standard errors
 #

 library(multiwayvcov)
 library(lmtest)

 m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
 m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
 coeftest(m1, m1.vcovCL) # results with clustered standard errors

 #
 # estimation using a linear mixed effects model
 #

 library(nlme)

 mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
 summary(mix)
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: fun with CIs.r
	# \| DATE: 7/16/22
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: basic CI simulation
	# *----------------------------------------------------------------

	rm(list=ls()) # get rid of any existing data
	cat("\f") # clear console
	dev.off() # clear plots
	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

	library(dplyr)


	#
	# read data
	#

	cards <- "id steps year time treat
	1 3700 2015 0 1
	1 3800 2016 0 1
	1 5000 2017 1 1
	2 3700 2015 0 1
	2 3800 2016 0 1
	2 4700 2017 1 1
	3 3700 2015 0 1
	3 3800 2016 0 1
	3 4450 2017 1 1
	4 3700 2015 0 1
	4 3800 2016 0 1
	4 4600 2017 1 1
	5 3700 2015 0 1
	5 3800 2016 0 1
	5 4500 2017 1 1
	6 3700 2015 0 1
	6 3800 2016 0 1
	6 4300 2017 1 1
	7 3700 2015 0 1
	7 3800 2016 0 1
	7 3800 2017 1 1
	8 3700 2015 0 1
	8 3800 2016 0 1
	8 4000 2017 1 1
	9 3700 2015 0 1
	9 3800 2016 0 1
	9 3900 2017 1 1
	10 3700 2015 0 1
	10 3800 2016 0 1
	10 4200 2017 1 1
	11 3700 2015 0 0
	11 3800 2016 0 0
	11 4300 2017 1 0
	12 3700 2015 0 0
	12 3800 2016 0 0
	12 4300 2017 1 0
	13 3700 2015 0 0
	13 3800 2016 0 0
	13 4300 2017 1 0
	14 3500 2015 0 0
	14 3600 2016 0 0
	14 3900 2017 1 0
	15 3600 2015 0 0
	15 3700 2016 0 0
	15 3700 2017 1 0
	16 3700 2015 0 0
	16 3800 2016 0 0
	16 3800 2017 1 0
	17 3650 2015 0 0
	17 3750 2016 0 0
	17 3900 2017 1 0
	18 3400 2015 0 0
	18 3500 2016 0 0
	18 3850 2017 1 0
	19 3300 2015 0 0
	19 3400 2016 0 0
	19 3825 2017 1 0
	20 3500 2015 0 0
	20 3600 2016 0 0
	20 3610 2017 1 0"
	dat <- read.table(textConnection(cards), header = TRUE)
	closeAllConnections()



	#
	# explore parallel trends for groups
	#

	# aggregate treatment X time data

	ctrl <- df[df$treat == 0,]
	tmp1 <- ctrl%>%
	group_by(year) %>%
	summarize(steps = mean(steps))

	tmp1$treat = 0
	trt <- df[df$treat == 1,]
	tmp2 <- trt%>%
	group_by(year) %>%
	summarize(steps = mean(steps))
	tmp2$treat = 1

	# stack data
	tmp3 <- rbind(tmp1,tmp2)

	#
	# plot group trends
	#

	plot(tmp3$year[tmp3$treat== 1], tmp3$steps[tmp3$treat==1], type="b", col = "green",

	xlim=c(2015,2017), ylim=c(3300,4500),xlab="year", ylab="steps", xaxt = "n")

	axis(1, at = 2015:2017)

	lines(tmp3$year[tmp3$treat==0], tmp3$steps[tmp3$treat==0], col=2, type="b")

	abline(v=2016, col="blue")

	title(main="Steps by Year", sub="Green = Treatment \| Red = Control")

	#
	# analysis - regression
	#

	summary(lm(steps ~ treat + time + time*treat, data = df))

	#
	# clustered standard errors
	#

	library(multiwayvcov)
	library(lmtest)

	m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
	m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
	coeftest(m1, m1.vcovCL) # results with clustered standard errors

	#
	# estimation using a linear mixed effects model
	#

	library(nlme)

	mix<-lme(steps ~ time*treat, random=(~1 \| id), data = df)
	summary(mix)
No results found