*************************************************************************************
Clustered SEs in Stata and R
*************************************************************************************

1. Stata code

# Loading Petersen (2006) 's dataset
webuse set http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/
webuse test_data.dta, clear

# OLS with SE clustered by firm
reg y x, vce(cluster firmid)

# OLS with SE clustered by time
reg y x, vce(cluster year)

# Setting up dataset as a panel
xtset firmid year

# FE regression with SE clustered by firm
xtreg y x, fe vce(cluster firmid)

# FE regression with SE clustered by time
xtreg y x, fe vce(cluster year) nonest

# Clustered standard errors - 2 dimensions by firm and time
cluster2 y x, fcluster(firmid) tcluster(year)

# Clustered standard errors - Fama-Macbeth
tsset firmid year fm y x, byfm(by_variable)

# Clustered standard errors - Newey-West
tsset firmid year
newey y x, lag(lag_length) force

2. R code

# Loading the required libraries
library(plm)
library(sandwich)
library(lmtest)
library(multiwayvcov)

# Loading Petersen's dataset
library(foreign)
petersen <- read.dta("http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.dta")

# Pooled OLS model
pooled.ols <- plm(formula=y~x, data=petersen, model="pooling", index=c("firmid", "year"))

# Fixed effects model
fe.firm <- plm(formula=y~x, data=petersen, model="within", index=c("firmid", "year"))

# Clustered standard errors - OLS (by firm)
coeftest(pooled.ols, vcov=vcovHC(pooled.ols, type="sss", cluster="group"))

# Clustered standard errors - OLS (by time)
coeftest(pooled.ols, vcov=vcovHC(pooled.ols, type="sss", cluster="time"))

# Clustered standard errors - OLS (by firm and time)
coeftest(pooled.ols, vcov=vcovDC(pooled.ols, type="sss"))

# Clustered standard errors - Fixed effect regression (by firm)
coeftest(fe.firm, vcov=vcovHC(fe.firm, type="sss", cluster="group"))

# Clustered standard errors - Fixed effect regression (by time)
coeftest(fe.firm, vcov=vcovHC(fe.firm, type="sss", cluster="time"))

# Clustered standard errors - Fixed effect regression (by firm and time)
coeftest(fe.firm, vcov=vcovDC(fe.firm, type="sss"))

# Clustered standard errors - Fama-Macbeth
library(plm)
summary(pmg(y ~ x, data=df.petersen, index=c("year","firmid")))

# Clustered standard errors - Newey-West
model <- lm(y~x, data=df.petersen)
coeftest(model, NeweyWest(model, lag = 1, prewhite = FALSE)) -- Newey-West '87 1 lag
coeftest(model, NeweyWest(model)) -- Newey-West '94 auto-lag
floor(bwNeweyWest(model)) # provides lag parameter used by Newey-West '94