suppressMessages({
library(stargazer)
library(hdm)
library(AER)
})
\[\begin{equation} (\Delta\log GDP)_i=\alpha \cdot GDP^0_i+U_i. \end{equation}\]
data("GrowthData")
gdpsh465
.Outcome
.simple<-lm(Outcome~gdpsh465,data=GrowthData)
suppressWarnings(
stargazer(simple,
header=FALSE,
title="Testing the simple catching up hypothesis",
omit.stat = "all",
#type="text"
type="html",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01")
)
)
Dependent variable: | |
Outcome | |
gdpsh465 | 0.001 |
(0.006) | |
Constant | 0.035 |
(0.047) | |
Note: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
There are a lot of potential controls in the data:
dim(GrowthData)
## [1] 90 63
names(GrowthData)
## [1] "Outcome" "intercept" "gdpsh465" "bmp1l" "freeop" "freetar"
## [7] "h65" "hm65" "hf65" "p65" "pm65" "pf65"
## [13] "s65" "sm65" "sf65" "fert65" "mort65" "lifee065"
## [19] "gpop1" "fert1" "mort1" "invsh41" "geetot1" "geerec1"
## [25] "gde1" "govwb1" "govsh41" "gvxdxe41" "high65" "highm65"
## [31] "highf65" "highc65" "highcm65" "highcf65" "human65" "humanm65"
## [37] "humanf65" "hyr65" "hyrm65" "hyrf65" "no65" "nom65"
## [43] "nof65" "pinstab1" "pop65" "worker65" "pop1565" "pop6565"
## [49] "sec65" "secm65" "secf65" "secc65" "seccm65" "seccf65"
## [55] "syr65" "syrm65" "syrf65" "teapri65" "teasec65" "ex1"
## [61] "im1" "xr65" "tot1"
y=as.vector(GrowthData$Outcome)
D=as.vector(GrowthData$gdpsh465)
Controls=as.matrix(GrowthData)[,-c(1,2,3)]
y
= GDP per capita growth rate.D
= initial GDP per capita.-c(1,2,3)
instructs to exclude the first 3 variables in
GrowthData
:
Outcome
intercept
gdpsh465
OLS regression with all controls:
conditional=lm(y~D+Controls)
suppressWarnings(
stargazer(conditional,
header=FALSE,
title="Testing the conditional catching up hypothesis",
omit.stat = "all",
omit="Controls",
#type="text"
type="html",notes.append = FALSE,notes = c("<sup>⋆</sup>p<0.1; <sup>⋆⋆</sup>p<0.05; <sup>⋆⋆⋆</sup>p<0.01")
)
)
Dependent variable: | |
y | |
D | -0.009 |
(0.030) | |
Constant | 0.247 |
(0.785) | |
Note: | ⋆p<0.1; ⋆⋆p<0.05; ⋆⋆⋆p<0.01 |
0.006
to 0.030
.?rlassoEffect
Usage:
x=
specifies the matrix of controls.y=
specifies the outcome variable.d=
specifies the treatment variable (the main regressor
of interest).Effect<-rlassoEffect(x=Controls,y=y,d=D,method="double selection")
summary(Effect)
## [1] "Estimates and significance testing of the effect of target variables"
## Estimate. Std. Error t value Pr(>|t|)
## d1 -0.05001 0.01579 -3.167 0.00154 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
names(Effect)
## [1] "alpha" "se" "t" "pval"
## [5] "no.selected" "coefficients" "coefficient" "coefficients.reg"
## [9] "selection.index" "residuals" "call" "samplesize"
Effect$selection.index
## bmp1l freeop freetar h65 hm65 hf65 p65 pm65
## TRUE FALSE TRUE FALSE TRUE FALSE FALSE FALSE
## pf65 s65 sm65 sf65 fert65 mort65 lifee065 gpop1
## FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
## fert1 mort1 invsh41 geetot1 geerec1 gde1 govwb1 govsh41
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## gvxdxe41 high65 highm65 highf65 highc65 highcm65 highcf65 human65
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## humanm65 humanf65 hyr65 hyrm65 hyrf65 no65 nom65 nof65
## FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## pinstab1 pop65 worker65 pop1565 pop6565 sec65 secm65 secf65
## FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## secc65 seccm65 seccf65 syr65 syrm65 syrf65 teapri65 teasec65
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## ex1 im1 xr65 tot1
## FALSE FALSE FALSE FALSE
Included controls:
sum(Effect$selection.index==TRUE)
## [1] 7
Effect$selection.index[Effect$selection.index==TRUE]
## bmp1l freetar hm65 sf65 lifee065 humanf65 pop6565
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bmp1l
: Log of the black market premium.freetar
: Measure of tariff restrictions.hm65
: Male gross enrollment ratio for higher education
in 1965.sf65
: Female gross enrollment ratio for secondary
education in 1965.lifee065
: Life expectancy at 0 in 1965.humanf65
: Average schooling years in the female
population over age 25 in 1965.pop6565
: Population Proportion over 65 in 1965.?rlasso()
lasso.Y<-rlasso(Outcome~Controls,data=GrowthData)
names(lasso.Y)
## [1] "coefficients" "beta" "intercept" "index" "lambda"
## [6] "lambda0" "loadings" "residuals" "sigma" "iter"
## [11] "call" "options" "model" "tss" "rss"
## [16] "dev"
coef(lasso.Y)
## (Intercept) Controlsbmp1l Controlsfreeop Controlsfreetar
## 0.05810092 -0.07556548 0.00000000 0.00000000
## Controlsh65 Controlshm65 Controlshf65 Controlsp65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlspm65 Controlspf65 Controlss65 Controlssm65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlssf65 Controlsfert65 Controlsmort65 Controlslifee065
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlsgpop1 Controlsfert1 Controlsmort1 Controlsinvsh41
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlsgeetot1 Controlsgeerec1 Controlsgde1 Controlsgovwb1
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlsgovsh41 Controlsgvxdxe41 Controlshigh65 Controlshighm65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlshighf65 Controlshighc65 Controlshighcm65 Controlshighcf65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlshuman65 Controlshumanm65 Controlshumanf65 Controlshyr65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlshyrm65 Controlshyrf65 Controlsno65 Controlsnom65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlsnof65 Controlspinstab1 Controlspop65 Controlsworker65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlspop1565 Controlspop6565 Controlssec65 Controlssecm65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlssecf65 Controlssecc65 Controlsseccm65 Controlsseccf65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlssyr65 Controlssyrm65 Controlssyrf65 Controlsteapri65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlsteasec65 Controlsex1 Controlsim1 Controlsxr65
## 0.00000000 0.00000000 0.00000000 0.00000000
## Controlstot1
## 0.00000000
lasso.D<-rlasso(D~Controls)
coef(lasso.D)
## (Intercept) bmp1l freeop freetar h65 hm65
## -4.48400246 0.00000000 0.00000000 -6.22415497 0.00000000 1.64599221
## hf65 p65 pm65 pf65 s65 sm65
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## sf65 fert65 mort65 lifee065 gpop1 fert1
## -0.16139575 0.00000000 0.00000000 2.91176774 0.00000000 0.00000000
## mort1 invsh41 geetot1 geerec1 gde1 govwb1
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## govsh41 gvxdxe41 high65 highm65 highf65 highc65
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## highcm65 highcf65 human65 humanm65 humanf65 hyr65
## 0.00000000 0.00000000 0.00000000 0.00000000 0.03873902 0.00000000
## hyrm65 hyrf65 no65 nom65 nof65 pinstab1
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## pop65 worker65 pop1565 pop6565 sec65 secm65
## 0.00000000 0.00000000 0.00000000 1.85116114 0.00000000 0.00000000
## secf65 secc65 seccm65 seccf65 syr65 syrm65
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## syrf65 teapri65 teasec65 ex1 im1 xr65
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## tot1
## 0.00000000
Effect_PO<-rlassoEffect(x=Controls,y=y,d=D,method="partialling out")
summary(Effect_PO)
## [1] "Estimates and significance testing of the effect of target variables"
## Estimate. Std. Error t value Pr(>|t|)
## [1,] -0.04981 0.01394 -3.574 0.000351 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sum(Effect_PO$selection.index==TRUE)
## [1] 7
Effect_PO$selection.index[Effect_PO$selection.index==TRUE]
## bmp1l freetar hm65 sf65 lifee065 humanf65 pop6565
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE
lasso.Y<-rlasso(Outcome~Controls,data=GrowthData)
Ytilde<-lasso.Y$residuals
lasso.D<-rlasso(D~Controls,data=GrowthData)
Dtilde<-lasso.D$residuals
Post<-lm(Ytilde~ -1+ Dtilde)
coeftest(Post,vcov. = vcovHC(Post,type="HC0"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## Dtilde -0.049811 0.015219 -3.2729 0.001516 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This data set contains many potential controls that are highly correlated among each other:
These controls are related not only among each other, but also to the main regressor (treatment): The “initial” GDP per capita. As a result, including all potential controls produces insignificant estimates due to the presence of many controls (over 60 controls with only 90 observations).
It is plausible to assume that the model is sparse: only certain demographic, education, and etc. variables matter. This is an appropriate problem for Lasso as we need to select few out of many controls.
Lasso selects the important controls. The double Lasso step also selects the controls that are related to the main regressor to avoid potential omitted variables bias.
Post Lasso produces significant estimates on the main regressor. The result implies that the conditional catching up hypothesis holds: growth rates converge for countries with similar economic and demographic characteristics.