suppressMessages({
library(foreach)
library(glmnet)
library(kableExtra)
library(stargazer)
})

We illustrate Lasso using simulated data.

DGP

We use the same polynomial regression model as before:

Consider a polynomial regression model:

\[\begin{align} Y_i & = \beta_0+\beta_1 X_{i}+\beta_2 X_i^2+\ldots+\beta_{p_0} X_i^{p_0}+U_i,\\ U_i &\sim N(0,1),\\ X_i &\sim N(0,1). \end{align}\]

A custom function to generate data:

DGP<-function(n,p0){
  #p0 = the number of polynomial terms
  #n = the number of observations
  X<-rnorm(n)
  U<-rnorm(n)
  Y<-U
  for (j in 1:p0){
    Y<-Y+X^j
  }
  return(list(y=Y,x=X))
}

Generate data with four polynomial terms:

n=30
p0=4
set.seed(42)
MyData<-DGP(n=n,p0=p0)

Lasso

One of the R functions for Lasso is glmnet() from the package glmnet():

Let’s run lasso with 10 polynomial terms:

p=10
X=model.matrix(MyData$y ~ poly(MyData$x,p=p,raw=TRUE))[,-1]
Lasso<-glmnet(x=X,
              y=MyData$y,
              #standardize = F,
              alpha=1)
names(Lasso)
##  [1] "a0"        "beta"      "df"        "dim"       "lambda"    "dev.ratio"
##  [7] "nulldev"   "npasses"   "jerr"      "offset"    "call"      "nobs"

For example:

Lasso$beta[,10]
##  poly(MyData$x, p = p, raw = TRUE)1  poly(MyData$x, p = p, raw = TRUE)2 
##                            0.000000                            3.218962 
##  poly(MyData$x, p = p, raw = TRUE)3  poly(MyData$x, p = p, raw = TRUE)4 
##                            0.000000                            0.000000 
##  poly(MyData$x, p = p, raw = TRUE)5  poly(MyData$x, p = p, raw = TRUE)6 
##                            0.000000                            0.000000 
##  poly(MyData$x, p = p, raw = TRUE)7  poly(MyData$x, p = p, raw = TRUE)8 
##                            0.000000                            0.000000 
##  poly(MyData$x, p = p, raw = TRUE)9 poly(MyData$x, p = p, raw = TRUE)10 
##                            0.000000                            0.000000

Let’s compare the coefficients for different values of the penalty parameter lambda. We use the kable() function from the package kableExtra to construct a table:

lambda_ind_1=1
lambda_ind_2=10
lambda_ind_3=25
lambda_ind_4=length(Lasso$lambda)

ind=seq(from=1,to=p, by=1)
cbind(ind,Lasso$beta[,lambda_ind_1],Lasso$beta[,lambda_ind_2],Lasso$beta[,lambda_ind_3],Lasso$beta[,lambda_ind_4]) %>%
  kable(digits=3,
        align=c(rep('c',times=4)),
        caption="Estimated Lasso coefficients for different $\\lambda$ penalty parameter values",
        row.names = FALSE,
        col.names=c("Polynom. terms",paste0("$\\lambda=$ ",round(Lasso$lambda[lambda_ind_1],digits=2)),paste0("$\\lambda=$ ",round(Lasso$lambda[lambda_ind_2],digits=2)),paste0("$\\lambda=$ ",round(Lasso$lambda[lambda_ind_3],digits=2)),paste0("$\\lambda=$ ",round(Lasso$lambda[lambda_ind_4],digits=2)))
        ) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Estimated Lasso coefficients for different \(\lambda\) penalty parameter values
Polynom. terms \(\lambda=\) 11.03 \(\lambda=\) 4.77 \(\lambda=\) 1.18 \(\lambda=\) 0.03
1 0 0.000 1.929 0.748
2 0 3.219 2.377 2.119
3 0 0.000 0.255 1.036
4 0 0.000 0.549 0.494
5 0 0.000 0.000 0.000
6 0 0.000 0.000 0.042
7 0 0.000 0.000 0.000
8 0 0.000 0.000 0.002
9 0 0.000 0.000 0.000
10 0 0.000 0.000 0.000
Lasso_s<-glmnet(x=X,
              y=MyData$y,
              lambda=0.001,
              alpha=1)
Lasso_s$beta
## 10 x 1 sparse Matrix of class "dgCMatrix"
##                                                s0
## poly(MyData$x, p = p, raw = TRUE)1   7.248380e-01
## poly(MyData$x, p = p, raw = TRUE)2   2.259741e+00
## poly(MyData$x, p = p, raw = TRUE)3   1.066606e+00
## poly(MyData$x, p = p, raw = TRUE)4   4.657693e-01
## poly(MyData$x, p = p, raw = TRUE)5  -1.564159e-02
## poly(MyData$x, p = p, raw = TRUE)6   3.133275e-02
## poly(MyData$x, p = p, raw = TRUE)7   7.453364e-05
## poly(MyData$x, p = p, raw = TRUE)8   2.467891e-03
## poly(MyData$x, p = p, raw = TRUE)9   6.150359e-04
## poly(MyData$x, p = p, raw = TRUE)10  2.906645e-04

Lasso selection with estimation-targeted penalty parameter lambda

Lasso_s<-glmnet(x=X,
              y=MyData$y,
              lambda=2*sqrt(2*log(n*p)/n),
              alpha=1)
Lasso_s$beta
## 10 x 1 sparse Matrix of class "dgCMatrix"
##                                            s0
## poly(MyData$x, p = p, raw = TRUE)1  1.9803763
## poly(MyData$x, p = p, raw = TRUE)2  2.4313637
## poly(MyData$x, p = p, raw = TRUE)3  0.2227161
## poly(MyData$x, p = p, raw = TRUE)4  0.5302107
## poly(MyData$x, p = p, raw = TRUE)5  .        
## poly(MyData$x, p = p, raw = TRUE)6  .        
## poly(MyData$x, p = p, raw = TRUE)7  .        
## poly(MyData$x, p = p, raw = TRUE)8  .        
## poly(MyData$x, p = p, raw = TRUE)9  .        
## poly(MyData$x, p = p, raw = TRUE)10 .

MC simulations for Lasso’s accuracy

A custom function to generate data, run Lasso, and check included regressors:

Selected<-function(n,lambda){
  #n=sample size
  MyData<-DGP(n=n,p0=4)
  X=model.matrix(MyData$y ~ poly(MyData$x,p=10,raw=TRUE))[,-1]
  Lasso.mc<-glmnet(x=X,
              y=MyData$y,
              alpha=1,
              lambda=lambda,
              )
  IN_r<-sum(Lasso.mc$beta[1:4] != 0)
  IN_w<-sum(Lasso.mc$beta[5:10] !=0)
  return(list(right=IN_r,wrong=IN_w))
}

Let’s run the simulations 100 times:

set.seed(42)
OUT<-foreach(r=1:100, .combine='rbind') %do% Selected(n=1000,lambda=2*sqrt(log(2*n*10)/n))
cat('Ave number of correct regressors included = ',mean(unlist(lapply(OUT[,1],as.vector,))),'\n')
## Ave number of correct regressors included =  4
cat('Ave number of wrong regressors included   = ',mean(unlist(lapply(OUT[,2],as.vector))))
## Ave number of wrong regressors included   =  0

Very accurate selection in large samples!

Small coefficients, Lasso, bias

Let’s change the DGP:

\[\begin{align} Y_i & = \beta_0+ D_i+\beta X_{i} +U_i,\\ D_i &=X_i+ \rho\cdot N(0,1), \\ X_i &\sim N(0,1),\\ U_i &\sim N(0,0.01). \end{align}\]

A new custom function to generate data:

DGP2<-function(n,rho,beta){
  #n = number of observations
  #rho= determines the relationship between the main regressor D and control X. Small rho -> strong relationship
  #beta= coefficient on the control X.
  X<-rnorm(n)
  D<-X+rho*rnorm(n)
  U<-rnorm(n)
  Y<-D+beta*X+0.1*U
  return(list(Y=Y,X=X,D=D))
}

Let’s generate data with a small \(\rho=0.1\), a small \(\beta =2/\sqrt{n}\), and run Lasso:

set.seed(42)
MyData<-DGP2(n=100,rho=.1,beta=2/sqrt(n))
X=model.matrix(MyData$Y ~ MyData$D+MyData$X)[,-1]
Lasso2<-glmnet(x=X,
              y=MyData$Y,
              alpha=1,
              lambda=2*sqrt(log(2*n*10)/n),
              )
coef(Lasso2)
## 3 x 1 sparse Matrix of class "dgCMatrix"
##                     s0
## (Intercept) 0.02361071
## MyData$D    0.46248598
## MyData$X    .

Let’s see if omitting \(X_i\) causes bias:

model.long<-lm(MyData$Y~MyData$D+MyData$X)
model.short<-lm(MyData$Y~MyData$D)
stargazer(model.long,model.short,
          type="text",
          omit.stat=c("ll","ser","f","n")
)
## 
## ========================================
##                 Dependent variable:     
##             ----------------------------
##                          Y              
##                  (1)            (2)     
## ----------------------------------------
## D              1.085***      1.348***   
##                (0.113)        (0.010)   
##                                         
## X              0.265**                  
##                (0.113)                  
##                                         
## Constant        0.0002         0.003    
##                (0.010)        (0.010)   
##                                         
## ----------------------------------------
## R2              0.995          0.995    
## Adjusted R2     0.995          0.995    
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01