library(glmnet)
= scale(iris[,1:4])
X = iris$Species
Y
# Ridge:
= glmnet(X, Y, family = "multinomial", alpha = 0, lambda = 0.01)
ridge
# LASSO:
= glmnet(X, Y, family = "multinomial", alpha = 1, lambda = 0.01)
lasso
# Elastic-net:
= glmnet(X, Y, family = "multinomial", alpha = 0.5, lambda = 0.01) elastic
1 Ridge, LASSO, and elastic-net regression
We can use the ‘glmnet’ R package (Simon et al. (2011)) for Ridge, LASSO, or elastic-net regularization. The ‘glmnet’ package supports different response families including ‘gaussian’, ‘binomial’ and ‘Poisson’. The strength of the regularization is set by the ‘lambda’ argument (\(\lambda\)) and the weighting between Ridge and LASSO regularization by the ‘alpha’ parameter (\(\alpha\)):
\[ \lambda*[(1 - \alpha)\|\beta\|_1 + \alpha\|\beta||^2] \] Setting alpha = 0 turns off the LASSO and alpha = 1 the Ridge. Alphas between (0,1) will use both regularization types, turning the model into an elastic-net regularization.
When using regularization, it is important to scale all features otherwise effects for features that are on a larger scale are stronger regularized.
In python, the ‘scikit-learn’ package provides an interface for many different ML algorithms, including elastic-net regression models (Pedregosa et al. (2011)).
The ‘MLJ’ package provides a generic interface for different ML algorithms (Blaom et al. (2019)). Elastic-net regression models can be accessed via the ‘MLJLinearModels’ package within MLJ.
1.1 Classification
Build models (for regularization it is important to scale the features):
Make predictions (class probabilities):
head(predict(lasso, newx = X, type = "response")[,,1], n = 3)
setosa versicolor virginica
[1,] 0.9858987 0.01410131 3.438452e-09
[2,] 0.9668897 0.03311031 1.397684e-08
[3,] 0.9815369 0.01846312 5.279315e-09
In the sklearn implementation the regularization strength parameter ‘C’ corresponds to the lambda parameter from glmnet:
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import scale
= datasets.load_iris()
iris = scale(iris.data)
X = iris.target Y
Models:
# Ridge:
= LogisticRegression(multi_class='multinomial',
ridge = "l2",
penalty = 0.01,
C ="saga")
solver
ridge.fit(X, Y)
# LASSO:
LogisticRegression(C=0.01, multi_class='multinomial', solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=0.01, multi_class='multinomial', solver='saga')
= LogisticRegression(multi_class='multinomial',
lasso = "l1",
penalty = 0.01,
C ="saga")
solver
lasso.fit(X, Y)
# Elastic-net:
LogisticRegression(C=0.01, multi_class='multinomial', penalty='l1', solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=0.01, multi_class='multinomial', penalty='l1', solver='saga')
= LogisticRegression(multi_class='multinomial',
elastic = "elasticnet",
penalty = 0.01,
C =0.5,
l1_ratio="saga")
solver elastic.fit(X, Y)
LogisticRegression(C=0.01, l1_ratio=0.5, multi_class='multinomial', penalty='elasticnet', solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=0.01, l1_ratio=0.5, multi_class='multinomial', penalty='elasticnet', solver='saga')
Predictions (probabilities):
0:5,:] lasso.predict_proba(X)[
array([[0.27313906, 0.3878848 , 0.33897614],
[0.27313906, 0.3878848 , 0.33897614],
[0.27313906, 0.3878848 , 0.33897614],
[0.27313906, 0.3878848 , 0.33897614],
[0.27313906, 0.3878848 , 0.33897614]])
import StatsBase;
using MLJ;
using MLJLinearModels;
@load MultinomialClassifier pkg=MLJLinearModels;
using RDatasets;
using StatsBase;
using DataFrames;
= dataset("datasets", "iris");
iris = mapcols(StatsBase.zscore, iris[:, 1:4]);
X = iris[:, 5]; Y
Models:
# Ridge
= fit!(machine(MultinomialClassifier(lambda = 0.01, penalty = "l2"), X, Y));
ridge
# Lasso
= fit!(machine(MultinomialClassifier(lambda = 0.01, penalty = "l1"), X, Y));
lasso
# Elastic-net
= fit!(machine(MultinomialClassifier(lambda = 0.01, gamma = 0.01, penalty = "en"), X, Y)); elastic
Predictions:
predict(lasso, X)[1:5] MLJ.
5-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
UnivariateFinite{Multiclass{3}}(setosa=>0.992, versicolor=>0.00829, virginica=>2.2e-9)
UnivariateFinite{Multiclass{3}}(setosa=>0.969, versicolor=>0.031, virginica=>1.4e-8)
UnivariateFinite{Multiclass{3}}(setosa=>0.985, versicolor=>0.0149, virginica=>4.6e-9)
UnivariateFinite{Multiclass{3}}(setosa=>0.971, versicolor=>0.0293, virginica=>1.41e-8)
UnivariateFinite{Multiclass{3}}(setosa=>0.994, versicolor=>0.00635, virginica=>1.51e-9)
1.2 Regression
= scale(iris[,2:4])
X = iris[,1]
Y
# Ridge:
= glmnet(X, Y, family = gaussian(), alpha = 0, lambda = 0.01)
ridge
# LASSO:
= glmnet(X, Y, family = gaussian(), alpha = 1, lambda = 0.01)
lasso
# Elastic-net:
= glmnet(X, Y, family = gaussian(), alpha = 0.5, lambda = 0.01) elastic
Make predictions (class probabilities):
head(predict(lasso, newx = X), n = 3)
s0
[1,] 5.006484
[2,] 4.720600
[3,] 4.781548
For regressions we can use the ElasticNet model class, here, however, lambda corresponds to alpha and l1_ratio to the alpha parameter.
from sklearn.linear_model import ElasticNet
from sklearn import datasets
from sklearn.preprocessing import scale
= datasets.load_iris()
iris = iris.data
data = scale(data[:,1:4])
X = data[:,0]
Y
# Ridge:
= ElasticNet(alpha = 0.01,
ridge = 0.011)
l1_ratio
ridge.fit(X, Y)
# LASSO:
ElasticNet(alpha=0.01, l1_ratio=0.011)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ElasticNet(alpha=0.01, l1_ratio=0.011)
= ElasticNet(alpha = 0.01,
lasso = 1.0)
l1_ratio
lasso.fit(X, Y)
# Elastic-net:
ElasticNet(alpha=0.01, l1_ratio=1.0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ElasticNet(alpha=0.01, l1_ratio=1.0)
= ElasticNet(alpha = 0.01,
elastic = 0.5)
l1_ratio
elastic.fit(X, Y)
# Make predictions:
ElasticNet(alpha=0.01)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ElasticNet(alpha=0.01)
0:10] lasso.predict(X)[
array([5.0064384 , 4.72032938, 4.78125162, 4.83107256, 5.06366021,
5.36149937, 4.93202142, 5.00273797, 4.66310758, 4.84826774])
import StatsBase;
using MLJ;
using MLJLinearModels;
@load LassoRegressor pkg=MLJLinearModels;
@load RidgeRegressor pkg=MLJLinearModels;
@load ElasticNetRegressor pkg=MLJLinearModels;
using RDatasets;
using DataFrames;
= dataset("datasets", "iris");
iris = mapcols(StatsBase.zscore, iris[:, 2:4]);
X = iris[:, 1]; Y
Models:
# Ridge
= fit!(machine(RidgeRegressor(lambda = 0.01), X, Y));
ridge
# Lasso
= fit!(machine(LassoRegressor(lambda = 0.01), X, Y));
lasso
# Elastic-net
= fit!(machine(ElasticNetRegressor(lambda = 0.01, gamma = 0.01), X, Y)); elastic
Predictions (probabilities):
predict(lasso, X)[1:5] MLJ.
5-element Vector{Float64}:
5.007709152258313
4.711530001523257
4.770100849643125
4.830666643844422
5.0669449824053245