Logistic regression with Elastic net and minimax concave penalties#

Illustrate the modularity of skglm by using GeneralizedLinearEstimator with one datafit and one penalty.

/home/badr-moufad/Desktop/skglm-repo/skglm/examples/plot_logreg_various_penalties.py:53: MatplotlibDeprecationWarning: The 'use_line_collection' parameter of stem() was deprecated in Matplotlib 3.6 and will be removed two minor releases later. If any parameter follows 'use_line_collection', they should be passed as keyword, not positionally.
  m, s, _ = plt.stem(
/home/badr-moufad/Desktop/skglm-repo/skglm/examples/plot_logreg_various_penalties.py:61: MatplotlibDeprecationWarning: The 'use_line_collection' parameter of stem() was deprecated in Matplotlib 3.6 and will be removed two minor releases later. If any parameter follows 'use_line_collection', they should be passed as keyword, not positionally.
  m, s, _ = plt.stem(
/home/badr-moufad/Desktop/skglm-repo/skglm/examples/plot_logreg_various_penalties.py:69: MatplotlibDeprecationWarning: The 'use_line_collection' parameter of stem() was deprecated in Matplotlib 3.6 and will be removed two minor releases later. If any parameter follows 'use_line_collection', they should be passed as keyword, not positionally.
  plt.stem(

# Author: Pierre-Antoine Bannier

import numpy as np
from numpy.linalg import norm
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score

from skglm import GeneralizedLinearEstimator
from skglm.datafits import Logistic
from skglm.penalties import L1_plus_L2, MCPenalty
from skglm.utils.data import make_correlated_data


n_samples, n_features = 50, 100
X, y, w_star = make_correlated_data(
    n_samples=n_samples, n_features=n_features, random_state=0)
y_ind = np.sign(y)

# standardize for MCP
X /= norm(X, axis=0) / np.sqrt(len(X))

# Split data in train set and test set
X_train, y_train = X[: n_samples // 2], y_ind[: n_samples // 2]
X_test, y_test = X[n_samples // 2:], y_ind[n_samples // 2:]


alpha = 0.005
gamma = 3.0
l1_ratio = 0.3
clf_enet = GeneralizedLinearEstimator(
    Logistic(),
    L1_plus_L2(alpha, l1_ratio),
)
y_pred_enet = clf_enet.fit(X_train, y_train).predict(X_test)
f1_score_enet = f1_score(y_test, y_pred_enet)

clf_mcp = GeneralizedLinearEstimator(
    Logistic(),
    MCPenalty(alpha, gamma),
)
y_pred_mcp = clf_mcp.fit(X_train, y_train).predict(X_test)
f1_score_mcp = f1_score(y_test, y_pred_mcp)


m, s, _ = plt.stem(
    np.where(clf_enet.coef_.ravel())[0],
    clf_enet.coef_[clf_enet.coef_ != 0],
    markerfmt="x",
    label="Elastic net coefficients",
    use_line_collection=True,
)
plt.setp([m, s], color="#2ca02c")
m, s, _ = plt.stem(
    np.where(clf_mcp.coef_.ravel())[0],
    clf_mcp.coef_[clf_mcp.coef_ != 0],
    markerfmt="x",
    label="MCP coefficients",
    use_line_collection=True,
)
plt.setp([m, s], color="#ff7f0e")
plt.stem(
    np.where(w_star)[0],
    w_star[w_star != 0],
    label="true coefficients",
    markerfmt="bx",
    use_line_collection=True,
)

plt.legend(loc="best")
plt.title("MCP F1: %.3f, Elastic Net F1: %.3f" % (f1_score_mcp, f1_score_enet))
plt.show()

Total running time of the script: ( 0 minutes 3.637 seconds)

Gallery generated by Sphinx-Gallery