Source code for lightning.impl.sgd

"""
===================================
Stochastic Gradient Descent Solvers
===================================

This module provides SGD solvers for a variety of loss
functions and penalties.
"""
# Author: Mathieu Blondel
# License: BSD

import warnings

import numpy as np

from sklearn.utils import check_random_state
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.utils.validation import assert_all_finite

from .base import BaseClassifier
from .base import BaseRegressor

from .dataset_fast import get_dataset

from .sgd_fast import _binary_sgd
from .sgd_fast import _multiclass_sgd

from .sgd_fast import ModifiedHuber
from .sgd_fast import Hinge
from .sgd_fast import SquaredHinge
from .sgd_fast import Log
from .sgd_fast import SquaredLoss
from .sgd_fast import Huber
from .sgd_fast import EpsilonInsensitive

from .sgd_fast import MulticlassLog
from .sgd_fast import MulticlassHinge
from .sgd_fast import MulticlassSquaredHinge


class _BaseSGD(object):

    def _get_penalty(self):
        penalties = {
            "nn": -1,
            "nnl1": -1,
            "nnl2": -2,
            "l1": 1,
            "l2": 2,
            "l1/l2": 12
        }
        return penalties[self.penalty]

    def _get_learning_rate(self):
        learning_rates = {"constant": 1, "pegasos": 2, "invscaling": 3}
        return learning_rates[self.learning_rate]


[docs]class SGDClassifier(BaseClassifier, _BaseSGD): r"""Estimator for learning linear classifiers by SGD. Parameters ---------- loss : str, 'hinge', 'squared_hinge', 'log', 'perceptron' Loss function to be used. penalty : str, 'l2', 'l1', 'l1/l2' The penalty to be used. - l2: ridge - l1: lasso - l1/l2: group lasso multiclass : bool Whether to use a direct multiclass formulation (True) or one-vs-rest (False). Direct formulations are only available for loss='hinge', 'squared_hinge' and 'log'. alpha : float Weight of the penalty term. learning_rate : 'pegasos', 'constant', 'invscaling' Learning schedule to use. eta0 : float Step size. power_t : float Power to be used (when learning_rate='invscaling'). epsilon : float Value to be used for epsilon-insensitive loss. fit_intercept : bool Whether to fit the intercept or not. intercept_decay : float Value by which the intercept is multiplied (to regularize it). max_iter : int Maximum number of iterations to perform. shuffle : bool Whether to shuffle data. callback : callable Callback function. n_calls : int Frequency with which `callback` must be called. random_state : RandomState or int The seed of the pseudo random number generator to use. verbose : int Verbosity level. Examples -------- >>> from sklearn.datasets import fetch_20newsgroups_vectorized >>> from lightning.classification import SGDClassifier >>> bunch = fetch_20newsgroups_vectorized(subset="all") >>> X, y = bunch.data, bunch.target >>> clf = SGDClassifier().fit(X, y) >>> accuracy = clf.score(X, y) """ def __init__(self, loss="hinge", penalty="l2", multiclass=False, alpha=0.01, learning_rate="pegasos", eta0=0.03, power_t=0.5, epsilon=0.01, fit_intercept=True, intercept_decay=1.0, max_iter=10, shuffle=True, random_state=None, callback=None, n_calls=100, verbose=0): self.loss = loss self.penalty = penalty self.multiclass = multiclass self.alpha = alpha self.learning_rate = learning_rate self.eta0 = eta0 self.power_t = power_t self.epsilon = epsilon self.fit_intercept = fit_intercept self.intercept_decay = intercept_decay self.max_iter = max_iter self.shuffle = shuffle self.random_state = random_state self.callback = callback self.n_calls = n_calls self.verbose = verbose self.coef_ = None self.intercept_ = None def _get_loss(self): if self.multiclass: losses = { "log": MulticlassLog(), "hinge": MulticlassHinge(), "squared_hinge": MulticlassSquaredHinge(), } else: losses = { "modified_huber": ModifiedHuber(), "hinge": Hinge(1.0), "squared_hinge": SquaredHinge(1.0), "perceptron": Hinge(0.0), "log": Log(), "squared": SquaredLoss(), "huber": Huber(self.epsilon), "epsilon_insensitive": EpsilonInsensitive(self.epsilon) } return losses[self.loss]
[docs] def fit(self, X, y): """Fit model according to X and y. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : classifier Returns self. """ rs = check_random_state(self.random_state) reencode = self.multiclass y, n_classes, n_vectors = self._set_label_transformers(y, reencode) ds = get_dataset(X) n_samples = ds.get_n_samples() n_features = ds.get_n_features() self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) self.intercept_ = np.zeros(n_vectors, dtype=np.float64) loss = self._get_loss() penalty = self._get_penalty() if n_vectors == 1 or not self.multiclass: Y = np.asfortranarray(self.label_binarizer_.fit_transform(y), dtype=np.float64) for i in range(n_vectors): _binary_sgd(self, self.coef_, self.intercept_, i, ds, Y[:, i], loss, penalty, self.alpha, self._get_learning_rate(), self.eta0, self.power_t, self.fit_intercept, self.intercept_decay, int(self.max_iter * n_samples), self.shuffle, rs, self.callback, self.n_calls, self.verbose) elif self.multiclass: _multiclass_sgd(self, self.coef_, self.intercept_, ds, y.astype(np.int32), loss, penalty, self.alpha, self._get_learning_rate(), self.eta0, self.power_t, self.fit_intercept, self.intercept_decay, int(self.max_iter * n_samples), self.shuffle, rs, self.callback, self.n_calls, self.verbose) else: raise ValueError("Wrong value for multiclass.") try: assert_all_finite(self.coef_) except ValueError: warnings.warn("coef_ contains infinite values") return self
[docs]class SGDRegressor(BaseRegressor, _BaseSGD): r"""Estimator for learning linear regressors by SGD. Parameters ---------- loss : str, 'squared', 'epsilon_insensitive', 'huber' Loss function to be used. penalty : str, 'l2', 'l1', 'l1/l2' The penalty to be used. - l2: ridge - l1: lasso - l1/l2: group lasso alpha : float Weight of the penalty term. learning_rate : 'pegasos', 'constant', 'invscaling' Learning schedule to use. eta0 : float Step size. power_t : float Power to be used (when learning_rate='invscaling'). epsilon : float Value to be used for epsilon-insensitive loss. fit_intercept : bool Whether to fit the intercept or not. intercept_decay : float Value by which the intercept is multiplied (to regularize it). max_iter : int Maximum number of iterations to perform. shuffle : bool Whether to shuffle data. callback : callable Callback function. n_calls : int Frequency with which `callback` must be called. random_state : RandomState or int The seed of the pseudo random number generator to use. verbose : int Verbosity level. """ def __init__(self, loss="squared", penalty="l2", alpha=0.01, learning_rate="pegasos", eta0=0.03, power_t=0.5, epsilon=0.01, fit_intercept=True, intercept_decay=1.0, max_iter=10, shuffle=True, random_state=None, callback=None, n_calls=100, verbose=0): self.loss = loss self.penalty = penalty self.alpha = alpha self.learning_rate = learning_rate self.eta0 = eta0 self.power_t = power_t self.epsilon = epsilon self.fit_intercept = fit_intercept self.intercept_decay = intercept_decay self.max_iter = max_iter self.shuffle = shuffle self.random_state = random_state self.callback = callback self.n_calls = n_calls self.verbose = verbose self.coef_ = None def _get_loss(self): losses = { "squared": SquaredLoss(), "huber": Huber(self.epsilon), "epsilon_insensitive": EpsilonInsensitive(self.epsilon) } return losses[self.loss]
[docs] def fit(self, X, y): """Fit model according to X and y. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values. Returns ------- self : regressor Returns self. """ rs = check_random_state(self.random_state) ds = get_dataset(X) n_samples = ds.get_n_samples() n_features = ds.get_n_features() self.outputs_2d_ = len(y.shape) == 2 if self.outputs_2d_: Y = y else: Y = y.reshape(-1, 1) Y = np.asfortranarray(Y) n_vectors = Y.shape[1] self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) self.intercept_ = np.zeros(n_vectors, dtype=np.float64) loss = self._get_loss() penalty = self._get_penalty() for k in range(n_vectors): _binary_sgd(self, self.coef_, self.intercept_, k, ds, Y[:, k], loss, penalty, self.alpha, self._get_learning_rate(), self.eta0, self.power_t, self.fit_intercept, self.intercept_decay, int(self.max_iter * n_samples), self.shuffle, rs, self.callback, self.n_calls, self.verbose) try: assert_all_finite(self.coef_) except ValueError: warnings.warn("coef_ contains infinite values") return self
[docs] def predict(self, X): """ Perform regression on an array of test vectors X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- p : array, shape = [n_samples] Predicted target values for X """ try: assert_all_finite(self.coef_) pred = safe_sparse_dot(X, self.coef_.T) pred += self.intercept_ except ValueError: n_samples = X.shape[0] n_vectors = self.coef_.shape[0] pred = np.zeros((n_samples, n_vectors)) if not self.outputs_2d_: pred = pred.ravel() return pred