Source code for lightning.impl.fista

# Author: Mathieu Blondel
# License: BSD

import numpy as np

from sklearn.utils.extmath import safe_sparse_dot

from .base import BaseClassifier, BaseRegressor

from .dataset_fast import get_dataset

from .loss_fast import Squared
from .loss_fast import SquaredHinge
from .loss_fast import MulticlassSquaredHinge
from .loss_fast import MulticlassLog

from .penalty import L1Penalty
from .penalty import L1L2Penalty
from .penalty import TracePenalty
from .penalty import SimplexConstraint
from .penalty import L1BallConstraint
from .penalty import TotalVariation1DPenalty


class _BaseFista(object):

    def _get_penalty(self):
        if hasattr(self.penalty, 'projection'):
            return self.penalty
        penalties = {
            "l1": L1Penalty(),
            "l1/l2": L1L2Penalty(),
            "trace": TracePenalty(),
            "simplex": SimplexConstraint(),
            "l1-ball": L1BallConstraint(),
            "tv1d": TotalVariation1DPenalty()
        }
        return penalties[self.penalty]

    def _get_objective(self, df, y, loss):
        return self.C * loss.objective(df, y)

    def _get_regularized_objective(self, df, y, loss, penalty, coef):
        obj = self._get_objective(df, y, loss)
        obj += self.alpha * penalty.regularization(coef)
        return obj

    def _get_quad_approx(self, coefa, coefb, objb, gradb, L, penalty):
        approx = objb
        diff = coefa - coefb
        approx += np.sum(diff * gradb)
        approx += L / 2 * np.sum(diff ** 2)
        approx += self.alpha * penalty.regularization(coefa)
        return approx

    def _fit(self, X, y, n_vectors):
        n_samples, n_features = X.shape
        loss = self._get_loss()
        penalty = self._get_penalty()
        ds = get_dataset(X)

        df = np.zeros((n_samples, n_vectors), dtype=np.float64)
        coef = np.zeros((n_vectors, n_features), dtype=np.float64)
        coefx = coef
        G = np.zeros((n_vectors, n_features), dtype=np.float64)

        obj = self._get_regularized_objective(df, y, loss, penalty, coef)

        if self.max_steps == 0:
            # No line search, need to use constant step size.
            L = self.C * loss.lipschitz_constant(ds, n_vectors)
        else:
            # Do not bother to compute the Lipschitz constant (expensive).
            L = 1.0

        t = 1.0
        for it in range(self.max_iter):
            if self.verbose >= 1:
                print("Iter", it + 1, obj)

            # Save current values
            t_old = t
            coefx_old = coefx

            # Gradient
            G.fill(0.0)
            loss.gradient(df, ds, y, G)
            G *= self.C

            # Line search
            if self.max_steps > 0:
                objb = self._get_objective(df, y, loss)

            for tt in range(self.max_steps):
                # Solve
                coefx = coef - G / L
                coefx = penalty.projection(coefx, self.alpha, L)

                dfx = safe_sparse_dot(X, coefx.T)
                obj = self._get_regularized_objective(dfx, y, loss, penalty,
                                                      coefx)
                approx = self._get_quad_approx(coefx, coef, objb, G, L, penalty)

                accepted = obj <= approx

                # Sufficient decrease condition
                if accepted:
                    if self.verbose >= 2:
                        print("Accepted at", tt + 1)
                    break
                else:
                    L *= self.eta

            if self.max_steps == 0:
                coefx = coef - G / L
                coefx = penalty.projection(coefx, self.alpha, L)

            t = (1 + np.sqrt(1 + 4 * t_old * t_old)) / 2
            coef = coefx + (t_old - 1) / t * (coefx - coefx_old)
            df = safe_sparse_dot(X, coef.T)

            # Callback might need self.coef_.
            self.coef_ = coef
            if self.callback is not None:
                ret = self.callback(self)
                if ret is not None:
                    break

        return self


[docs]class FistaClassifier(BaseClassifier, _BaseFista): r"""Estimator for learning linear classifiers by FISTA. The objective functions considered take the form minimize F(W) = C * L(W) + alpha * R(W), where L(W) is a loss term and R(W) is a penalty term. Parameters ---------- loss : str, 'squared_hinge', 'log', 'modified_huber', 'squared' The loss function to be used. penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'} The penalty or constraint to be used. - l2: ridge - l1: lasso - l1/l2: group lasso - tv1d: 1-dimensional total variation (also known as fused lasso) - simplex: simplex constraint The method can also take an arbitrary Penalty object, i.e., an instance that implements methods projection regularization method (see file penalty.py) multiclass : bool Whether to use a direct multiclass formulation (True) or one-vs-rest (False). C : float Weight of the loss term. alpha : float Weight of the penalty term. max_iter : int Maximum number of iterations to perform. max_steps : int Maximum number of steps to use during the line search. sigma : float Constant used in the line search sufficient decrease condition. eta : float Decrease factor for line-search procedure. For example, eta=2. will decrease the step size by a factor of 2 at each iteration of the line-search routine. callback : callable Callback function. verbose : int Verbosity level. """ def __init__(self, C=1.0, alpha=1.0, loss="squared_hinge", penalty="l1", multiclass=False, max_iter=100, max_steps=30, eta=2.0, sigma=1e-5, callback=None, verbose=0): self.C = C self.alpha = alpha self.loss = loss self.penalty = penalty self.multiclass = multiclass self.max_iter = max_iter self.max_steps = max_steps self.eta = eta self.sigma = sigma self.callback = callback self.verbose = verbose def _get_loss(self): if self.multiclass: losses = { "squared_hinge": MulticlassSquaredHinge(), "log": MulticlassLog(), "log_margin": MulticlassLog(margin=1), } else: losses = { "squared_hinge": SquaredHinge(), } return losses[self.loss]
[docs] def fit(self, X, y): y, _, n_vectors = self._set_label_transformers(y, reencode=True) if not self.multiclass: y = np.asfortranarray(self.label_binarizer_.transform(y), dtype=np.float64) n_vectors = y.shape[1] return self._fit(X, y, n_vectors)
[docs]class FistaRegressor(BaseRegressor, _BaseFista): r"""Estimator for learning linear classifiers by FISTA. The objective functions considered take the form minimize F(W) = C * L(W) + alpha * R(W), where L(W) is a loss term and R(W) is a penalty term. Parameters ---------- penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'} The penalty or constraint to be used. - l2: ridge - l1: lasso - l1/l2: group lasso - tv1d: 1-dimensional total variation (also known as fussed lasso) - simplex: simplex constraint The method can also take an arbitrary Penalty object, i.e., an instance that implements methods projection regularization method (see file penalty.py) C : float Weight of the loss term. alpha : float Weight of the penalty term. max_iter : int Maximum number of iterations to perform. max_steps : int Maximum number of steps to use during the line search. sigma : float Constant used in the line search sufficient decrease condition. eta : float Decrease factor for line-search procedure. For example, eta=2. will decrease the step size by a factor of 2 at each iteration of the line-search routine. callback : callable Callback function. verbose : int Verbosity level. """ def __init__(self, C=1.0, alpha=1.0, penalty="l1", max_iter=100, max_steps=30, eta=2.0, sigma=1e-5, callback=None, verbose=0): self.C = C self.alpha = alpha self.penalty = penalty self.max_iter = max_iter self.max_steps = max_steps self.eta = eta self.sigma = sigma self.callback = callback self.verbose = verbose def _get_loss(self): return Squared()
[docs] def fit(self, X, y): self.outputs_2d_ = len(y.shape) > 1 Y = y.reshape(-1, 1) if not self.outputs_2d_ else y Y = np.asfortranarray(Y.astype(np.float64)) n_vectors = Y.shape[1] return self._fit(X, Y, n_vectors)