# Author: Mathieu Blondel
# License: BSD
import numpy as np
from sklearn.utils.extmath import safe_sparse_dot
from .base import BaseClassifier, BaseRegressor
from .dataset_fast import get_dataset
from .loss_fast import Squared
from .loss_fast import SquaredHinge
from .loss_fast import MulticlassSquaredHinge
from .loss_fast import MulticlassLog
from .penalty import L1Penalty
from .penalty import L1L2Penalty
from .penalty import TracePenalty
from .penalty import SimplexConstraint
from .penalty import L1BallConstraint
from .penalty import TotalVariation1DPenalty
class _BaseFista(object):
def _get_penalty(self):
if hasattr(self.penalty, 'projection'):
return self.penalty
penalties = {
"l1": L1Penalty(),
"l1/l2": L1L2Penalty(),
"trace": TracePenalty(),
"simplex": SimplexConstraint(),
"l1-ball": L1BallConstraint(),
"tv1d": TotalVariation1DPenalty()
}
return penalties[self.penalty]
def _get_objective(self, df, y, loss):
return self.C * loss.objective(df, y)
def _get_regularized_objective(self, df, y, loss, penalty, coef):
obj = self._get_objective(df, y, loss)
obj += self.alpha * penalty.regularization(coef)
return obj
def _get_quad_approx(self, coefa, coefb, objb, gradb, L, penalty):
approx = objb
diff = coefa - coefb
approx += np.sum(diff * gradb)
approx += L / 2 * np.sum(diff ** 2)
approx += self.alpha * penalty.regularization(coefa)
return approx
def _fit(self, X, y, n_vectors):
n_samples, n_features = X.shape
loss = self._get_loss()
penalty = self._get_penalty()
ds = get_dataset(X)
df = np.zeros((n_samples, n_vectors), dtype=np.float64)
coef = np.zeros((n_vectors, n_features), dtype=np.float64)
coefx = coef
G = np.zeros((n_vectors, n_features), dtype=np.float64)
obj = self._get_regularized_objective(df, y, loss, penalty, coef)
if self.max_steps == 0:
# No line search, need to use constant step size.
L = self.C * loss.lipschitz_constant(ds, n_vectors)
else:
# Do not bother to compute the Lipschitz constant (expensive).
L = 1.0
t = 1.0
for it in range(self.max_iter):
if self.verbose >= 1:
print("Iter", it + 1, obj)
# Save current values
t_old = t
coefx_old = coefx
# Gradient
G.fill(0.0)
loss.gradient(df, ds, y, G)
G *= self.C
# Line search
if self.max_steps > 0:
objb = self._get_objective(df, y, loss)
for tt in range(self.max_steps):
# Solve
coefx = coef - G / L
coefx = penalty.projection(coefx, self.alpha, L)
dfx = safe_sparse_dot(X, coefx.T)
obj = self._get_regularized_objective(dfx, y, loss, penalty,
coefx)
approx = self._get_quad_approx(coefx, coef, objb, G, L, penalty)
accepted = obj <= approx
# Sufficient decrease condition
if accepted:
if self.verbose >= 2:
print("Accepted at", tt + 1)
break
else:
L *= self.eta
if self.max_steps == 0:
coefx = coef - G / L
coefx = penalty.projection(coefx, self.alpha, L)
t = (1 + np.sqrt(1 + 4 * t_old * t_old)) / 2
coef = coefx + (t_old - 1) / t * (coefx - coefx_old)
df = safe_sparse_dot(X, coef.T)
# Callback might need self.coef_.
self.coef_ = coef
if self.callback is not None:
ret = self.callback(self)
if ret is not None:
break
return self
[docs]class FistaClassifier(BaseClassifier, _BaseFista):
r"""Estimator for learning linear classifiers by FISTA.
The objective functions considered take the form
minimize F(W) = C * L(W) + alpha * R(W),
where L(W) is a loss term and R(W) is a penalty term.
Parameters
----------
loss : str, 'squared_hinge', 'log', 'modified_huber', 'squared'
The loss function to be used.
penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'}
The penalty or constraint to be used.
- l2: ridge
- l1: lasso
- l1/l2: group lasso
- tv1d: 1-dimensional total variation (also known as fused lasso)
- simplex: simplex constraint
The method can also take an arbitrary Penalty object, i.e., an instance
that implements methods projection regularization method (see file penalty.py)
multiclass : bool
Whether to use a direct multiclass formulation (True) or one-vs-rest
(False).
C : float
Weight of the loss term.
alpha : float
Weight of the penalty term.
max_iter : int
Maximum number of iterations to perform.
max_steps : int
Maximum number of steps to use during the line search.
sigma : float
Constant used in the line search sufficient decrease condition.
eta : float
Decrease factor for line-search procedure. For example, eta=2.
will decrease the step size by a factor of 2 at each iteration
of the line-search routine.
callback : callable
Callback function.
verbose : int
Verbosity level.
"""
def __init__(self, C=1.0, alpha=1.0, loss="squared_hinge", penalty="l1",
multiclass=False, max_iter=100, max_steps=30, eta=2.0,
sigma=1e-5, callback=None, verbose=0):
self.C = C
self.alpha = alpha
self.loss = loss
self.penalty = penalty
self.multiclass = multiclass
self.max_iter = max_iter
self.max_steps = max_steps
self.eta = eta
self.sigma = sigma
self.callback = callback
self.verbose = verbose
def _get_loss(self):
if self.multiclass:
losses = {
"squared_hinge": MulticlassSquaredHinge(),
"log": MulticlassLog(),
"log_margin": MulticlassLog(margin=1),
}
else:
losses = {
"squared_hinge": SquaredHinge(),
}
return losses[self.loss]
[docs] def fit(self, X, y):
y, _, n_vectors = self._set_label_transformers(y, reencode=True)
if not self.multiclass:
y = np.asfortranarray(self.label_binarizer_.transform(y),
dtype=np.float64)
n_vectors = y.shape[1]
return self._fit(X, y, n_vectors)
[docs]class FistaRegressor(BaseRegressor, _BaseFista):
r"""Estimator for learning linear classifiers by FISTA.
The objective functions considered take the form
minimize F(W) = C * L(W) + alpha * R(W),
where L(W) is a loss term and R(W) is a penalty term.
Parameters
----------
penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'}
The penalty or constraint to be used.
- l2: ridge
- l1: lasso
- l1/l2: group lasso
- tv1d: 1-dimensional total variation (also known as fussed lasso)
- simplex: simplex constraint
The method can also take an arbitrary Penalty object, i.e., an instance
that implements methods projection regularization method (see file penalty.py)
C : float
Weight of the loss term.
alpha : float
Weight of the penalty term.
max_iter : int
Maximum number of iterations to perform.
max_steps : int
Maximum number of steps to use during the line search.
sigma : float
Constant used in the line search sufficient decrease condition.
eta : float
Decrease factor for line-search procedure. For example, eta=2.
will decrease the step size by a factor of 2 at each iteration
of the line-search routine.
callback : callable
Callback function.
verbose : int
Verbosity level.
"""
def __init__(self, C=1.0, alpha=1.0, penalty="l1", max_iter=100,
max_steps=30, eta=2.0, sigma=1e-5, callback=None, verbose=0):
self.C = C
self.alpha = alpha
self.penalty = penalty
self.max_iter = max_iter
self.max_steps = max_steps
self.eta = eta
self.sigma = sigma
self.callback = callback
self.verbose = verbose
def _get_loss(self):
return Squared()
[docs] def fit(self, X, y):
self.outputs_2d_ = len(y.shape) > 1
Y = y.reshape(-1, 1) if not self.outputs_2d_ else y
Y = np.asfortranarray(Y.astype(np.float64))
n_vectors = Y.shape[1]
return self._fit(X, Y, n_vectors)