# Source code for lightning.impl.fista

```# Author: Mathieu Blondel

import numpy as np

from sklearn.utils.extmath import safe_sparse_dot

from .base import BaseClassifier, BaseRegressor

from .dataset_fast import get_dataset

from .loss_fast import Squared
from .loss_fast import SquaredHinge
from .loss_fast import MulticlassSquaredHinge
from .loss_fast import MulticlassLog

from .penalty import L1Penalty
from .penalty import L1L2Penalty
from .penalty import TracePenalty
from .penalty import SimplexConstraint
from .penalty import L1BallConstraint
from .penalty import TotalVariation1DPenalty

class _BaseFista(object):

def _get_penalty(self):
if hasattr(self.penalty, 'projection'):
return self.penalty
penalties = {
"l1": L1Penalty(),
"l1/l2": L1L2Penalty(),
"trace": TracePenalty(),
"simplex": SimplexConstraint(),
"l1-ball": L1BallConstraint(),
"tv1d": TotalVariation1DPenalty()
}
return penalties[self.penalty]

def _get_objective(self, df, y, loss):
return self.C * loss.objective(df, y)

def _get_regularized_objective(self, df, y, loss, penalty, coef):
obj = self._get_objective(df, y, loss)
obj += self.alpha * penalty.regularization(coef)
return obj

approx = objb
diff = coefa - coefb
approx += L / 2 * np.sum(diff ** 2)
approx += self.alpha * penalty.regularization(coefa)
return approx

def _fit(self, X, y, n_vectors):
n_samples, n_features = X.shape
loss = self._get_loss()
penalty = self._get_penalty()
ds = get_dataset(X)

df = np.zeros((n_samples, n_vectors), dtype=np.float64)
coef = np.zeros((n_vectors, n_features), dtype=np.float64)
coefx = coef
G = np.zeros((n_vectors, n_features), dtype=np.float64)

obj = self._get_regularized_objective(df, y, loss, penalty, coef)

if self.max_steps == 0:
# No line search, need to use constant step size.
L = self.C * loss.lipschitz_constant(ds, n_vectors)
else:
# Do not bother to compute the Lipschitz constant (expensive).
L = 1.0

t = 1.0
for it in range(self.max_iter):
if self.verbose >= 1:
print("Iter", it + 1, obj)

# Save current values
t_old = t
coefx_old = coefx

G.fill(0.0)
G *= self.C

# Line search
if self.max_steps > 0:
objb = self._get_objective(df, y, loss)

for tt in range(self.max_steps):
# Solve
coefx = coef - G / L
coefx = penalty.projection(coefx, self.alpha, L)

dfx = safe_sparse_dot(X, coefx.T)
obj = self._get_regularized_objective(dfx, y, loss, penalty,
coefx)
approx = self._get_quad_approx(coefx, coef, objb, G, L, penalty)

accepted = obj <= approx

# Sufficient decrease condition
if accepted:
if self.verbose >= 2:
print("Accepted at", tt + 1)
break
else:
L *= self.eta

if self.max_steps == 0:
coefx = coef - G / L
coefx = penalty.projection(coefx, self.alpha, L)

t = (1 + np.sqrt(1 + 4 * t_old * t_old)) / 2
coef = coefx + (t_old - 1) / t * (coefx - coefx_old)
df = safe_sparse_dot(X, coef.T)

# Callback might need self.coef_.
self.coef_ = coef
if self.callback is not None:
ret = self.callback(self)
if ret is not None:
break

return self

[docs]class FistaClassifier(BaseClassifier, _BaseFista):
r"""Estimator for learning linear classifiers by FISTA.

The objective functions considered take the form

minimize F(W) = C * L(W) + alpha * R(W),

where L(W) is a loss term and R(W) is a penalty term.

Parameters
----------
loss : str, 'squared_hinge', 'log', 'modified_huber', 'squared'
The loss function to be used.

penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'}
The penalty or constraint to be used.

- l2: ridge
- l1: lasso
- l1/l2: group lasso
- tv1d: 1-dimensional total variation (also known as fused lasso)
- simplex: simplex constraint

The method can also take an arbitrary Penalty object, i.e., an instance
that implements methods projection regularization method (see file penalty.py)

multiclass : bool
Whether to use a direct multiclass formulation (True) or one-vs-rest
(False).

C : float
Weight of the loss term.

alpha : float
Weight of the penalty term.

max_iter : int
Maximum number of iterations to perform.

max_steps : int
Maximum number of steps to use during the line search.

sigma : float
Constant used in the line search sufficient decrease condition.

eta : float
Decrease factor for line-search procedure. For example, eta=2.
will decrease the step size by a factor of 2 at each iteration
of the line-search routine.

callback : callable
Callback function.

verbose : int
Verbosity level.
"""

def __init__(self, C=1.0, alpha=1.0, loss="squared_hinge", penalty="l1",
multiclass=False, max_iter=100, max_steps=30, eta=2.0,
sigma=1e-5, callback=None, verbose=0):
self.C = C
self.alpha = alpha
self.loss = loss
self.penalty = penalty
self.multiclass = multiclass
self.max_iter = max_iter
self.max_steps = max_steps
self.eta = eta
self.sigma = sigma
self.callback = callback
self.verbose = verbose

def _get_loss(self):
if self.multiclass:
losses = {
"squared_hinge": MulticlassSquaredHinge(),
"log": MulticlassLog(),
"log_margin": MulticlassLog(margin=1),
}
else:
losses = {
"squared_hinge": SquaredHinge(),
}

return losses[self.loss]

[docs]    def fit(self, X, y):
y, _, n_vectors = self._set_label_transformers(y, reencode=True)

if not self.multiclass:
y = np.asfortranarray(self.label_binarizer_.transform(y),
dtype=np.float64)
n_vectors = y.shape

return self._fit(X, y, n_vectors)

[docs]class FistaRegressor(BaseRegressor, _BaseFista):
r"""Estimator for learning linear classifiers by FISTA.

The objective functions considered take the form

minimize F(W) = C * L(W) + alpha * R(W),

where L(W) is a loss term and R(W) is a penalty term.

Parameters
----------
penalty : str or Penalty object, {'l2', 'l1', 'l1/l2', 'tv1d', 'simplex'}
The penalty or constraint to be used.

- l2: ridge
- l1: lasso
- l1/l2: group lasso
- tv1d: 1-dimensional total variation (also known as fussed lasso)
- simplex: simplex constraint

The method can also take an arbitrary Penalty object, i.e., an instance
that implements methods projection regularization method (see file penalty.py)

C : float
Weight of the loss term.

alpha : float
Weight of the penalty term.

max_iter : int
Maximum number of iterations to perform.

max_steps : int
Maximum number of steps to use during the line search.

sigma : float
Constant used in the line search sufficient decrease condition.

eta : float
Decrease factor for line-search procedure. For example, eta=2.
will decrease the step size by a factor of 2 at each iteration
of the line-search routine.

callback : callable
Callback function.

verbose : int
Verbosity level.
"""

def __init__(self, C=1.0, alpha=1.0, penalty="l1", max_iter=100,
max_steps=30, eta=2.0, sigma=1e-5, callback=None, verbose=0):
self.C = C
self.alpha = alpha
self.penalty = penalty
self.max_iter = max_iter
self.max_steps = max_steps
self.eta = eta
self.sigma = sigma
self.callback = callback
self.verbose = verbose

def _get_loss(self):
return Squared()

[docs]    def fit(self, X, y):
self.outputs_2d_ = len(y.shape) > 1
Y = y.reshape(-1, 1) if not self.outputs_2d_ else y
Y = np.asfortranarray(Y.astype(np.float64))
n_vectors = Y.shape
return self._fit(X, Y, n_vectors)
```