Source code for lightning.impl.primal_newton

"""
=============================
Kernel SVM by Newton's method
=============================

This module provides a kernel SVM solver using Newton's method.

"""
# Author: Mathieu Blondel
# License: BSD

import numpy as np

from scipy.sparse.linalg import cg
from scipy.linalg import solve

from sklearn.utils import safe_mask
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import check_random_state
from sklearn.metrics.pairwise import pairwise_kernels

from .base import BaseClassifier


[docs]class KernelSVC(BaseClassifier): r"""Estimator for learning kernel SVMs by Newton's method. Parameters ---------- alpha : float Weight of the penalty term. solver : str, 'cg', 'dense' max_iter : int Maximum number of iterations to perform. tol : float Tolerance of the stopping criterion. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel to use. Default: "linear" degree : int, default=3 Degree for poly, rbf and sigmoid kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. random_state : RandomState or int The seed of the pseudo random number generator to use. verbose : int Verbosity level. n_jobs : int Number of jobs to use to compute the kernel matrix. Examples -------- >>> from sklearn.datasets import make_classification >>> from lightning.classification import KernelSVC >>> X, y = make_classification() >>> clf = KernelSVC().fit(X, y) >>> accuracy = clf.score(X, y) """ def __init__(self, alpha=1.0, solver="cg", max_iter=50, tol=1e-3, kernel="linear", gamma=0.1, coef0=1, degree=4, random_state=None, verbose=0, n_jobs=1): self.alpha = alpha self.solver = solver self.max_iter = max_iter self.tol = tol self.kernel = kernel self.gamma = gamma self.coef0 = coef0 self.degree = degree self.random_state = random_state self.verbose = verbose self.n_jobs = n_jobs def _kernel_params(self): return {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} def _solve(self, A, b): if self.solver == "cg": x, info = cg(A, b, tol=self.tol) elif self.solver == "dense": x = solve(A, b, sym_pos=True) return x def _fit_binary(self, K, y, rs): n_samples = K.shape[0] coef = np.zeros(n_samples) if n_samples < 1000: sv = np.ones(n_samples, dtype=bool) else: sv = np.zeros(n_samples, dtype=bool) sv[:1000] = True rs.shuffle(sv) for t in range(1, self.max_iter + 1): if self.verbose: print("Iteration", t, "#SV=", np.sum(sv)) K_sv = K[sv][:, sv] I = np.diag(self.alpha * np.ones(K_sv.shape[0])) coef_sv = self._solve(K_sv + I, y[sv]) coef *= 0 coef[sv] = coef_sv pred = np.dot(K, coef) errors = 1 - y * pred last_sv = sv sv = errors > 0 if np.array_equal(last_sv, sv): if self.verbose: print("Converged at iteration", t) break return coef def _post_process(self, X): # We can't know the support vectors when using precomputed kernels. if self.kernel != "precomputed": sv = np.sum(self.coef_ != 0, axis=0, dtype=bool) if np.sum(sv) > 0: self.coef_ = np.ascontiguousarray(self.coef_[:, sv]) mask = safe_mask(X, sv) self.support_vectors_ = np.ascontiguousarray(X[mask]) self.support_indices_ = np.arange(X.shape[0], dtype=np.int32)[sv] self.n_samples_ = X.shape[0] if self.verbose >= 1: print("Number of support vectors:", np.sum(sv))
[docs] def fit(self, X, y): """Fit model according to X and y. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : classifier Returns self. """ n_samples, n_features = X.shape rs = check_random_state(self.random_state) self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1) Y = self.label_binarizer_.fit_transform(y) self.classes_ = self.label_binarizer_.classes_.astype(np.int32) n_vectors = Y.shape[1] if self.verbose: print("Pre-computing kernel matrix...") K = pairwise_kernels(X, filter_params=True, n_jobs=self.n_jobs, metric=self.kernel, **self._kernel_params()) coef = [self._fit_binary(K, Y[:, i], rs) for i in range(n_vectors)] self.coef_ = np.array(coef) self.intercept_ = np.zeros(n_vectors, dtype=np.float64) self._post_process(X) return self
[docs] def decision_function(self, X): """ Return the decision function for test vectors X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- P : array, shape = [n_classes, n_samples] Decision function for X """ K = pairwise_kernels(X, self.support_vectors_, filter_params=True, n_jobs=self.n_jobs, metric=self.kernel, **self._kernel_params()) return np.dot(K, self.coef_.T)