Source code for lightning.impl.prank

# Author: Mathieu Blondel
# License: BSD

import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.metrics.pairwise import pairwise_kernels

from .base import BaseEstimator
from .dataset_fast import get_dataset
from .prank_fast import _prank_fit
from .prank_fast import _prank_fit_kernel
from .prank_fast import _prank_predict


class _BasePRank(BaseEstimator):

    def score(self, X, y):
        y_pred = self.predict(X)
        return np.mean(np.abs(y - y_pred))

    @property
    def classes_(self):
        return self._label_encoder.classes_


[docs]class PRank(_BasePRank): r"""Online algorithm for learning an ordinal regression model. Parameters ---------- n_iter : int Number of iterations to run. shuffle : boolean Whether to shuffle data. random_state : RandomState or int The seed of the pseudo random number generator to use. Attributes ---------- coef_ : array, shape=[n_features] Estimated weights. thresholds_ : array, shape=[n_classes] Estimated thresholds. References ---------- Pranking with Ranking Koby Crammer, Yoram Singer NIPS 2001 """ def __init__(self, n_iter=10, shuffle=True, random_state=None): self.n_iter = n_iter self.shuffle = shuffle self.random_state = random_state
[docs] def fit(self, X, y): """Fit model according to X and y. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : classifier Returns self. """ n_samples, n_features = X.shape rs = self._get_random_state() self._label_encoder = LabelEncoder() y = self._label_encoder.fit_transform(y).astype(np.int32) n_classes = len(self.classes_) self.coef_ = np.zeros(n_features, dtype=np.float64) self.thresholds_ = np.zeros(n_classes, dtype=np.float64) self.thresholds_[-1] = np.inf ds = get_dataset(X) _prank_fit(self.coef_, self.thresholds_, ds, y, n_classes, self.n_iter, rs, self.shuffle) return self
[docs] def predict(self, X): n_samples = X.shape[0] dot = safe_sparse_dot(X, self.coef_) out = np.zeros(n_samples, dtype=np.int32) _prank_predict(dot, self.thresholds_, len(self.classes_), out) return self._label_encoder.inverse_transform(out)
[docs]class KernelPRank(_BasePRank): r"""Kernelized online algorithm for learning an ordinal regression model. Parameters ---------- n_iter : int Number of iterations to run. shuffle : boolean Whether to shuffle data. random_state : RandomState or int The seed of the pseudo random number generator to use. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. Attributes ---------- dual_coef_ : array, shape=[n_samples] Estimated weights. thresholds_ : array, shape=[n_classes] Estimated thresholds. References ---------- Pranking with Ranking Koby Crammer, Yoram Singer NIPS 2001 """ def __init__(self, n_iter=10, shuffle=True, random_state=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None): self.n_iter = n_iter self.shuffle = shuffle self.random_state = random_state self.kernel = kernel self.gamma = gamma self.degree = degree self.coef0 = coef0 self.kernel_params = kernel_params @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} else: params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
[docs] def fit(self, X, y): """Fit model according to X and y. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : classifier Returns self. """ n_samples, n_features = X.shape rs = self._get_random_state() self._label_encoder = LabelEncoder() y = self._label_encoder.fit_transform(y).astype(np.int32) n_classes = len(self.classes_) K = self._get_kernel(X) self.X_train_ = X self.dual_coef_ = np.zeros(n_samples, dtype=np.float64) self.thresholds_ = np.zeros(n_classes, dtype=np.float64) self.thresholds_[-1] = np.inf _prank_fit_kernel(self.dual_coef_, self.thresholds_, K, y, n_classes, self.n_iter, rs, self.shuffle) return self
[docs] def predict(self, X): K = self._get_kernel(X, self.X_train_) n_samples = X.shape[0] dot = np.dot(K, self.dual_coef_) out = np.zeros(n_samples, dtype=np.int32) _prank_predict(dot, self.thresholds_, len(self.classes_), out) return self._label_encoder.inverse_transform(out)