Source code for metric_learn.sdml

"""
Sparse High-Dimensional Metric Learning (SDML)
"""

import warnings
import numpy as np
from sklearn.base import TransformerMixin
from scipy.linalg import pinvh
from sklearn.covariance import graphical_lasso
from sklearn.exceptions import ConvergenceWarning

from .base_metric import MahalanobisMixin, _PairsClassifierMixin
from .constraints import Constraints, wrap_pairs
from ._util import components_from_metric, _initialize_metric_mahalanobis
try:
  from inverse_covariance import quic
except ImportError:
  HAS_SKGGM = False
else:
  HAS_SKGGM = True


class _BaseSDML(MahalanobisMixin):

  _tuple_size = 2  # constraints are pairs

  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
               verbose=False, preprocessor=None,
               random_state=None):
    self.balance_param = balance_param
    self.sparsity_param = sparsity_param
    self.prior = prior
    self.verbose = verbose
    self.random_state = random_state
    super(_BaseSDML, self).__init__(preprocessor)

  def _fit(self, pairs, y):
    if not HAS_SKGGM:
      if self.verbose:
        print("SDML will use scikit-learn's graphical lasso solver.")
    else:
      if self.verbose:
        print("SDML will use skggm's graphical lasso solver.")
    pairs, y = self._prepare_inputs(pairs, y,
                                    type_of_inputs='tuples')
    n_features = pairs.shape[2]
    if n_features < 2:
      raise ValueError(f"Cannot fit SDML with {n_features} feature(s)")

    # set up (the inverse of) the prior M
    # if the prior is the default (None), we raise a warning
    _, prior_inv = _initialize_metric_mahalanobis(
        pairs, self.prior,
        return_inverse=True, strict_pd=True, matrix_name='prior',
        random_state=self.random_state)
    diff = pairs[:, 0] - pairs[:, 1]
    loss_matrix = (diff.T * y).dot(diff)
    emp_cov = prior_inv + self.balance_param * loss_matrix

    # our initialization will be the matrix with emp_cov's eigenvalues,
    # with a constant added so that they are all positive (plus an epsilon
    # to ensure definiteness). This is empirical.
    w, V = np.linalg.eigh(emp_cov)
    min_eigval = np.min(w)
    if min_eigval < 0.:
      warnings.warn("Warning, the input matrix of graphical lasso is not "
                    "positive semi-definite (PSD). The algorithm may diverge, "
                    "and lead to degenerate solutions. "
                    "To prevent that, try to decrease the balance parameter "
                    "`balance_param` and/or to set prior='identity'.",
                    ConvergenceWarning)
      w -= min_eigval  # we translate the eigenvalues to make them all positive
    w += 1e-10  # we add a small offset to avoid definiteness problems
    sigma0 = (V * w).dot(V.T)
    try:
      if HAS_SKGGM:
        theta0 = pinvh(sigma0)
        M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param,
                                msg=self.verbose,
                                Theta0=theta0, Sigma0=sigma0)
      else:
        _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
                               verbose=self.verbose,
                               cov_init=sigma0)
      raised_error = None
      w_mahalanobis, _ = np.linalg.eigh(M)
      not_spd = any(w_mahalanobis < 0.)
      not_finite = not np.isfinite(M).all()
    # TODO: Narrow this to the specific exceptions we expect.
    except Exception as e:
      raised_error = e
      not_spd = False  # not_spd not applicable here so we set to False
      not_finite = False  # not_finite not applicable here so we set to False
    if raised_error is not None or not_spd or not_finite:
      msg = ("There was a problem in SDML when using {}'s graphical "
             "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn")
      if not HAS_SKGGM:
        skggm_advice = (" skggm's graphical lasso can sometimes converge "
                        "on non SPD cases where scikit-learn's graphical "
                        "lasso fails to converge. Try to install skggm and "
                        "rerun the algorithm (see the README.md for the "
                        "right version of skggm).")
        msg += skggm_advice
      if raised_error is not None:
        msg += " The following error message was thrown: {}.".format(
            raised_error)
      raise RuntimeError(msg)

    self.components_ = components_from_metric(np.atleast_2d(M))
    return self



[docs]
class SDML(_BaseSDML, _PairsClassifierMixin):
  r"""Sparse Distance Metric Learning (SDML)

  SDML is an efficient sparse metric learning in high-dimensional space via
  double regularization: an L1-penalization on the off-diagonal elements of the
  Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence
  between :math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either
  :math:`\mathbf{I}` or :math:`\mathbf{\Omega}^{-1}`, where
  :math:`\mathbf{\Omega}` is the covariance matrix).

  Read more in the :ref:`User Guide <sdml>`.

  Parameters
  ----------
  balance_param : float, optional (default=0.5)
    Trade off between sparsity and M0 prior.

  sparsity_param : float, optional  (default=0.01)
    Trade off between optimizer and sparseness (see graph_lasso).

  prior : string or numpy array, optional (default='identity')
    Prior to set for the metric. Possible options are
    'identity', 'covariance', 'random', and a numpy array of
    shape (n_features, n_features). For SDML, the prior should be strictly
    positive definite (PD).

    'identity'
      An identity matrix of shape (n_features, n_features).

    'covariance'
      The inverse covariance matrix.

    'random'
      The prior will be a random positive definite (PD) matrix of shape
      `(n_features, n_features)`, generated using
      `sklearn.datasets.make_spd_matrix`.

    numpy array
      A positive definite (PD) matrix of shape
      (n_features, n_features), that will be used as such to set the
      prior.

  verbose : bool, optional (default=False)
    If True, prints information while learning.

  preprocessor : array-like, shape=(n_samples, n_features) or callable
    The preprocessor to call to get tuples from indices. If array-like,
    tuples will be gotten like this: X[indices].

  random_state : int or numpy.RandomState or None, optional (default=None)
    A pseudo random number generator object or a seed for it if int. If
    ``prior='random'``, ``random_state`` is used to set the prior.

  Attributes
  ----------
  components_ : `numpy.ndarray`, shape=(n_features, n_features)
    The linear transformation ``L`` deduced from the learned Mahalanobis
    metric (See function `components_from_metric`.)

  threshold_ : `float`
    If the distance metric between two points is lower than this threshold,
    points will be classified as similar, otherwise they will be
    classified as dissimilar.

  Examples
  --------
  >>> from metric_learn import SDML_Supervised
  >>> from sklearn.datasets import load_iris
  >>> iris_data = load_iris()
  >>> X = iris_data['data']
  >>> Y = iris_data['target']
  >>> sdml = SDML_Supervised(n_constraints=200)
  >>> sdml.fit(X, Y)

  References
  ----------
  .. [1] Qi et al. `An efficient sparse metric learning in high-dimensional
         space via L1-penalized log-determinant regularization
         <http://www.machinelearning.org/archive/icml2009/papers/46.pdf>`_.
         ICML 2009.

  .. [2] Code adapted from https://gist.github.com/kcarnold/5439945
  """


[docs]
  def fit(self, pairs, y, calibration_params=None):
    """Learn the SDML model.

    The threshold will be calibrated on the trainset using the parameters
    `calibration_params`.

    Parameters
    ----------
    pairs : array-like, shape=(n_constraints, 2, n_features) or \
           (n_constraints, 2)
      3D Array of pairs with each row corresponding to two points,
      or 2D array of indices of pairs if the metric learner uses a
      preprocessor.

    y : array-like, of shape (n_constraints,)
      Labels of constraints. Should be -1 for dissimilar pair, 1 for similar.

    calibration_params : `dict` or `None`
      Dictionary of parameters to give to `calibrate_threshold` for the
      threshold calibration step done at the end of `fit`. If `None` is
      given, `calibrate_threshold` will use the default parameters.

    Returns
    -------
    self : object
      Returns the instance.
    """
    calibration_params = (calibration_params if calibration_params is not
                          None else dict())
    self._validate_calibration_params(**calibration_params)
    self._fit(pairs, y)
    self.calibrate_threshold(pairs, y, **calibration_params)
    return self





[docs]
class SDML_Supervised(_BaseSDML, TransformerMixin):
  """Supervised version of Sparse Distance Metric Learning (SDML)

  `SDML_Supervised` creates pairs of similar sample by taking same class
  samples, and pairs of dissimilar samples by taking different class
  samples. It then passes these pairs to `SDML` for training.

  Parameters
  ----------
  balance_param : float, optional (default=0.5)
    Trade off between sparsity and M0 prior.

  sparsity_param : float, optional (default=0.01)
    Trade off between optimizer and sparseness (see graph_lasso).

  prior : string or numpy array, optional (default='identity')
    Prior to set for the metric. Possible options are
    'identity', 'covariance', 'random', and a numpy array of
    shape (n_features, n_features). For SDML, the prior should be strictly
    positive definite (PD).

    'identity'
      An identity matrix of shape (n_features, n_features).

    'covariance'
      The inverse covariance matrix.

    'random'
      The prior will be a random SPD matrix of shape
      `(n_features, n_features)`, generated using
      `sklearn.datasets.make_spd_matrix`.

    numpy array
      A positive definite (PD) matrix of shape
      (n_features, n_features), that will be used as such to set the
      prior.

  n_constraints : int, optional (default=None)
    Number of constraints to generate. If None, defaults to `20 *
    num_classes**2`.

  verbose : bool, optional (default=False)
    If True, prints information while learning.

  preprocessor : array-like, shape=(n_samples, n_features) or callable
    The preprocessor to call to get tuples from indices. If array-like,
    tuples will be formed like this: X[indices].

  random_state : int or numpy.RandomState or None, optional (default=None)
    A pseudo random number generator object or a seed for it if int. If
    ``init='random'``, ``random_state`` is used to set the random
    prior. In any case, `random_state` is also used to randomly sample
    constraints from labels.

  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0

  Attributes
  ----------
  components_ : `numpy.ndarray`, shape=(n_features, n_features)
    The linear transformation ``L`` deduced from the learned Mahalanobis
    metric (See function `components_from_metric`.)

  See Also
  --------
  metric_learn.SDML : The original weakly-supervised algorithm
  :ref:`supervised_version` : The section of the project documentation
    that describes the supervised version of weakly supervised estimators.
  """


[docs]
  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
               n_constraints=None, verbose=False, preprocessor=None,
               random_state=None, num_constraints='deprecated'):
    _BaseSDML.__init__(self, balance_param=balance_param,
                       sparsity_param=sparsity_param, prior=prior,
                       verbose=verbose,
                       preprocessor=preprocessor, random_state=random_state)
    if num_constraints != 'deprecated':
      warnings.warn('"num_constraints" parameter has been renamed to'
                    ' "n_constraints". It has been deprecated in'
                    ' version 0.6.3 and will be removed in 0.7.0'
                    '', FutureWarning)
      self.n_constraints = num_constraints
    else:
      self.n_constraints = n_constraints
    # Avoid test get_params from failing (all params passed sholud be set)
    self.num_constraints = 'deprecated'



[docs]
  def fit(self, X, y):
    """Create constraints from labels and learn the SDML model.

    Parameters
    ----------
    X : array-like, shape (n, d)
      data matrix, where each row corresponds to a single instance

    y : array-like, shape (n,)
      data labels, one for each instance

    Returns
    -------
    self : object
      Returns the instance.
    """
    X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
    n_constraints = self.n_constraints
    if n_constraints is None:
      num_classes = len(np.unique(y))
      n_constraints = 20 * num_classes**2

    c = Constraints(y)
    pos_neg = c.positive_negative_pairs(n_constraints,
                                        random_state=self.random_state)
    pairs, y = wrap_pairs(X, pos_neg)
    return _BaseSDML._fit(self, pairs, y)