Source code for metric_learn.lsml

"""
Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML)
"""

import numpy as np
import scipy.linalg
from sklearn.base import TransformerMixin

from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
from .constraints import Constraints
from ._util import components_from_metric, _initialize_metric_mahalanobis
import warnings


class _BaseLSML(MahalanobisMixin):

  _tuple_size = 4  # constraints are quadruplets

  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
               verbose=False, preprocessor=None, random_state=None):
    self.prior = prior
    self.tol = tol
    self.max_iter = max_iter
    self.verbose = verbose
    self.random_state = random_state
    super(_BaseLSML, self).__init__(preprocessor)

  def _fit(self, quadruplets, weights=None):
    quadruplets = self._prepare_inputs(quadruplets,
                                       type_of_inputs='tuples')

    # check to make sure that no two constrained vectors are identical
    vab = quadruplets[:, 0, :] - quadruplets[:, 1, :]
    vcd = quadruplets[:, 2, :] - quadruplets[:, 3, :]
    if vab.shape != vcd.shape:
      raise ValueError('Constraints must have same length')
    if weights is None:
      self.w_ = np.ones(vab.shape[0])
    else:
      self.w_ = weights
    self.w_ /= self.w_.sum()  # weights must sum to 1
    M, prior_inv = _initialize_metric_mahalanobis(
        quadruplets, self.prior,
        return_inverse=True, strict_pd=True, matrix_name='prior',
        random_state=self.random_state)

    step_sizes = np.logspace(-10, 0, 10)
    # Keep track of the best step size and the loss at that step.
    l_best = 0
    s_best = self._total_loss(M, vab, vcd, prior_inv)
    if self.verbose:
      print('initial loss', s_best)
    for it in range(1, self.max_iter + 1):
      grad = self._gradient(M, vab, vcd, prior_inv)
      grad_norm = scipy.linalg.norm(grad)
      if grad_norm < self.tol:
        break
      if self.verbose:
        print('gradient norm', grad_norm)
      M_best = None
      for step_size in step_sizes:
        step_size /= grad_norm
        new_metric = M - step_size * grad
        w, v = scipy.linalg.eigh(new_metric)
        new_metric = v.dot((np.maximum(w, 1e-8) * v).T)
        cur_s = self._total_loss(new_metric, vab, vcd, prior_inv)
        if cur_s < s_best:
          l_best = step_size
          s_best = cur_s
          M_best = new_metric
      if self.verbose:
        print('iter', it, 'cost', s_best, 'best step', l_best * grad_norm)
      if M_best is None:
        break
      M = M_best
    else:
      if self.verbose:
        print("Didn't converge after", it, "iterations. Final loss:", s_best)
    self.n_iter_ = it

    self.components_ = components_from_metric(M)
    return self

  def _comparison_loss(self, metric, vab, vcd):
    dab = np.sum(vab.dot(metric) * vab, axis=1)
    dcd = np.sum(vcd.dot(metric) * vcd, axis=1)
    violations = dab > dcd
    return self.w_[violations].dot((np.sqrt(dab[violations]) -
                                    np.sqrt(dcd[violations]))**2)

  def _total_loss(self, metric, vab, vcd, prior_inv):
    # Regularization loss
    sign, logdet = np.linalg.slogdet(metric)
    reg_loss = np.sum(metric * prior_inv) - sign * logdet
    return self._comparison_loss(metric, vab, vcd) + reg_loss

  def _gradient(self, metric, vab, vcd, prior_inv):
    dMetric = prior_inv - np.linalg.inv(metric)
    dabs = np.sum(vab.dot(metric) * vab, axis=1)
    dcds = np.sum(vcd.dot(metric) * vcd, axis=1)
    violations = dabs > dcds
    # TODO: vectorize
    for vab, dab, vcd, dcd in zip(vab[violations], dabs[violations],
                                  vcd[violations], dcds[violations]):
      dMetric += ((1 - np.sqrt(dcd / dab)) * np.outer(vab, vab) +
                  (1 - np.sqrt(dab / dcd)) * np.outer(vcd, vcd))
    return dMetric



[docs]
class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
  """Least Squared-residual Metric Learning (LSML)

  `LSML` proposes a simple, yet effective, algorithm that minimizes a convex
  objective function corresponding to the sum of squared residuals of
  constraints. This algorithm uses the constraints in the form of the
  relative distance comparisons, such method is especially useful where
  pairwise constraints are not natural to obtain, thus pairwise constraints
  based algorithms become infeasible to be deployed. Furthermore, its sparsity
  extension leads to more stable estimation when the dimension is high and
  only a small amount of constraints is given.

  Read more in the :ref:`User Guide <lsml>`.

  Parameters
  ----------
  prior : string or numpy array, optional (default='identity')
    Prior to set for the metric. Possible options are
    'identity', 'covariance', 'random', and a numpy array of
    shape (n_features, n_features). For LSML, the prior should be strictly
    positive definite (PD).

    'identity'
      An identity matrix of shape (n_features, n_features).

    'covariance'
      The inverse covariance matrix.

    'random'
      The initial Mahalanobis matrix will be a random positive definite
      (PD) matrix of shape `(n_features, n_features)`, generated using
      `sklearn.datasets.make_spd_matrix`.

    numpy array
      A positive definite (PD) matrix of shape
      (n_features, n_features), that will be used as such to set the
      prior.

  tol : float, optional (default=1e-3)
    Convergence tolerance of the optimization procedure.

  max_iter : int, optional (default=1000)
    Maximum number of iteration of the optimization procedure.

  verbose : bool, optional (default=False)
    If True, prints information while learning

  preprocessor : array-like, shape=(n_samples, n_features) or callable
    The preprocessor to call to get tuples from indices. If array-like,
    tuples will be formed like this: X[indices].

  random_state : int or numpy.RandomState or None, optional (default=None)
    A pseudo random number generator object or a seed for it if int. If
    ``init='random'``, ``random_state`` is used to set the random
    prior.

  Attributes
  ----------
  n_iter_ : `int`
    The number of iterations the solver has run.

  components_ : `numpy.ndarray`, shape=(n_features, n_features)
    The linear transformation ``L`` deduced from the learned Mahalanobis
    metric (See function `components_from_metric`.)

  Examples
  --------
  >>> from metric_learn import LSML
  >>> quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]],
  >>>                [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]],
  >>>                [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]],
  >>>                [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]]
  >>> # we want to make closer points where the first feature is close, and
  >>> # further if the second feature is close
  >>> lsml = LSML()
  >>> lsml.fit(quadruplets)

  References
  ----------
  .. [1] Liu et al. `Metric Learning from Relative Comparisons by Minimizing
         Squared Residual
         <http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf>`_. ICDM 2012.

  .. [2] Code adapted from https://gist.github.com/kcarnold/5439917

  See Also
  --------
  metric_learn.LSML : The original weakly-supervised algorithm

  :ref:`supervised_version` : The section of the project documentation
    that describes the supervised version of weakly supervised estimators.
  """


[docs]
  def fit(self, quadruplets, weights=None):
    """Learn the LSML model.

    Parameters
    ----------
    quadruplets : array-like, shape=(n_constraints, 4, n_features) or \
                  (n_constraints, 4)
      3D array-like of quadruplets of points or 2D array of quadruplets of
      indicators. In order to supervise the algorithm in the right way, we
      should have the four samples ordered in a way such that:
      d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i <
      n_constraints.

    weights : (n_constraints,) array of floats, optional
      scale factor for each constraint

    Returns
    -------
    self : object
      Returns the instance.
    """
    return self._fit(quadruplets, weights=weights)





[docs]
class LSML_Supervised(_BaseLSML, TransformerMixin):
  """Supervised version of Least Squared-residual Metric Learning (LSML)

  `LSML_Supervised` creates quadruplets from labeled samples by taking two
  samples from the same class, and two samples from different classes.
  This way it builds quadruplets where the two first points must be more
  similar than the two last points.

  Parameters
  ----------
  tol : float, optional (default=1e-3)
    Convergence tolerance of the optimization procedure.

  max_iter : int, optional (default=1000)
    Number of maximum iterations of the optimization procedure.

  prior : string or numpy array, optional (default='identity')
    Prior to set for the metric. Possible options are
    'identity', 'covariance', 'random', and a numpy array of
    shape (n_features, n_features). For LSML, the prior should be strictly
    positive definite (PD).

    'identity'
      An identity matrix of shape (n_features, n_features).

    'covariance'
      The inverse covariance matrix.

    'random'
      The initial Mahalanobis matrix will be a random positive definite
      (PD) matrix of shape `(n_features, n_features)`, generated using
      `sklearn.datasets.make_spd_matrix`.

    numpy array
      A positive definite (PD) matrix of shape
      (n_features, n_features), that will be used as such to set the
      prior.

  n_constraints: int, optional (default=None)
    Number of constraints to generate. If None, default to `20 *
    num_classes**2`.

  weights : (n_constraints,) array of floats, optional (default=None)
    Relative weight given to each constraint. If None, defaults to uniform
    weights.

  verbose : bool, optional (default=False)
    If True, prints information while learning

  preprocessor : array-like, shape=(n_samples, n_features) or callable
    The preprocessor to call to get tuples from indices. If array-like,
    tuples will be formed like this: X[indices].

  random_state : int or numpy.RandomState or None, optional (default=None)
    A pseudo random number generator object or a seed for it if int. If
    ``init='random'``, ``random_state`` is used to set the random
    prior. In any case, `random_state` is also used to randomly sample
    constraints from labels.

  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0

  Examples
  --------
  >>> from metric_learn import LSML_Supervised
  >>> from sklearn.datasets import load_iris
  >>> iris_data = load_iris()
  >>> X = iris_data['data']
  >>> Y = iris_data['target']
  >>> lsml = LSML_Supervised(n_constraints=200)
  >>> lsml.fit(X, Y)

  Attributes
  ----------
  n_iter_ : `int`
    The number of iterations the solver has run.

  components_ : `numpy.ndarray`, shape=(n_features, n_features)
    The linear transformation ``L`` deduced from the learned Mahalanobis
    metric (See function `components_from_metric`.)
  """


[docs]
  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
               n_constraints=None, weights=None,
               verbose=False, preprocessor=None, random_state=None,
               num_constraints='deprecated'):
    _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
                       verbose=verbose, preprocessor=preprocessor,
                       random_state=random_state)
    if num_constraints != 'deprecated':
      warnings.warn('"num_constraints" parameter has been renamed to'
                    ' "n_constraints". It has been deprecated in'
                    ' version 0.6.3 and will be removed in 0.7.0'
                    '', FutureWarning)
      self.n_constraints = num_constraints
    else:
      self.n_constraints = n_constraints
    # Avoid test get_params from failing (all params passed sholud be set)
    self.num_constraints = 'deprecated'
    self.weights = weights



[docs]
  def fit(self, X, y):
    """Create constraints from labels and learn the LSML model.

    Parameters
    ----------
    X : (n x d) matrix
      Input data, where each row corresponds to a single instance.

    y : (n) array-like
      Data labels.
    """
    X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
    n_constraints = self.n_constraints
    if n_constraints is None:
      num_classes = len(np.unique(y))
      n_constraints = 20 * num_classes**2

    c = Constraints(y)
    pos_neg = c.positive_negative_pairs(n_constraints, same_length=True,
                                        random_state=self.random_state)
    return _BaseLSML._fit(self, X[np.column_stack(pos_neg)],
                          weights=self.weights)