# Source code for metric_learn.lsml

"""
Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML)
"""

import numpy as np
import scipy.linalg
from sklearn.base import TransformerMixin

from .constraints import Constraints
from ._util import components_from_metric, _initialize_metric_mahalanobis

class _BaseLSML(MahalanobisMixin):

_tuple_size = 4  # constraints are quadruplets

def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
verbose=False, preprocessor=None, random_state=None):
self.prior = prior
self.tol = tol
self.max_iter = max_iter
self.verbose = verbose
self.random_state = random_state
super(_BaseLSML, self).__init__(preprocessor)

type_of_inputs='tuples')

# check to make sure that no two constrained vectors are identical
if vab.shape != vcd.shape:
raise ValueError('Constraints must have same length')
if weights is None:
self.w_ = np.ones(vab.shape[0])
else:
self.w_ = weights
self.w_ /= self.w_.sum()  # weights must sum to 1
M, prior_inv = _initialize_metric_mahalanobis(
return_inverse=True, strict_pd=True, matrix_name='prior',
random_state=self.random_state)

step_sizes = np.logspace(-10, 0, 10)
# Keep track of the best step size and the loss at that step.
l_best = 0
s_best = self._total_loss(M, vab, vcd, prior_inv)
if self.verbose:
print('initial loss', s_best)
for it in range(1, self.max_iter + 1):
break
if self.verbose:
M_best = None
for step_size in step_sizes:
new_metric = M - step_size * grad
w, v = scipy.linalg.eigh(new_metric)
new_metric = v.dot((np.maximum(w, 1e-8) * v).T)
cur_s = self._total_loss(new_metric, vab, vcd, prior_inv)
if cur_s < s_best:
l_best = step_size
s_best = cur_s
M_best = new_metric
if self.verbose:
print('iter', it, 'cost', s_best, 'best step', l_best * grad_norm)
if M_best is None:
break
M = M_best
else:
if self.verbose:
print("Didn't converge after", it, "iterations. Final loss:", s_best)
self.n_iter_ = it

self.components_ = components_from_metric(M)
return self

def _comparison_loss(self, metric, vab, vcd):
dab = np.sum(vab.dot(metric) * vab, axis=1)
dcd = np.sum(vcd.dot(metric) * vcd, axis=1)
violations = dab > dcd
return self.w_[violations].dot((np.sqrt(dab[violations]) -
np.sqrt(dcd[violations]))**2)

def _total_loss(self, metric, vab, vcd, prior_inv):
# Regularization loss
sign, logdet = np.linalg.slogdet(metric)
reg_loss = np.sum(metric * prior_inv) - sign * logdet
return self._comparison_loss(metric, vab, vcd) + reg_loss

def _gradient(self, metric, vab, vcd, prior_inv):
dMetric = prior_inv - np.linalg.inv(metric)
dabs = np.sum(vab.dot(metric) * vab, axis=1)
dcds = np.sum(vcd.dot(metric) * vcd, axis=1)
violations = dabs > dcds
# TODO: vectorize
for vab, dab, vcd, dcd in zip(vab[violations], dabs[violations],
vcd[violations], dcds[violations]):
dMetric += ((1 - np.sqrt(dcd / dab)) * np.outer(vab, vab) +
(1 - np.sqrt(dab / dcd)) * np.outer(vcd, vcd))
return dMetric

"""Least Squared-residual Metric Learning (LSML)

LSML proposes a simple, yet effective, algorithm that minimizes a convex
objective function corresponding to the sum of squared residuals of
constraints. This algorithm uses the constraints in the form of the
relative distance comparisons, such method is especially useful where
pairwise constraints are not natural to obtain, thus pairwise constraints
based algorithms become infeasible to be deployed. Furthermore, its sparsity
extension leads to more stable estimation when the dimension is high and
only a small amount of constraints is given.

Read more in the :ref:User Guide <lsml>.

Parameters
----------
prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For LSML, the prior should be strictly
positive definite (PD).

'identity'
An identity matrix of shape (n_features, n_features).

'covariance'
The inverse covariance matrix.

'random'
The initial Mahalanobis matrix will be a random positive definite
(PD) matrix of shape (n_features, n_features), generated using
sklearn.datasets.make_spd_matrix.

numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.

tol : float, optional (default=1e-3)
Convergence tolerance of the optimization procedure.

max_iter : int, optional (default=1000)
Maximum number of iteration of the optimization procedure.

verbose : bool, optional (default=False)
If True, prints information while learning

preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].

random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
init='random', random_state is used to set the random
prior.

Attributes
----------
n_iter_ : int
The number of iterations the solver has run.

components_ : numpy.ndarray, shape=(n_features, n_features)
The linear transformation L deduced from the learned Mahalanobis
metric (See function components_from_metric.)

Examples
--------
>>> from metric_learn import LSML
>>> quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]],
>>>                [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]],
>>>                [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]],
>>>                [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]]
>>> # we want to make closer points where the first feature is close, and
>>> # further if the second feature is close
>>> lsml = LSML()

References
----------
.. [1] Liu et al. Metric Learning from Relative Comparisons by Minimizing
Squared Residual
<http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf>_. ICDM 2012.

.. [2] Code adapted from https://gist.github.com/kcarnold/5439917

--------
metric_learn.LSML : The original weakly-supervised algorithm

:ref:supervised_version : The section of the project documentation
that describes the supervised version of weakly supervised estimators.
"""

"""Learn the LSML model.

Parameters
----------
quadruplets : array-like, shape=(n_constraints, 4, n_features) or \
(n_constraints, 4)
3D array-like of quadruplets of points or 2D array of quadruplets of
indicators. In order to supervise the algorithm in the right way, we
should have the four samples ordered in a way such that:
d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i <
n_constraints.

weights : (n_constraints,) array of floats, optional
scale factor for each constraint

Returns
-------
self : object
Returns the instance.
"""

[docs]class LSML_Supervised(_BaseLSML, TransformerMixin):
"""Supervised version of Least Squared-residual Metric Learning (LSML)

LSML_Supervised creates quadruplets from labeled samples by taking two
samples from the same class, and two samples from different classes.
This way it builds quadruplets where the two first points must be more
similar than the two last points.

Parameters
----------
tol : float, optional (default=1e-3)
Convergence tolerance of the optimization procedure.

max_iter : int, optional (default=1000)
Number of maximum iterations of the optimization procedure.

prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For LSML, the prior should be strictly
positive definite (PD).

'identity'
An identity matrix of shape (n_features, n_features).

'covariance'
The inverse covariance matrix.

'random'
The initial Mahalanobis matrix will be a random positive definite
(PD) matrix of shape (n_features, n_features), generated using
sklearn.datasets.make_spd_matrix.

numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.

num_constraints: int, optional (default=None)
Number of constraints to generate. If None, default to 20 *
num_classes**2.

weights : (num_constraints,) array of floats, optional (default=None)
Relative weight given to each constraint. If None, defaults to uniform
weights.

verbose : bool, optional (default=False)
If True, prints information while learning

preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].

random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
init='random', random_state is used to set the random
prior. In any case, random_state is also used to randomly sample
constraints from labels.

Examples
--------
>>> from metric_learn import LSML_Supervised
>>> X = iris_data['data']
>>> Y = iris_data['target']
>>> lsml = LSML_Supervised(num_constraints=200)
>>> lsml.fit(X, Y)

Attributes
----------
n_iter_ : int
The number of iterations the solver has run.

components_ : numpy.ndarray, shape=(n_features, n_features)
The linear transformation L deduced from the learned Mahalanobis
metric (See function components_from_metric.)
"""

[docs]  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
num_constraints=None, weights=None,
verbose=False, preprocessor=None, random_state=None):
_BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
verbose=verbose, preprocessor=preprocessor,
random_state=random_state)
self.num_constraints = num_constraints
self.weights = weights

[docs]  def fit(self, X, y):
"""Create constraints from labels and learn the LSML model.

Parameters
----------
X : (n x d) matrix
Input data, where each row corresponds to a single instance.

y : (n) array-like
Data labels.
"""
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
if num_constraints is None:
num_classes = len(np.unique(y))
num_constraints = 20 * num_classes**2

c = Constraints(y)
pos_neg = c.positive_negative_pairs(num_constraints, same_length=True,
random_state=self.random_state)
return _BaseLSML._fit(self, X[np.column_stack(pos_neg)],
weights=self.weights)