# Source code for metric_learn.sdml

"""
Sparse High-Dimensional Metric Learning (SDML)
"""

import warnings
import numpy as np
from sklearn.base import TransformerMixin
from scipy.linalg import pinvh
from sklearn.covariance import graphical_lasso
from sklearn.exceptions import ConvergenceWarning

from .base_metric import MahalanobisMixin, _PairsClassifierMixin
from .constraints import Constraints, wrap_pairs
from ._util import components_from_metric, _initialize_metric_mahalanobis
try:
from inverse_covariance import quic
except ImportError:
HAS_SKGGM = False
else:
HAS_SKGGM = True

class _BaseSDML(MahalanobisMixin):

_tuple_size = 2  # constraints are pairs

def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
verbose=False, preprocessor=None,
random_state=None):
self.balance_param = balance_param
self.sparsity_param = sparsity_param
self.prior = prior
self.verbose = verbose
self.random_state = random_state
super(_BaseSDML, self).__init__(preprocessor)

def _fit(self, pairs, y):
if not HAS_SKGGM:
if self.verbose:
print("SDML will use scikit-learn's graphical lasso solver.")
else:
if self.verbose:
print("SDML will use skggm's graphical lasso solver.")
pairs, y = self._prepare_inputs(pairs, y,
type_of_inputs='tuples')

# set up (the inverse of) the prior M
# if the prior is the default (None), we raise a warning
_, prior_inv = _initialize_metric_mahalanobis(
pairs, self.prior,
return_inverse=True, strict_pd=True, matrix_name='prior',
random_state=self.random_state)
diff = pairs[:, 0] - pairs[:, 1]
loss_matrix = (diff.T * y).dot(diff)
emp_cov = prior_inv + self.balance_param * loss_matrix

# our initialization will be the matrix with emp_cov's eigenvalues,
# with a constant added so that they are all positive (plus an epsilon
# to ensure definiteness). This is empirical.
w, V = np.linalg.eigh(emp_cov)
min_eigval = np.min(w)
if min_eigval < 0.:
warnings.warn("Warning, the input matrix of graphical lasso is not "
"positive semi-definite (PSD). The algorithm may diverge, "
"and lead to degenerate solutions. "
"To prevent that, try to decrease the balance parameter "
"balance_param and/or to set prior='identity'.",
ConvergenceWarning)
w -= min_eigval  # we translate the eigenvalues to make them all positive
w += 1e-10  # we add a small offset to avoid definiteness problems
sigma0 = (V * w).dot(V.T)
try:
if HAS_SKGGM:
theta0 = pinvh(sigma0)
M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param,
msg=self.verbose,
Theta0=theta0, Sigma0=sigma0)
else:
_, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
verbose=self.verbose,
cov_init=sigma0)
raised_error = None
w_mahalanobis, _ = np.linalg.eigh(M)
not_spd = any(w_mahalanobis < 0.)
not_finite = not np.isfinite(M).all()
except Exception as e:
raised_error = e
not_spd = False  # not_spd not applicable here so we set to False
not_finite = False  # not_finite not applicable here so we set to False
if raised_error is not None or not_spd or not_finite:
msg = ("There was a problem in SDML when using {}'s graphical "
"lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn")
if not HAS_SKGGM:
skggm_advice = (" skggm's graphical lasso can sometimes converge "
"on non SPD cases where scikit-learn's graphical "
"lasso fails to converge. Try to install skggm and "
"rerun the algorithm (see the README.md for the "
"right version of skggm).")
if raised_error is not None:
msg += " The following error message was thrown: {}.".format(
raised_error)
raise RuntimeError(msg)

self.components_ = components_from_metric(np.atleast_2d(M))
return self

[docs]class SDML(_BaseSDML, _PairsClassifierMixin):
r"""Sparse Distance Metric Learning (SDML)

SDML is an efficient sparse metric learning in high-dimensional space via
double regularization: an L1-penalization on the off-diagonal elements of the
Mahalanobis matrix :math:\mathbf{M}, and a log-determinant divergence
between :math:\mathbf{M} and :math:\mathbf{M_0} (set as either
:math:\mathbf{I} or :math:\mathbf{\Omega}^{-1}, where
:math:\mathbf{\Omega} is the covariance matrix).

Read more in the :ref:User Guide <sdml>.

Parameters
----------
balance_param : float, optional (default=0.5)
Trade off between sparsity and M0 prior.

sparsity_param : float, optional  (default=0.01)
Trade off between optimizer and sparseness (see graph_lasso).

prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For SDML, the prior should be strictly
positive definite (PD).

'identity'
An identity matrix of shape (n_features, n_features).

'covariance'
The inverse covariance matrix.

'random'
The prior will be a random positive definite (PD) matrix of shape
(n_features, n_features), generated using
sklearn.datasets.make_spd_matrix.

numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.

verbose : bool, optional (default=False)
If True, prints information while learning.

preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be gotten like this: X[indices].

random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
prior='random', random_state is used to set the prior.

Attributes
----------
components_ : numpy.ndarray, shape=(n_features, n_features)
The linear transformation L deduced from the learned Mahalanobis
metric (See function components_from_metric.)

threshold_ : float
If the distance metric between two points is lower than this threshold,
points will be classified as similar, otherwise they will be
classified as dissimilar.

Examples
--------
>>> from metric_learn import SDML_Supervised
>>> X = iris_data['data']
>>> Y = iris_data['target']
>>> sdml = SDML_Supervised(num_constraints=200)
>>> sdml.fit(X, Y)

References
----------
..  Qi et al. An efficient sparse metric learning in high-dimensional
space via L1-penalized log-determinant regularization
<http://www.machinelearning.org/archive/icml2009/papers/46.pdf>_.
ICML 2009.

..  Code adapted from https://gist.github.com/kcarnold/5439945
"""

[docs]  def fit(self, pairs, y, calibration_params=None):
"""Learn the SDML model.

The threshold will be calibrated on the trainset using the parameters
calibration_params.

Parameters
----------
pairs : array-like, shape=(n_constraints, 2, n_features) or \
(n_constraints, 2)
3D Array of pairs with each row corresponding to two points,
or 2D array of indices of pairs if the metric learner uses a
preprocessor.

y : array-like, of shape (n_constraints,)
Labels of constraints. Should be -1 for dissimilar pair, 1 for similar.

calibration_params : dict or None
Dictionary of parameters to give to calibrate_threshold for the
threshold calibration step done at the end of fit. If None is
given, calibrate_threshold will use the default parameters.

Returns
-------
self : object
Returns the instance.
"""
calibration_params = (calibration_params if calibration_params is not
None else dict())
self._validate_calibration_params(**calibration_params)
self._fit(pairs, y)
self.calibrate_threshold(pairs, y, **calibration_params)
return self

[docs]class SDML_Supervised(_BaseSDML, TransformerMixin):
"""Supervised version of Sparse Distance Metric Learning (SDML)

SDML_Supervised creates pairs of similar sample by taking same class
samples, and pairs of dissimilar samples by taking different class
samples. It then passes these pairs to SDML for training.

Parameters
----------
balance_param : float, optional (default=0.5)
Trade off between sparsity and M0 prior.

sparsity_param : float, optional (default=0.01)
Trade off between optimizer and sparseness (see graph_lasso).

prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For SDML, the prior should be strictly
positive definite (PD).

'identity'
An identity matrix of shape (n_features, n_features).

'covariance'
The inverse covariance matrix.

'random'
The prior will be a random SPD matrix of shape
(n_features, n_features), generated using
sklearn.datasets.make_spd_matrix.

numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.

num_constraints : int, optional (default=None)
Number of constraints to generate. If None, defaults to 20 *
num_classes**2.

verbose : bool, optional (default=False)
If True, prints information while learning.

preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].

random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
init='random', random_state is used to set the random
prior. In any case, random_state is also used to randomly sample
constraints from labels.

Attributes
----------
components_ : numpy.ndarray, shape=(n_features, n_features)
The linear transformation L deduced from the learned Mahalanobis
metric (See function components_from_metric.)

--------
metric_learn.SDML : The original weakly-supervised algorithm
:ref:supervised_version : The section of the project documentation
that describes the supervised version of weakly supervised estimators.
"""

[docs]  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
num_constraints=None, verbose=False, preprocessor=None,
random_state=None):
_BaseSDML.__init__(self, balance_param=balance_param,
sparsity_param=sparsity_param, prior=prior,
verbose=verbose,
preprocessor=preprocessor, random_state=random_state)
self.num_constraints = num_constraints

[docs]  def fit(self, X, y):
"""Create constraints from labels and learn the SDML model.

Parameters
----------
X : array-like, shape (n, d)
data matrix, where each row corresponds to a single instance

y : array-like, shape (n,)
data labels, one for each instance

Returns
-------
self : object
Returns the instance.
"""
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
if num_constraints is None:
num_classes = len(np.unique(y))
num_constraints = 20 * num_classes**2

c = Constraints(y)
pos_neg = c.positive_negative_pairs(num_constraints,
random_state=self.random_state)
pairs, y = wrap_pairs(X, pos_neg)
return _BaseSDML._fit(self, pairs, y)