"""
Sparse High-Dimensional Metric Learning (SDML)
"""
import warnings
import numpy as np
from sklearn.base import TransformerMixin
from scipy.linalg import pinvh
from sklearn.covariance import graphical_lasso
from sklearn.exceptions import ConvergenceWarning
from .base_metric import MahalanobisMixin, _PairsClassifierMixin
from .constraints import Constraints, wrap_pairs
from ._util import components_from_metric, _initialize_metric_mahalanobis
try:
from inverse_covariance import quic
except ImportError:
HAS_SKGGM = False
else:
HAS_SKGGM = True
class _BaseSDML(MahalanobisMixin):
_tuple_size = 2 # constraints are pairs
def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
verbose=False, preprocessor=None,
random_state=None):
self.balance_param = balance_param
self.sparsity_param = sparsity_param
self.prior = prior
self.verbose = verbose
self.random_state = random_state
super(_BaseSDML, self).__init__(preprocessor)
def _fit(self, pairs, y):
if not HAS_SKGGM:
if self.verbose:
print("SDML will use scikit-learn's graphical lasso solver.")
else:
if self.verbose:
print("SDML will use skggm's graphical lasso solver.")
pairs, y = self._prepare_inputs(pairs, y,
type_of_inputs='tuples')
n_features = pairs.shape[2]
if n_features < 2:
raise ValueError(f"Cannot fit SDML with {n_features} feature(s)")
# set up (the inverse of) the prior M
# if the prior is the default (None), we raise a warning
_, prior_inv = _initialize_metric_mahalanobis(
pairs, self.prior,
return_inverse=True, strict_pd=True, matrix_name='prior',
random_state=self.random_state)
diff = pairs[:, 0] - pairs[:, 1]
loss_matrix = (diff.T * y).dot(diff)
emp_cov = prior_inv + self.balance_param * loss_matrix
# our initialization will be the matrix with emp_cov's eigenvalues,
# with a constant added so that they are all positive (plus an epsilon
# to ensure definiteness). This is empirical.
w, V = np.linalg.eigh(emp_cov)
min_eigval = np.min(w)
if min_eigval < 0.:
warnings.warn("Warning, the input matrix of graphical lasso is not "
"positive semi-definite (PSD). The algorithm may diverge, "
"and lead to degenerate solutions. "
"To prevent that, try to decrease the balance parameter "
"`balance_param` and/or to set prior='identity'.",
ConvergenceWarning)
w -= min_eigval # we translate the eigenvalues to make them all positive
w += 1e-10 # we add a small offset to avoid definiteness problems
sigma0 = (V * w).dot(V.T)
try:
if HAS_SKGGM:
theta0 = pinvh(sigma0)
M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param,
msg=self.verbose,
Theta0=theta0, Sigma0=sigma0)
else:
_, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
verbose=self.verbose,
cov_init=sigma0)
raised_error = None
w_mahalanobis, _ = np.linalg.eigh(M)
not_spd = any(w_mahalanobis < 0.)
not_finite = not np.isfinite(M).all()
# TODO: Narrow this to the specific exceptions we expect.
except Exception as e:
raised_error = e
not_spd = False # not_spd not applicable here so we set to False
not_finite = False # not_finite not applicable here so we set to False
if raised_error is not None or not_spd or not_finite:
msg = ("There was a problem in SDML when using {}'s graphical "
"lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn")
if not HAS_SKGGM:
skggm_advice = (" skggm's graphical lasso can sometimes converge "
"on non SPD cases where scikit-learn's graphical "
"lasso fails to converge. Try to install skggm and "
"rerun the algorithm (see the README.md for the "
"right version of skggm).")
msg += skggm_advice
if raised_error is not None:
msg += " The following error message was thrown: {}.".format(
raised_error)
raise RuntimeError(msg)
self.components_ = components_from_metric(np.atleast_2d(M))
return self
[docs]
class SDML(_BaseSDML, _PairsClassifierMixin):
r"""Sparse Distance Metric Learning (SDML)
SDML is an efficient sparse metric learning in high-dimensional space via
double regularization: an L1-penalization on the off-diagonal elements of the
Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence
between :math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either
:math:`\mathbf{I}` or :math:`\mathbf{\Omega}^{-1}`, where
:math:`\mathbf{\Omega}` is the covariance matrix).
Read more in the :ref:`User Guide <sdml>`.
Parameters
----------
balance_param : float, optional (default=0.5)
Trade off between sparsity and M0 prior.
sparsity_param : float, optional (default=0.01)
Trade off between optimizer and sparseness (see graph_lasso).
prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For SDML, the prior should be strictly
positive definite (PD).
'identity'
An identity matrix of shape (n_features, n_features).
'covariance'
The inverse covariance matrix.
'random'
The prior will be a random positive definite (PD) matrix of shape
`(n_features, n_features)`, generated using
`sklearn.datasets.make_spd_matrix`.
numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.
verbose : bool, optional (default=False)
If True, prints information while learning.
preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be gotten like this: X[indices].
random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
``prior='random'``, ``random_state`` is used to set the prior.
Attributes
----------
components_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `components_from_metric`.)
threshold_ : `float`
If the distance metric between two points is lower than this threshold,
points will be classified as similar, otherwise they will be
classified as dissimilar.
Examples
--------
>>> from metric_learn import SDML_Supervised
>>> from sklearn.datasets import load_iris
>>> iris_data = load_iris()
>>> X = iris_data['data']
>>> Y = iris_data['target']
>>> sdml = SDML_Supervised(n_constraints=200)
>>> sdml.fit(X, Y)
References
----------
.. [1] Qi et al. `An efficient sparse metric learning in high-dimensional
space via L1-penalized log-determinant regularization
<http://www.machinelearning.org/archive/icml2009/papers/46.pdf>`_.
ICML 2009.
.. [2] Code adapted from https://gist.github.com/kcarnold/5439945
"""
[docs]
def fit(self, pairs, y, calibration_params=None):
"""Learn the SDML model.
The threshold will be calibrated on the trainset using the parameters
`calibration_params`.
Parameters
----------
pairs : array-like, shape=(n_constraints, 2, n_features) or \
(n_constraints, 2)
3D Array of pairs with each row corresponding to two points,
or 2D array of indices of pairs if the metric learner uses a
preprocessor.
y : array-like, of shape (n_constraints,)
Labels of constraints. Should be -1 for dissimilar pair, 1 for similar.
calibration_params : `dict` or `None`
Dictionary of parameters to give to `calibrate_threshold` for the
threshold calibration step done at the end of `fit`. If `None` is
given, `calibrate_threshold` will use the default parameters.
Returns
-------
self : object
Returns the instance.
"""
calibration_params = (calibration_params if calibration_params is not
None else dict())
self._validate_calibration_params(**calibration_params)
self._fit(pairs, y)
self.calibrate_threshold(pairs, y, **calibration_params)
return self
[docs]
class SDML_Supervised(_BaseSDML, TransformerMixin):
"""Supervised version of Sparse Distance Metric Learning (SDML)
`SDML_Supervised` creates pairs of similar sample by taking same class
samples, and pairs of dissimilar samples by taking different class
samples. It then passes these pairs to `SDML` for training.
Parameters
----------
balance_param : float, optional (default=0.5)
Trade off between sparsity and M0 prior.
sparsity_param : float, optional (default=0.01)
Trade off between optimizer and sparseness (see graph_lasso).
prior : string or numpy array, optional (default='identity')
Prior to set for the metric. Possible options are
'identity', 'covariance', 'random', and a numpy array of
shape (n_features, n_features). For SDML, the prior should be strictly
positive definite (PD).
'identity'
An identity matrix of shape (n_features, n_features).
'covariance'
The inverse covariance matrix.
'random'
The prior will be a random SPD matrix of shape
`(n_features, n_features)`, generated using
`sklearn.datasets.make_spd_matrix`.
numpy array
A positive definite (PD) matrix of shape
(n_features, n_features), that will be used as such to set the
prior.
n_constraints : int, optional (default=None)
Number of constraints to generate. If None, defaults to `20 *
num_classes**2`.
verbose : bool, optional (default=False)
If True, prints information while learning.
preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].
random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int. If
``init='random'``, ``random_state`` is used to set the random
prior. In any case, `random_state` is also used to randomly sample
constraints from labels.
num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
Attributes
----------
components_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `components_from_metric`.)
See Also
--------
metric_learn.SDML : The original weakly-supervised algorithm
:ref:`supervised_version` : The section of the project documentation
that describes the supervised version of weakly supervised estimators.
"""
[docs]
def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
n_constraints=None, verbose=False, preprocessor=None,
random_state=None, num_constraints='deprecated'):
_BaseSDML.__init__(self, balance_param=balance_param,
sparsity_param=sparsity_param, prior=prior,
verbose=verbose,
preprocessor=preprocessor, random_state=random_state)
if num_constraints != 'deprecated':
warnings.warn('"num_constraints" parameter has been renamed to'
' "n_constraints". It has been deprecated in'
' version 0.6.3 and will be removed in 0.7.0'
'', FutureWarning)
self.n_constraints = num_constraints
else:
self.n_constraints = n_constraints
# Avoid test get_params from failing (all params passed sholud be set)
self.num_constraints = 'deprecated'
[docs]
def fit(self, X, y):
"""Create constraints from labels and learn the SDML model.
Parameters
----------
X : array-like, shape (n, d)
data matrix, where each row corresponds to a single instance
y : array-like, shape (n,)
data labels, one for each instance
Returns
-------
self : object
Returns the instance.
"""
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
n_constraints = self.n_constraints
if n_constraints is None:
num_classes = len(np.unique(y))
n_constraints = 20 * num_classes**2
c = Constraints(y)
pos_neg = c.positive_negative_pairs(n_constraints,
random_state=self.random_state)
pairs, y = wrap_pairs(X, pos_neg)
return _BaseSDML._fit(self, pairs, y)