Skip to content

Classification

Conformal prediction methods for classification tasks.

mapie.classification.SplitConformalClassifier

SplitConformalClassifier(
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[
        str, BaseClassificationScore
    ] = "lac",
    prefit: bool = True,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, RandomState]] = None,
)

Computes prediction sets using the split conformal classification technique:

  1. The fit method (optional) fits the base classifier to the training data.
  2. The conformalize method estimates the uncertainty of the base classifier by computing conformity scores on the conformalization set.
  3. The predict_set method predicts labels and sets of labels.
PARAMETER DESCRIPTION
estimator

The base classifier used to predict labels.

TYPE: ClassifierMixin DEFAULT: LogisticRegression()

confidence_level

The confidence level(s) for the prediction sets, indicating the desired coverage probability of the prediction sets. If a float is provided, it represents a single confidence level. If a list, multiple prediction sets for each specified confidence level are returned.

TYPE: Union[float, List[float]] DEFAULT: 0.9

conformity_score

The method used to compute conformity scores.

Valid options:

  • "lac"
  • "top_k"
  • "aps"
  • "raps"
  • Any subclass of BaseClassificationScore

A custom score function inheriting from BaseClassificationScore may also be provided.

See theoretical description (classification).

TYPE: Union[str, BaseClassificationScore] DEFAULT: "lac"

prefit

If True, the base classifier must be fitted, and the fit method must be skipped.

If False, the base classifier will be fitted during the fit method.

TYPE: bool DEFAULT: True

n_jobs

The number of jobs to run in parallel when applicable.

TYPE: Optional[int] DEFAULT: None

verbose

Controls the verbosity level. Higher values increase the output details.

TYPE: int DEFAULT: 0

Examples:

>>> from mapie.classification import SplitConformalClassifier
>>> from mapie.utils import train_conformalize_test_split
>>> from sklearn.datasets import make_classification
>>> from sklearn.neighbors import KNeighborsClassifier
>>> X, y = make_classification(n_samples=500)
>>> (
...     X_train, X_conformalize, X_test,
...     y_train, y_conformalize, y_test
... ) = train_conformalize_test_split(
...     X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
... )
>>> mapie_classifier = SplitConformalClassifier(
...     estimator=KNeighborsClassifier(),
...     confidence_level=0.95,
...     prefit=False,
... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)
>>> predicted_labels, predicted_sets = mapie_classifier.predict_set(X_test)
Source code in mapie/classification.py
def __init__(
    self,
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[str, BaseClassificationScore] = "lac",
    prefit: bool = True,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, np.random.RandomState]] = None,
) -> None:
    self._estimator = estimator
    self._alphas = _transform_confidence_level_to_alpha_list(confidence_level)
    self._conformity_score = check_and_select_conformity_score(
        conformity_score, BaseClassificationScore
    )
    self._prefit = prefit
    self._is_fitted = prefit
    self._is_conformalized = False

    # Note to developers: to implement this v1 class without touching the
    # v0 backend, we're for now using a hack. We always set cv="prefit",
    # and we fit the estimator if needed. See the .fit method below.
    self._mapie_classifier = _MapieClassifier(
        estimator=self._estimator,
        cv="prefit",
        n_jobs=n_jobs,
        verbose=verbose,
        conformity_score=self._conformity_score,
        random_state=random_state,
    )
    self._predict_params: dict = {}

fit

fit(
    X_train: ArrayLike,
    y_train: ArrayLike,
    fit_params: Optional[dict] = None,
) -> SplitConformalClassifier

Fits the base classifier to the training data.

PARAMETER DESCRIPTION
X_train

Training data features.

TYPE: ArrayLike

y_train

Training data targets.

TYPE: ArrayLike

fit_params

Parameters to pass to the fit method of the base classifier.

TYPE: Optional[dict] DEFAULT: None

RETURNS DESCRIPTION
Self

The fitted SplitConformalClassifier instance.

Source code in mapie/classification.py
def fit(
    self,
    X_train: ArrayLike,
    y_train: ArrayLike,
    fit_params: Optional[dict] = None,
) -> SplitConformalClassifier:
    """
    Fits the base classifier to the training data.

    Parameters
    ----------
    X_train : ArrayLike
        Training data features.

    y_train : ArrayLike
        Training data targets.

    fit_params : Optional[dict], default=None
        Parameters to pass to the `fit` method of the base classifier.

    Returns
    -------
    Self
        The fitted SplitConformalClassifier instance.
    """
    _raise_error_if_fit_called_in_prefit_mode(self._prefit)
    _raise_error_if_method_already_called("fit", self._is_fitted)

    cloned_estimator = clone(self._estimator)
    fit_params_ = _prepare_params(fit_params)
    cloned_estimator.fit(X_train, y_train, **fit_params_)
    self._mapie_classifier.estimator = cloned_estimator

    self._is_fitted = True
    return self

conformalize

conformalize(
    X_conformalize: ArrayLike,
    y_conformalize: ArrayLike,
    predict_params: Optional[dict] = None,
) -> SplitConformalClassifier

Estimates the uncertainty of the base classifier by computing conformity scores on the conformalization set.

PARAMETER DESCRIPTION
X_conformalize

Features of the conformalization set.

TYPE: ArrayLike

y_conformalize

Targets of the conformalization set.

TYPE: ArrayLike

predict_params

Parameters to pass to the predict and predict_proba methods of the base classifier. These parameters will also be used in the predict_set and predict methods of this SplitConformalClassifier.

TYPE: Optional[dict] DEFAULT: None

RETURNS DESCRIPTION
Self

The conformalized SplitConformalClassifier instance.

Source code in mapie/classification.py
def conformalize(
    self,
    X_conformalize: ArrayLike,
    y_conformalize: ArrayLike,
    predict_params: Optional[dict] = None,
) -> SplitConformalClassifier:
    """
    Estimates the uncertainty of the base classifier by computing
    conformity scores on the conformalization set.

    Parameters
    ----------
    X_conformalize : ArrayLike
        Features of the conformalization set.

    y_conformalize : ArrayLike
        Targets of the conformalization set.

    predict_params : Optional[dict], default=None
        Parameters to pass to the `predict` and `predict_proba` methods
        of the base classifier. These parameters will also be used in the
        `predict_set` and `predict` methods of this SplitConformalClassifier.

    Returns
    -------
    Self
        The conformalized SplitConformalClassifier instance.
    """
    _raise_error_if_previous_method_not_called(
        "conformalize",
        "fit",
        self._is_fitted,
    )
    _raise_error_if_method_already_called(
        "conformalize",
        self._is_conformalized,
    )

    self._predict_params = _prepare_params(predict_params)
    self._mapie_classifier.fit(
        X_conformalize,
        y_conformalize,
        predict_params=self._predict_params,
    )

    self._is_conformalized = True
    return self

predict_set

predict_set(
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
) -> Tuple[NDArray, NDArray]

For each sample in X, predicts a label (using the base classifier), and a set of labels.

If several confidence levels were provided during initialisation, several sets will be predicted for each sample. See the return signature.

PARAMETER DESCRIPTION
X

Features

TYPE: ArrayLike

conformity_score_params

Parameters specific to conformity scores, used at prediction time.

The only example for now is include_last_label, available for aps and raps conformity scores. For detailed information on include_last_label, see the docstring of APSConformityScore.get_prediction_sets.

TYPE: Optional[dict] DEFAULT: None

RETURNS DESCRIPTION
Tuple[NDArray, NDArray]

Two arrays:

  • Prediction labels, of shape (n_samples,)
  • Prediction sets, of shape (n_samples, n_class, n_confidence_levels)
Source code in mapie/classification.py
def predict_set(
    self,
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
) -> Tuple[NDArray, NDArray]:
    """
    For each sample in X, predicts a label (using the base classifier),
    and a set of labels.

    If several confidence levels were provided during initialisation, several
    sets will be predicted for each sample. See the return signature.

    Parameters
    ----------
    X : ArrayLike
        Features

    conformity_score_params : Optional[dict], default=None
        Parameters specific to conformity scores, used at prediction time.

        The only example for now is `include_last_label`, available for `aps`
        and `raps` conformity scores. For detailed information on
        `include_last_label`, see the docstring of
        `APSConformityScore.get_prediction_sets`.

    Returns
    -------
    Tuple[NDArray, NDArray]
        Two arrays:

        - Prediction labels, of shape `(n_samples,)`
        - Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`
    """
    _raise_error_if_previous_method_not_called(
        "predict_set",
        "conformalize",
        self._is_conformalized,
    )
    conformity_score_params_ = _prepare_params(conformity_score_params)
    predictions = self._mapie_classifier.predict(
        X,
        alpha=self._alphas,
        include_last_label=conformity_score_params_.get("include_last_label", True),
        **self._predict_params,
    )
    return _cast_predictions_to_ndarray_tuple(predictions)

predict

predict(X: ArrayLike) -> NDArray

For each sample in X, returns the predicted label by the base classifier.

PARAMETER DESCRIPTION
X

Features

TYPE: ArrayLike

RETURNS DESCRIPTION
NDArray

Array of predicted labels, with shape (n_samples,).

Source code in mapie/classification.py
def predict(self, X: ArrayLike) -> NDArray:
    """
    For each sample in X, returns the predicted label by the base classifier.

    Parameters
    ----------
    X : ArrayLike
        Features

    Returns
    -------
    NDArray
        Array of predicted labels, with shape `(n_samples,)`.
    """
    _raise_error_if_previous_method_not_called(
        "predict",
        "conformalize",
        self._is_conformalized,
    )
    predictions = self._mapie_classifier.predict(
        X,
        alpha=None,
        **self._predict_params,
    )
    return _cast_point_predictions_to_ndarray(predictions)

mapie.classification.CrossConformalClassifier

CrossConformalClassifier(
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[
        str, BaseClassificationScore
    ] = "lac",
    cv: Union[int, BaseCrossValidator] = 5,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, RandomState]] = None,
)

Computes prediction sets using the cross conformal classification technique:

  1. The fit_conformalize method estimates the uncertainty of the base classifier in a cross-validation style. It fits the base classifier on folds of the dataset and computes conformity scores on the out-of-fold data.
  2. The predict_set method predicts labels and sets of labels.
PARAMETER DESCRIPTION
estimator

The base classifier used to predict labels.

TYPE: ClassifierMixin DEFAULT: LogisticRegression()

confidence_level

The confidence level(s) for the prediction sets, indicating the desired coverage probability of the prediction sets. If a float is provided, it represents a single confidence level. If a list, multiple prediction sets for each specified confidence level are returned.

TYPE: Union[float, List[float]] DEFAULT: 0.9

conformity_score

The method used to compute conformity scores. Valid options:

  • "lac"
  • "aps"
  • Any subclass of BaseClassificationScore

A custom score function inheriting from BaseClassificationScore may also be provided.

See theoretical description (classification).

TYPE: Union[str, BaseClassificationScore] DEFAULT: "lac"

cv

The cross-validator used to compute conformity scores. Valid options:

  • integer, to specify the number of folds
  • any sklearn.model_selection.BaseCrossValidator suitable for classification, or a custom cross-validator inheriting from it.

Main variants in the cross conformal setting are:

  • sklearn.model_selection.KFold (vanilla cross conformal)
  • sklearn.model_selection.LeaveOneOut (jackknife)

TYPE: Union[int, BaseCrossValidator] DEFAULT: 5

n_jobs

The number of jobs to run in parallel when applicable.

TYPE: Optional[int] DEFAULT: None

verbose

Controls the verbosity level. Higher values increase the output details.

TYPE: int DEFAULT: 0

random_state

A seed or random state instance to ensure reproducibility in any random operations within the classifier.

TYPE: Optional[Union[int, RandomState]] DEFAULT: None

Examples:

>>> from mapie.classification import CrossConformalClassifier
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.neighbors import KNeighborsClassifier
>>> X_full, y_full = make_classification(n_samples=500)
>>> X, X_test, y, y_test = train_test_split(X_full, y_full)
>>> mapie_classifier = CrossConformalClassifier(
...     estimator=KNeighborsClassifier(),
...     confidence_level=0.95,
...     cv=10
... ).fit_conformalize(X, y)
>>> predicted_labels, predicted_sets = mapie_classifier.predict_set(X_test)
Source code in mapie/classification.py
def __init__(
    self,
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[str, BaseClassificationScore] = "lac",
    cv: Union[int, BaseCrossValidator] = 5,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, np.random.RandomState]] = None,
) -> None:
    _check_cv_not_string(cv)

    self._mapie_classifier = _MapieClassifier(
        estimator=estimator,
        cv=cv,
        n_jobs=n_jobs,
        verbose=verbose,
        conformity_score=check_and_select_conformity_score(
            conformity_score,
            BaseClassificationScore,
        ),
        random_state=random_state,
    )

    self._alphas = _transform_confidence_level_to_alpha_list(confidence_level)
    self.is_fitted_and_conformalized = False

    self._predict_params: dict = {}

fit_conformalize

fit_conformalize(
    X: ArrayLike,
    y: ArrayLike,
    groups: Optional[ArrayLike] = None,
    fit_params: Optional[dict] = None,
    predict_params: Optional[dict] = None,
) -> CrossConformalClassifier

Estimates the uncertainty of the base classifier in a cross-validation style: fits the base classifier on different folds of the dataset and computes conformity scores on the corresponding out-of-fold data.

PARAMETER DESCRIPTION
X

Features

TYPE: ArrayLike

y

Targets

TYPE: ArrayLike

groups

Groups to pass to the cross-validator.

TYPE: Optional[ArrayLike] DEFAULT: None

fit_params

Parameters to pass to the fit method of the base classifier.

TYPE: Optional[dict] DEFAULT: None

predict_params

Parameters to pass to the predict and predict_proba methods of the base classifier. These parameters will also be used in the predict_set and predict methods of this CrossConformalClassifier.

TYPE: Optional[dict] DEFAULT: None

RETURNS DESCRIPTION
Self

This CrossConformalClassifier instance, fitted and conformalized.

Source code in mapie/classification.py
def fit_conformalize(
    self,
    X: ArrayLike,
    y: ArrayLike,
    groups: Optional[ArrayLike] = None,
    fit_params: Optional[dict] = None,
    predict_params: Optional[dict] = None,
) -> CrossConformalClassifier:
    """
    Estimates the uncertainty of the base classifier in a cross-validation style:
    fits the base classifier on different folds of the dataset
    and computes conformity scores on the corresponding out-of-fold data.

    Parameters
    ----------
    X : ArrayLike
        Features

    y : ArrayLike
        Targets

    groups: Optional[ArrayLike] of shape (n_samples,), default=None
        Groups to pass to the cross-validator.

    fit_params : Optional[dict], default=None
        Parameters to pass to the `fit` method of the base classifier.

    predict_params : Optional[dict], default=None
        Parameters to pass to the `predict` and `predict_proba` methods
        of the base classifier. These parameters will also be used in the
        `predict_set` and `predict` methods of this CrossConformalClassifier.

    Returns
    -------
    Self
        This CrossConformalClassifier instance, fitted and conformalized.
    """
    _raise_error_if_method_already_called(
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )

    fit_params_, sample_weight = _prepare_fit_params_and_sample_weight(fit_params)
    self._predict_params = _prepare_params(predict_params)
    self._mapie_classifier.fit(
        X=X,
        y=y,
        sample_weight=sample_weight,
        groups=groups,
        fit_params=fit_params_,
        predict_params=self._predict_params,
    )

    self.is_fitted_and_conformalized = True
    return self

predict_set

predict_set(
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
    agg_scores: str = "mean",
) -> Tuple[NDArray, NDArray]

For each sample in X, predicts a label (using the base classifier), and a set of labels.

If several confidence levels were provided during initialisation, several sets will be predicted for each sample. See the return signature.

PARAMETER DESCRIPTION
X

Features

TYPE: ArrayLike

conformity_score_params

Parameters specific to conformity scores, used at prediction time.

The only example for now is include_last_label, available for aps and raps conformity scores. For detailed information on include_last_label, see the docstring of APSConformityScore.get_prediction_sets.

TYPE: Optional[dict] DEFAULT: None

agg_scores

How to aggregate conformity scores.

Each classifier fitted on different folds of the dataset is used to produce conformity scores on the test data. The agg_score parameter allows to control how those scores are aggregated. Valid options:

  • "mean", takes the mean of scores.
  • "crossval", compares the scores between all training data and each test point for each label to estimate if the label must be included in the prediction set. Follows algorithm 2 of Classification with Valid and Adaptive Coverage (Romano+2020).

TYPE: str DEFAULT: "mean"

RETURNS DESCRIPTION
Tuple[NDArray, NDArray]

Two arrays:

  • Prediction labels, of shape (n_samples,)
  • Prediction sets, of shape (n_samples, n_class, n_confidence_levels)
Source code in mapie/classification.py
def predict_set(
    self,
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
    agg_scores: str = "mean",
) -> Tuple[NDArray, NDArray]:
    """
    For each sample in X, predicts a label (using the base classifier),
    and a set of labels.

    If several confidence levels were provided during initialisation, several
    sets will be predicted for each sample. See the return signature.

    Parameters
    ----------
    X : ArrayLike
        Features

    conformity_score_params : Optional[dict], default=None
        Parameters specific to conformity scores, used at prediction time.

        The only example for now is `include_last_label`, available for `aps`
        and `raps` conformity scores. For detailed information on
        `include_last_label`, see the docstring of
        `APSConformityScore.get_prediction_sets`.

    agg_scores : str, default="mean"
        How to aggregate conformity scores.

        Each classifier fitted on different folds of the dataset is used to produce
        conformity scores on the test data. The agg_score parameter allows to
        control how those scores are aggregated. Valid options:

        - "mean", takes the mean of scores.
        - "crossval", compares the scores between all training data and each
          test point for each label to estimate if the label must be
          included in the prediction set. Follows algorithm 2 of
          Classification with Valid and Adaptive Coverage (Romano+2020).

    Returns
    -------
    Tuple[NDArray, NDArray]
        Two arrays:

        - Prediction labels, of shape `(n_samples,)`
        - Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`
    """
    _raise_error_if_previous_method_not_called(
        "predict_set",
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )

    conformity_score_params_ = _prepare_params(conformity_score_params)
    predictions = self._mapie_classifier.predict(
        X,
        alpha=self._alphas,
        include_last_label=conformity_score_params_.get("include_last_label", True),
        agg_scores=agg_scores,
        **self._predict_params,
    )
    return _cast_predictions_to_ndarray_tuple(predictions)

predict

predict(X: ArrayLike) -> NDArray

For each sample in X, returns the predicted label by the base classifier.

PARAMETER DESCRIPTION
X

Features

TYPE: ArrayLike

RETURNS DESCRIPTION
NDArray

Array of predicted labels, with shape (n_samples,).

Source code in mapie/classification.py
def predict(self, X: ArrayLike) -> NDArray:
    """
    For each sample in X, returns the predicted label by the base classifier.

    Parameters
    ----------
    X : ArrayLike
        Features

    Returns
    -------
    NDArray
        Array of predicted labels, with shape `(n_samples,)`.
    """
    _raise_error_if_previous_method_not_called(
        "predict",
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )
    predictions = self._mapie_classifier.predict(
        X,
        alpha=None,
        **self._predict_params,
    )
    return _cast_point_predictions_to_ndarray(predictions)