Classification¶

Conformal prediction methods for classification tasks.

mapie.classification.SplitConformalClassifier ¶

SplitConformalClassifier(
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[
        str, BaseClassificationScore
    ] = "lac",
    prefit: bool = True,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, RandomState]] = None,
)

Computes prediction sets using the split conformal classification technique:

The fit method (optional) fits the base classifier to the training data.
The conformalize method estimates the uncertainty of the base classifier by computing conformity scores on the conformalization set.
The predict_set method predicts labels and sets of labels.

PARAMETER	DESCRIPTION
`estimator`	The base classifier used to predict labels. TYPE: `ClassifierMixin` DEFAULT: `LogisticRegression()`
`confidence_level`	The confidence level(s) for the prediction sets, indicating the desired coverage probability of the prediction sets. If a float is provided, it represents a single confidence level. If a list, multiple prediction sets for each specified confidence level are returned. TYPE: `Union[float, List[float]]` DEFAULT: `0.9`
`conformity_score`	The method used to compute conformity scores. Valid options: "lac" "top_k" "aps" "raps" Any subclass of BaseClassificationScore A custom score function inheriting from BaseClassificationScore may also be provided. See theoretical description (classification). TYPE: `Union[str, BaseClassificationScore]` DEFAULT: `"lac"`
`prefit`	If True, the base classifier must be fitted, and the `fit` method must be skipped. If False, the base classifier will be fitted during the `fit` method. TYPE: `bool` DEFAULT: `True`
`n_jobs`	The number of jobs to run in parallel when applicable. TYPE: `Optional[int]` DEFAULT: `None`
`verbose`	Controls the verbosity level. Higher values increase the output details. TYPE: `int` DEFAULT: `0`

Examples:

>>> from mapie.classification import SplitConformalClassifier
>>> from mapie.utils import train_conformalize_test_split
>>> from sklearn.datasets import make_classification
>>> from sklearn.neighbors import KNeighborsClassifier

>>> X, y = make_classification(n_samples=500)
>>> (
...     X_train, X_conformalize, X_test,
...     y_train, y_conformalize, y_test
... ) = train_conformalize_test_split(
...     X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
... )

>>> mapie_classifier = SplitConformalClassifier(
...     estimator=KNeighborsClassifier(),
...     confidence_level=0.95,
...     prefit=False,
... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)

>>> predicted_labels, predicted_sets = mapie_classifier.predict_set(X_test)

Source code in mapie/classification.py

def __init__(
    self,
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[str, BaseClassificationScore] = "lac",
    prefit: bool = True,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, np.random.RandomState]] = None,
) -> None:
    self._estimator = estimator
    self._alphas = _transform_confidence_level_to_alpha_list(confidence_level)
    self._conformity_score = check_and_select_conformity_score(
        conformity_score, BaseClassificationScore
    )
    self._prefit = prefit
    self._is_fitted = prefit
    self._is_conformalized = False

    # Note to developers: to implement this v1 class without touching the
    # v0 backend, we're for now using a hack. We always set cv="prefit",
    # and we fit the estimator if needed. See the .fit method below.
    self._mapie_classifier = _MapieClassifier(
        estimator=self._estimator,
        cv="prefit",
        n_jobs=n_jobs,
        verbose=verbose,
        conformity_score=self._conformity_score,
        random_state=random_state,
    )
    self._predict_params: dict = {}

fit ¶

fit(
    X_train: ArrayLike,
    y_train: ArrayLike,
    fit_params: Optional[dict] = None,
) -> SplitConformalClassifier

Fits the base classifier to the training data.

PARAMETER	DESCRIPTION
`X_train`	Training data features. TYPE: `ArrayLike`
`y_train`	Training data targets. TYPE: `ArrayLike`
`fit_params`	Parameters to pass to the `fit` method of the base classifier. TYPE: `Optional[dict]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Self`	The fitted SplitConformalClassifier instance.

Source code in mapie/classification.py

def fit(
    self,
    X_train: ArrayLike,
    y_train: ArrayLike,
    fit_params: Optional[dict] = None,
) -> SplitConformalClassifier:
    """
    Fits the base classifier to the training data.

    Parameters
    ----------
    X_train : ArrayLike
        Training data features.

    y_train : ArrayLike
        Training data targets.

    fit_params : Optional[dict], default=None
        Parameters to pass to the `fit` method of the base classifier.

    Returns
    -------
    Self
        The fitted SplitConformalClassifier instance.
    """
    _raise_error_if_fit_called_in_prefit_mode(self._prefit)
    _raise_error_if_method_already_called("fit", self._is_fitted)

    cloned_estimator = clone(self._estimator)
    fit_params_ = _prepare_params(fit_params)
    cloned_estimator.fit(X_train, y_train, **fit_params_)
    self._mapie_classifier.estimator = cloned_estimator

    self._is_fitted = True
    return self

conformalize ¶

conformalize(
    X_conformalize: ArrayLike,
    y_conformalize: ArrayLike,
    predict_params: Optional[dict] = None,
) -> SplitConformalClassifier

Estimates the uncertainty of the base classifier by computing conformity scores on the conformalization set.

PARAMETER	DESCRIPTION
`X_conformalize`	Features of the conformalization set. TYPE: `ArrayLike`
`y_conformalize`	Targets of the conformalization set. TYPE: `ArrayLike`
`predict_params`	Parameters to pass to the `predict` and `predict_proba` methods of the base classifier. These parameters will also be used in the `predict_set` and `predict` methods of this SplitConformalClassifier. TYPE: `Optional[dict]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Self`	The conformalized SplitConformalClassifier instance.

Source code in mapie/classification.py

def conformalize(
    self,
    X_conformalize: ArrayLike,
    y_conformalize: ArrayLike,
    predict_params: Optional[dict] = None,
) -> SplitConformalClassifier:
    """
    Estimates the uncertainty of the base classifier by computing
    conformity scores on the conformalization set.

    Parameters
    ----------
    X_conformalize : ArrayLike
        Features of the conformalization set.

    y_conformalize : ArrayLike
        Targets of the conformalization set.

    predict_params : Optional[dict], default=None
        Parameters to pass to the `predict` and `predict_proba` methods
        of the base classifier. These parameters will also be used in the
        `predict_set` and `predict` methods of this SplitConformalClassifier.

    Returns
    -------
    Self
        The conformalized SplitConformalClassifier instance.
    """
    _raise_error_if_previous_method_not_called(
        "conformalize",
        "fit",
        self._is_fitted,
    )
    _raise_error_if_method_already_called(
        "conformalize",
        self._is_conformalized,
    )

    self._predict_params = _prepare_params(predict_params)
    self._mapie_classifier.fit(
        X_conformalize,
        y_conformalize,
        predict_params=self._predict_params,
    )

    self._is_conformalized = True
    return self

predict_set ¶

predict_set(
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
) -> Tuple[NDArray, NDArray]

For each sample in X, predicts a label (using the base classifier), and a set of labels.

If several confidence levels were provided during initialisation, several sets will be predicted for each sample. See the return signature.

PARAMETER	DESCRIPTION
`X`	Features TYPE: `ArrayLike`
`conformity_score_params`	Parameters specific to conformity scores, used at prediction time. The only example for now is `include_last_label`, available for `aps` and `raps` conformity scores. For detailed information on `include_last_label`, see the docstring of `APSConformityScore.get_prediction_sets`. TYPE: `Optional[dict]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Tuple[NDArray, NDArray]`	Two arrays: Prediction labels, of shape `(n_samples,)` Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`

Source code in mapie/classification.py

def predict_set(
    self,
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
) -> Tuple[NDArray, NDArray]:
    """
    For each sample in X, predicts a label (using the base classifier),
    and a set of labels.

    If several confidence levels were provided during initialisation, several
    sets will be predicted for each sample. See the return signature.

    Parameters
    ----------
    X : ArrayLike
        Features

    conformity_score_params : Optional[dict], default=None
        Parameters specific to conformity scores, used at prediction time.

        The only example for now is `include_last_label`, available for `aps`
        and `raps` conformity scores. For detailed information on
        `include_last_label`, see the docstring of
        `APSConformityScore.get_prediction_sets`.

    Returns
    -------
    Tuple[NDArray, NDArray]
        Two arrays:

        - Prediction labels, of shape `(n_samples,)`
        - Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`
    """
    _raise_error_if_previous_method_not_called(
        "predict_set",
        "conformalize",
        self._is_conformalized,
    )
    conformity_score_params_ = _prepare_params(conformity_score_params)
    predictions = self._mapie_classifier.predict(
        X,
        alpha=self._alphas,
        include_last_label=conformity_score_params_.get("include_last_label", True),
        **self._predict_params,
    )
    return _cast_predictions_to_ndarray_tuple(predictions)

predict ¶

predict(X: ArrayLike) -> NDArray

For each sample in X, returns the predicted label by the base classifier.

PARAMETER	DESCRIPTION
`X`	Features TYPE: `ArrayLike`

RETURNS	DESCRIPTION
`NDArray`	Array of predicted labels, with shape `(n_samples,)`.

Source code in mapie/classification.py

def predict(self, X: ArrayLike) -> NDArray:
    """
    For each sample in X, returns the predicted label by the base classifier.

    Parameters
    ----------
    X : ArrayLike
        Features

    Returns
    -------
    NDArray
        Array of predicted labels, with shape `(n_samples,)`.
    """
    _raise_error_if_previous_method_not_called(
        "predict",
        "conformalize",
        self._is_conformalized,
    )
    predictions = self._mapie_classifier.predict(
        X,
        alpha=None,
        **self._predict_params,
    )
    return _cast_point_predictions_to_ndarray(predictions)

mapie.classification.CrossConformalClassifier ¶

CrossConformalClassifier(
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[
        str, BaseClassificationScore
    ] = "lac",
    cv: Union[int, BaseCrossValidator] = 5,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, RandomState]] = None,
)

Computes prediction sets using the cross conformal classification technique:

The fit_conformalize method estimates the uncertainty of the base classifier in a cross-validation style. It fits the base classifier on folds of the dataset and computes conformity scores on the out-of-fold data.
The predict_set method predicts labels and sets of labels.

PARAMETER	DESCRIPTION
`estimator`	The base classifier used to predict labels. TYPE: `ClassifierMixin` DEFAULT: `LogisticRegression()`
`confidence_level`	The confidence level(s) for the prediction sets, indicating the desired coverage probability of the prediction sets. If a float is provided, it represents a single confidence level. If a list, multiple prediction sets for each specified confidence level are returned. TYPE: `Union[float, List[float]]` DEFAULT: `0.9`
`conformity_score`	The method used to compute conformity scores. Valid options: "lac" "aps" Any subclass of BaseClassificationScore A custom score function inheriting from BaseClassificationScore may also be provided. See theoretical description (classification). TYPE: `Union[str, BaseClassificationScore]` DEFAULT: `"lac"`
`cv`	The cross-validator used to compute conformity scores. Valid options: integer, to specify the number of folds any `sklearn.model_selection.BaseCrossValidator` suitable for classification, or a custom cross-validator inheriting from it. Main variants in the cross conformal setting are: `sklearn.model_selection.KFold` (vanilla cross conformal) `sklearn.model_selection.LeaveOneOut` (jackknife) TYPE: `Union[int, BaseCrossValidator]` DEFAULT: `5`
`n_jobs`	The number of jobs to run in parallel when applicable. TYPE: `Optional[int]` DEFAULT: `None`
`verbose`	Controls the verbosity level. Higher values increase the output details. TYPE: `int` DEFAULT: `0`
`random_state`	A seed or random state instance to ensure reproducibility in any random operations within the classifier. TYPE: `Optional[Union[int, RandomState]]` DEFAULT: `None`

Examples:

>>> from mapie.classification import CrossConformalClassifier
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.neighbors import KNeighborsClassifier

>>> X_full, y_full = make_classification(n_samples=500)
>>> X, X_test, y, y_test = train_test_split(X_full, y_full)

>>> mapie_classifier = CrossConformalClassifier(
...     estimator=KNeighborsClassifier(),
...     confidence_level=0.95,
...     cv=10
... ).fit_conformalize(X, y)

>>> predicted_labels, predicted_sets = mapie_classifier.predict_set(X_test)

Source code in mapie/classification.py

def __init__(
    self,
    estimator: ClassifierMixin = LogisticRegression(),
    confidence_level: Union[float, Iterable[float]] = 0.9,
    conformity_score: Union[str, BaseClassificationScore] = "lac",
    cv: Union[int, BaseCrossValidator] = 5,
    n_jobs: Optional[int] = None,
    verbose: int = 0,
    random_state: Optional[Union[int, np.random.RandomState]] = None,
) -> None:
    _check_cv_not_string(cv)

    self._mapie_classifier = _MapieClassifier(
        estimator=estimator,
        cv=cv,
        n_jobs=n_jobs,
        verbose=verbose,
        conformity_score=check_and_select_conformity_score(
            conformity_score,
            BaseClassificationScore,
        ),
        random_state=random_state,
    )

    self._alphas = _transform_confidence_level_to_alpha_list(confidence_level)
    self.is_fitted_and_conformalized = False

    self._predict_params: dict = {}

fit_conformalize ¶

fit_conformalize(
    X: ArrayLike,
    y: ArrayLike,
    groups: Optional[ArrayLike] = None,
    fit_params: Optional[dict] = None,
    predict_params: Optional[dict] = None,
) -> CrossConformalClassifier

Estimates the uncertainty of the base classifier in a cross-validation style: fits the base classifier on different folds of the dataset and computes conformity scores on the corresponding out-of-fold data.

PARAMETER	DESCRIPTION
`X`	Features TYPE: `ArrayLike`
`y`	Targets TYPE: `ArrayLike`
`groups`	Groups to pass to the cross-validator. TYPE: `Optional[ArrayLike]` DEFAULT: `None`
`fit_params`	Parameters to pass to the `fit` method of the base classifier. TYPE: `Optional[dict]` DEFAULT: `None`
`predict_params`	Parameters to pass to the `predict` and `predict_proba` methods of the base classifier. These parameters will also be used in the `predict_set` and `predict` methods of this CrossConformalClassifier. TYPE: `Optional[dict]` DEFAULT: `None`

RETURNS	DESCRIPTION
`Self`	This CrossConformalClassifier instance, fitted and conformalized.

Source code in mapie/classification.py

def fit_conformalize(
    self,
    X: ArrayLike,
    y: ArrayLike,
    groups: Optional[ArrayLike] = None,
    fit_params: Optional[dict] = None,
    predict_params: Optional[dict] = None,
) -> CrossConformalClassifier:
    """
    Estimates the uncertainty of the base classifier in a cross-validation style:
    fits the base classifier on different folds of the dataset
    and computes conformity scores on the corresponding out-of-fold data.

    Parameters
    ----------
    X : ArrayLike
        Features

    y : ArrayLike
        Targets

    groups: Optional[ArrayLike] of shape (n_samples,), default=None
        Groups to pass to the cross-validator.

    fit_params : Optional[dict], default=None
        Parameters to pass to the `fit` method of the base classifier.

    predict_params : Optional[dict], default=None
        Parameters to pass to the `predict` and `predict_proba` methods
        of the base classifier. These parameters will also be used in the
        `predict_set` and `predict` methods of this CrossConformalClassifier.

    Returns
    -------
    Self
        This CrossConformalClassifier instance, fitted and conformalized.
    """
    _raise_error_if_method_already_called(
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )

    fit_params_, sample_weight = _prepare_fit_params_and_sample_weight(fit_params)
    self._predict_params = _prepare_params(predict_params)
    self._mapie_classifier.fit(
        X=X,
        y=y,
        sample_weight=sample_weight,
        groups=groups,
        fit_params=fit_params_,
        predict_params=self._predict_params,
    )

    self.is_fitted_and_conformalized = True
    return self

predict_set ¶

predict_set(
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
    agg_scores: str = "mean",
) -> Tuple[NDArray, NDArray]

For each sample in X, predicts a label (using the base classifier), and a set of labels.

If several confidence levels were provided during initialisation, several sets will be predicted for each sample. See the return signature.

PARAMETER	DESCRIPTION
`X`	Features TYPE: `ArrayLike`
`conformity_score_params`	Parameters specific to conformity scores, used at prediction time. The only example for now is `include_last_label`, available for `aps` and `raps` conformity scores. For detailed information on `include_last_label`, see the docstring of `APSConformityScore.get_prediction_sets`. TYPE: `Optional[dict]` DEFAULT: `None`
`agg_scores`	How to aggregate conformity scores. Each classifier fitted on different folds of the dataset is used to produce conformity scores on the test data. The agg_score parameter allows to control how those scores are aggregated. Valid options: "mean", takes the mean of scores. "crossval", compares the scores between all training data and each test point for each label to estimate if the label must be included in the prediction set. Follows algorithm 2 of Classification with Valid and Adaptive Coverage (Romano+2020). TYPE: `str` DEFAULT: `"mean"`

RETURNS	DESCRIPTION
`Tuple[NDArray, NDArray]`	Two arrays: Prediction labels, of shape `(n_samples,)` Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`

Source code in mapie/classification.py

def predict_set(
    self,
    X: ArrayLike,
    conformity_score_params: Optional[dict] = None,
    agg_scores: str = "mean",
) -> Tuple[NDArray, NDArray]:
    """
    For each sample in X, predicts a label (using the base classifier),
    and a set of labels.

    If several confidence levels were provided during initialisation, several
    sets will be predicted for each sample. See the return signature.

    Parameters
    ----------
    X : ArrayLike
        Features

    conformity_score_params : Optional[dict], default=None
        Parameters specific to conformity scores, used at prediction time.

        The only example for now is `include_last_label`, available for `aps`
        and `raps` conformity scores. For detailed information on
        `include_last_label`, see the docstring of
        `APSConformityScore.get_prediction_sets`.

    agg_scores : str, default="mean"
        How to aggregate conformity scores.

        Each classifier fitted on different folds of the dataset is used to produce
        conformity scores on the test data. The agg_score parameter allows to
        control how those scores are aggregated. Valid options:

        - "mean", takes the mean of scores.
        - "crossval", compares the scores between all training data and each
          test point for each label to estimate if the label must be
          included in the prediction set. Follows algorithm 2 of
          Classification with Valid and Adaptive Coverage (Romano+2020).

    Returns
    -------
    Tuple[NDArray, NDArray]
        Two arrays:

        - Prediction labels, of shape `(n_samples,)`
        - Prediction sets, of shape `(n_samples, n_class, n_confidence_levels)`
    """
    _raise_error_if_previous_method_not_called(
        "predict_set",
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )

    conformity_score_params_ = _prepare_params(conformity_score_params)
    predictions = self._mapie_classifier.predict(
        X,
        alpha=self._alphas,
        include_last_label=conformity_score_params_.get("include_last_label", True),
        agg_scores=agg_scores,
        **self._predict_params,
    )
    return _cast_predictions_to_ndarray_tuple(predictions)

predict ¶

predict(X: ArrayLike) -> NDArray

For each sample in X, returns the predicted label by the base classifier.

PARAMETER	DESCRIPTION
`X`	Features TYPE: `ArrayLike`

RETURNS	DESCRIPTION
`NDArray`	Array of predicted labels, with shape `(n_samples,)`.

Source code in mapie/classification.py

def predict(self, X: ArrayLike) -> NDArray:
    """
    For each sample in X, returns the predicted label by the base classifier.

    Parameters
    ----------
    X : ArrayLike
        Features

    Returns
    -------
    NDArray
        Array of predicted labels, with shape `(n_samples,)`.
    """
    _raise_error_if_previous_method_not_called(
        "predict",
        "fit_conformalize",
        self.is_fitted_and_conformalized,
    )
    predictions = self._mapie_classifier.predict(
        X,
        alpha=None,
        **self._predict_params,
    )
    return _cast_point_predictions_to_ndarray(predictions)