Note
Click here to download the full example code
Conformal Predictive Distribution¶
In this advanced analysis, we propose to use MAPIE for Conformal Predictive Distribution (CPD) in few steps. Here are some reference papers for more information about CPD:
[1] Schweder, T., & Hjort, N. L. (2016). Confidence, likelihood, probability (Vol. 41). Cambridge University Press.
[2] Vovk, V., Shen, J., Manokhin, V., & Xie, M. G. (2017, May). Nonparametric predictive distributions based on conformal prediction. In Conformal and probabilistic prediction and applications (pp. 82-102). PMLR.
import warnings
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from mapie.conformity_scores import AbsoluteConformityScore, ResidualNormalisedScore
from mapie.regression import SplitConformalRegressor
from mapie.utils import train_conformalize_test_split
warnings.filterwarnings("ignore")
RANDOM_STATE = 15
1. Generating toy dataset¶
Here, we propose just to generate data for regression task, then split it.
X, y = make_regression(
n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
)
(X_train, X_conformalize, X_test, y_train, y_conformalize, y_test) = (
train_conformalize_test_split(
X,
y,
train_size=0.6,
conformalize_size=0.2,
test_size=0.2,
random_state=RANDOM_STATE,
)
)
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X_train, y_train, alpha=0.3)
plt.show()

2. Defining a Conformal Predictive Distribution class with MAPIE¶
To be able to obtain the cumulative distribution function of
a prediction with MAPIE, we propose here to wrap the
SplitConformalRegressor to add a new method named
get_cumulative_distribution_function.
class MapieConformalPredictiveDistribution(SplitConformalRegressor):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
def get_cumulative_distribution_function(self, X):
y_pred, _ = self.predict_interval(X)
cs = self._mapie_regressor.conformity_scores_[
~np.isnan(self._mapie_regressor.conformity_scores_)
]
res = self._conformity_score.get_estimation_distribution(
y_pred.reshape((-1, 1)), cs, X=X
)
return res
Now, we propose to use it with two different conformity scores -
AbsoluteConformityScore and
ResidualNormalisedScore -
in split-conformal inference.
mapie_regressor_1 = MapieConformalPredictiveDistribution(
estimator=LinearRegression(),
conformity_score=AbsoluteConformityScore(sym=False),
prefit=False,
)
mapie_regressor_1.fit(X_train, y_train)
mapie_regressor_1.conformalize(X_conformalize, y_conformalize)
y_pred_1, _ = mapie_regressor_1.predict_interval(X_test)
y_cdf_1 = mapie_regressor_1.get_cumulative_distribution_function(X_test)
mapie_regressor_2 = MapieConformalPredictiveDistribution(
estimator=LinearRegression(),
conformity_score=ResidualNormalisedScore(sym=False, random_state=RANDOM_STATE),
prefit=False,
)
mapie_regressor_2.fit(X_train, y_train)
mapie_regressor_2.conformalize(X_conformalize, y_conformalize)
y_pred_2, _ = mapie_regressor_2.predict_interval(X_test)
y_cdf_2 = mapie_regressor_2.get_cumulative_distribution_function(X_test)
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X_test, y_test, alpha=0.3)
plt.plot(X_test, y_pred_1, color="C1")
plt.show()

3. Visualizing the cumulative distribution function¶
We now propose to visualize the cumulative distribution functions of the predictive distribution in a graph in order to compare the two methods.
nb_bins = 100
def plot_cdf(data, bins, **kwargs):
counts, bins = np.histogram(data, bins=bins)
cdf = np.cumsum(counts) / np.sum(counts)
plt.plot(
np.vstack((bins, np.roll(bins, -1))).T.flatten()[:-2],
np.vstack((cdf, cdf)).T.flatten(),
**kwargs,
)
plot_cdf(y_cdf_1[0], bins=nb_bins, label="Absolute Residual Score", alpha=0.8)
plot_cdf(y_cdf_2[0], bins=nb_bins, label="Normalized Residual Score", alpha=0.8)
plt.vlines(y_pred_1[0], 0, 1, label="Prediction", color="C2", linestyles="dashed")
plt.legend(loc=2)
plt.show()

Total running time of the script: ( 0 minutes 0.173 seconds)
Download Python source code: plot_conformal_predictive_distribution.py
Download Jupyter notebook: plot_conformal_predictive_distribution.ipynb