from
sklearn.datasets
import
load_iris
from
sklearn.model_selection
import
train_test_split
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.calibration
import
CalibratedClassifierCV
from
sklearn.metrics
import
log_loss
import
matplotlib.pyplot as plt
data
=
load_iris()
X_train, X_test, y_train, y_test
=
train_test_split(data.data,
data.target,
test_size
=
0.2
)
clf
=
LogisticRegression()
clf.fit(X_train, y_train)
probabilities
=
clf.predict_proba(X_test)
Uncalibrated_log_loss
=
log_loss(y_test, probabilities)
print
(
'Uncalibrated log loss:'
, Uncalibrated_log_loss)
calibrated_classifier
=
CalibratedClassifierCV(clf,
cv
=
'prefit'
,
method
=
'sigmoid'
)
calibrated_classifier.fit(X_test, y_test)
calibrated_probabilities
=
calibrated_classifier.predict_proba(X_test)
calibrated_log_loss
=
log_loss(y_test, calibrated_probabilities)
print
(
'Calibrated log loss:'
, calibrated_log_loss)
plt.figure(figsize
=
(
8
,
6
))
plt.hist(probabilities[:,
0
], bins
=
20
, alpha
=
0.5
, label
=
'Uncalibrated'
)
plt.hist(calibrated_probabilities[:,
0
],
bins
=
20
,
alpha
=
0.5
,
label
=
'Calibrated'
)
plt.legend(loc
=
'upper center'
)
plt.title(
'Histogram of Predicted Probabilities'
)
plt.xlabel(
'Predicted Probability'
)
plt.ylabel(
'Frequency'
)
plt.show()