import
numpy as np
import
matplotlib.pyplot as plt
from
sklearn.ensemble
import
BaggingClassifier
from
sklearn.tree
import
DecisionTreeClassifier
from
mlxtend.evaluate
import
bias_variance_decomp
from
sklearn.datasets
import
load_iris
from
sklearn.model_selection
import
train_test_split
import
numpy as np
import
matplotlib.pyplot as plt
X, y
=
load_iris(return_X_y
=
True
)
X_train, X_test,y_train, y_test
=
train_test_split(X,y,
random_state
=
104
,
test_size
=
0.25
,
shuffle
=
True
)
tree
=
DecisionTreeClassifier()
tree.fit(X_train, y_train)
y_hat_pop_tree
=
tree.predict(X_test)
y_error, avg_bias, avg_var
=
bias_variance_decomp(tree,
X_train, y_train,
X_test, y_test,
loss
=
'0-1_loss'
,
random_seed
=
23
)
print
(
'Using Single Estimator'
)
print
(
'Average expected loss: %.3f'
%
y_error)
print
(
'Average bias: %.3f'
%
avg_bias)
print
(
'Average variance: %.3f'
%
avg_var)
bagging
=
BaggingClassifier()
bagging.fit(X_train, y_train)
y_hat_pop_bagging
=
bagging.predict(X_test)
by_error, bavg_bias, bavg_var
=
bias_variance_decomp(bagging,
X_train, y_train,
X_test, y_test,
loss
=
'0-1_loss'
,
random_seed
=
123
)
print
(
'Using Bagging'
)
print
(
'Average expected loss: %.3f'
%
by_error)
print
(
'Average bias: %.3f'
%
bavg_bias)
print
(
'Average variance: %.3f'
%
bavg_var)
labels
=
[
'Expected Loss'
,
'Bias^2'
,
'Variance'
]
tree_values
=
[y_error, avg_bias, avg_var]
bagging_values
=
[by_error, bavg_bias, bavg_var]
plt.figure(figsize
=
(
12
,
5
))
plt.subplot(
1
,
2
,
1
)
plt.bar(labels, tree_values)
plt.xlabel(
'Terms'
)
plt.ylabel(
'Value'
)
plt.title(
'Bias-Variance Decomposition (Decision Tree)'
)
plt.subplot(
1
,
2
,
2
)
plt.bar(labels, bagging_values)
plt.xlabel(
'Terms'
)
plt.ylabel(
'Value'
)
plt.title(
'Bias-Variance Decomposition (Bagging)'
)
plt.tight_layout()
plt.show()