import
numpy as np
import
sklearn
from
sklearn
import
metrics
import
pandas as pd
from
sklearn.model_selection
import
train_test_split
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.neighbors
import
KNeighborsClassifier
data
=
pd.read_csv(
"gfg_data"
)
x
=
data[[
'Pclass'
,
'Sex'
,
'Age'
,
'Parch'
,
'Embarked'
,
'Fare'
,
'Has_Cabin'
,
'FamilySize'
,
'title'
,
'IsAlone'
]]
y
=
data[[
'Survived'
]]
X_train, X_test, Y_train, Y_test
=
train_test_split(x, y,
test_size
=
0.3
, random_state
=
None
)
lr
=
LogisticRegression()
lr.fit(X_train, Y_train)
Y_pred
=
lr.predict(X_test)
LogReg
=
round
(lr.score(X_test, Y_test),
2
)
mae_lr
=
round
(metrics.mean_absolute_error(Y_test, Y_pred),
4
)
mse_lr
=
round
(metrics.mean_squared_error(Y_test, Y_pred),
4
)
knn
=
KNeighborsClassifier(n_neighbors
=
2
)
knn.fit(X_train, Y_train)
Y_pred
=
knn.predict(X_test)
KNN
=
round
(knn.score(X_test, Y_test),
2
)
mae_knn
=
metrics.mean_absolute_error(Y_test, Y_pred)
mse_knn
=
metrics.mean_squared_error(Y_test, Y_pred)
compare_models
=
pd.DataFrame(
{
'Model'
: [
'LogReg'
,
'KNN'
],
'Score'
: [LogReg, KNN],
'MAE'
: [mae_lr, mae_knn],
'MSE'
: [mse_lr, mse_knn]
})
print
(compare_models)