import
numpy as np
import
pandas as pd
import
matplotlib.pyplot as plt
import
statsmodels.api as sm
import
statsmodels.formula.api as smf
data
=
pd.read_csv(
'/content/Real estate.csv'
)
data.head()
model
=
smf.ols(formula
=
' Y ~ X3 + X2'
, data
=
data )
results
=
model.fit()
print
(results.summary())
residuals
=
results.resid
fitted_value
=
results.fittedvalues
stand_resids
=
results.resid_pearson
influence
=
results.get_influence()
leverage
=
influence.hat_matrix_diag
plt.rcParams[
"figure.figsize"
]
=
(
20
,
15
)
fig, ax
=
plt.subplots(nrows
=
2
, ncols
=
2
)
plt.style.use(
'seaborn'
)
sns.scatterplot(x
=
fitted_value, y
=
residuals, ax
=
ax[
0
,
0
])
ax[
0
,
0
].axhline(y
=
0
, color
=
'grey'
, linestyle
=
'dashed'
)
ax[
0
,
0
].set_xlabel(
'Fitted Values'
)
ax[
0
,
0
].set_ylabel(
'Residuals'
)
ax[
0
,
0
].set_title(
'Residuals vs Fitted Fitted'
)
sm.qqplot(residuals, fit
=
True
, line
=
'45'
,ax
=
ax[
0
,
1
], c
=
'#4C72B0'
)
ax[
0
,
1
].set_title(
'Normal Q-Q'
)
sns.scatterplot(x
=
fitted_value, y
=
residuals, ax
=
ax[
1
,
0
])
ax[
1
,
0
].axhline(y
=
0
, color
=
'grey'
, linestyle
=
'dashed'
)
ax[
1
,
0
].set_xlabel(
'Fitted values'
)
ax[
1
,
0
].set_ylabel(
'Sqrt(standardized residuals)'
)
ax[
1
,
0
].set_title(
'Scale-Location Plot'
)
sns.scatterplot(x
=
leverage, y
=
stand_resids, ax
=
ax[
1
,
1
])
ax[
1
,
1
].axhline(y
=
0
, color
=
'grey'
, linestyle
=
'dashed'
)
ax[
1
,
1
].set_xlabel(
'Leverage'
)
ax[
1
,
1
].set_ylabel(
'Sqrt(standardized residuals)'
)
ax[
1
,
1
].set_title(
'Residuals vs Leverage Plot'
)
plt.tight_layout()
plt.show()
sm.graphics.influence_plot(results, criterion
=
"cooks"
)