Study_note(zb_data)/Machine Learning
์คํฐ๋๋ ธํธ (Cost Function 2)
KloudHyun
2023. 9. 28. 23:51
๐ Boston ์ง ๊ฐ์ ๋ํด ์์ธํ๊ฒ ์์๋ณด์
boston.head()
#์ง ๊ฐ์ ๋ํ ํ์คํ ๊ทธ๋จ
import plotly_express as px
fig = px.histogram(boston, x='PRICE')
fig.show()
๐ ์๊ด ๊ณ์ ๊ตฌํ๊ธฐ
import matplotlib.pyplot as plt
import seaborn as sns
corr_mat = boston.corr().round(1)
sns.heatmap(data = corr_mat, annot=True, cmap='bwr')
๐ plot ๊ทธ๋ฆผ ๊ทธ๋ฆฌ๊ธฐ
sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(12, 6)})
fig, ax = plt.subplots(ncols = 2)
sns.regplot(x="RM", y="PRICE", data = boston, ax=ax[0])
sns.regplot(x="LSTAT", y="PRICE", data = boston, ax=ax[1])
๐sklearn
from sklearn.model_selection import train_test_split
X = boston.drop('PRICE', axis=1)
y = boston['PRICE']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train, y_train)
from sklearn.metrics import mean_squared_error
import numpy as np
pred_tr = reg.predict(X_train)
pred_test = reg.predict(X_test)
rmse_tr = (np.sqrt(mean_squared_error(y_train, pred_tr)))
rmse_test = (np.sqrt(mean_squared_error(y_test, pred_test)))
print('RMSE Train data : ', rmse_tr)
print('RMSE Test data : ', rmse_test)
plt.scatter(y_test, pred_test) #์ฐธ ๊ฐ๊ณผ ํ
์คํธ ๋ฐ์ดํฐ์ ๋ํ ๊ฐ
plt.xlabel('Real ($1000)')
plt.ylabel('Predicted Prices')
plt.plot([0, 50], [0, 50])
plt.show()
๐sklearn
- LSTAT ๊ฐ๋ ๊ฐ์ด ๋นผ๊ณ ๋ชจ๋ธ์ ๋ง๋ค์ด๋ณด์.
X = boston.drop(['PRICE', 'LSTAT'], axis=1)
y = boston['PRICE']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
reg = LinearRegression()
reg.fit(X_train, y_train)
pred_tr = reg.predict(X_train)
pred_test = reg.predict(X_test)
rmse_tr = (np.sqrt(mean_squared_error(y_train, pred_tr)))
rmse_test = (np.sqrt(mean_squared_error(y_test, pred_test)))
print('RMSE Train data : ', rmse_tr)
print('RMSE Test data : ', rmse_test)
>>>>
RMSE Train data : 5.165137874244864
RMSE Test data : 5.2955950325971655
plt.scatter(y_test, pred_test)
plt.xlabel('Real ($1000)')
plt.ylabel('Predicted Prices')
plt.plot([0, 50], [0, 50])
plt.show()