๐ ์ ์ฉ์นด๋ ๋ถ์ ์ฌ์ฉ์ ๊ฒ์ถํด๋ณด๊ธฐ 2
๐ปํน์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ง๊ณ ๋ค์ ๋์ ํด๋ณด์.
plt.figure(figsize=(12, 10))
sns.distplot(raw_data['Amount'], color='r');
plt.show()
๐ปStandardScaler ์ ์ฉ
→ Amount ์ปฌ๋ผ์ด ๋น๋์นญ์ฑ์ด ๋๋ฌด ๋๋ค (๋ฐ์ดํฐ๊ฐ ํ ์ชฝ์ผ๋ก ์ ๋ ค ์๋ค)
→ ๋ฐ๋ผ์, ์ ๊ทํ๋ฅผ ์์ผ์ ํธํฅ์ฑ์ ๋ฐ๊ฟ๋ณด์.
→ ๊ฒฐ๊ณผ ๊ฐ์ ๋ณ๋์ฑ์ด ์์ด ๋ฏธ๋ฏธํ ์์ค์ด๋ค..
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
amount_n = scaler.fit_transform(raw_data['Amount'].values.reshape(-1, 1))
raw_data_copy = raw_data.iloc[:, 1:-2]
raw_data_copy['Amount_Scaled'] = amount_n
raw_data_copy.head()
→ V1 ~ Amount_Scaled ๊น์ง ์ปฌ๋ผ์ ๋ง๋ ๊ฒ
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(raw_data_copy, y, test_size=0.3, random_state= 13, stratify=y)
models = [lr_clf, dt_clf, rf_clf, lgbm_clf]
model_names = ['Logistic Regression', 'DecisionTree', 'RandomForest', 'LightGBM']
result = get_result_pd(models, model_names, X_train, y_train, X_test, y_test)
result
๐ปroc_curve๋ฅผ ๊ทธ๋ ค๋ณด์
from sklearn.metrics import roc_curve
def draw_roc_curve(models, model_names, X_test, y_test):
plt.figure(figsize=(12, 10))
for model in range(len(models)):
pred = models[model].predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, pred)
plt.plot(fpr, tpr, label=model_names[model])
plt.plot([0,1], [0,1], 'k--', label='random guess')
plt.title('ROC')
plt.legend()
plt.grid()
plt.show()
draw_roc_curve(models, model_names, X_test, y_test)
๐ปlog ํจ์๋ฅผ ์ ์ฉ ์์ผ๋ณด์
→ ํฐ ๊ฐ์ ์๋์ ์ผ๋ก ๋ฎ์ ๊ฐ์ผ๋ก ๋ฐ๊ฟ์ฃผ๊ณ , ๋ฎ์ ๊ฐ์ ๊ทธ๋๋ก ์ฌ์ฉํ๋๋ก ์ค์ → X๊ฐ ์ปค์ง์๋ก, y ๊ฐ์ ์ต์ ํด์ฃผ๋ ๊ธฐ๋ฅ์ ๊ฐ์ง๊ณ ์๋ค (log scale ์ ์ฉ)
amount_log = np.log1p(raw_data['Amount'])
raw_data_copy['Amount_Scaled'] = amount_log
plt.figure(figsize=(12, 6))
sns.distplot(raw_data_copy['Amount_Scaled'], color='r')
plt.show()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(raw_data_copy, y, test_size=0.3, random_state= 13, stratify=y)
models = [lr_clf, dt_clf, rf_clf, lgbm_clf]
model_names = ['Logistic Regression', 'DecisionTree', 'RandomForest', 'LightGBM']
result = get_result_pd(models, model_names, X_train, y_train, X_test, y_test)
result
draw_roc_curve(models, model_names, X_test, y_test)
'Study_note(zb_data) > Machine Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
์คํฐ๋๋ ธํธ (์์ฐ์ด ์ฒ๋ฆฌ) (0) | 2023.10.04 |
---|---|
์คํฐ๋๋ ธํธ (credit card data 3, 4) (1) | 2023.10.02 |
์คํฐ๋๋ ธํธ (credit card data 1) (0) | 2023.10.02 |
์คํฐ๋๋ ธํธ (kNN) (1) | 2023.10.02 |
์คํฐ๋๋ ธํธ (Boosting Algorithm) (1) | 2023.10.02 |