๐ Machine Learning
- ๋ช ์์ ์ผ๋ก ํ๋ก๊ทธ๋๋ฐ ํ์ง ์์๋ ์ปดํจํฐ์ ํ์ตํ ์ ์๋ ๋ฅ๋ ฅ์ ๋ถ์ฌํ๋ ํ๋ฌธ
- ์ฃผ์ด์ง ๋ฐ์ดํฐ๋ฅผ ํตํด ๊ท์น์ ์ฐพ๋ ๊ฒ์ด๋ค.
๐ iris ๋ฐ์ดํฐ ์ import ํ๊ธฐ
- sklearn.datasets์์ iris ๋ฐ์ดํฐ๋ฅผ import
- ๋ฐ์ดํฐ ์ ์ ํ์ฉํ์ฌ setosa, versicolor, virginica๋ฅผ ๊ตฌ๋ถํด๋ณด์
from sklearn.datasets import load_iris
iris = load_iris()
iris.keys()
>>>>
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
print(iris['target_names'])
>>>>
['setosa' 'versicolor' 'virginica']
import pandas as pd
iris_pd = pd.DataFrame(iris.data, columns=iris['feature_names'])
iris_pd
๐ iris ๋ฐ์ดํฐ๋ฅผ ๊ทธ๋ํ๋ก ๊ทธ๋ ค๋ณด์
- ๋ฐ์ดํฐ๋ฅผ ํ์ธ, petal length์ petal width๋ฅผ ๊ฐ์ง๊ณ ํ์ข ์ ๊ตฌ๋ถํ ์ ์์ ๊ฒ ๊ฐ๋ค๋ ๊ฐ๋ฅ์ฑ ํ์ธ
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
sns.boxplot(x='sepal length (cm)', y='species', data=iris_pd, orient='h');
plt.figure(figsize=(12, 6))
sns.boxplot(x='petal length (cm)', y='species', data=iris_pd, orient='h');
plt.figure(figsize=(12, 6))
sns.boxplot(x='petal width (cm)', y='species', data=iris_pd, orient='h');
sns.pairplot(data=iris_pd,
vars=['petal length (cm)', 'petal width (cm)'],
hue = 'species', height=4);
plt.figure(figsize=(12, 10))
sns.scatterplot(data=iris_pd,
x='petal length (cm)', y='petal width (cm)', hue='species');
๐ Decision Tree์ ๋ถํ ๊ธฐ์ค
๐ป entropy์ ๊ฐ๋
- ์ผ๋ง๋ ์ ๋ณด๊ฐ ๋ฌด์ง์ํ๊ณ ๋ถํ์ค ํ๊ฐ? (๋ฌด์ง์์ ์ ๋๋ฅผ ๋ํ๋ธ๋ค.)
- ๋ถํ ํ๋ฉด ์ํธ๋กํผ๊ฐ ๋ฎ์์ง๋ ๊ฒ์ ํ์ธํ ์ ์๋ค.
๐ป Gini ๊ณ์
- Gini index ํน์ ๋ถ์๋์จ
- ์ํธ๋กํผ์ ๊ณ์ฐ๋์ด ๋ง์์ ๋ณด๋ค ๊ณ์ฐ๋์ด ์ ์ ์ง๋๊ณ์๋ฅผ ์ฌ์ฉํ๋ ๊ฒฝ์ฐ๊ฐ ๋ง๋ค.
- ๋ถํ ํ๋ฉด ์ง๋๊ณ์๊ฐ ๋ฎ์์ง๋ ๊ฒ์ ํ์ธํ ์ ์๋ค.
--> ์ํธ๋กํผ๋ ์ง๋ ๊ณ์๋ ๋ฎ์ ์๋ก ์ข๋ค
๐ Scikit Learn
- iris data์ petal width, length ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ์ฌ 'setosa', 'versicolor', 'virginica' ๋ฅผ ๊ตฌ๋ถํ ์ ์๋๋ก ํ์ต ์์ผ๋ณด์
# fit์ ํ์ฉ, ๋ฐ์ดํฐ๋ฅผ ํ์ต
from sklearn.tree import DecisionTreeClassifier
iris_clf = DecisionTreeClassifier()
iris_clf.fit(iris.data[:, 2:], iris.target)
๐ป ํ์ต ํ ๋ฐ์ดํฐ์ ๊ทธ ์๋์ ์ ๋ต ๋ฐ์ดํฐ๋ฅผ ๋น๊ตํด๋ณด์.
# ํ์ตํ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์์ธกํ๊ธฐ
from sklearn.metrics import accuracy_score
y_pred_tr = iris_clf.predict(iris.data[:, 2:])
y_pred_tr
>>>>
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
iris.target
>>>>
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
๐ป ์ ํ๋๋ฅผ ์์๋ณด์
## ์ผ๋ง๋ ์ ํํ์ง ์์๋ณด์
accuracy_score(iris.target, y_pred_tr)
>>>>
0.9933333333333333
'Study_note(zb_data) > Machine Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
์คํฐ๋๋ ธํธ (ML6_Wine) (0) | 2023.09.22 |
---|---|
์คํฐ๋๋ ธํธ (ML5) (0) | 2023.09.21 |
์คํฐ๋๋ ธํธ (ML4) (0) | 2023.09.21 |
์คํฐ๋๋ ธํธ (ML3) (0) | 2023.09.21 |
์คํฐ๋ ๋ ธํธ (ML2) (0) | 2023.09.19 |