Study_note(zb_data)/Machine Learning

์Šคํ„ฐ๋””๋…ธํŠธ (ML5)

KloudHyun 2023. 9. 21. 23:08

๐Ÿ“Œ LabelEncoder

df = pd.DataFrame({
    'A' : ['a', 'b', 'c', 'a', 'b'],
    'B' : [1, 2, 3, 1, 0],
})
df

๐Ÿ“Œ Label_encoder

- fit -> transform

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(df['A'])
le.classes_
>>>>
array(['a', 'b', 'c'], dtype=object)
le.transform(df['A'])
>>>>
array([0, 1, 2, 0, 1])
le.fit_transform(df['A'])
>>>>
array([0, 1, 2, 0, 1])
le.inverse_transform(df['le_A'])
>>>>
array(['a', 'b', 'c', 'a', 'b'], dtype=object)
df = pd.DataFrame( {
    'A' : [10, 20, -10, 0, 25],
    'B' : [1, 2, 3, 1, 0]
})
df
>>>>

๐Ÿ“Œ min-max scaling

- ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”

์ถœ์ฒ˜ : ์ œ๋กœ๋ฒ ์ด์Šค ๋ฐ์ดํ„ฐ ์Šค์ฟจ

-> df

from sklearn.preprocessing import MinMaxScaler

mms = MinMaxScaler()
mms.fit(df)
mms.data_max_, mms.data_min_, mms.data_range_
>>>>
(array([25.,  3.]), array([-10.,   0.]), array([35.,  3.]))
mms.inverse_transform(df_mms)
>>>>
array([[ 10.,   1.],
       [ 20.,   2.],
       [-10.,   3.],
       [  0.,   1.],
       [ 25.,   0.]])
df_mms = mms.transform(df)
df_mms
>>>>
array([[0.57142857, 0.33333333],
       [0.85714286, 0.66666667],
       [0.        , 1.        ],
       [0.28571429, 0.33333333],
       [1.        , 0.        ]])
       
       # ๊ฐ๊ฐ์˜ ๋งฅ์‹œ๋ฉˆ์ด 1๋กœ ๋ฐ˜ํ™˜

๐Ÿ“Œ Standard Scaler

- ๋ฐ์ดํ„ฐ ํ‘œ์ค€ํ™”

์ถœ์ฒ˜ : ์ œ๋กœ๋ฒ ์ด์Šค ๋ฐ์ดํ„ฐ ์Šค์ฟจ

from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
ss.fit(df)
# ํ‰๊ท ๊ณผ ํ‘œ์ค€ํŽธ์ฐจ
ss.mean_, ss.scale_
>>>>
(array([9. , 1.4]), array([12.80624847,  1.0198039 ]))
df_ss = ss.transform(df)
df_ss
>>>>
array([[ 0.07808688, -0.39223227],
       [ 0.85895569,  0.58834841],
       [-1.48365074,  1.56892908],
       [-0.70278193, -0.39223227],
       [ 1.2493901 , -1.37281295]])
       
       #๊ธฐ์กด ๋ฐ์ดํ„ฐ๋Š” 0์ด๊ณ  ํ‘œ์ค€ํŽธ์ฐจ๋Š” 1์ธ ๋ฐ์ดํ„ฐ๋กœ ๊ต์ฒด

๐Ÿ“Œ Robust Scaler

df = pd.DataFrame({
    'A' : [-0.1, 0., 0.1, 0.2, 0.3, 0.4, 1.0, 1.1, 5]
})
df
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

mm = MinMaxScaler()
ss = StandardScaler()
rs = RobustScaler()
df_scaler = df.copy()

df_scaler['MinMax'] = mm.fit_transform(df)
df_scaler['Standard'] = ss.fit_transform(df)
df_scaler['Robust'] = rs.fit_transform(df)

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid')

plt.figure(figsize=(16, 6))
sns.boxplot(data=df_scaler, orient='h')