์คํฐ๋ ๋ ธํธ (tensorflow, LeNET)
๐ Classify Wearing Mask
โก๏ธ Data Source
→ https://www.kaggle.com/datasets/ashishjangra27/face-mask-12k-images-dataset
โก๏ธ Find file path using keyword ls
→ ls ๋ช ๋ น์ด๋ก ํ์ฌ ๊ฒฝ๋ก์ ์กด์ฌํ๋ ํ์ผ์ ์ ์ ์๋ค.
ls
>>>
2023-11-01 ์คํ 01:37 <DIR> .
2023-10-30 ์คํ 04:04 <DIR> ..
2023-10-31 ์คํ 09:59 <DIR> .ipynb_checkpoints
2023-10-31 ์คํ 09:57 3,172,365 1. Beginning of Deeplearning.ipynb
2023-10-31 ์คํ 09:56 1,298,479 2. Deep Learning from scratch.ipynb
2023-11-01 ์คํ 01:37 167,739 3. Dive to cnn.ipynb
2023-11-01 ์คํ 01:59 <DIR> data
2023-10-31 ์คํ 02:57 37,918,144 MNIST_CNN_model.h5
โก๏ธ Dealwith zipfiles
→ zipfile ๋ช ๋ น์ด๋ฅผ ์ฌ์ฉํ์ฌ ์์ง ํ์ผ์ ์์ถ์ ํ ์ ์๋ค.
import zipfile
content_zip = zipfile.ZipFile("./data/archive.zip")
content_zip.extractall("./data")
content_zip.close()
โก๏ธ Organizing Data files
→ listdir ๋ช ๋ น์ด๋ฅผ ์ฌ์ฉํ์ฌ, ํด๋น ๊ฒฝ๋ก์ ํด๋ ํ์ผ์ ํ์ธํ ์ ์๋ค.
import os
import glob
path = "./data/Face Mask Dataset/"
os.listdir(path)
>>>>
['Test', 'Train', 'Validation']
path = "./data/Face Mask Dataset/"
os.listdir(path+"/"+'Train')
>>>>
['WithMask', 'WithoutMask']
path = "./data/Face Mask Dataset/"
dataset = {
"image_path" : [],
"mask_status" : [],
"where" : []
}
for where in os.listdir(path):
for status in os.listdir(path+"/"+where):
for image in glob.glob(path+where+"/"+status+"/"+"*.png"):
dataset["image_path"].append(image)
dataset["mask_status"].append(status)
dataset["where"].append(where)
import pandas as pd
dataset = pd.DataFrame(dataset)
dataset.head()
๐ปConfirm is data ratio
→ DataFrame์ value_counts ๋ช ๋ น์ด๋ฅผ ์ฌ์ฉํ๊ณ column์ ๋ฃ์ด์ฃผ๋ฉด ํด๋น ์ปฌ๋ผ์ value_counts๋ฅผ ์ ์ ์๋ค.
dataset.value_counts("mask_status")
>>>>
mask_status
WithoutMask 5909
WithMask 5883
print("With Mask:", dataset.value_counts("mask_status")[0])
print("Without Mask:", dataset.value_counts("mask_status")[1])
sns.countplot(x=dataset['mask_status'])
plt.show()
>>>>
With Mask: 5909
Without Mask: 5883
๐ปCheck the image randomly
import cv2
plt.figure(figsize=(15, 10))
for i in range(9):
# dataset์์ random์ผ๋ก ์ด๋ฏธ์ง ๊ฐ์ ธ์ค๊ธฐ
random = np.random.randint(1, len(dataset))
# subplot์ ํ์ฉํ์ฌ 3x3์ ํํ๋ก ๊ทธ๋ฆฌ๊ธฐ
plt.subplot(3, 3, i + 1)
# cv2 ๋ชจ๋์ ์ด์ฉํ์ฌ image๋ฅผ ๋ก๋, random์ผ๋ก ์ถ์ถํ dataset์ index๋ฅผ ํ์ฉ, location ๋ฉ์๋๋ฅผ ํ์ฉ
plt.imshow(cv2.imread(dataset.loc[random, "image_path"]))
# ๊ฐ image์ ๋ํ title, ์ฌ๊ธฐ์๋ ๋ง์คํฌ์ ์ํ๋ฅผ ํํ
plt.title(dataset.loc[random, "mask_status"], size=15)
# x์ถ ๊ฐ, y์ถ ๊ฐ์ ์์ค๋ค (๋น ๋ฆฌ์คํธ๋ฅผ ํ์ฉ)
plt.xticks([])
plt.yticks([])
plt.show()
๐ปArrange Data
→ train data๋ง ์ฌ์ฉํ ๊ฒ์ด๊ธฐ ๋๋ฌธ์, train_df๋ฅผ ๋ค์ ๋ง๋ ๋ค.
train_df = dataset[dataset["where"]=="Train"]
test_df = dataset[dataset["where"]=="Test"]
valid_df = dataset[dataset["where"]=="Validation"]
train_df = train_df.reset_index().drop("index", axis=1)
train_df.head()
๐ปCheck Data ratio
→ ๊ฐ๊ฐ ๋ฐ์ดํฐ ์ ์ ํ์ธ ํด๋ณด๋, ๊ฑฐ์ ๋์ผํ ๋น์จ์ ๋ํ๋ด๊ณ ์๋ค.
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
sns.countplot(x = train_df['mask_status'])
plt.title("Training Dataset", size=10)
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 2)
sns.countplot(x = test_df['mask_status'])
plt.title("Test Dataset", size=10)
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 3)
sns.countplot(x = valid_df['mask_status'])
plt.title("Validation Dataset", size=10)
plt.show()
๐ปPreprocessing image data
→ image๋ฅผ grayscale๋ก ์ฝ์ ๋ค์, resizing
→ ์ด ๋ฐ์ดํฐ๋ฅผ ๋ฆฌ์คํธ์ append
from tqdm.notebook import tqdm
data = []
image_size = 150
for i in range(len(train_df)):
# image๋ฅผ grayscale๋ก ์ฝ๋๋ค
img_array = cv2.imread(train_df['image_path'][i], cv2.IMREAD_GRAYSCALE)
# image๋ฅผ resize
new_image_array = cv2.resize(img_array, (image_size, image_size))
if train_df["mask_status"][i] == 'WithMask":
data.append([new_image_array, 1])
else:
data.append([new_image_array, 0])
โก๏ธ The reason why dataset shuffle
→ ๋ฐ์ดํฐ๊ฐ ์์ฐจ์ ์ผ๋ก ์ ๋ ฅ์ด ๋์๊ธฐ ๋๋ฌธ์, ํ ๋ฒ ์๋ ๊ฒ์ด ์ข๋ค.
np.random.shuffle(data)
๐ปCheck resizing image
fig, ax = plt.subplots(2, 3, figsize=(10, 6))
for row in range(2):
for col in range(3):
image_index = row*100+col
ax[row, col].axis("off")
ax[row, col].imshow(data[image_index][0], cmap="grey")
if data[image_index][1] == 0:
ax[row, col].set_title("Without Mask")
else:
ax[row, col].set_title("With Mask")
๐ปData Setting (Arrange)
→ Pixel Data๋ฅผ X, Labeling Data๋ฅผ y์ append
X = [] ; y= []
for image in data:
X.append(image[0])
y.append(image[1])
X=np.array(X)
y=np.array(y)
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state = 13)
๐ปModeling
from tensorflow.keras import Sequential, models
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPool2D
model = models.Sequential([
# Conv2D, 32์ฑ๋, ์ปค๋ ์ฌ์ด์ฆ๋ 5x5 (3x3๊ณผ ํจ๊ป ๋ง์ด ์ฐ์)
# strides ์ต์
1x1 (ํ ์นธ์ฉ ํ๋๋ค), padding
layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1), padding="same", activation="relu", input_shape=(150, 150, 1)),
# ๋ณดํต MaxPooling size์ strides size๋ฅผ ๋์ผํ๊ฒ ๊ฐ์ ธ๊ฐ๋ ๊ฒฝ์ฐ๊ฐ ๋ง๋ค.
layers.MaxPooling2D(pool_size = (2, 2), strides=(2, 2)),
# MaxPooling์ ๋ง๋์ ํฌ๊ธฐ๊ฐ ์ค์์ผ๋, ๋ ๋ง์ ํน์ฑ์ ์ฐพ๊ธฐ ์ํด์ Channel์ ๋๋ฆฐ๋ค.
layers.Conv2D(64, (2, 2), activation="relu", padding="same"),
# ๋์ผ
layers.MaxPooling2D(pool_size = (2, 2), strides=(2, 2)),
# ๊ณผ์ ํฉ ๋ฐฉ์ง๋ฅผ ์ํด Dropout์ 25% ๋น์จ๋ก ์ค์
layers.Dropout(0.25),
layers.Flatten(),
layers.Dense(1000, activation='relu'),
# ๋ง์ง๋ง ์ถ๋ ฅ๋จ ์ค์
layers.Dense(1, activation="sigmoid")
])
model.compile(
optimizer="adam", loss=tf.keras.losses.BinaryCrossentropy(),
metrics =["accuracy"]
)
๐ปFit
# X_train = X_train.reshape(len(X_train), X_train.shape[1], X_train.shape[2], 1)
X_train = X_train.reshape(-1, 150, 150, 1)
X_val = X_val.reshape(-1, 150, 150, 1)
hist = model.fit(X_train, y_train, epochs = 4, batch_size = 32)
๐ปEvaluate
→ ์ฑ๋ฅ ํ๊ฐ
model.evaluate(X_val, y_val)
>>>>
63/63 [==============================] - 3s 54ms/step - loss: 0.1177 - accuracy: 0.9740
[0.11772098392248154, 0.9739999771118164]
๐ปPredict
prediction = (model.predict(X_val) > 0.5).astype("int32")
print(classification_report(y_val, prediction))
print(confusion_matrix(y_val, prediction))
>>>>
63/63 [==============================] - 3s 47ms/step
precision recall f1-score support
0 0.98 0.97 0.97 982
1 0.97 0.98 0.97 1018
accuracy 0.97 2000
macro avg 0.97 0.97 0.97 2000
weighted avg 0.97 0.97 0.97 2000
[[950 32]
[ 20 998]]
๐ป Showing Wrong number Image
→ ์์ธก์ด ํ๋ฆฐ ๋ฐ์ดํฐ ํ์ธ
→ ์ ๋๋ก ์ผ๊ตด ์๋ฉด์ด ์๋์จ ์ฌ์ง๋ ์๊ณ ... ์ด์ํ(?) ๋ง์คํฌ๋ฅผ ์ฐ๊ณ ์๋ ์ฌ์ง๋ ์๋ ๋ฏ ํ๋ค.
wrong_result = []
for n in range(0, len(y_val)):
# ์์ธกํ ๊ฐ ์ค์์, ์ค์ ๊ฐ๊ณผ ๋ค๋ฅธ ๊ฒ์ ์ด๋ฏธ์ง ๋ฒํธ๋ฅผ ํ์ธ
if prediction[n] != y_val[n]:
wrong_result.append(n)
len(wrong_result)
>>>>
52
import random
samples = random.choices(population = wrong_result, k=6)
plt.figure(figsize=(14, 12))
for idx, n in enumerate(sample):
plt.subplot(2, 3, idx + 1)
plt.imshow(X_val[n].reshape(150, 150), interpolation='nearest')
plt.title(prediction[n])
plt.axis("off")
plt.show()