https://www.tensorflow.org/tutorials/keras/classification?hl=ko

1. modules import

import tensorflow as tf
from tensorflow.keras.datasets.fashion_mnist import load_data
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import plot_model

from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt

 

 

2. 데이터셋 로드

tf.random.set_seed(111)

(x_train_full, y_train_full), (x_test, y_test) = load_data()

x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size = 0.3, random_state = 111)

print("학습 데이터: {}\t레이블: {}".format(x_train_full.shape, y_train_full.shape))
print("학습 데이터: {}\t레이블: {}".format(x_train.shape, y_train.shape))
print("검증 데이터: {}\t레이블: {}".format(x_val.shape, y_val.shape))
print("테스트 데이터: {}\t레이블: {}".format(x_test.shape, y_test.shape))

# 출력 결과
학습 데이터: (60000, 28, 28)	레이블: (60000,)
학습 데이터: (42000, 28, 28)	레이블: (42000,)
검증 데이터: (18000, 28, 28)	레이블: (18000,)
테스트 데이터: (10000, 28, 28)	레이블: (10000,)

 

 

3. 데이터 확인

# 정답의 집합
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'bag', 'Ankle boot']

# 첫번째 데이터의 정답 확인
class_names[y_train[0]]

# 출력 결과
'Pullover'
plt.figure()
plt.imshow(x_train[0])
plt.colorbar()
plt.grid()
plt.show()

# 랜덤하게 4개의 데이터 추출하여 출력
num_sample = 4
random_idxs = np.random.randint(60000, size =num_sample)
plt.figure(figsize = (15, 10))
for i, idx in enumerate(random_idxs):
    image = x_train_full[idx, :]
    label = y_train_full[idx]

    plt.subplot(1, len(random_idxs), i+1)
    plt.imshow(image)
    plt.title("Index: {}, Label: {}".format(idx, class_names[label]))

 

 

4. 데이터 전처리

  • Normalization
  • flatten
  • oss = 'sparse_categorical_crossentropy
# Normalization
x_train = (x_train.reshape(-1, 28*28)) / 255.
x_val = (x_val.reshape(-1, 28*28)) / 255.
x_test = (x_test.reshape(-1, 28*28)) / 255.

 

 

5. 모델 구성(함수형 API)

input = Input(shape = (784, ), name = 'input')
hidden1 = Dense(256, activation = 'relu', name = 'hidden1')(input)
hidden2 = Dense(128, activation = 'relu', name = 'hidden2')(hidden1)
hidden3 = Dense(64, activation = 'relu', name = 'hidden3')(hidden2)
hidden4 = Dense(32, activation = 'relu', name = 'hidden4')(hidden3)
output = Dense(10, activation = 'softmax', name = 'output')(hidden4)
model = Model(inputs = [input], outputs = [output])
model.summary()

# 출력 결과
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (InputLayer)          [(None, 784)]             0         
                                                                 
 hidden1 (Dense)             (None, 256)               200960    
                                                                 
 hidden2 (Dense)             (None, 128)               32896     
                                                                 
 hidden3 (Dense)             (None, 64)                8256      
                                                                 
 hidden4 (Dense)             (None, 32)                2080      
                                                                 
 output (Dense)              (None, 10)                330       
                                                                 
=================================================================
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
plot_model(model)

 

 

6. 모델 컴파일

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = RMSprop(learning_rate = 0.01),
              metrics = ['acc'])

 

 

7. 모델 학습

  • 모델 시각화를 위해 history 변수에 학습과정 저장
history = model.fit(x_train, y_train,
                    epochs = 10,
                    batch_size = 128,
                    validation_data = (x_val, y_val))

# 출력 결과
Epoch 1/10
329/329 [==============================] - 16s 33ms/step - loss: 0.8969 - acc: 0.6897 - val_loss: 0.5580 - val_acc: 0.7997
Epoch 2/10
329/329 [==============================] - 6s 19ms/step - loss: 0.5179 - acc: 0.8132 - val_loss: 0.5554 - val_acc: 0.8124
Epoch 3/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4643 - acc: 0.8321 - val_loss: 0.7202 - val_acc: 0.7992
Epoch 4/10
329/329 [==============================] - 5s 14ms/step - loss: 0.4484 - acc: 0.8414 - val_loss: 0.5157 - val_acc: 0.7810
Epoch 5/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4242 - acc: 0.8497 - val_loss: 0.5527 - val_acc: 0.8212
Epoch 6/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4175 - acc: 0.8523 - val_loss: 0.6034 - val_acc: 0.8197
Epoch 7/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4107 - acc: 0.8566 - val_loss: 0.6612 - val_acc: 0.8046
Epoch 8/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4029 - acc: 0.8594 - val_loss: 0.6940 - val_acc: 0.7671
Epoch 9/10
329/329 [==============================] - 5s 14ms/step - loss: 0.3955 - acc: 0.8603 - val_loss: 0.5032 - val_acc: 0.8444
Epoch 10/10
329/329 [==============================] - 5s 14ms/step - loss: 0.3969 - acc: 0.8653 - val_loss: 0.5266 - val_acc: 0.8257

 

 

8. 학습 결과 시각화

history_dict = history.history

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(loss) + 1)
fig = plt.figure(figsize = (10, 5))

ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(epochs, loss, color = 'blue', label = 'train_loss')
ax1.plot(epochs, val_loss, color = 'red', label = 'val_loss')
ax1.set_title('Train and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.grid()
ax1.legend()

acc = history_dict['acc']
val_acc = history_dict['val_acc']

ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(epochs, acc, color = 'blue', label = 'train_acc')
ax2.plot(epochs, val_acc, color = 'red', label = 'val_acc')
ax2.set_title('Train and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.grid()
ax2.legend()

  • 검증데이터(val_loss, val_acc)가 일정하지 않고 튀는 현상 발생
  • 다른 옵티마이저로 실행
    • 데이터셋 로드 - 데이터 전처리 - 모델 구성 다시 진행
from tensorflow.keras.optimizers import SGD

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = SGD(learning_rate = 0.01),
              metrics = ['acc'])

history2 = model.fit(x_train, y_train,
                     epochs = 10,
                     batch_size = 128,
                     validation_data = (x_val, y_val))

# 출력 결과
Epoch 1/10
329/329 [==============================] - 13s 32ms/step - loss: 0.3495 - acc: 0.8706 - val_loss: 0.3795 - val_acc: 0.8644
Epoch 2/10
329/329 [==============================] - 9s 27ms/step - loss: 0.3172 - acc: 0.8811 - val_loss: 0.3691 - val_acc: 0.8689
Epoch 3/10
329/329 [==============================] - 6s 19ms/step - loss: 0.3072 - acc: 0.8848 - val_loss: 0.3621 - val_acc: 0.8713
Epoch 4/10
329/329 [==============================] - 8s 25ms/step - loss: 0.3017 - acc: 0.8864 - val_loss: 0.3590 - val_acc: 0.8728
Epoch 5/10
329/329 [==============================] - 7s 23ms/step - loss: 0.2977 - acc: 0.8880 - val_loss: 0.3572 - val_acc: 0.8728
Epoch 6/10
329/329 [==============================] - 7s 21ms/step - loss: 0.2950 - acc: 0.8888 - val_loss: 0.3548 - val_acc: 0.8733
Epoch 7/10
329/329 [==============================] - 4s 12ms/step - loss: 0.2925 - acc: 0.8896 - val_loss: 0.3542 - val_acc: 0.8756
Epoch 8/10
329/329 [==============================] - 3s 11ms/step - loss: 0.2903 - acc: 0.8904 - val_loss: 0.3526 - val_acc: 0.8756
Epoch 9/10
329/329 [==============================] - 3s 10ms/step - loss: 0.2887 - acc: 0.8911 - val_loss: 0.3520 - val_acc: 0.8757
Epoch 10/10
329/329 [==============================] - 3s 11ms/step - loss: 0.2870 - acc: 0.8915 - val_loss: 0.3526 - val_acc: 0.8756
# 다시 시각화
history_dict = history2.history

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(loss) + 1)
fig = plt.figure(figsize = (10, 5))

ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(epochs, loss, color = 'blue', label = 'train_loss')
ax1.plot(epochs, val_loss, color = 'red', label = 'val_loss')
ax1.set_title('Train and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.grid()
ax1.legend()

acc = history_dict['acc']
val_acc = history_dict['val_acc']

ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(epochs, acc, color = 'blue', label = 'train_acc')
ax2.plot(epochs, val_acc, color = 'red', label = 'val_acc')
ax2.set_title('Train and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.grid()
ax2.legend()

  • 학습 데이터의 loss값과 정확도가 검증 데이터의 loss값과 정확도와 차이가 있어보이지만 값으로 보면 큰 차이는 아님
  • loss값의 차이는 가장 큰 구간에서 0.06정도, 정확도는 가장 큰 구간이 0.025정

 

 

9. 모델 평가(1)

  • optimizer: SGD()로 학습한 모델
  • evaluate()
model.evaluate(x_test, y_test)

# 출력 결과
313/313 [==============================] - 2s 6ms/step - loss: 0.3862 - acc: 0.8661
[0.38618436455726624, 0.866100013256073]

 

 

10. 학습된 모델을 통해 값 예측

pred_ys = model.predict(x_test)

print(pred_ys.shape)
np.set_printoptions(precision = 7)
print(pred_ys[0])

# 출력 결과
# 정답 집합 10개 각각이 정답일 확률을 표시
(10000, 10)
[4.2854483e-21 1.0930411e-15 1.6151620e-17 3.9182383e-11 2.9266587e-15
 3.3629590e-03 4.9878759e-17 1.0700015e-03 2.2493745e-13 9.9556702e-01]
# 10개의 정답 집합 각각에 속할 확률 중 가장 높은 확률을 가진 값을 정답으로 채택하고 결과 확인
arg_pred_y = np.argmax(pred_ys, axis = 1)
plt.imshow(x_test[0].reshape(-1, 28))
plt.title('Predicted Class: {}'.format(class_names[arg_pred_y[0]]))
plt.show()

# 이미지 출력
def plot_image(i, pred_ys, y_test, img):
    pred_ys, y_test, img = pred_ys[i], y_test[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    
    plt.imshow(img, cmap = plt.cm.binary)

    predicted_label = np.argmax(pred_ys)
    if predicted_label == y_test:
        color = 'blue'
    else:
        color = 'red'
    
    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                         100 * np.max(pred_ys),
                                         class_names[y_test]),
                                         color = color)

# 전체 정답 집합 중 해당 데이터를 정답으로 예측한 확률 표시
def plot_value_array(i, pred_ys, true_label):
    pred_ys, true_label = pred_ys[i], true_label[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    thisplot = plt.bar(range(10), pred_ys, color = '#777777')
    plt.ylim([0, 1])
    predicted_label = np.argmax(pred_ys)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')
# 첫번째 데이터 정답 확인
i = 0
plt.figure(figsize = (8, 4))
plt.subplot(1, 2, 1)
plot_image(i, pred_ys, y_test, x_test.reshape(-1, 28, 28))
plt.subplot(1, 2, 2)
plot_value_array(i, pred_ys, y_test)
plt.show()

# 랜덤으로 추출하여 정답 확인
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols

random_num = np.random.randint(10000, size = num_images)
plt.figure(figsize = (2 * 2 * num_cols, 2 * num_rows))
for idx, num in enumerate(random_num):
    plt.subplot(num_rows, 2 * num_cols, 2 * idx + 1)
    plot_image(num, pred_ys, y_test, x_test.reshape(-1, 28, 28))
    plt.subplot(num_rows, 2 * num_cols, 2 * idx + 2)
    plot_value_array(num, pred_ys, y_test)

plt.show()

 

 

11. 모델 평가(2)

  • optimizer: SGD()로 학습한 모델
  • 혼동 행렬(Confusion Matrix)
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from tensorflow.keras.utils import to_categorical

y_test_che = to_categorical(y_test)
plt.figure(figsize = (8, 8))
cm2 = confusion_matrix(np.argmax(y_test_che, axis = 1), np.argmax(pred_ys, axis = -1))
sns.heatmap(cm2, annot = True, fmt = 'd', cmap = 'Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

 

 

12. 모델 평가(3)

  • optimizer: SGD()로 학습한 모델
  • 분류 보고서
print(classification_report(np.argmax(y_test_che, axis = -1), np.argmax(pred_ys, axis = -1)))

# 출력 결과
              precision    recall  f1-score   support

           0       0.78      0.85      0.81      1000
           1       0.99      0.96      0.98      1000
           2       0.75      0.81      0.78      1000
           3       0.86      0.88      0.87      1000
           4       0.77      0.75      0.76      1000
           5       0.97      0.95      0.96      1000
           6       0.68      0.57      0.62      1000
           7       0.93      0.96      0.94      1000
           8       0.96      0.97      0.96      1000
           9       0.96      0.95      0.96      1000

    accuracy                           0.87     10000
   macro avg       0.86      0.87      0.86     10000
weighted avg       0.86      0.87      0.86     10000

+ Recent posts