● 케라스 전이학습(tramsfer learning)

https://medium.com/the-official-integrate-ai-blog/transfer-learning-explained-7d275c1e34e2

  • 새로운 모델을 만들때 기존에 학습된 모델을 사용
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import *


# 예시로 학습된 vgg 데이터 불러오기
vgg16 = VGG16(weights = 'imagenet',
              input_shape = (32, 32, 3), include_top = False)

model = Sequential()
model.add(vgg16)

model.add(Flatten())
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(10, activation = 'softmax'))

model.summary()

# 출력 결과
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 vgg16 (Functional)          (None, 1, 1, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 batch_normalization (BatchN  (None, 256)              1024      
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
=================================================================
Total params: 14,849,610
Trainable params: 14,849,098
Non-trainable params: 512
_________________________________________________________________
  • vgg16 이외에 MobileNet, ResNet50, Xceoption 모델 등이 존재하여 전이 학습에 이용가능

 

1. 예제: Dogs vs Cats

 

  - modules import

import tensorflow as tf
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img, ImageDataGenerator
from tensorflow.keras.layers import Conv2D, Flatten, MaxPool2D, Input, Dropout, Dense
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam

import os
import zipfile
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

 

  - 데이터 로드

# 외부에서 데이터 가져오기
import wget

wget.download("https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip")


# 압축 해제
local_zip = 'cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
# 현재 폴더에 압축해제
zip_ref.extractall()
zip_ref.close()


# 압축해제된 폴더를 기본 경로로 지정, 폴더 내의 train과 validation 폴더에 각각 접근
base_dir = 'cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')


# 압축해제된 폴더 내의 train cat, validation cat, train dog, validation dog 폴더에 각각 접근
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

train_cat_frames = os.listdir(train_cats_dir)
train_dog_frames = os.listdir(train_dogs_dir)

 

  - 이미지 보강된 데이터 확인

# ImageDataGenerator 정의
datagen = ImageDataGenerator(
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
)


# 이미지 로드
img_path = os.path.join(train_cats_dir, train_cat_frames[2])
img = load_img(img_path, target_size = (150, 150))
x = img_to_array(img)
x = x.reshape((1, ) + x.shape)

i = 0
for batch in datagen.flow(x, batch_size = 1):
    plt.figure(i)
    imgplot = plt.imshow(array_to_img(batch[0]))
    i += 1
    if i % 5 == 0:
        break

 

  - 학습, 검증 데이터셋의 Data Generator

train_datagen = ImageDataGenerator(
    rescale = 1. / 255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (150, 150),
    batch_size = 20,
    class_mode = 'binary'
)

val_datagen = ImageDataGenerator(rescale = 1. / 255)

validation_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size = (150, 150),
    batch_size = 20,
    class_mode = 'binary'
)


# 출력 결과
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.

 

  - 모델 구성 및 컴파일

model = Sequential()
model.add(Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)))
model.add(MaxPool2D(2, 2))
model.add(Conv2D(64, (3, 3), activation = 'relu'))
model.add(MaxPool2D(2, 2))
model.add(Conv2D(128, (3, 3), activation = 'relu'))
model.add(MaxPool2D(2, 2))
model.add(Conv2D(128, (3, 3), activation = 'relu'))
model.add(MaxPool2D(2, 2))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy',
              optimizer = Adam(learning_rate = 1e-4),
              metrics = ['acc'])

model.summary()

# 출력 결과
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 17, 17, 128)      0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 15, 15, 128)       147584    
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 7, 7, 128)        0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 6272)              0         
                                                                 
 dropout (Dropout)           (None, 6272)              0         
                                                                 
 dense_2 (Dense)             (None, 512)               3211776   
                                                                 
 dense_3 (Dense)             (None, 1)                 513       
                                                                 
=================================================================
Total params: 3,453,121
Trainable params: 3,453,121
Non-trainable params: 0
_________________________________________________________________

 

  - 모델 학습 및 학습 과정 시각화

history = model.fit(train_generator,
                    steps_per_epoch = 100,
                    epochs = 30,
                    batch_size = 256,
                    validation_data = validation_generator,
                    validation_steps = 50,
                    verbose = 2)

# 시각화
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, loss, 'b--', label = 'Train Loss')
plt.plot(epochs, val_loss, 'b--', label = 'Validation Loss')
plt.grid()
plt.legend()

plt.plot(epochs, acc, 'b--', label = 'Train Accuracy')
plt.plot(epochs, val_acc, 'b--', label = 'Validation Accuracy')
plt.grid()
plt.legend()

plt.show()

 

  - 모델 저장

model.save('cats_and_dogs_model.h5')

 

  - 사전 훈련된 모델 사용

from tensorflow.keras.optimizers import RMSprop

conv_base = VGG16(weights = 'imagenet',
                  input_shape = (150, 150, 3), include_top = False)

def build_model_with_pretrained(convbase):
    model = Sequential()
    model.add(conv_base)
    model.add(Flatten())
    model.add(Dense(256, activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))

    model.compile(loss = binary_crossentropy,
                  optimizer = RMSprop(learning_rate = 2e-5),
                  metrics = ['accuracy'])
    return model
  • 파라미터 수 확인
model.build_model_with_pretrained(conv_base)
model.summary()

# 출력 결과
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 vgg16 (Functional)          (None, 4, 4, 512)         14714688  
                                                                 
 flatten_2 (Flatten)         (None, 8192)              0         
                                                                 
 dense_4 (Dense)             (None, 256)               2097408   
                                                                 
 dense_5 (Dense)             (None, 1)                 257       
                                                                 
=================================================================
Total params: 16,812,353
Trainable params: 16,812,353
Non-trainable params: 0
_________________________________________________________________

 

  - 레이어 동결

  • 훈련하기 전, 합성곱 기반 레이어들의 가중치 학습을 막기 위해 이를 동결
# 동결 전
print(len(model.trainable_weights))

# 출력 결과
30


# 동결 후
conv_base.trainable = False
print(len(model.trainable_weights))

# 출력 결과
4

 

  - 모델 컴파일

  • trainable 속성을 변경했기 때문에 다시 모델을 컴파일 해야함
model.compile(loss = 'binary_crossentropy',
              optimizer = RMSprop(learning_rate = 2e-5),
              metrics = ['accuracy'])

 

  - 이미지 제너레이터

train_datagen = ImageDataGenerator(
    rescale = 1. / 255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (150, 150),
    batch_size = 20,
    class_mode = 'binary'
)

val_datagen = ImageDataGenerator(rescale = 1. / 255)

validation_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size = (150, 150),
    batch_size = 20,
    class_mode = 'binary'
)

# 출력 결과
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.

 

  - 모델 재학습

history2 = model.fit(train_generator,
                    steps_per_epoch = 100,
                    epochs = 30,
                    batch_size = 256,
                    validation_data = validation_generator,
                    validation_steps = 50,
                    verbose = 2)

acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']
loss = history2.history['loss']
val_loss = history2.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, loss, 'b--', label = 'Train Loss')
plt.plot(epochs, val_loss, 'r:', label = 'Validation Loss')
plt.grid()
plt.legend()

plt.plot(epochs, acc, 'b--', label = 'Train Accuracy')
plt.plot(epochs, val_acc, 'r:', label = 'Validation Accuracy')
plt.grid()
plt.legend()

plt.show()

 

  - 모델 저장

model.save('cats_and_dogs_with_pretrained_model.h5')

 

 

2. Feature Map 시각화

  - 모델 구성

import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image


# 저장된 모델 로드
model = load_model('cats_and_dogs_model.h5')
model.summary()

# 출력 결과
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 17, 17, 128)      0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 15, 15, 128)       147584    
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 7, 7, 128)        0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 6272)              0         
                                                                 
 dropout (Dropout)           (None, 6272)              0         
                                                                 
 dense_2 (Dense)             (None, 512)               3211776   
                                                                 
 dense_3 (Dense)             (None, 1)                 513       
                                                                 
=================================================================
Total params: 3,453,121
Trainable params: 3,453,121
Non-trainable params: 0
_________________________________________________________________
img_path = 'cats_and_dogs_filtered/validation/dogs/dog.2000.jpg'

img = image.load_img(img_path, target_size = (150, 150))
img_tensor = image.img_to_array(img)
img_tensor = img_tensor[np.newaxis, ...]
img_tensor /= 255.
print(img_tensor.shape)

# 출력 결과
(1, 150, 150, 3)
plt.imshow(img_tensor[0])
plt.show()

# 레이어 중 일부만(8개) 출력
conv_output = [layer.output for layer in model.layer[:8]]
conv_output

# 출력 결과
[<KerasTensor: shape=(None, 148, 148, 32) dtype=float32 (created by layer 'conv2d')>,
 <KerasTensor: shape=(None, 74, 74, 32) dtype=float32 (created by layer 'max_pooling2d')>,
 <KerasTensor: shape=(None, 72, 72, 64) dtype=float32 (created by layer 'conv2d_1')>,
 <KerasTensor: shape=(None, 36, 36, 64) dtype=float32 (created by layer 'max_pooling2d_1')>,
 <KerasTensor: shape=(None, 34, 34, 128) dtype=float32 (created by layer 'conv2d_2')>,
 <KerasTensor: shape=(None, 17, 17, 128) dtype=float32 (created by layer 'max_pooling2d_2')>,
 <KerasTensor: shape=(None, 15, 15, 128) dtype=float32 (created by layer 'conv2d_3')>,
 <KerasTensor: shape=(None, 7, 7, 128) dtype=float32 (created by layer 'max_pooling2d_3')>]
activation_model = Model(inputs = [model.input], outputs = conv_output)
activations = activation_model.predict(img_tensor)
len(activations)

# 출력 결과
8

 

  - 시각화

print(activations[0].shape)
plt.matshow(activations[0][0, :, :, 7], cmap = 'viridis')
plt.show()

# 출력 결과
(1, 148, 148, 32)

print(activations[0].shape)
plt.matshow(activations[0][0, :, :, 10], cmap = 'viridis')
plt.show()

# 출력 결과
(1, 148, 148, 32)

 

  - 중간의 모든 활성화에 대해 시각화

# 각 layer에서 이미지의 변환과정을 시각화
layer_names = []
for layer in model.layers[:8]:
    layer_names.append(layer.name)

images_per_row = 16

for layer_name, layer_activation in zip(layer_names, activations):
    num_features = layer_activation.shape[-1]

    size = layer_activation.shape[1]

    num_cols = num_features // images_per_row
    display_grid = np.zeros((size * num_cols, size * images_per_row))

    for col in range(num_cols):
        for row in range(images_per_row):
            channel_image = layer_activation[0, :, :, col * images_per_row + row]
            channel_image -= channel_image.mean()
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image =np.clip(channel_image, 0, 255).astype('unit8')
            display_grid[col * size : (col + 1) * size, row * size : (row + 1) * size] = channel_image
        
    scale = 1. / size

    plt.figure(figsize = (scale * display_grid.shape[1],
                          scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect = 'auto', cmap = 'viridis')

plt.show()

● CIFAR 10

  • 50,000개의 학습 데이터, 10,000개의 테스트 데이터로 구성
  • 데이터 복잡도가 MNIST보다 훨씬 높은 특징이 있음
    • 신경망이 특징을 검출하기 어려움

1. modules import

import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Input, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np

 

 

2. 데이터 로드 및 전처리

(x_train_full, y_train_full), (x_test, y_test) = cifar10.load_data()
print(x_train_full.shape, y_train_full.shape)
print(x_test.shape, y_test.shape)

# 출력 결과
(50000, 32, 32, 3) (50000, 1)
(10000, 32, 32, 3) (10000, 1)


# 정답 데이터의 값은 레이블로 되어있음
print(y_test[0])

# 출력 결과
[3]


# 예시 데이터
np.random.seed(777)

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'sheep', 'truck']

sample_size = 9
random_idx = np.random.randint(60000, size = sample_size)

plt.figure(figsize = (5, 5))
for i, idx in enumerate(random_idx):
    plt.subplot(3, 3, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_train_full[i])
    plt.xlabel(class_names[int(y_train_full[i])])

plt.show()

  • 32 * 32 이미지라 화질이 낮음
# x 데이터 정규화
x_mean = np.mean(x_train_full, axis = (0, 1, 2))
x_std = np.std(x_train_full, axis = (0, 1, 2))
x_train_full = (x_train_full - x_mean) / x_std
x_test = (x_test - x_mean) / x_std


# 학습데이터와 검증데이터 분리
x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size = 0.3)


# 전처리한 데이터 형태 출력
print(x_train.shape)
print(y_train.shape)

print(x_val.shape)
print(y_val.shape)

print(x_test.shape)
print(y_test.shape)

# 출력 결과
(35000, 32, 32, 3)
(35000, 1)
(15000, 32, 32, 3)
(15000, 1)
(10000, 32, 32, 3)
(10000, 1)

 

 

3. 모델 구성 및 컴파일

def model_build():
    model = Sequential()

    input = Input(shape = (32, 32, 3))

    output = Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu')(input)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)

    output = Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu')(output)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)

    output = Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu')(output)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)

    output = Flatten()(output)
    output = Dense(256, activation = 'relu')(output)
    output = Dense(128, activation = 'relu')(output)
    output = Dense(10, activation = 'softmax')(output)

    model = Model(inputs = [input], outputs = [output])

    model.compile(optimizer = Adam(learning_rate = 1e-4),
                  loss = 'sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model
model = model_build()
model.summary()

# 출력 결과
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 8, 8, 64)         0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 8, 8, 128)         73856     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 4, 4, 128)        0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 2048)              0         
                                                                 
 dense_3 (Dense)             (None, 256)               524544    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
=================================================================
Total params: 651,978
Trainable params: 651,978
Non-trainable params: 0
_________________________________________________________________

 

 

4. 모델 학습 및 평가

history = model.fit(x_train, y_train,
                    epochs = 30,
                    batch_size = 256,
                    validation_data = (x_val, y_val))

 

 

5. 학습 과정 시각화

plt.figure(figsize = (12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], 'b--', label = 'loss')
plt.plot(history.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], 'b--', label = 'accuracy')
plt.plot(history.history['val_accuracy'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

  - 해당 모델은 성능이 좋지 않음

  - 규제화, 드롭아웃 등 과대적합을 방지하는 기술 필요

def model_build2():
    model = Sequential()

    input = Input(shape = (32, 32, 3))

    output = Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu')(input)
    output = BatchNormalization()(output)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)

    output = Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu')(output)
    output = BatchNormalization()(output)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)

    output = Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu')(output)
    output = BatchNormalization()(output)
    output = MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same')(output)
    output = Dropout(0.5)(output)

    output = Flatten()(output)
    output = Dense(256, activation = 'relu')(output)
    output = Dropout(0.5)(output)
    output = Dense(128, activation = 'relu')(output)
    output = Dense(10, activation = 'softmax')(output)

    model = Model(inputs = [input], outputs = [output])

    model.compile(optimizer = Adam(learning_rate = 1e-4),
                  loss = 'sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model
model2 = model_build2()
model2.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 32)       128       
 ormalization)                                                   
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 batch_normalization_1 (Batc  (None, 16, 16, 64)       256       
 hNormalization)                                                 
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 8, 8, 64)         0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 8, 8, 128)         73856     
                                                                 
 batch_normalization_2 (Batc  (None, 8, 8, 128)        512       
 hNormalization)                                                 
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 4, 4, 128)        0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 4, 4, 128)         0         
                                                                 
 flatten_2 (Flatten)         (None, 2048)              0         
                                                                 
 dense_6 (Dense)             (None, 256)               524544    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_7 (Dense)             (None, 128)               32896     
                                                                 
 dense_8 (Dense)             (None, 10)                1290      
                                                                 
=================================================================
Total params: 652,874
Trainable params: 652,426
Non-trainable params: 448
_________________________________________________________________

 

 

6. 모델 학습 및 평가

history2 = model2.fit(x_train, y_train,
                      epochs = 30,
                      batch_size = 256,
                      validation_data = (x_val, y_val))

 

 

7. 학습 과정 시각화

plt.figure(figsize = (12, 4))

plt.subplot(1, 2, 1)
plt.plot(history2.history['loss'], 'b--', label = 'loss')
plt.plot(history2.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history2.history['accuracy'], 'b--', label = 'accuracy')
plt.plot(history2.history['val_accuracy'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

  • 검증데이터의 결과가 많이 개선됨

1. modules import 

%load_ext tensorboard
import datetime
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets.fashion_mnist import load_data
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Input, Flatten

 

 

2. 데이터 로드 및 전처리

(x_train, y_train), (x_test, y_test) = load_data()

x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

x_train = x_train / 255.
x_test = x_test / 255.

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# 출력 결과
(60000, 28, 28, 1)
(60000,)
(10000, 28, 28, 1)
(10000,)
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

 

3. 모델 구성 및 컴파일

def build_model():
    model = Sequential()

    input = Input(shape = (28, 28, 1))
    output = Conv2D(filters = 32, kernel_size = (3, 3))(input)
    output = Conv2D(filters = 64, kernel_size = (3, 3))(output)
    output = Conv2D(filters = 64, kernel_size = (3, 3))(output)
    output = Flatten()(output)
    output = Dense(units = 128, activation = 'relu')(output)
    output = Dense(units = 64, activation = 'relu')(output)
    output = Dense(units = 10, activation = 'softmax')(output)

    model = Model(inputs = [input], outputs = [output])

    model.compile(optimizer = 'adam',
                  loss = 'sparse_categorical_crossentropy',
                  metrics = ['acc'])
    return model

model_1 = build_model()
model_1.summary()

# 출력 결과
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 64)        18496     
                                                                 
 conv2d_2 (Conv2D)           (None, 22, 22, 64)        36928     
                                                                 
 flatten (Flatten)           (None, 30976)             0         
                                                                 
 dense (Dense)               (None, 128)               3965056   
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
=================================================================
Total params: 4,029,706
Trainable params: 4,029,706
Non-trainable params: 0
_________________________________________________________________

 

 

4. 모델 학습

hist_1 = model_1.fit(x_train, y_train,
                     epochs = 25,
                     validation_split = 0.3,
                     batch_size = 128)

 

 

5. 학습 결과 시각화

plt.figure(figsize = (12, 4))
plt.subplot(1, 2, 1)
plt.plot(hist_1.history['loss'], 'b--', label = 'loss')
plt.plot(hist_1.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_1.history['acc'], 'b--', label = 'accuracy')
plt.plot(hist_1.history['val_acc'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

 

 

6. 모델 평가

model_1.evaluate(x_test, y_test)

# 출력 결과
loss: 1.1168 - acc: 0.8566
[1.116817831993103, 0.8565999865531921]

 

 

7. 모델 재구성(학습 파라미터 수 비교)

def build_model_2():
    model = Sequential()

    input = Input(shape = (28, 28, 1))
    output = Conv2D(filters = 32, kernel_size = (3, 3))(input)
    output = MaxPool2D(strides = (2, 2))(output)
    output = Conv2D(filters = 64, kernel_size = (3, 3))(output)
    output = MaxPool2D(strides = (2, 2))(output)
    output = Conv2D(filters = 64, kernel_size = (3, 3))(output)
    output = MaxPool2D(strides = (2, 2))(output)
    output = Flatten()(output)
    output = Dense(units = 128, activation = 'relu')(output)
    output = Dropout(0.3)(output)
    output = Dense(units = 64, activation = 'relu')(output)
    output = Dropout(0.3)(output)
    output = Dense(units = 10, activation = 'softmax')(output)

    model = Model(inputs = [input], outputs = [output])

    model.compile(optimizer = 'adam',
                  loss = 'sparse_categorical_crossentropy',
                  metrics = ['acc'])
    return model

model_2 = build_model_2()
model_2.summary()

# 출력 결과
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_7 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 3, 3, 64)          36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 1, 1, 64)         0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 64)                0         
                                                                 
 dense_6 (Dense)             (None, 128)               8320      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 10)                650       
                                                                 
=================================================================
Total params: 72,970
Trainable params: 72,970
Non-trainable params: 0
_________________________________________________________________
  • 학습 파라미터 수가 줄어듦

 

 

8. 모델 재학습

hist_2 = model_2.fit(x_train, y_train,
                     epochs = 25,
                     validation_split = 0.3,
                     batch_size = 128)

# 재학습 결과 시각화
plt.figure(figsize = (12, 4))
plt.subplot(1, 2, 1)
plt.plot(hist_2.history['loss'], 'b--', label = 'loss')
plt.plot(hist_2.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_2.history['acc'], 'b--', label = 'accuracy')
plt.plot(hist_2.history['val_acc'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

  • 처음 모델보다 학습데이터에 오버피팅이 덜 된 모습

 

9. 모델 재평가

model_2.evaluate(x_test, y_test)

# 출력 결과
loss: 0.4026 - acc: 0.8830
[0.4026452302932739, 0.8830000162124634]

 

 

10. 모델 성능 높이기(많은 레이어 쌓기)

from tensorflow.keras.layers import BatchNormalization, ReLU

def build_model_3():
    model = Sequential()

    input = Input(shape = (28, 28, 1))
    output = Conv2D(filters = 32, kernel_size = 3, activation = 'relu', padding = 'same')(input)
    output = Conv2D(filters = 64, kernel_size = 3, activation = 'relu', padding = 'valid')(output)
    output = MaxPool2D(strides = (2, 2))(output)
    output = Dropout(0.5)(output)

    output = Conv2D(filters = 128, kernel_size = 3, activation = 'relu', padding = 'same')(output)
    output = Conv2D(filters = 256, kernel_size = 3, activation = 'relu', padding = 'valid')(output)
    output = MaxPool2D(strides = (2, 2))(output)
    output = Dropout(0.5)(output)

    output = Flatten()(output)
    output = Dense(units = 256, activation = 'relu')(output)
    output = Dropout(0.5)(output)
    output = Dense(units = 100, activation = 'relu')(output)
    output = Dropout(0.5)(output)
    output = Dense(units = 10, activation = 'softmax')(output)

    model = Model(inputs = [input], outputs = [output])

    model.compile(optimizer = 'adam',
                  loss = 'sparse_categorical_crossentropy',
                  metrics = ['acc'])
    return model

model_3 = build_model_3()
model_3.summary()

# 출력 결과
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_9 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 conv2d_10 (Conv2D)          (None, 26, 26, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 13, 13, 64)       0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 13, 13, 64)        0         
                                                                 
 conv2d_11 (Conv2D)          (None, 13, 13, 128)       73856     
                                                                 
 conv2d_12 (Conv2D)          (None, 11, 11, 256)       295168    
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 5, 5, 256)        0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 5, 5, 256)         0         
                                                                 
 flatten_3 (Flatten)         (None, 6400)              0         
                                                                 
 dense_9 (Dense)             (None, 256)               1638656   
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 dense_10 (Dense)            (None, 100)               25700     
                                                                 
 dropout_5 (Dropout)         (None, 100)               0         
                                                                 
 dense_11 (Dense)            (None, 10)                1010      
                                                                 
=================================================================
Total params: 2,053,206
Trainable params: 2,053,206
Non-trainable params: 0
_________________________________________________________________

 

  - 모델 학습 및 결과 시각화

hist_3 = model_3.fit(x_train, y_train,
                     epochs = 25,
                     validation_split = 0.3,
                     batch_size = 128)

  - 과적합은 되지 않았지만 층을 늘려도 좋은 성능을 낼 수 있음

plt.figure(figsize = (12, 4))
plt.subplot(1, 2, 1)
plt.plot(hist_3.history['loss'], 'b--', label = 'loss')
plt.plot(hist_3.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_3.history['acc'], 'b--', label = 'accuracy')
plt.plot(hist_3.history['val_acc'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

model_3.evaluate(x_test, y_test)

# 출력 결과
loss: 0.2157 - acc: 0.9261
[0.21573999524116516, 0.9261000156402588]

 

 

11. 모델 성능 높이기(이미지 보강, Image Augmentation)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_generator = ImageDataGenerator(
    rotation_range = 10,
    zoom_range = 0.2,
    share_range = 0.6,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    horizontal_flip = True,
    vertival_flip = False
)

augment_size = 200

print(x_train.shape)
print(x_train[0].shape)

# 출력 결과
(60000, 28, 28, 1)
(28, 28, 1)
x_augment = image_generator.flow(np.tile(x_train[0].reshape(28 * 28 * 1), augment_size).reshape(28 * 28 * 1),
                                 np.zeros(augment_size), batch_size = augment_size, shuffle = False).next()[0]

plt.figure(figsize = (10, 10))
for i in range(1, 101):
    plt.subplot(10, 10, i)
    plt.axis('off')
    plt.imshow(x_augment[i - 1].reshape(28, 28), cmap = 'gray')

  • 위의 코드를 사용해 학습에 사용할 데이터 추가
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_generator = ImageDataGenerator(
    rotation_range = 15,
    zoom_range = 0.1,
    share_range = 0.6,
    width_shift_range = 0.15,
    height_shift_range = 0.1,
    horizontal_flip = True,
    vertival_flip = False
)

augment_size = 30000

random_mask = np.random.randint(x_train.shape[0], size = augment_size)
x_augmented = x_train[random_mask].copy()
y_augmented = y_train[random_mask].copy()

x_augmented = image_generator.flow(x_augmented, np.zeros(augment_size),
                                   batch_size = augment_size, shuffle = False).next()[0]
x_train = np.concatenate((x_train, x_augmented))
y_train = np.concatenate((y_train, y_augmented))

# 생성한 augment 30000개가 더 추가됨
print(x_train.shape)

# 출력 결과
(90000, 28, 28, 1)

 

  - 모델 학습 및 결과 시각화

model_4 = build_model_3()
model_4.summary()

# 출력 결과
Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_5 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_13 (Conv2D)          (None, 28, 28, 32)        320       
                                                                 
 conv2d_14 (Conv2D)          (None, 26, 26, 64)        18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 13, 13, 64)       0         
 2D)                                                             
                                                                 
 dropout_6 (Dropout)         (None, 13, 13, 64)        0         
                                                                 
 conv2d_15 (Conv2D)          (None, 13, 13, 128)       73856     
                                                                 
 conv2d_16 (Conv2D)          (None, 11, 11, 256)       295168    
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 5, 5, 256)        0         
 2D)                                                             
                                                                 
 dropout_7 (Dropout)         (None, 5, 5, 256)         0         
                                                                 
 flatten_4 (Flatten)         (None, 6400)              0         
                                                                 
 dense_12 (Dense)            (None, 256)               1638656   
                                                                 
 dropout_8 (Dropout)         (None, 256)               0         
                                                                 
 dense_13 (Dense)            (None, 100)               25700     
                                                                 
 dropout_9 (Dropout)         (None, 100)               0         
                                                                 
 dense_14 (Dense)            (None, 10)                1010      
                                                                 
=================================================================
Total params: 2,053,206
Trainable params: 2,053,206
Non-trainable params: 0
_________________________________________________________________
hist_4 = model_4.fit(x_train, y_train,
                     epochs = 25,
                     validation_spli = 0.3,
                     batch_size = 128)

plt.figure(figsize = (12, 4))
plt.subplot(1, 2, 1)
plt.plot(hist_4.history['loss'], 'b--', label = 'loss')
plt.plot(hist_4.history['val_loss'], 'r:', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist_4.history['acc'], 'b--', label = 'accuracy')
plt.plot(hist_4.history['val_acc'], 'r:', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

model_4.evaluate(x_test, y_test)

# 출력 결과
loss: 0.2023 - acc: 0.9313
[0.2023032009601593, 0.9312999844551086]

 

  - 학습 인자를 이전과 다르게 주면서 학습하면 더 잘 나올 것

1. 주요 레이어

  - Conv2D

  • tensorflow.keras.layers.Conv2D
  • tf.nn.conv2d
import tensorflow as tf
from tensorflow.keras.layers import Conv2D

import matplotlib.pyplot as plt

import numpy as np
from sklearn.datasets import load_sample_image

china = load_sample_image('china.jpg') / 255.
print(china.dtype)
print(china.shape)

# 출력 결과
float64
(427, 640, 3)


plt.imshow(china)
plt.show

flower = load_sample_image('flower.jpg') / 255.
print(flower.dtype)
print(flower.shape)

# 출력 결과
float64
(427, 640, 3)


plt.imshow(flower)
plt.show()

images = np.array([china, flower])
batch_size, height, width, channels = images.shape
print(images.shape)

# 출력 결과
(2, 427, 640, 3)
# 필터 적용
filters = np.zeros(shape = (7, 7, channels, 2), dtype = np.float32)
# 수직선 추가
filters[:, 3, :, 0] = 1
# 수평선 추가
filters[3, :, :, 1] = 1

print(filters.shape)

# 출력 결과
(7, 7, 3, 2)
# 텐서플로우로 conv2d 사용하는 방법
outputs = tf.nn.conv2d(images, filters, strides = 1, padding = 'SAME')
print(outputs.shape)
plt.imshow(outputs[0, :, :, 1], cmap = 'gray')
plt.show()

# 출력 결과
(2, 427, 640, 2)

plt.imshow(outputs[0, :, :, 0], cmap = 'gray')
plt.show()

# keras로 conv2d 사용하는 방법
conv = Conv2D(filters = 32, kernel_size = 3, strides = 1,
              padding = 'same', activation = 'relu')

 

  - MaxPool2D

  • 텐서플로우 저수준 딥러닝 API
    • tf.nn.max_pool
    • 사용자가 사이즈를 맞춰줘야함
    • keras의 모델의 층으로 사용하고  싶으면 Lambda 층으로 감싸줘야함
  • Keras 고수준 API
    • keras.layers.MaxPool2D
import tensorflow as tf
from tensorflow.keras.layers import MaxPool2D, Lambda

output = tf.nn.max_pool(images,
                        ksize = (1, 1, 1, 3),
                        strides = (1, 1, 1, 3),
                        padding = 'VALID')

# 텐서플로우에서 max pool 사용하는 방법
output_keras = Lambda(
    lambda X: tf.nn.maxpool(X, ksize = (1, 1, 1, 3), strides = (1, 1, 1, 3), padding = 'VALID')
)


# 케라스에서 max pool 사용하는 방법
max_pool = MaxPool2D(pool_size = 2)
flower = load_sample_image('flower.jpg') / 255.
print(flower.dtype)
print(flower.shape)

# 출력 결과
float64
(427, 640, 3)


# 차원 추가
flower = np.expand_dims(flower, axis = 0)
flower.shape

# 출력 결과
(1, 427, 640, 3)


# pool size를 2로 maxpool 적용으로 데이터 수는 1/2
output = Conv2D(filters = 32, kernel_size = 3, strides = 1, padding = 'SAME', activation = 'relu')(flower)
output = MaxPool2D(pool_size = 2)(output)
output.shape

# 출력 결과
TensorShape([1, 213, 320, 32])
plt.imshow(output[0, :, :, 8], cmap = 'gray')
plt.show()

사이즈가 줄어든 만큼 원본보다 해상도가 줄어듦

 

  - AvgPool2D

  • 텐서플로우 저수준 딥러닝 API
    • tf.nn.avg_pool
  • 케라스 고수준 API
    • keras.layers.AvgPool2D
from tensorflow.keras.layers import AvgPool2D

# 원본
flower.shape

# 출력 결과
(1, 427, 640, 3)


# AvgPool 적용(데이터 크기 1/2)
output = Conv2D(filters = 32, kernel_size = 3, strides = 1, padding = 'SAME', activation = 'relu')(flower)
output = AvgPool2D(pool_size = 2)(output)
output.shape

# 출력 결과
TensorShape([1, 213, 320, 32])
plt.imshow(output[0, :, : , 8], cmap = 'gray')
plt.show()

 

  - GlobalAvgPool2D(전역 평균 풀링 층)

  • keras.layers.GlobalAvgPool2D()
  • 특징 맵 각각의 평균값을 출력하는 것이므로, 특성맵에 있는 대부분의 정보를 잃음
  • 출력층에는 유용할 수 있음
from tensorflow.keras.layers import GlobalAvgPool2D

output = Conv2D(filters = 32, kernel_size=  3, strides = 1, padding = 'SAME', activation = 'relu')(flower)
output = GlobalAvgPool2D()(output)
output.shape

# 출력 결과
TensorShape([1, 32])

 

 

2. 예제로 보는 CNN 구조와 학습

● 일반적인 구조

  - modules import

%load_ext tensorboard

import datetime
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, AvgPool2D, Dropout

from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical, plot_model

 

  - 데이터 로드 및 전처리

(x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data()

# 원본 데이터 형태
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# 출력 결과
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


# x 데이터에 축 하나씩 추가
x_train = x_train[:, :, :, np.newaxis]
x_test = x_test[:, :, :, np.newaxis]
print(x_train.shape)
print(x_test.shape)

# 출력 결과
(60000, 28, 28, 1)
(10000, 28, 28, 1)


# y 데이터 카테고리화
num_classes = 10

y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)

# 출력 결과
print(y_train.shape)
print(y_test.shape)


# x 데이터 표준화
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.
x_test /= 255.

 

  -  CNN을 위한 간단한 모델

def build():
    model = Sequential([Conv2D(64, 7, activation = 'relu', padding = 'same', input_shape = [28, 28, 1]),
                        MaxPool2D(pool_size = 2),
                        Conv2D(128, 3, activation = 'relu', padding = 'same'),
                        MaxPool2D(pool_size = 2),
                        Conv2D(256, 3, activation = 'relu', padding = 'SAME'),
                        MaxPool2D(pool_size = 2),
                        Flatten(),
                        Dense(128, activation = 'relu'),
                        Dropout(0.5),
                        Dense(64, activation = 'relu'),
                        Dropout(0.5),
                        Dense(10, activation = 'softmax')])
    return model

 

  - 모델 컴파일

model = build()
model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])
model.summary()

# 출력 결과
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d_11 (Conv2D)          (None, 28, 28, 64)        3200      
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 14, 14, 64)       0         
 2D)                                                             
                                                                 
 conv2d_12 (Conv2D)          (None, 14, 14, 128)       73856     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 7, 7, 128)        0         
 g2D)                                                            
                                                                 
 conv2d_13 (Conv2D)          (None, 7, 7, 256)         295168    
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 3, 3, 256)        0         
 g2D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 2304)              0         
                                                                 
 dense_6 (Dense)             (None, 128)               295040    
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 10)                650       
                                                                 
=================================================================
Total params: 676,170
Trainable params: 676,170
Non-trainable params: 0
_________________________________________________________________
plot_model(model)

 

  - Hyper Parameters

callbacks = [tf.keras.callbacks.TensorBoard(log_dir = './logs')]
EPOCHS = 20
BATCH_SIZE = 200
VERBOSE = 1

 

  - 모델 학습(GPU 추천)

  • validation_split을 통해 검증 데이터셋을 생성
hist = model.fit(x_train, y_train,
                 epochs = EPOCHS,
                 batch_size = BATCH_SIZE,
                 validation_split = 0.3,
                 callbacks = callbacks,
                 verbose = VERBOSE)

 

  • 텐서보드로 확인
log_dir = '.logs' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
%tensorboard --logdir logs/

 

● LeNet-5(코드 출처: https://datahacker.rs/lenet-5-implementation-tensorflow-2-0/)

  • CNN의 초창기 모델
  • 필기체 인식을 위한 모델

https://www.researchgate.net/figure/The-LeNet-5-Architecture-a-convolutional-neural-network_fig4_321586653

  - modules import

import datetime
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, AvgPool2D, Dropout

from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical, plot_model

from sklearn.model_selection import train_test_split

 

  - 데이터 로드 및 전처리

(x_train_full, y_train_full), (x_test, y_test) = datasets.mnist.load_data()

x_train, x_val ,y_train, y_val = train_test_split(x_train_full, y_train_full, test_size = 0.3, random_state = 777)

x_train = x_train[..., np.newaxis]
x_val = x_val[..., np.newaxis]
x_test = x_test[..., np.newaxis]

num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255.
x_val /= 255.
x_test /= 255.

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

# 출력 결과
(42000, 28, 28, 1)
(42000, 10)
(18000, 28, 28, 1)
(18000, 10)
(10000, 28, 28, 1)
(10000, 10)

 

  -  모델 구성 및 컴파일

class LeNet(Sequential):
    def __init__(self, input_shape, nb_classes):
        super().__init__()

        self.add(Conv2D(6, kernel_size = (5, 5), strides = (1, 1), activation = 'tanh', input_shape = input_shape, padding = 'SAME'))
        self.add(AvgPool2D(pool_size = (2, 2), strides = (2, 2), padding = 'valid'))
        self.add(Conv2D(16, kernel_size = (5, 5), strides = (1, 1), axtivation = 'tanh', padding = 'valid'))
        self.sdd(AvgPool2D(pool_size = (2, 2), strides = (2, 2), padding = 'valid'))
        self.add(Flatten())
        self.add(Dense(120, activation = 'tanh'))
        self.add(Dense(84, activation = 'tanh'))
        self.add(Dense(nb_classes, activation = 'softmax'))

        self.compile(optimizer = 'adam',
                     loss = 'categorical_crossentropy',
                     metrics = ['accuracy'])

model = LeNet(input_shape = (28, 28, 1), nb_classes = 10)
model.summary()

# 출력 결과
Model: "le_net_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d_17 (Conv2D)          (None, 28, 28, 6)         156       
                                                                 
 average_pooling2d_3 (Averag  (None, 14, 14, 6)        0         
 ePooling2D)                                                     
                                                                 
 conv2d_18 (Conv2D)          (None, 10, 10, 16)        2416      
                                                                 
 average_pooling2d_4 (Averag  (None, 5, 5, 16)         0         
 ePooling2D)                                                     
                                                                 
 flatten_3 (Flatten)         (None, 400)               0         
                                                                 
 dense_9 (Dense)             (None, 120)               48120     
                                                                 
 dense_10 (Dense)            (None, 84)                10164     
                                                                 
 dense_11 (Dense)            (None, 10)                850       
                                                                 
=================================================================
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
_________________________________________________________________
plot_model(model, show_shapes = True)

 

  - Hyper Parameters

EPOCHS = 20
BATHC_SIZE = 128
VERBOSE = 1

 

  - 모델 학습

hist = model.fit(x_train, y_train,
                 epochs = EPOCHS,
                 batch_size = BATCH_SIZE,
                 validation_data = (x_val, y_val),
                 verbose = VERBOSE)

 

  - 학습 결과 시각화

plt.figure(figsize = (12, 6))

plt.subplot(1, 2, 1)
plt.plot(hist.history['loss'], 'b-', label = 'loss')
plt.plot(hist.history['val_loss'], 'm--', label = 'val_loss')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist.history['accuracy'], 'g-', label = 'accuracy')
plt.plot(hist.history['val_accuracy'], 'r-', label = 'val_accuracy')
plt.xlabel('Epochs')
plt.grid()
plt.legend()

plt.show()

 

  - 모델 평가

model.evaluate(x_test, y_test)

# 출력 결과
313/313 [==============================] - 3s 7ms/step - loss: 0.0564 - accuracy: 0.9854
[0.0564129501581192, 0.9854000210762024]

1. Data API

 

Module: tf.data  |  TensorFlow v2.12.0

tf.data.Dataset API for input pipelines.

www.tensorflow.org

  • tf.data.datasets

 

  - tf.data.datasets

import tensorflow as tf
import tensorflow_datasets as tfds

# 데이터셋 확인
builders = tfds.list_builders()
print(builders)

# 출력 결과
['abstract_reasoning',
'accentdb',
'aeslc',
'aflw2k3d',
'ag_news_subset',
...
'yelp_polarity_reviews',
'yes_no',
'youtube_vis']
# mnist 데이터 생성
data, info = tfds.load('mnist', with_info = True)
train_data, test_data = data['train'], data['test']

print(info)

# 출력 결과
tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_path='C:\\Users\\YONG\\tensorflow_datasets\\mnist\\3.0.1',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
)

 

  - tf.data

  • 생성
    • from_tensor_slices(): 개별 또는 다중 넘파이를 받고, 배치를 지원
    • from_tensors(): 배치를 지원하지 않음
    • froom_generator(): 생성자 하무에서 입력을 취함
  • 변환
    • batch(): 순차적으로 지정한 배치사이즈로 데이터셋을 분할
    • repeat(): 데이터를 복제
    • shuffle(): 데이터를 무작위로 섞음
    • map(): 데이터에 함수를 적용
    • filter(): 데이터를 거르고자 할 때 사용
  • 반복
    • next_batch = iterator.get_next() 사용

 

  - from_tensor_slices

import numpy as np

num_items = 20
num_list = np.arange(num_items)

num_list_dataset = tf.data.Dataset.from_tensor_slices(num_list)
num_list_dataset

# 출력 결과
# shape은 아직 없는 상태
<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
for item in num_list_dataset:
    print(item)

# 출력 결과
# tensor 20개 생성
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(10, shape=(), dtype=int32)
tf.Tensor(11, shape=(), dtype=int32)
tf.Tensor(12, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
tf.Tensor(14, shape=(), dtype=int32)
tf.Tensor(15, shape=(), dtype=int32)
tf.Tensor(16, shape=(), dtype=int32)
tf.Tensor(17, shape=(), dtype=int32)
tf.Tensor(18, shape=(), dtype=int32)
tf.Tensor(19, shape=(), dtype=int32)

 

  - from_generator()

  • 해당 클래스 메서드를 사용하면 생성자에서 데이터셋 생성 가능
  • output_types, output_shapes 인수로 출력 자료형과 크기를 지정해주어야 함
import itertools

# i는 1씩 증가하고 i의 개수만큼 1을 배열에 추가
def gen():
    for i in itertools.count(1):
        yield(i, [1] * i)

# 위에서 만든 generator
# 출력 형식은 int64
# 출력 형태는 TensorShape([])
dataset = tf.data.Dataset.from_generator(
    gen,
    (tf.int64, tf.int64),
    (tf.TensorShape([]), tf.TensorShape([None]))
)
list(dataset.take(3).as_numpy_iterator())

# 출력 결과
[(1, array([1], dtype=int64)),
 (2, array([1, 1], dtype=int64)),
 (3, array([1, 1, 1], dtype=int64))]
# stop이 없이 위의 코드와 같이 gen을 돌리면 무한히 돌아감
def gen(stop):
    for i in itertools.count(1):
        if i < stop:
            yield(i, [1] * i)

dataset = tf.data.Dataset.from_generator(
    gen, args = [10],
    output_types = (tf.int64,  tf.int64),
    output_shapes = (tf.TensorShape([]), tf.TensorShape([None]))
)

list(dataset.take(5).as_numpy_iterator())

# 출력 결과
[(1, array([1], dtype=int64)),
 (2, array([1, 1], dtype=int64)),
 (3, array([1, 1, 1], dtype=int64)),
 (4, array([1, 1, 1, 1], dtype=int64)),
 (5, array([1, 1, 1, 1, 1], dtype=int64))]

 

  - batch, repeat

  • batch(): 배치 사이즈 크기
  • repeat(): 반복 횟수
# 배치사이즈 7, 3번 반복
dataset = num_list_dataset.repeat(3).batch(7)
for item in dataset:
    print(item)

# 출력 결과
# 배치 사이즈가 7이므로 7개씩 나뉨
# 그렇게 3번 반복
tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([ 7  8  9 10 11 12 13], shape=(7,), dtype=int32)
tf.Tensor([14 15 16 17 18 19  0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)
tf.Tensor([ 8  9 10 11 12 13 14], shape=(7,), dtype=int32)
tf.Tensor([15 16 17 18 19  0  1], shape=(7,), dtype=int32)
tf.Tensor([2 3 4 5 6 7 8], shape=(7,), dtype=int32)
tf.Tensor([ 9 10 11 12 13 14 15], shape=(7,), dtype=int32)
tf.Tensor([16 17 18 19], shape=(4,), dtype=int32)
# 뒤에 남는 수 없이 정확한 배치 사이즈로 나누어 떨어지도록 하고 싶으면
# drop_remainder = True 옵션 설정
dataset = num_list_dataset.repeat(3).batch(7, drop_remainder = True)

for item in dataset:
    print(item)

# 출력 결과
# 마지막에 4개만 있던 데이터 사라짐
tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([ 7  8  9 10 11 12 13], shape=(7,), dtype=int32)
tf.Tensor([14 15 16 17 18 19  0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)
tf.Tensor([ 8  9 10 11 12 13 14], shape=(7,), dtype=int32)
tf.Tensor([15 16 17 18 19  0  1], shape=(7,), dtype=int32)
tf.Tensor([2 3 4 5 6 7 8], shape=(7,), dtype=int32)
tf.Tensor([ 9 10 11 12 13 14 15], shape=(7,), dtype=int32)

 

  - map, filter

  • 전처리 단계레서 시행하여 원하지 않는 데이터를 거를 수 있음
  • tf.Tensor 자료형을 다룸
# map 함수 적용
from tensorflow.data import Dataset

# [1, 2, 3, 4, 5]의 리스트
dataset = Dataset.range(1, 6)
# 리스트 각 값에 2씩 곱하는 과정을 map 함수로 적용
dataset = dataset.map(lambda x: x * 2)
list(dataset.as_numpy_iterator())

# 출력 결과
[2, 4, 6, 8, 10]


# as_numpy_iterator()형태로 출력하지 않고 그대로 출력하는 경우
dataset = Dataset.range(5)
result = dataset.map(lambda x: x + 1)
result

# 출력 결과
<MapDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
# map 함수를 사용해 원하는 데이터만 전처리하여 가져올 수 있음
elements = [(1, 'one'), (2, 'two'), (3, 'three')]
dataset = Dataset.from_generator(lambda: elements, (tf.int32, tf.string))
result = dataset.map(lambda x_int, y_str: x_int)
list(result.as_numpy_iterator())

# 출력 결과
[1, 2, 3]
dataset = Dataset.range(3)

# 1. 기본적인 선언
def g(x):
    return tf.constant(10.5), tf.constant(['One', 'Two', 'Three'])

result = dataset.map(g)
# 각 원소의 스펙 확인
result.element_spec

# 출력 결과
(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(3,), dtype=tf.string, name=None))
 
 
 # 2. tf.constant로 텐서플로우 타입을 명시하지 않아도 기본적으로 적용됨
 def h(x):
    return 10.5, ['One', 'Two', 'Three'], np.array([1., 2.], dtype = np.float64)

result = dataset.map(h)
result.element_spec

# 출력 결과
(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(3,), dtype=tf.string, name=None),
 TensorSpec(shape=(2,), dtype=tf.float64, name=None))
 
 
 # 3. 내부에 데이터 리스트 형태 추가
 def i(x):
    return (10.5, [12.5, 11.1]), "One", "Two"

result = dataset.map(i)
result.element_spec

# 출력 결과
((TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(2,), dtype=tf.float32, name=None)),
 TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.string, name=None))
# 1. 필터로 조건 지정
dataset = Dataset.from_tensor_slices([1, 2, 3])
dataset = dataset.filter(lambda x: x < 3)
list(dataset.as_numpy_iterator())

# 출력 결과
[1, 2]


# 2. 필터를 함수로 지정가능
# 1이랑 같은 것만 필터링
def filter_fn(x):
    return tf.math.equal(x, 1)

dataset = dataset.filter(filter_fn)
list(dataset.as_numpy_iterator())

 

  - shuffle, take

# 데이터 가져오기
dataset, info = tfds.load('imdb_reviews', with_info = True, as_supervised = True)

train_dataset = dataset['train']
# 5개로 구분하여 셔플하고, 2개를 가져오기
train_dataset = train_dataset.batch(5).shuffle(5).take(2)

for data in train_dataset:
    print(data)

# 출력 결과(영화 리뷰 데이터를 5개 가져와 섞은 뒤 그 중 2개를 출력한 것)
(<tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'It was disgusting and painful. What a waste of a cast! I swear, the audience (1/2 full) laughed TWICE in 90 minutes. This is not a lie. Do not even rent it.<br /><br />Zeta Jones was just too mean to be believable.<br /><br />Cusack was OK. Just OK. I felt sorry for him (the actor) in case people remember this mess.<br /><br />Roberts was the same as she always is. Charming and sweet, but with no purpose. The "romance" with John was completely unbelievable.',
       b'This is a straight-to-video movie, so it should go without saying that it\'s not going to rival the first Lion King, but that said, this was downright good.<br /><br />My kids loved this, but that\'s a given, they love anything that\'s a cartoon. The big shock was that *I* liked it too, it was laugh out loud funny at some parts (even the fart jokes*), had lots of rather creative tie-ins with the first movie, and even some jokes that you had to be older to understand (but without being risqu\xc3\xa9 like in Shrek ["do you think he\'s compensating for something?"]).<br /><br />A special note on the fart jokes, I was surprised to find that none of the jokes were just toilet noises (in fact there were almost no noises/imagery at all, the references were actually rather subtle), they actually had a setup/punchline/etc, and were almost in good taste. I\'d like my kids to think that there\'s more to humor than going to the bathroom, and this movie is fine in those regards.<br /><br />Hmm what else? The music was so-so, not nearly as creative as in the first or second movie, but plenty of fun for the kids. No painfully corny moments, which was a blessing for me. A little action but nothing too scary (the Secret of NIMH gave my kids nightmares, not sure a G rating was appropriate for that one...)<br /><br />All in all I\'d say this is a great movie for kids of any age, one that\'s 100% safe to let them watch (I try not to be overly sensitive but I\'ve had to jump up and turn off the TV during a few movies that were less kid-appropriate than expected) - but you\'re safe to leave the room during this one. I\'d say stick around anyway though, you might find that you enjoy it too :)',
       b'Finally, Timon and Pumbaa in their own film...<br /><br />\'The Lion King 1 1/2: Hakuna Matata\' is an irreverent new take on a classic tale. Which classic tale, you ask? Why, \'The Lion King\' of course!<br /><br />Yep, if there\'s one thing that Disney is never short of, it\'s narcissism.<br /><br />But that doesn\'t mean that this isn\'t a good film. It\'s basically the events of \'The Lion King\' as told from Timon and Pumbaa\'s perspective. And it\'s because of this that you\'ll have to know the story of \'The Lion King\' by heart to see where they\'re coming from.<br /><br />Anyway, at one level I was watching this and thinking "Oh my god this is so lame..." and on another level I was having a ball. Much of the humour is predictable - I mean, when Pumbaa makes up two beds, a big one for himself and a small one for Timon, within the first nanosecond we all know that Timon is going to take the big one. But that doesn\'t stop it from being hilarious, which, IMO, is \'Hakuna Matata\' in a nutshell. It\'s not what happens, it\'s how.<br /><br />And a note of warning: there are also some fart jokes. Seriously, did you expect anything else in a film where Pumbaa takes centre stage? But as fart jokes go, these are especially good, and should satisfy even the most particular connoisseur.<br /><br />The returning voice talent is great. I\'m kinda surprised that some of the actors were willing to return, what with most of them only having two or three lines (if they\'re lucky). Whoopi Goldberg is particularly welcome.<br /><br />The music is also great. From \'Digga Tunnah\' at the start to \'That\'s all I need\', an adaption of \'Warthog Rhapsody\' (a song that was cut from \'The Lion King\' and is frankly much improved in this incarnation), the music leaves me with nothing to complain about whatsoever.<br /><br />In the end, Timon and Pumbaa are awesome characters, and while it may be argued that \'Hakuna Matata\' is simply an excuse to see them in various fun and assorted compromising situations then so be it. It\'s rare to find characters that you just want to spend time with.<br /><br />Am I starting to sound creepy?<br /><br />Either way, \'The Lion King 1 1/2\' is great if you\'ve seen \'The Lion King\' far too many times. Especially if you are right now thinking "Don\'t be silly, there\'s no such thing as seeing \'The Lion King\' too many times!"',
       b'Indian Directors have it tough, They have to compete with movies like "Laggan" where 11 henpecked,Castrated males defend their village and half of them are certifiable idiots. "Devdas", a hapless, fedar- festooned foreign return drinking to oblivion, with characters running in endless corridors oblivious to any one\'s feelings or sentiments-alas they live in an ornate squalor of red tapestry and pageantry. But to make a good movie, you have to tight-rope walk to appease the frontbenchers who are the quentessential gapers who are mesmerized with Split skirts and Dishum-Dishum fights preferably involving a nitwit "Bollywood" leading actor who is marginally handsome. So you can connect with a director who wants to tell a tale of Leonine village head who in own words "defending his Village" this is considered a violent movie or too masculine for a male audience. There are very few actors who can convey the anger and pathos like Nana Patekar (Narasimhan). Nana Patekar lets you in his courtyard and watch him beret and mock the Politician when his loyal admirers burst in laughter with every word of satire thrown at him, meanwhile his daughter is bathing his Grandson.This is as authentic a scene you can get in rural India. Nana Patekar is the essential actor who belongs to the old school of acting which is a disappearing breed in Hindi Films. The violence depicted is an intricate part of storytelling with Song&Dances thrown in for the gawkers without whom movies won\'t sell, a sad but true state of affairs. Faster this changes better for "Bollywood". All said and done this is one good Movie.',
       b"Nathan Detroit runs illegal craps games for high rollers in NYC, but the heat is on and he can't find a secure location. He bets chronic gambler Sky Masterson that Sky can't make a prim missionary, Sarah Brown, go out to dinner with him. Sky takes up the challenge, but both men have some surprises in store \xc2\x85<br /><br />This is one of those expensive fifties MGM musicals in splashy colour, with big sets, loud music, larger-than-life roles and performances to match; Broadway photographed for the big screen if you like that sort of thing, which I don't. My main problem with these type of movies is simply the music. I like all kinds of music, from Albinoni to ZZ Top, but Broadway show tunes in swing time with never-ending pah-pah-tah-dah trumpet flourishes at the end of every fourth bar aren't my cup of tea. This was written by the tag team of Frank Loesser, Mankiewicz, Jo Swerling and Abe Burrows (based on a couple of Damon Runyon stories), and while the plot is quite affable the songs are weak. Blaine's two numbers for example are identical, unnecessary, don't advance the plot and grate on the ears (and are also flagrantly misogynistic if that sort of thing bothers you). There are only two memorable tunes, Luck Be A Lady (sung by Brando, not Sinatra as you might expect) and Sit Down, You're Rockin' The Boat (nicely performed by Kaye) but you have to sit through two hours to get to them. The movie's trump card is a young Brando giving a thoughtful, laid-back performance; he also sings quite well and even dances a little, and is evenly matched with the always interesting Simmons. The sequence where the two of them escape to Havana for the night is a welcome respite from all the noise, bustle and vowel-murdering of Noo Yawk. Fans of musicals may dig this, but in my view a musical has to do something more than just film the stage show."],
      dtype=object)>, <tf.Tensor: shape=(5,), dtype=int64, numpy=array([0, 1, 1, 1, 0], dtype=int64)>)
(<tf.Tensor: shape=(5,), dtype=string, numpy=
array([b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.",
       b'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film was rubbish. The plot development was constant. Constantly slow and boring. Things seemed to happen, but with no explanation of what was causing them or why. I admit, I may have missed part of the film, but i watched the majority of it and everything just seemed to happen of its own accord without any real concern for anything else. I cant recommend this film at all.',
       b'Mann photographs the Alberta Rocky Mountains in a superb fashion, and Jimmy Stewart and Walter Brennan give enjoyable performances as they always seem to do. <br /><br />But come on Hollywood - a Mountie telling the people of Dawson City, Yukon to elect themselves a marshal (yes a marshal!) and to enforce the law themselves, then gunfighters battling it out on the streets for control of the town? <br /><br />Nothing even remotely resembling that happened on the Canadian side of the border during the Klondike gold rush. Mr. Mann and company appear to have mistaken Dawson City for Deadwood, the Canadian North for the American Wild West.<br /><br />Canadian viewers be prepared for a Reefer Madness type of enjoyable howl with this ludicrous plot, or, to shake your head in disgust.',
       b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty paddle through New York life at its best. A family film in every sense and one that deserves the praise it received.',
       b'As others have mentioned, all the women that go nude in this film are mostly absolutely gorgeous. The plot very ably shows the hypocrisy of the female libido. When men are around they want to be pursued, but when no "men" are around, they become the pursuers of a 14 year old boy. And the boy becomes a man really fast (we should all be so lucky at this age!). He then gets up the courage to pursue his true love.'],
      dtype=object)>, <tf.Tensor: shape=(5,), dtype=int64, numpy=array([0, 0, 0, 1, 1], dtype=int64)>)

 

  - get_next()

dataset = Dataset.range(2)
for element in dataset:
    print(element)

# 출력 결과
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
dataset = Dataset.range(2)
iterator = iter(dataset)

print(dataset)
# 다음 데이터에 접근
print(iterator.get_next())
print(iterator.get_next())

# 출력 결과
<RangeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
a = np.random.randint(0, 10, size = (2, 3))
print(a)

dataset = Dataset.from_tensor_slices(a)
iterator = iter(dataset)

print(iterator.get_next())
print(iterator.get_next())

# 출력 결과
# a의 원래 2행짜리 데이터에서 get_next()가 실행될 때마다 다음 행에 접근
[[0 7 2]
 [6 1 4]]
tf.Tensor([0 7 2], shape=(3,), dtype=int32)
tf.Tensor([6 1 4], shape=(3,), dtype=int32)

 

2. tf.dataset을 이용한 Fashion-MNIST 분류

  - modules import

import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Flatten, Dropout, Activation, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.datasets.fashion_mnist import load_data

 

  - 데이터 로드

(x_train, y_train), (x_test, y_test) = load_data()

# 데이터 형태 확인
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# 출력 결과
(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)

 

  - 데이터 전처리

x_train = x_train / 255.
x_test = x_test / 255.

 

  - tf.data 이용

train_ds = Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(1000)
train_ds = train_ds.batch(32)

test_ds = Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_ds.batch(32)

 

  - 데이터 확인

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneakers', 'Bag', 'Ankle boot']

for image, label in train_ds.take(2):
    plt.title("{}".format(class_names[label[0]]))
    plt.imshow(image[0, :, :], cmap = 'gray')
    plt.show()

 

  - 모델 생성

  • 임의의 모델
def build_model():
    input = Input(shape = (28, 28), name = 'input')
    flatten = Flatten(input_shape = [28, 28], name = 'flatten')(input)
    hidden1 = Dense(256, kernel_initializer = 'he_normal', name = 'hidden1')(flatten)
    hidden1 = BatchNormalization()(hidden1)
    hidden1 = Activation('relu')(hidden1)
    dropout1 = Dropout(0.5)(hidden1)

    hidden2 = Dense(100, kernel_initializer = 'he_normal', name = 'hidden2')(dropout1)
    hidden2 = BatchNormalization()(hidden2)
    hidden2 = Activation('relu')(hidden2)
    dropout2 = Dropout(0.5)(hidden2)

    hidden3 = Dense(100, kernel_initializer = 'he_normal', name = 'hidden3')(dropout2)
    hidden3 = BatchNormalization()(hidden3)
    hidden3 = Activation('relu')(hidden3)
    dropout3 = Dropout(0.5)(hidden3)

    hidden4 = Dense(50, kernel_initializer = 'he_normal', name = 'hidden4')(dropout3)
    hidden4 = BatchNormalization()(hidden4)
    hidden4 = Activation('relu')(hidden4)
    dropout4 = Dropout(0.5)(hidden4)

    output = Dense(10, activation = 'softmax', name = 'output')(dropout4)

    model = Model(inputs = [input], outputs = [output])

    return model
model = build_model()

model.summary()

# 출력 결과
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (InputLayer)          [(None, 28, 28)]          0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 hidden1 (Dense)             (None, 256)               200960    
                                                                 
 batch_normalization (BatchN  (None, 256)              1024      
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 hidden2 (Dense)             (None, 100)               25700     
                                                                 
 batch_normalization_1 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 100)               0         
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 hidden3 (Dense)             (None, 100)               10100     
                                                                 
 batch_normalization_2 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation_2 (Activation)   (None, 100)               0         
                                                                 
 dropout_2 (Dropout)         (None, 100)               0         
                                                                 
 hidden4 (Dense)             (None, 50)                5050      
                                                                 
 batch_normalization_3 (Batc  (None, 50)               200       
 hNormalization)                                                 
                                                                 
 activation_3 (Activation)   (None, 50)                0         
                                                                 
 dropout_3 (Dropout)         (None, 50)                0         
                                                                 
 output (Dense)              (None, 10)                510       
                                                                 
=================================================================
Total params: 244,344
Trainable params: 243,332
Non-trainable params: 1,012
_________________________________________________________________

 

  - 모델 컴파일

  • 평가(metrics)방식의 다른 방법
    • tf.keras.metrics.Mean
    • tf.keras.metrics.SparseCategoricalAccuracy
  • 위 두 방식을 이용하여 loss 값을 좀 더 smooth하게 만들기(평균을 내는 방식)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'test_accuracy')

 

  - 모델 학습

  • tf.function으로 인해 학습이 시작되면 그래프를 생성하여 속도가 빠름
# tf.function 사용시 오토 그래프 생성으로 성능 향상
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

@tf.function
def test_step(images, labels):
    predictions = model(images)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

epochs = 20

for epoch in range(epochs):
    for images, labels in train_ds:
        train_step(images, labels)
    
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

    template = "Epochs: {:3d}\tLoss: {:.4f}\tAccuracy: {:.4f}\tTest Loss: {:.4f}\tTest Accuracy: {:.4f}"
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))

# 출력 결과
Epochs:   1	Loss: 0.3975	Accuracy: 85.4906	Test Loss: 0.3896	Test Accuracy: 85.6400
Epochs:   2	Loss: 0.3756	Accuracy: 86.2650	Test Loss: 0.3840	Test Accuracy: 85.9050
Epochs:   3	Loss: 0.3586	Accuracy: 86.8523	Test Loss: 0.3768	Test Accuracy: 86.2340
Epochs:   4	Loss: 0.3450	Accuracy: 87.3364	Test Loss: 0.3706	Test Accuracy: 86.4583
Epochs:   5	Loss: 0.3333	Accuracy: 87.7414	Test Loss: 0.3684	Test Accuracy: 86.6014
Epochs:   6	Loss: 0.3232	Accuracy: 88.0877	Test Loss: 0.3648	Test Accuracy: 86.7925
Epochs:   7	Loss: 0.3144	Accuracy: 88.3983	Test Loss: 0.3639	Test Accuracy: 86.8289
Epochs:   8	Loss: 0.3066	Accuracy: 88.6765	Test Loss: 0.3618	Test Accuracy: 87.0010
Epochs:   9	Loss: 0.2994	Accuracy: 88.9215	Test Loss: 0.3595	Test Accuracy: 87.1400
Epochs:  10	Loss: 0.2927	Accuracy: 89.1588	Test Loss: 0.3595	Test Accuracy: 87.1833
Epochs:  11	Loss: 0.2864	Accuracy: 89.3894	Test Loss: 0.3573	Test Accuracy: 87.3015
Epochs:  12	Loss: 0.2808	Accuracy: 89.5865	Test Loss: 0.3570	Test Accuracy: 87.3336
Epochs:  13	Loss: 0.2753	Accuracy: 89.7777	Test Loss: 0.3583	Test Accuracy: 87.4113
Epochs:  14	Loss: 0.2703	Accuracy: 89.9568	Test Loss: 0.3577	Test Accuracy: 87.4900
Epochs:  15	Loss: 0.2654	Accuracy: 90.1251	Test Loss: 0.3583	Test Accuracy: 87.5524
Epochs:  16	Loss: 0.2609	Accuracy: 90.2880	Test Loss: 0.3615	Test Accuracy: 87.5750
Epochs:  17	Loss: 0.2565	Accuracy: 90.4376	Test Loss: 0.3626	Test Accuracy: 87.6426
Epochs:  18	Loss: 0.2525	Accuracy: 90.5751	Test Loss: 0.3634	Test Accuracy: 87.6910
Epochs:  19	Loss: 0.2484	Accuracy: 90.7171	Test Loss: 0.3651	Test Accuracy: 87.7324
Epochs:  20	Loss: 0.2446	Accuracy: 90.8512	Test Loss: 0.3667	Test Accuracy: 87.7555

 

  - 모델 학습: 2번째 방법(Keras)

from sklearn.model_selection import train_test_split

(x_train_full, y_train_full), (x_test, y_test) = load_data()

x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size = 0.3, random_state = 777)

x_train = x_train / 255.
x_val = x_val / 255.
x_test = x_test / 255.

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

model = build_model()
model.compile(optimizer = 'sgd',
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

model.summary()

# 출력 결과
(42000, 28, 28)
(42000,)
(18000, 28, 28)
(18000,)
(10000, 28, 28)
(10000,)
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (InputLayer)          [(None, 28, 28)]          0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 hidden1 (Dense)             (None, 256)               200960    
                                                                 
 batch_normalization_4 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 activation_4 (Activation)   (None, 256)               0         
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 hidden2 (Dense)             (None, 100)               25700     
                                                                 
 batch_normalization_5 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation_5 (Activation)   (None, 100)               0         
                                                                 
 dropout_5 (Dropout)         (None, 100)               0         
                                                                 
 hidden3 (Dense)             (None, 100)               10100     
                                                                 
 batch_normalization_6 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation_6 (Activation)   (None, 100)               0         
                                                                 
 dropout_6 (Dropout)         (None, 100)               0         
                                                                 
 hidden4 (Dense)             (None, 50)                5050      
                                                                 
 batch_normalization_7 (Batc  (None, 50)               200       
 hNormalization)                                                 
                                                                 
 activation_7 (Activation)   (None, 50)                0         
                                                                 
 dropout_7 (Dropout)         (None, 50)                0         
                                                                 
 output (Dense)              (None, 10)                510       
                                                                 
=================================================================
Total params: 244,344
Trainable params: 243,332
Non-trainable params: 1,012
_________________________________________________________________
from tensorflow.keras.callbacks import EarlyStopping

early_stopping_cb = EarlyStopping(patience = 3, monitor = 'val_loss',
                                  restore_best_weights = True)
history = model.fit(x_train, y_train,
                    batch_size = 256,
                    epochs = 200,
                    shuffle = True,
                    validation_data = (x_val, y_val),
                    callbacks = [early_stopping_cb])

  - 모델 평가

model.evaluate(x_test, y_test, batch_size = 100)

# 출력 결과
loss: 0.4427 - accuracy: 0.8464
[0.44270941615104675, 0.8464000225067139]

 

  - 결과 확인

# 첫번째 테스트 데이터 결과
test_img = x_test[0, :, :]
plt.title(class_names[y_test[0]])
plt.imshow(test_img, cmap = 'gray')
plt.show()

pred = model.predict(test_img.reshape(1, 28, 28))
pred.shape

# 출력 결과
(1, 10)


pred

# 출력 결과
array([[8.9198991e-05, 3.5745958e-05, 7.4570953e-06, 1.5882608e-05,
        8.0741156e-06, 3.3398017e-02, 4.0778108e-05, 1.1560775e-01,
        7.1698561e-04, 8.5008013e-01]], dtype=float32)


# 가장 확률이 높은 것을 정답으로 출력
class_names[np.argmax(pred)]

# 출력 결과
'Ankle boot'

 

  - Test Batch Dataset

test_batch = x_test[:32, :, :]
test_batch_y = y_test[:32]
print(test_batch.shape)

# 출력 결과
(32, 28, 28)
preds = model.predict(test_batch)
preds.shape

# 출력 결과
(32, 10)
pred_arg = np.argmax(preds, -1)

num_rows = 8
num_cols = 4
num_images = num_rows * num_cols

plt.figure(figsize = (16, 10))

for idx in range(1, 33, 1):
    plt.subplot(num_rows, num_cols, idx)
    plt.title('Predicted: {}, True: {}'.format(class_names[pred_arg[idx - 1]],
                                               class_names[test_batch_y[idx - 1]]))
    plt.imshow(test_batch[idx - 1], cmap = 'gray')

plt.show()

● 과대적합, 과소적합을 막기 위한 방법들

  • 모델의 크기 축소
  • 초기화
  • 옵티마이저
  • 배치 정규화
  • 규제화

 

1. 모델의 크기 축소

  • 가장 단순한 방법
  • 모델의 크기를 줄인다는 것은 학습 파라미터의 수를 줄이는 것
# 데이터 준비
from tensorflow.keras.datasets import imdb
import numpy as np

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)

def vectorize_seq(seqs, dim = 10000):
    results = np.zeros((len(seqs), dim))
    for i, seq in enumerate(seqs):
        results[i, seq] = 1.
    
    return results

x_train = vectorize_seq(train_data)
x_test = vectorize_seq(test_data)

y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
# 모델1
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model_1 = Sequential([Dense(16, activation = 'relu', input_shape = (10000, ), name = 'input'),
                      Dense(16, activation = 'relu', name = 'hidden'),
                      Dense(1, activation = 'sigmoid', name = 'output')])
model_1.summary()

# 출력 결과
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (Dense)               (None, 16)                160016    
                                                                 
 hidden (Dense)              (None, 16)                272       
                                                                 
 output (Dense)              (None, 1)                 17        
                                                                 
=================================================================
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________
# 모델2
model_2 = Sequential([Dense(7, activation = 'relu', input_shape = (10000, ), name = 'input2'),
                      Dense(7, activation = 'relu', name = 'hidden2'),
                      Dense(1, activation = 'sigmoid', name = 'output2')])
model_2.summary()

# 출력 결과
odel: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input2 (Dense)              (None, 7)                 70007     
                                                                 
 hidden2 (Dense)             (None, 7)                 56        
                                                                 
 output2 (Dense)             (None, 1)                 8         
                                                                 
=================================================================
Total params: 70,071
Trainable params: 70,071
Non-trainable params: 0
_________________________________________________________________
  • 모델1과 모델2 차이점은 모델의 크기 차이
# 모델 학습
model_1.compile(optimizer = 'rmsprop',
                loss = 'binary_crossentropy',
                metrics = ['acc'])
model_2.compile(optimizer = 'rmsprop',
                loss = 'binary_crossentropy',
                metrics = ['acc'])

model_1_hist = model_1.fit(x_train, y_train,
                           epochs = 20,
                           batch_size = 512,
                           validation_data = (x_test, y_test))
model_2_hist = model_2.fit(x_train, y_train,
                           epochs = 20,
                           batch_size = 512,
                           validation_data = (x_test, y_test))
# 비교
epochs = range(1, 21)
model_1_val_loss = model_1_hist.history['val_loss']
model_2_val_loss = model_2_hist.history['val_loss']

import matplotlib.pyplot as plt

plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, model_2_val_loss, 'bo', label = 'Model_2')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

  • model_2(더 작은 모델)이 조금 더 나중에 과대적합 발생

 

 

2. 모델의 크기 축소(2)

# 모델 구성
model_3 = Sequential([Dense(1024, activation = 'relu', input_shape = (10000, ), name = 'input3'),
                      Dense(1024, activation = 'relu', name = 'hidden3'),
                      Dense(1, activation = 'sigmoid', name = 'output3')])

model_3.compile(optimizer = 'rmsprop',
                loss = 'binary_crossentropy',
                metrics = ['acc'])

model_3.summary()

# 출력 결과
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input3 (Dense)              (None, 1024)              10241024  
                                                                 
 hidden3 (Dense)             (None, 1024)              1049600   
                                                                 
 output3 (Dense)             (None, 1)                 1025      
                                                                 
=================================================================
Total params: 11,291,649
Trainable params: 11,291,649
Non-trainable params: 0
_________________________________________________________________
# 모델 학습
model_3_hist = model_3.fit(x_train, y_train,
                           epochs = 20,
                           batch_size = 512,
                           validation_data = (x_test, y_test))
# 시각화
model_3_val_loss = model_3_hist.history['val_loss']

plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, model_2_val_loss, 'r+', label = 'Model_2')
plt.plot(epochs, model_3_val_loss, 'r+', label = 'Model_3')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

  • 볼륨이 큰 신경망일수록 빠르게 훈련데이터 모델링 가능(학습 손실이 낮아짐)
  • 과대적합에는 더욱 민감해짐
  • 이는 학습-검증 데이터의 손실을 보면 알 수 있음
# 학습 데이터의 loss 값도 비교
model_1_train_loss = model_1_hist.history['loss']
model_2_train_loss = model_2_hist.history['loss']
model_3_train_loss = model_3_hist.history['loss']

plt.plot(epochs, model_1_train_loss, 'r+', label = 'Model_1')
plt.plot(epochs, model_2_train_loss, 'r+', label = 'Model_2')
plt.plot(epochs, model_3_train_loss, 'r+', label = 'Model_3')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.legend()
plt.grid()
plt.show()

 

 

3. 가중치 초기화

  - 초기화 전략

  • Glorot Initialization(Xavier)
    • 활성화 함수
      • 없음
      • tanh
      • sigmoid
      • softmax
  • He Initialization
    • 활성화 함수
      • ReLU
      • LeakyReLU
      • ELU 등
from tensorflow.keras.layers import Dense, LeakyReLU, Activation
from tensorflow.keras.models import Sequential

model = Sequential([Dense(30, kernel_initializer = 'he_normal', input_shape = [10, 10]),
                    LeakyReLU(alpha = 0.2),
                    Dense(1, kernel_initializer = 'he_normal'),
                    Activation('softmax')])
model.summary()

# 출력 결과
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense (Dense)               (None, 10, 30)            330       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 10, 30)            0         
                                                                 
 dense_1 (Dense)             (None, 10, 1)             31        
                                                                 
 activation (Activation)     (None, 10, 1)             0         
                                                                 
=================================================================
Total params: 361
Trainable params: 361
Non-trainable params: 0
_________________________________________________________________

 

 

4. 고속 옵티마이저

  - 모멘텀 최적화

$$ v \leftarrow \alpha v - \gamma \frac{\partial L}{\partial W} $$

$$ W \leftarrow W + v $$

  • \(\alpha\): 관성계수
  • \(v\): 속도
  • \(\gamma\): 학습률
  • \(\frac{\partial L}{\partial W}\): 손실함수에 대한 미분
import tensorflow as tf
from tensorflow.keras.optimizers import SGD

# momentum 값이 관성계수(알파값)
optimizer = SGD(learning_rate = 0.001, momentum = 0.9)

 

  - 네스테로프(Nesterov)

  • 모멘텀의 방향으로 조금 앞선 곳에서 손실함수의 미분을 구함
  • 시간이 지날수록 조금 더 빨리 최솟값에 도달
    \(m \leftarrow \beta m - \eta \bigtriangledown_{\theta}J(\theta + \beta m)\)
    \(\theta \leftarrow \theta + m\)
  • \(h\): 기존의 기울기를 제곱하여 더한 값
  • \(\eta\): 학습률
  • \(\bigtriangledown_{\theta}J(\theta)\): \(\theta\)에 대한 미분(그라디언트)

http://cs231n.stanford.edu/slides/2019/cs231n_2019_lecture08.pdf

optimizer = SGD(learning_rate = 0.001, momentum = 0.9, nesterov = True)

 

  - AdaGrad

  • 보통 간단한 모델에는 효과 좋을 수는 있으나, 심층 신경망 모델에서는 사용 X(사용하지 않는 것이 좋은 것으로 밝혀짐)
    \(h \leftarrow h+\frac{\partial L}{\partial W} \odot \frac{\partial L}{\partial W}\)
    \(W \leftarrow W+\gamma \frac{1}{\sqrt{h}} \frac{\partial L}{\partial W}\)
  • \(h\): 기존의 기울기를제곱하여 더한 값
  • \(\gamma\): 학습률
  • \(\frac{\partial L}{\partial W}\): \(W\)에 대한 미분
from tensorflow.keras.optimizers import Adagrad

optimizer = Adagrad(learning_rate = 0.001)

 

  - RMSprop

$$ s \leftarrow \beta s+(1-\beta)\bigtriangledown_{\theta}J(\theta) \otimes \bigtriangledown_{\theta}J(\theta) $$

$$ \theta \leftarrow \theta - \eta \bigtriangledown_{\theta}J(\theta)\oslash \sqrt{s+\epsilon} $$

  • \(s\): 그라디언트의 제곱을 감쇠율을 곱한 후 더함
  • \(\eta\): 학습률
  • \(\bigtriangledown_{\theta}J(\theta)\): 손실함수의 미분값
from tensorflow.keras.optimizers import RMSprop

optimizer = RMSprop(learning_rate = 0.001, rho = 0.9)

 

  - Adam

$$ m \leftarrow \beta_{1}m-(1-\beta_{1})\frac{\partial L}{\partial W} $$

$$ s \leftarrow \beta_{2}s+(1-\beta_{2}\frac{\partial L}{\partial W}\odot\frac{\partial L}{\partial W} $$

$$ \hat{m} \leftarrow \frac{m}{1-\beta^{t}_{1}} $$

$$ \hat{s} \leftarrow \frac{s}{1-\beta^{t}_{2}} $$

$$ W \leftarrow W+\gamma \hat{m} \oslash \sqrt{\hat{s}+\epsilon} $$

  • \(\beta\): 지수 평균의 업데이트 계수
  • \(\gamma\): 학습률
  • \(\beta_{1} \approx 0.9, \beta_{2} \approx 0.999 \)
  • \( \frac{\partial L}{\partial W} \): \(W\)에 대한 미분
from tensorflow.keras.optimizers import Adam

# beta_1과 beta_2에 지정한 값은 디폴트 값으로, 어느정도 가장 좋은 값이라고 증명된 값
optimizer = Adam(learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999)

 

 

5. 배치 정규화

  • 모델에 주입되는 샘플들을 균일하게 만드는 방법
  • 학습 후 새로운 데이터에 잘 일반화 할 수 있도록 도와줌
  • 데이터 전처리 단계에서 진행해도 되지만 정규화가 되어서 layer에 들어갔다는 보장이 없음
  • 주로 Dense 또는 Conv2D Layer 후, 활성화 함수 이전에 놓임
from tensorflow.keras.layers import BatchNormalization, Dense, Activation
from tensorflow.keras.utils import plot_model

model = Sequential()
model.add(Dense(32, input_shape = (28 * 28, ), kernel_initializer = 'he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.summary()
plot_model(model, show_shapes = True)

# 출력 결과
Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_4 (Dense)             (None, 32)                25120     
                                                                 
 batch_normalization_1 (Batc  (None, 32)               128       
 hNormalization)                                                 
                                                                 
 activation_2 (Activation)   (None, 32)                0         
                                                                 
=================================================================
Total params: 25,248
Trainable params: 25,184
Non-trainable params: 64
_________________________________________________________________

 

 

6. 규제화

  • 복잡한 네트워크 일수록 네트워크 복잡도에 제한을 두어 가중치가 작은 값을 가지도록 함
  • 가중치의 분포가 더 균일하게 됨
  • 네트워크 손실함수에 큰 가중치에 연관된 비용을 추가
    • L1 규제: 가중치의 절댓값에 비례하는 비용이 추가
    • L2 규제: 가중치의 제곱에 비례한느 비용이 추가(흔히 가중치 감쇠라고도 불림)
    • 위의 두 규제가 합쳐진 경우도 존재
# l2 모델 구성
from tensorflow.keras.regularizers import l1, l2, l1_l2

l2_model = Sequential([Dense(16, kernel_regularizer = l2(0.001), activation = 'relu', input_shape = (10000, )),
                       Dense(16, kernel_regularizer = l2(0.001), activation = 'relu'),
                       Dense(1, activation = 'sigmoid')])
l2_model.compile(optimizer = 'rmsprop',
                 loss = 'binary_crossentropy',
                 metrics = ['acc'])
l2_model.summary()
plot_model(l2_model, show_shapes = True)

# 출력 결과
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_5 (Dense)             (None, 16)                160016    
                                                                 
 dense_6 (Dense)             (None, 16)                272       
                                                                 
 dense_7 (Dense)             (None, 1)                 17        
                                                                 
=================================================================
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________

# l2 모델 학습
l2_model_hist = l2_model.fit(x_train, y_train,
                             epochs = 20,
                             batch_size = 512,
                             validation_data = (x_test, y_test))
# l2 모델 시각화
l2_model_val_loss = l2_model_hist.history['val_loss']

epochs = range(1, 21)
plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, l2_model_val_loss, 'bo', label = 'Model_L2-regularized')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

 

# l1 모델 구성
l1_model = Sequential([Dense(16, kernel_regularizer = l1(0.001), activation = 'relu', input_shape = (10000, )),
                       Dense(16, kernel_regularizer = l1(0.001), activation = 'relu'),
                       Dense(1, activation = 'sigmoid')])
l1_model.compile(optimizer = 'rmsprop',
                 loss = 'binary_crossentropy',
                 metrics = ['acc'])
l1_model.summary()
plot_model(l1_model, show_shapes = True)


# 출력 결과
Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_14 (Dense)            (None, 16)                160016    
                                                                 
 dense_15 (Dense)            (None, 16)                272       
                                                                 
 dense_16 (Dense)            (None, 1)                 17        
                                                                 
=================================================================
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________

# l1 모델 학습
l1_model_hist = l1_model.fit(x_train, y_train,
                             epochs = 20,
                             batch_size = 512,
                             validation_data = (x_test, y_test))
# l1 모델 시각화
l1_model_val_loss = l1_model_hist.history['val_loss']

epochs = range(1, 21)
plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, l1_model_val_loss, 'bo', label = 'Model_L1-regularized')
plt.plot(epochs, l2_model_val_loss, 'g--', label = 'Model_L2-regularized')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

 

# l1_l2 모델 구성
l1_l2_model = Sequential([Dense(16, kernel_regularizer = l1_l2(l1 = 0.0001, l2 = 0.0001), activation = 'relu', input_shape = (10000, )),
                          Dense(16, kernel_regularizer = l1_l2(l1 = 0.0001, l2 = 0.0001), activation = 'relu'),
                          Dense(1, activation = 'sigmoid')])
l1_l2_model.compile(optimizer = 'rmsprop',
                    loss = 'binary_crossentropy',
                    metrics = ['acc'])
l1_l2_model.summary()
plot_model(l1_l2_model, show_shapes = True)

# 출력 결과
l1_l2_model = Sequential([Dense(16, kernel_regularizer = l1_l2(l1 = 0.0001, l2 = 0.0001), activation = 'relu', input_shape = (10000, )),
                          Dense(16, kernel_regularizer = l1_l2(l1 = 0.0001, l2 = 0.0001), activation = 'relu'),
                          Dense(1, activation = 'sigmoid')])
l1_l2_model.compile(optimizer = 'rmsprop',
                    loss = 'binary_crossentropy',
                    metrics = ['acc'])
l1_l2_model.summary()
plot_model(l1_l2_model, show_shapes = True)

 

# l1_l2 모델 학습
l1_l2_model_hist = l1_l2_model.fit(x_train, y_train,
                                   epochs = 20,
                                   batch_size = 512,
                                   validation_data = (x_test, y_test))
# l1_l2 모델 시각화
l1_l2_model_val_loss = l1_l2_model_hist.history['val_loss']

epochs = range(1, 21)
plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, l1_l2_model_val_loss, 'ko', label = 'Model_L1_L2-regularized')
plt.plot(epochs, l1_model_val_loss, 'bo', label = 'Model_L1-regularized')
plt.plot(epochs, l2_model_val_loss, 'g--', label = 'Model_L2-regularized')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

 

 

7. 드롭아웃(Dropout)

  • 신경망을 위해 사용되는 규제 기법 중 가장 효과적이고 널리 사용되는 방법
  • 신경망의 레이어에 드롭아웃을 적용하면 훈련하는 동안 무작위로 층의 일부 특성(노드)를 제외
    • 예를 들어, 벡터 [1.0, 3.2, 0.6, 0.8, 1.1]에 대해 드롭아웃을 적용하면 무작위로 0으로 바뀜
      ([0, 3.2, 0.6, 0.8, 0]과 같이 바뀜)
    • 보통 0.2~0.5 사이의 비율로 지정됨
  • 테스트 단계에서는 그 어떤 노드도 드롭아웃 되지 않음
    • 대신 해당 레이어의 출력 노드를 드롭아웃 비율에 맞게 줄여줌
# 모델 구성
from tensorflow.keras.layers import Dropout

dropout_model = Sequential([Dense(16, activation = 'relu', input_shape = (10000, )),
                             Dropout(0.5),
                             Dense(16, activation = 'relu'),
                             Dropout(0.5),
                             Dense(1, activation = 'sigmoid')])
dropout_model.compile(optimizer = 'rmsprop',
                      loss = 'binary_crossentropy',
                      metrics = ['acc'])
dropout_model.summary()
plot_model(dropout_model, show_shapes = True)

# 출력 결과
Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_20 (Dense)            (None, 16)                160016    
                                                                 
 dropout (Dropout)           (None, 16)                0         
                                                                 
 dense_21 (Dense)            (None, 16)                272       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_22 (Dense)            (None, 1)                 17        
                                                                 
=================================================================
Total params: 160,305
Trainable params: 160,305
Non-trainable params: 0
_________________________________________________________________

# 모델 학습
dropout_model_hist = dropout_model.fit(x_train, y_train,
                                       epochs = 20,
                                       batch_size = 512,
                                       validation_data = (x_test, y_test))
# 시각화
dropout_model_val_loss = dropout_model_hist.history['val_loss']

epochs = range(1, 21)
plt.plot(epochs, model_1_val_loss, 'r+', label = 'Model_1')
plt.plot(epochs, dropout_model_val_loss, 'co', label = 'Model_Dropout')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid()
plt.show()

https://www.tensorflow.org/tutorials/keras/classification?hl=ko

1. modules import

import tensorflow as tf
from tensorflow.keras.datasets.fashion_mnist import load_data
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import plot_model

from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt

 

 

2. 데이터셋 로드

tf.random.set_seed(111)

(x_train_full, y_train_full), (x_test, y_test) = load_data()

x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size = 0.3, random_state = 111)

print("학습 데이터: {}\t레이블: {}".format(x_train_full.shape, y_train_full.shape))
print("학습 데이터: {}\t레이블: {}".format(x_train.shape, y_train.shape))
print("검증 데이터: {}\t레이블: {}".format(x_val.shape, y_val.shape))
print("테스트 데이터: {}\t레이블: {}".format(x_test.shape, y_test.shape))

# 출력 결과
학습 데이터: (60000, 28, 28)	레이블: (60000,)
학습 데이터: (42000, 28, 28)	레이블: (42000,)
검증 데이터: (18000, 28, 28)	레이블: (18000,)
테스트 데이터: (10000, 28, 28)	레이블: (10000,)

 

 

3. 데이터 확인

# 정답의 집합
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'bag', 'Ankle boot']

# 첫번째 데이터의 정답 확인
class_names[y_train[0]]

# 출력 결과
'Pullover'
plt.figure()
plt.imshow(x_train[0])
plt.colorbar()
plt.grid()
plt.show()

# 랜덤하게 4개의 데이터 추출하여 출력
num_sample = 4
random_idxs = np.random.randint(60000, size =num_sample)
plt.figure(figsize = (15, 10))
for i, idx in enumerate(random_idxs):
    image = x_train_full[idx, :]
    label = y_train_full[idx]

    plt.subplot(1, len(random_idxs), i+1)
    plt.imshow(image)
    plt.title("Index: {}, Label: {}".format(idx, class_names[label]))

 

 

4. 데이터 전처리

  • Normalization
  • flatten
  • oss = 'sparse_categorical_crossentropy
# Normalization
x_train = (x_train.reshape(-1, 28*28)) / 255.
x_val = (x_val.reshape(-1, 28*28)) / 255.
x_test = (x_test.reshape(-1, 28*28)) / 255.

 

 

5. 모델 구성(함수형 API)

input = Input(shape = (784, ), name = 'input')
hidden1 = Dense(256, activation = 'relu', name = 'hidden1')(input)
hidden2 = Dense(128, activation = 'relu', name = 'hidden2')(hidden1)
hidden3 = Dense(64, activation = 'relu', name = 'hidden3')(hidden2)
hidden4 = Dense(32, activation = 'relu', name = 'hidden4')(hidden3)
output = Dense(10, activation = 'softmax', name = 'output')(hidden4)
model = Model(inputs = [input], outputs = [output])
model.summary()

# 출력 결과
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (InputLayer)          [(None, 784)]             0         
                                                                 
 hidden1 (Dense)             (None, 256)               200960    
                                                                 
 hidden2 (Dense)             (None, 128)               32896     
                                                                 
 hidden3 (Dense)             (None, 64)                8256      
                                                                 
 hidden4 (Dense)             (None, 32)                2080      
                                                                 
 output (Dense)              (None, 10)                330       
                                                                 
=================================================================
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
plot_model(model)

 

 

6. 모델 컴파일

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = RMSprop(learning_rate = 0.01),
              metrics = ['acc'])

 

 

7. 모델 학습

  • 모델 시각화를 위해 history 변수에 학습과정 저장
history = model.fit(x_train, y_train,
                    epochs = 10,
                    batch_size = 128,
                    validation_data = (x_val, y_val))

# 출력 결과
Epoch 1/10
329/329 [==============================] - 16s 33ms/step - loss: 0.8969 - acc: 0.6897 - val_loss: 0.5580 - val_acc: 0.7997
Epoch 2/10
329/329 [==============================] - 6s 19ms/step - loss: 0.5179 - acc: 0.8132 - val_loss: 0.5554 - val_acc: 0.8124
Epoch 3/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4643 - acc: 0.8321 - val_loss: 0.7202 - val_acc: 0.7992
Epoch 4/10
329/329 [==============================] - 5s 14ms/step - loss: 0.4484 - acc: 0.8414 - val_loss: 0.5157 - val_acc: 0.7810
Epoch 5/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4242 - acc: 0.8497 - val_loss: 0.5527 - val_acc: 0.8212
Epoch 6/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4175 - acc: 0.8523 - val_loss: 0.6034 - val_acc: 0.8197
Epoch 7/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4107 - acc: 0.8566 - val_loss: 0.6612 - val_acc: 0.8046
Epoch 8/10
329/329 [==============================] - 5s 15ms/step - loss: 0.4029 - acc: 0.8594 - val_loss: 0.6940 - val_acc: 0.7671
Epoch 9/10
329/329 [==============================] - 5s 14ms/step - loss: 0.3955 - acc: 0.8603 - val_loss: 0.5032 - val_acc: 0.8444
Epoch 10/10
329/329 [==============================] - 5s 14ms/step - loss: 0.3969 - acc: 0.8653 - val_loss: 0.5266 - val_acc: 0.8257

 

 

8. 학습 결과 시각화

history_dict = history.history

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(loss) + 1)
fig = plt.figure(figsize = (10, 5))

ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(epochs, loss, color = 'blue', label = 'train_loss')
ax1.plot(epochs, val_loss, color = 'red', label = 'val_loss')
ax1.set_title('Train and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.grid()
ax1.legend()

acc = history_dict['acc']
val_acc = history_dict['val_acc']

ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(epochs, acc, color = 'blue', label = 'train_acc')
ax2.plot(epochs, val_acc, color = 'red', label = 'val_acc')
ax2.set_title('Train and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.grid()
ax2.legend()

  • 검증데이터(val_loss, val_acc)가 일정하지 않고 튀는 현상 발생
  • 다른 옵티마이저로 실행
    • 데이터셋 로드 - 데이터 전처리 - 모델 구성 다시 진행
from tensorflow.keras.optimizers import SGD

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = SGD(learning_rate = 0.01),
              metrics = ['acc'])

history2 = model.fit(x_train, y_train,
                     epochs = 10,
                     batch_size = 128,
                     validation_data = (x_val, y_val))

# 출력 결과
Epoch 1/10
329/329 [==============================] - 13s 32ms/step - loss: 0.3495 - acc: 0.8706 - val_loss: 0.3795 - val_acc: 0.8644
Epoch 2/10
329/329 [==============================] - 9s 27ms/step - loss: 0.3172 - acc: 0.8811 - val_loss: 0.3691 - val_acc: 0.8689
Epoch 3/10
329/329 [==============================] - 6s 19ms/step - loss: 0.3072 - acc: 0.8848 - val_loss: 0.3621 - val_acc: 0.8713
Epoch 4/10
329/329 [==============================] - 8s 25ms/step - loss: 0.3017 - acc: 0.8864 - val_loss: 0.3590 - val_acc: 0.8728
Epoch 5/10
329/329 [==============================] - 7s 23ms/step - loss: 0.2977 - acc: 0.8880 - val_loss: 0.3572 - val_acc: 0.8728
Epoch 6/10
329/329 [==============================] - 7s 21ms/step - loss: 0.2950 - acc: 0.8888 - val_loss: 0.3548 - val_acc: 0.8733
Epoch 7/10
329/329 [==============================] - 4s 12ms/step - loss: 0.2925 - acc: 0.8896 - val_loss: 0.3542 - val_acc: 0.8756
Epoch 8/10
329/329 [==============================] - 3s 11ms/step - loss: 0.2903 - acc: 0.8904 - val_loss: 0.3526 - val_acc: 0.8756
Epoch 9/10
329/329 [==============================] - 3s 10ms/step - loss: 0.2887 - acc: 0.8911 - val_loss: 0.3520 - val_acc: 0.8757
Epoch 10/10
329/329 [==============================] - 3s 11ms/step - loss: 0.2870 - acc: 0.8915 - val_loss: 0.3526 - val_acc: 0.8756
# 다시 시각화
history_dict = history2.history

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(loss) + 1)
fig = plt.figure(figsize = (10, 5))

ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(epochs, loss, color = 'blue', label = 'train_loss')
ax1.plot(epochs, val_loss, color = 'red', label = 'val_loss')
ax1.set_title('Train and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.grid()
ax1.legend()

acc = history_dict['acc']
val_acc = history_dict['val_acc']

ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(epochs, acc, color = 'blue', label = 'train_acc')
ax2.plot(epochs, val_acc, color = 'red', label = 'val_acc')
ax2.set_title('Train and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.grid()
ax2.legend()

  • 학습 데이터의 loss값과 정확도가 검증 데이터의 loss값과 정확도와 차이가 있어보이지만 값으로 보면 큰 차이는 아님
  • loss값의 차이는 가장 큰 구간에서 0.06정도, 정확도는 가장 큰 구간이 0.025정

 

 

9. 모델 평가(1)

  • optimizer: SGD()로 학습한 모델
  • evaluate()
model.evaluate(x_test, y_test)

# 출력 결과
313/313 [==============================] - 2s 6ms/step - loss: 0.3862 - acc: 0.8661
[0.38618436455726624, 0.866100013256073]

 

 

10. 학습된 모델을 통해 값 예측

pred_ys = model.predict(x_test)

print(pred_ys.shape)
np.set_printoptions(precision = 7)
print(pred_ys[0])

# 출력 결과
# 정답 집합 10개 각각이 정답일 확률을 표시
(10000, 10)
[4.2854483e-21 1.0930411e-15 1.6151620e-17 3.9182383e-11 2.9266587e-15
 3.3629590e-03 4.9878759e-17 1.0700015e-03 2.2493745e-13 9.9556702e-01]
# 10개의 정답 집합 각각에 속할 확률 중 가장 높은 확률을 가진 값을 정답으로 채택하고 결과 확인
arg_pred_y = np.argmax(pred_ys, axis = 1)
plt.imshow(x_test[0].reshape(-1, 28))
plt.title('Predicted Class: {}'.format(class_names[arg_pred_y[0]]))
plt.show()

# 이미지 출력
def plot_image(i, pred_ys, y_test, img):
    pred_ys, y_test, img = pred_ys[i], y_test[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    
    plt.imshow(img, cmap = plt.cm.binary)

    predicted_label = np.argmax(pred_ys)
    if predicted_label == y_test:
        color = 'blue'
    else:
        color = 'red'
    
    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                         100 * np.max(pred_ys),
                                         class_names[y_test]),
                                         color = color)

# 전체 정답 집합 중 해당 데이터를 정답으로 예측한 확률 표시
def plot_value_array(i, pred_ys, true_label):
    pred_ys, true_label = pred_ys[i], true_label[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    thisplot = plt.bar(range(10), pred_ys, color = '#777777')
    plt.ylim([0, 1])
    predicted_label = np.argmax(pred_ys)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')
# 첫번째 데이터 정답 확인
i = 0
plt.figure(figsize = (8, 4))
plt.subplot(1, 2, 1)
plot_image(i, pred_ys, y_test, x_test.reshape(-1, 28, 28))
plt.subplot(1, 2, 2)
plot_value_array(i, pred_ys, y_test)
plt.show()

# 랜덤으로 추출하여 정답 확인
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols

random_num = np.random.randint(10000, size = num_images)
plt.figure(figsize = (2 * 2 * num_cols, 2 * num_rows))
for idx, num in enumerate(random_num):
    plt.subplot(num_rows, 2 * num_cols, 2 * idx + 1)
    plot_image(num, pred_ys, y_test, x_test.reshape(-1, 28, 28))
    plt.subplot(num_rows, 2 * num_cols, 2 * idx + 2)
    plot_value_array(num, pred_ys, y_test)

plt.show()

 

 

11. 모델 평가(2)

  • optimizer: SGD()로 학습한 모델
  • 혼동 행렬(Confusion Matrix)
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from tensorflow.keras.utils import to_categorical

y_test_che = to_categorical(y_test)
plt.figure(figsize = (8, 8))
cm2 = confusion_matrix(np.argmax(y_test_che, axis = 1), np.argmax(pred_ys, axis = -1))
sns.heatmap(cm2, annot = True, fmt = 'd', cmap = 'Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

 

 

12. 모델 평가(3)

  • optimizer: SGD()로 학습한 모델
  • 분류 보고서
print(classification_report(np.argmax(y_test_che, axis = -1), np.argmax(pred_ys, axis = -1)))

# 출력 결과
              precision    recall  f1-score   support

           0       0.78      0.85      0.81      1000
           1       0.99      0.96      0.98      1000
           2       0.75      0.81      0.78      1000
           3       0.86      0.88      0.87      1000
           4       0.77      0.75      0.76      1000
           5       0.97      0.95      0.96      1000
           6       0.68      0.57      0.62      1000
           7       0.93      0.96      0.94      1000
           8       0.96      0.97      0.96      1000
           9       0.96      0.95      0.96      1000

    accuracy                           0.87     10000
   macro avg       0.86      0.87      0.86     10000
weighted avg       0.86      0.87      0.86     10000

1. modules import

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import Model
from tensorflow.keras.utils import get_file, plot_model

 

 

2. 데이터 로드

# 해당 주소의 데이터를 다운로드
dataset_path = get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

# 열 이름 지정
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight','Acceleration','Model Year', 'Origin']

# 지정된 열이름을 사용하여 데이터를 판다스 데이터프레임 형식으로 로드
raw_dataset = pd.read_csv(dataset_path, names = column_names,
                          na_values = '?', comment = '\t',
                          sep = ' ', skipinitialspace = True)

 

 

3. 데이터 확인

# raw data바로 사용하지 않고 copy()하여 사용
dataset = raw_dataset.copy()
dataset

 

 

4. 데이터 전처리

  • 해당 데이터는 일부 데이터가 누락되어 있음
dataset.isna().sum()

# 출력 결과
MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64
  • 누락된 행 삭제
# Horsepower에 6개의 결측값이 있으므로 결측값 제거
dataset = dataset.dropna()
  • 'Origin' 범주형 데이터
    • 원-핫 인코딩 진행
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1) * 1.0
dataset['Europe'] = (origin == 2) * 1.0
dataset['Japan'] = (origin == 3) * 1.0
dataset

 

 

4-1. 검증 데이터셋 생성

# train 데이터로 전체 데이터의 0.8을 추출
# 전체 데이터에서 train 데이터를 drop시킨 나머지를 test 데이터로 지정
train_dataset = dataset.sample(frac = 0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)

 

 

4-2. 데이터 조사

sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight']], diag_kind = 'kde')

# 데이터의 통계정보
train_stats = train_dataset.describe()
# MPG는 정답이기 때문에 통계 정보에서 제외
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats

 

 

4-3. 데이터의 특성과 레이블 분리

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

 

 

4-4. 데이터 정규화

# 통계정보에서 평균과 표준편차를 가져와서 각 데이터 값에서 평균을 빼고 표준편차로 나눠 정규화
def normalization(x):
    return (x - train_stats['mean']) / train_stats['std']

normed_train_data = normalization(train_dataset)
normed_test_data = normalization(test_dataset)

 

 

5. 모델 구성

def build_model():
    input = Input(shape = len(train_dataset.keys()), name = 'input')
    hidden1 = Dense(64,activation = 'relu', name = 'dense1')(input)
    hidden2 = Dense(64, activation = 'relu', name = 'dense2')(hidden1)
    output = Dense(1, name = 'output')(hidden2)

    model = Model(inputs = [input], outputs = [output])

    model.compile(loss = 'mse',
                  optimizer = RMSprop(0.001),
                  metrics = ['mae', 'mse'])
    
    return model
model = build_model()
model.summary()

# 출력 결과
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input (InputLayer)          [(None, 9)]               0         
                                                                 
 dense1 (Dense)              (None, 64)                640       
                                                                 
 dense2 (Dense)              (None, 64)                4160      
                                                                 
 output (Dense)              (None, 1)                 65        
                                                                 
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
plot_model(model)

 

 

6. 샘플 데이터 확인

sample_batch = normed_train_data[:10]
sample_result = model.predict(sample_batch)
sample_batch

  • 정규화가 잘 된 데이터 값을 확인할 수 있음

 

 

7. 모델 학습

epochs = 1000
history = model.fit(normed_train_data, train_labels,
                    epochs = epochs, validation_split = 0.2)

 

 

8. 모델 학습 시각화

# 학습 과정에서 생겼던 각 반복의 loss값과 mae, mse 값을 모두 데이터프레임 형태로 저장
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist

# 위의 데이터프레임을 사용하여 하나의 함수로 시각화 생성
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure(figsize = (12, 6))
    
    plt.subplot(1, 2, 1)
    plt.xlabel('Epochs')
    plt.ylabel('MPG Mean Absolute Error')
    plt.plot(hist['epoch'], hist['mae'], label = 'Train Error')
    plt.plot(hist['epoch'], hist['val_mae'], label = 'Val Error')
    plt.ylim([0, 5])
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.xlabel('Epochs')
    plt.ylabel('MPG Mean Squared Error')
    plt.plot(hist['epoch'], hist['mse'], label = 'Train Error')
    plt.plot(hist['epoch'], hist['val_mse'], label = 'Val Error')
    plt.ylim([0, 20])
    plt.legend()

    plt.show()

plot_history(history)

  • 검증데이터의 오차(Val Error)값이 mae, mse 값 모두 일정 값 이하로 더이상 떨어지지 않음
  • 학습을 더 진행해봤자 검증데이터의 오차가 줄어들지 않으면 의미가 없고
    train 데이터의 오차만 줄어들어 둘 사이 간격이 벌어지면 오히려 모델이 train 데이터에 과대적합될 수 있음

 

 

9. EarlyStopping을 이용한 규제화

from tensorflow.keras.callbacks import EarlyStopping

model = build_model()

# 10번의 성능 향상을 보고 그 동안 성능 향상이 이뤄지지 않으면 stop
early_stop = EarlyStopping(monitor = 'val_loss', patience = 10)

history = model.fit(normed_train_data, train_labels, epochs = epochs,
                    validation_split = 0.2, callbacks = [early_stop])

  • 1000번 다 반복되지 않고 91번째에서 성능 향상이 없다고 판단되어 학습 중지
plot_history(history)

 

 

10. 모델 평가

# test 데이터를 모델에 넣어 나온 loss와 mae, mse 값 저장
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose = 2)
print(mae)

# 출력 결과
# 1.88정도의 mpg 오차내에서 예측
3/3 - 0s - loss: 5.7125 - mae: 1.8831 - mse: 5.7125 - 61ms/epoch - 20ms/step
1.8831140995025635

 

 

11. 학습된 모델을 통한 예측

# 예측
test_pred = model.predict(normed_test_data).flatten()

# 예측된 값과 실제 값의 산점도를 그려 선형성을 만족하는지 확인
plt.scatter(test_labels, test_pred)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.axis('square')
plt.grid()
plt.xlim([0, plt.xlim()[1]])
plt.ylim([0, plt.ylim()[1]])
plt.plot([-50, 50], [-50, 50])
plt.show()

# 잘못 예측한 값은 어느정도 되는지 시각화
error = test_pred - test_labels
plt.hist(error, bins = 30)
plt.xlabel('Prediction Error')
plt.grid()
plt.ylabel('Count')
plt.show()

+ Recent posts