● MNIST 분류 실습

  • Module Import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

 

  • 데이터 로드
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
num_classes = 10

 

  • 데이터 전처리
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10

# 데이터 수가 너무 많으므로 조금 줄이기
x_train = x_train[:10000]
x_test = x_test[:3000]

y_train = y_train[:10000]
y_test = y_test[:3000]

# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)

x_train = x_train / .255
x_test = x_test / .255

# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# 출력 결과
(10000, 784)
(10000, 10)
(3000, 784)
(3000,)

 

  • Hyper Parameter
epochs = 1000
learning_rate = 1e-2
batch_size = 256
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)

 

  • Util Functions
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis = 0)
        y = np.exp(x) / np.sum(np.exp(x), axis = 0)
        return y.T
    
    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t)**2)

def cross_entropy_error(pred_y, true_y):
    if pred_y.ndim == 1:
        true_y = true_y.reshape(1, true_y.size)
        pred_y = pred_y.reshape(1, pred_y.size)
    
    if true_y.size == pred_y.size:
        true_y = true_y.argmax(axis = 1)
    
    batch_size = pred_y.shape[0]
    return -np.sum(np.log(pred_y[np.arange(batch_size), true_y] + 1e-7)) / batch_size

 

  • Util Classes
  • ReLU
class ReLU:
    def __init__(self):
        self.mask = None

    def forward(self, input_data):
        self.mask = (input_data <= 0)
        out = input_data.copy()
        out[self.mask] = 0

        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

 

  • Sigmoid
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, input_data):
        out = 1 / (1 + np.exp(-input_data))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.dout
        return dx

 

  • Layer
class Layer:
    def __init__(self, W, b):
        self.W = W
        self.b = b

        self.input_data = None
        self.input_data_shape = None

        self.dW = None
        self.db = None

    def forward(self, input_data):
        self.input_data_shape = input_data.shape

        input_data = input_data.reshape(input_data.shape[0], -1)
        self.input_data = input_data
        out = np.dot(self.input_data, self.W) + self.b

        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.input_data.T, dout)
        self.db = np.sum(dout, axis = 0)

        dx = dx.reshape(*self.input_data_shape)
        return dx

 

  • Batch Normalization
class BatchNormalization:
    def __init__(self, gamma, beta, momentum = 0.9, running_mean = None, running_var = None):
        self.gamma = gamma
        self.beta = beta
        self.momentum = momentum
        self.input_shape = None
        
        self.running_mean = running_mean
        self.running_var = running_var
        
        self.batch_size = None
        self.xc = None
        self.std = None
        self.dgamma = None
        self.dbeta = None
    
    def forward(self, input_data, is_train = True):
        self.input_shape = input_data.shape
        if input_data.ndim != 2:
            N, C, H, W = input_data.shape
            input_data = input_data.reshape(N, -1)

        out = self.__forward(input_data, is_train)

        return out.reshape(*self.input_shape)
    
    def __forward(self, input_data, is_train):
        if self.running_mean is None:
            N, D = input_data.shape
            self.running_mean = np.zeros(D)
            self.running_var = np.zeros(D)
        
        if is_train:
            mu = input_data.mean(axis = 0)
            xc = input_data - mu
            var = np.mean(xc**2, axis = 0)
            std = np.sqrt(var + 10e-7)
            xn = xc / std

            self.batch_size = input_data.shape[0]
            self.xc = xc
            self.std = std
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mu 
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
        
        else:
            xc = input_data - self.running_mean
            xn = xc / ((np.sqrt(self.running_var + 10e-7)))
        
        out = self.gamma * xn + self.beta
        return out
    
    def backward(self, dout):
        if dout.ndim != 2:
            N, C, H, W = dout.shape
            dout = dout.reshape(N, -1)
        
        dx = self.__backward(dout)

        dx = dx.reshape(*self.input_shape)
        return dx
    
    def __backward(self, dout):
        dbeta = dout.sum(axis = 0)
        dgamma = np.sum(self.xn * dout, axis = 0)
        dxn = self.gamma * dout
        dxc = dxn / self.std
        dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis = 0)
        dvar = 0.5 * dstd / self.std
        dxc += (2.0 / self.batch_size) * self.xc * dvar
        dmu = np.sum(dxc, axis = 0)
        dx = dxc - dmu / self.batch_size

        self.dgamma = dgamma
        self.dbeta = dbeta

        return dx

 

  • Dropout
class Dropout:
    def __init__(self, dropout_ratio = 0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, input_data, is_train = True):
        if is_train:
            self.mask = np.random.rand(*input_data.shape) > self.dropout_ratio
            return input_data * self.mask
        else:
            return input_data * (1.0 - self.dropout_ratio)
        
    def backward(self, dout):
        return dout * self.mask

 

  • Softmax
class Softmax:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, input_data, t):
        self.t = t
        self.y = softmax(input_data)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss
    
    def backward(self, dout = 1):
        batch_size = self.t.shape[0]

        if self.t.size == self.y.size:
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size

        return dx

 

  • Model
class MyModel:
    def __init__(self, input_size, hidden_size_list, output_size,
                 activation = 'relu', decay_lambda = 0,
                 use_dropout = False, dropout_ratio = 0.5, use_batchnorm = False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.decay_lambda = decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        self.__init_weight(activation)

        activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)],
                                                    self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.ones(hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
            
            self.layers['Activation_function' + str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio)
        
        idx = self.hidden_layer_num + 1
        self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)], self.params['b' + str(idx)])
        self.last_layer = Softmax()

    def __init_weight(self, activation):
        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]

        for idx in range(1, len(all_size_list)):
            scale = None
            if activation.lower() == 'relu':
                scale = np.sqrt(2.0 / all_size_list[idx * 1])
            elif activation.lower() == 'sigmoid':
                scale = np.sqrt(1.0 / all_size_list[idx * 1])
            
            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx - 1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    
    def predict(self, x, is_train = False):
        for key, layer in self.layers.items():
            if 'Dropout' in key or 'BatchNorm' in key:
                x = layer.forward(x, is_train)
            else:
                x = layer.forward(x)
            
        return x
    
    def loss(self, x, t, is_train = False):
        y = self.predict(x, is_train)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params['W' + str(idx)]
            # L2 규제 적용
            weight_decay += 0.5 * self.decay_lambda * np.sum(W**2)
        
        return self.last_layer.forward(y, t) + weight_decay
    
    def accuracy(self, x, t):
        y = self.predict(x, is_train = False)
        y = np.argmax(y, axis = 1)
        if t.ndim != 1:
            t = np.argmax(t, axis = 1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def gradient(self,x, t):
        self.loss(x, t, is_train = True)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        # backward이므로 한번 reverser해서 역으로 접근
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads['W' + str(idx)] = self.layers['Layer' + str(idx)].dW + self.decay_lambda * self.params['W' + str(idx)]
            grads['b' + str(idx)] = self.layers['Layer' + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta
        
        return grads

 

  • 모델 생성 및 학습 (1)

  - 사용 기법

  • 학습 데이터 수: 10,000
  • Hidden Layers: 4 [100, 100, 100, 100]
  • SGD
  • EPOCHS: 1000
  • 학습률: 1e-2(0.01)
  • 배치사이즈: 256
  • 드롭아웃: 0.2
  • 배치 정규화
  • 규제화: 0.1
decay_lambda = 0.1
model_1 = MyModel(input_size = 784, hidden_size_list = [256, 100, 64, 32], output_size = 10,
                  decay_lambda = decay_lambda, use_batchnorm = True)

optimizer = SGD(learning_rate = learning_rate)

model_1_train_loss_list = []
model_1_train_acc_list = []
model_1_test_acc_list = []

for epoch in range(epochs):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    y_batch = y_train[batch_mask]

    grads = model_1.gradient(x_batch, y_batch)
    optimizer.update(model_1.params, grads)

    loss = model_1.loss(x_batch, y_batch)
    model_1_train_loss_list.append(loss)

    train_acc = model_1.accuracy(x_train, y_train)
    test_acc = model_1.accuracy(x_test, y_test)
    model_1_train_acc_list.append(train_acc)
    model_1_test_acc_list.append(test_acc)

    if epoch % 50 == 0:
        print("[Model 1]  Epoch: {}  Train Loss: {:.4f}  Train Accuracy: {:.4f}  Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))

# 출력 결과
[Model 1]  Epoch: 1  Train Loss: 137.5669  Train Accuracy: 0.1000  Test Accuracy: 0.1020
[Model 1]  Epoch: 51  Train Loss: 112.5705  Train Accuracy: 0.6919  Test Accuracy: 0.6257
[Model 1]  Epoch: 101  Train Loss: 101.5959  Train Accuracy: 0.7885  Test Accuracy: 0.7303
[Model 1]  Epoch: 151  Train Loss: 91.9510  Train Accuracy: 0.8327  Test Accuracy: 0.7677
[Model 1]  Epoch: 201  Train Loss: 83.1132  Train Accuracy: 0.8590  Test Accuracy: 0.7963
[Model 1]  Epoch: 251  Train Loss: 75.2112  Train Accuracy: 0.8741  Test Accuracy: 0.8127
[Model 1]  Epoch: 301  Train Loss: 68.0901  Train Accuracy: 0.8852  Test Accuracy: 0.8243
[Model 1]  Epoch: 351  Train Loss: 61.6642  Train Accuracy: 0.8969  Test Accuracy: 0.8347
[Model 1]  Epoch: 401  Train Loss: 55.9115  Train Accuracy: 0.9010  Test Accuracy: 0.8450
[Model 1]  Epoch: 451  Train Loss: 50.6766  Train Accuracy: 0.9085  Test Accuracy: 0.8533
[Model 1]  Epoch: 501  Train Loss: 45.8550  Train Accuracy: 0.9132  Test Accuracy: 0.8573
[Model 1]  Epoch: 551  Train Loss: 41.5136  Train Accuracy: 0.9185  Test Accuracy: 0.8613
[Model 1]  Epoch: 601  Train Loss: 37.5357  Train Accuracy: 0.9221  Test Accuracy: 0.8667
[Model 1]  Epoch: 651  Train Loss: 34.0123  Train Accuracy: 0.9255  Test Accuracy: 0.8720
[Model 1]  Epoch: 701  Train Loss: 30.7791  Train Accuracy: 0.9269  Test Accuracy: 0.8747
[Model 1]  Epoch: 751  Train Loss: 27.9667  Train Accuracy: 0.9301  Test Accuracy: 0.8800
[Model 1]  Epoch: 801  Train Loss: 25.3409  Train Accuracy: 0.9313  Test Accuracy: 0.8823
[Model 1]  Epoch: 851  Train Loss: 23.0407  Train Accuracy: 0.9345  Test Accuracy: 0.8830
[Model 1]  Epoch: 901  Train Loss: 20.8816  Train Accuracy: 0.9363  Test Accuracy: 0.8867
[Model 1]  Epoch: 951  Train Loss: 18.8845  Train Accuracy: 0.9387  Test Accuracy: 0.8903
  • 시각화
# 정확도 시각화
x = np.arange(len(model_1_train_acc_list))

plt.plot(x, model_1_train_acc_list, 'bc', label = 'train', markersize = 3)
plt.plot(x, model_1_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()

# 손실함수 시각화
x = np.arange(len(model_1_train_loss_list))

plt.plot(x, model_1_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()

 

  • 모델 생성 및 학습 (2)

  - 사용 기법

  • 학습 데이터 수: 10,000
  • Hidden Layers: 4 [100, 100, 100, 100]
  • Adam
  • EPOCHS: 1000
  • 학습률: 1e-3(0.001)
  • 배치사이즈: 100
  • 드롭아웃: 0.5
  • 배치 정규화
  • 규제화: 0.15
# 데이터 로드 및 전처리
np.random.seed(42)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10

x_train = x_train[:10000]
x_test = x_test[:3000]

y_train = y_train[:10000]
y_test = y_test[:3000]

# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)

x_train = x_train / .255
x_test = x_test / .255

# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


# 하이퍼 파라미터
epochs = 1000
learning_rate = 1e-3
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)

decay_lambda_2 = 0.15
model_2 = MyModel(input_size = 784, hidden_size_list = [100, 100, 100, 100], decay_lambda = decay_lambda_2,
                  output_size = 10, use_dropout = True, dropout_ratio = 0.5, use_batchnorm = True)

optimizer = Adam(learning_rate = learning_rate)

model_2_train_loss_list = []
model_2_train_acc_list = []
model_2_test_acc_list = []


# 모델 생성 및 학습
for epoch in range(epochs):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    y_batch = y_train[batch_mask]

    grads = model_2.gradient(x_batch, y_batch)
    optimizer.update(model_2.params, grads)

    loss = model_2.loss(x_batch, y_batch)
    model_2_train_loss_list.append(loss)

    train_acc = model_2.accuracy(x_train, y_train)
    test_acc = model_2.accuracy(x_test, y_test)
    model_2_train_acc_list.append(train_acc)
    model_2_test_acc_list.append(test_acc)

    if epoch % 50 == 0:
        print("[Model 1]  Epoch: {}  Train Loss: {:.4f}  Train Accuracy: {:.4f}  Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))

# 출력 결과
[Model 1]  Epoch: 1  Train Loss: 189.7545  Train Accuracy: 0.0730  Test Accuracy: 0.0750
[Model 1]  Epoch: 51  Train Loss: 110.1612  Train Accuracy: 0.2698  Test Accuracy: 0.2470
[Model 1]  Epoch: 101  Train Loss: 69.2994  Train Accuracy: 0.5468  Test Accuracy: 0.5150
[Model 1]  Epoch: 151  Train Loss: 44.7758  Train Accuracy: 0.5966  Test Accuracy: 0.5520
[Model 1]  Epoch: 201  Train Loss: 29.6832  Train Accuracy: 0.6948  Test Accuracy: 0.6287
[Model 1]  Epoch: 251  Train Loss: 20.2380  Train Accuracy: 0.7174  Test Accuracy: 0.6733
[Model 1]  Epoch: 301  Train Loss: 14.4343  Train Accuracy: 0.7739  Test Accuracy: 0.7323
[Model 1]  Epoch: 351  Train Loss: 10.3112  Train Accuracy: 0.7837  Test Accuracy: 0.7340
[Model 1]  Epoch: 401  Train Loss: 7.9462  Train Accuracy: 0.8494  Test Accuracy: 0.7950
[Model 1]  Epoch: 451  Train Loss: 6.2215  Train Accuracy: 0.8380  Test Accuracy: 0.7767
[Model 1]  Epoch: 501  Train Loss: 4.9697  Train Accuracy: 0.8574  Test Accuracy: 0.8087
[Model 1]  Epoch: 551  Train Loss: 4.3279  Train Accuracy: 0.8439  Test Accuracy: 0.7980
[Model 1]  Epoch: 601  Train Loss: 3.6755  Train Accuracy: 0.8670  Test Accuracy: 0.8337
[Model 1]  Epoch: 651  Train Loss: 3.1388  Train Accuracy: 0.8588  Test Accuracy: 0.8090
[Model 1]  Epoch: 701  Train Loss: 2.8542  Train Accuracy: 0.8635  Test Accuracy: 0.8040
[Model 1]  Epoch: 751  Train Loss: 2.5575  Train Accuracy: 0.8723  Test Accuracy: 0.8247
[Model 1]  Epoch: 801  Train Loss: 2.3355  Train Accuracy: 0.8722  Test Accuracy: 0.8247
[Model 1]  Epoch: 851  Train Loss: 2.3049  Train Accuracy: 0.8755  Test Accuracy: 0.8163
[Model 1]  Epoch: 901  Train Loss: 2.1523  Train Accuracy: 0.8509  Test Accuracy: 0.8027
  • 시각화
# 정확도 시각화
x = np.arange(len(model_2_train_acc_list))

plt.plot(x, model_2_train_acc_list, 'bo', label = 'train', markersize = 3)
plt.plot(x, model_2_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()

# 손실함수 시각화
x = np.arange(len(model_2_train_loss_list))

plt.plot(x, model_2_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()

 

  • 모델 생성 및 학습 (3)

  - 사용 기법

  • 학습 데이터 수: 20,000
  • Hidden Layers: 3 [256, 100, 100]
  • Adam
  • EPOCHS: 1000
  • 학습률: 1e-2(0.01)
  • 배치사이즈: 100
  • 배치정규화
# 데이터 로드 및 전처리
np.random.seed(42)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10

x_train = x_train[:20000]
x_test = x_test[:3000]

y_train = y_train[:20000]
y_test = y_test[:3000]

# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)

x_train = x_train / .255
x_test = x_test / .255

# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]


# 하이퍼 파라미터
epochs = 1000
learning_rate = 1e-2
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)

decay_lambda_3 = 0
model_3 = MyModel(input_size = 784, hidden_size_list = [256, 100, 100], decay_lambda = decay_lambda_3,
                  output_size = 10, use_batchnorm = True)

optimizer = Adam(learning_rate = learning_rate)

model_3_train_loss_list = []
model_3_train_acc_list = []
model_3_test_acc_list = []


# 모델 생성 및 학습
for epoch in range(epochs):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    y_batch = y_train[batch_mask]

    grads = model_3.gradient(x_batch, y_batch)
    optimizer.update(model_3.params, grads)

    loss = model_3.loss(x_batch, y_batch)
    model_3_train_loss_list.append(loss)

    train_acc = model_3.accuracy(x_train, y_train)
    test_acc = model_3.accuracy(x_test, y_test)
    model_3_train_acc_list.append(train_acc)
    model_3_test_acc_list.append(test_acc)

    if epoch % 50 == 0:
        print("[Model 1]  Epoch: {}  Train Loss: {:.4f}  Train Accuracy: {:.4f}  Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))


# 출력 결과
[Model 1]  Epoch: 1  Train Loss: 11.1115  Train Accuracy: 0.2633  Test Accuracy: 0.2520
[Model 1]  Epoch: 51  Train Loss: 0.3368  Train Accuracy: 0.8868  Test Accuracy: 0.8573
[Model 1]  Epoch: 101  Train Loss: 0.3627  Train Accuracy: 0.9221  Test Accuracy: 0.8937
[Model 1]  Epoch: 151  Train Loss: 0.1413  Train Accuracy: 0.9246  Test Accuracy: 0.8897
[Model 1]  Epoch: 201  Train Loss: 0.1724  Train Accuracy: 0.9344  Test Accuracy: 0.8950
[Model 1]  Epoch: 251  Train Loss: 0.2378  Train Accuracy: 0.9447  Test Accuracy: 0.9123
[Model 1]  Epoch: 301  Train Loss: 0.1957  Train Accuracy: 0.9496  Test Accuracy: 0.9133
[Model 1]  Epoch: 351  Train Loss: 0.0789  Train Accuracy: 0.9612  Test Accuracy: 0.9300
[Model 1]  Epoch: 401  Train Loss: 0.1396  Train Accuracy: 0.9544  Test Accuracy: 0.9150
[Model 1]  Epoch: 451  Train Loss: 0.0557  Train Accuracy: 0.9593  Test Accuracy: 0.9223
[Model 1]  Epoch: 501  Train Loss: 0.0462  Train Accuracy: 0.9615  Test Accuracy: 0.9250
[Model 1]  Epoch: 551  Train Loss: 0.0584  Train Accuracy: 0.9661  Test Accuracy: 0.9340
[Model 1]  Epoch: 601  Train Loss: 0.1176  Train Accuracy: 0.9692  Test Accuracy: 0.9323
[Model 1]  Epoch: 651  Train Loss: 0.0956  Train Accuracy: 0.9679  Test Accuracy: 0.9300
[Model 1]  Epoch: 701  Train Loss: 0.0324  Train Accuracy: 0.9703  Test Accuracy: 0.9377
[Model 1]  Epoch: 751  Train Loss: 0.0896  Train Accuracy: 0.9640  Test Accuracy: 0.9317
[Model 1]  Epoch: 801  Train Loss: 0.0107  Train Accuracy: 0.9813  Test Accuracy: 0.9413
[Model 1]  Epoch: 851  Train Loss: 0.1093  Train Accuracy: 0.9795  Test Accuracy: 0.9450
[Model 1]  Epoch: 901  Train Loss: 0.0329  Train Accuracy: 0.9755  Test Accuracy: 0.9353
[Model 1]  Epoch: 951  Train Loss: 0.0891  Train Accuracy: 0.9759  Test Accuracy: 0.9357
  • 시각화
# 정확도 시각화
x = np.arange(len(model_3_train_acc_list))

plt.plot(x, model_3_train_acc_list, 'bo', label = 'train', markersize = 3)
plt.plot(x, model_3_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()

# 손실함수 시각화
x = np.arange(len(model_1_train_loss_list))

plt.plot(x, model_3_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()

 

  • 세가지 모델 비교
    • 위의 세가지 모델은 전체적으로 학습 데이터 수를 일부로 제한했기 때문에 학습이 잘 안 될 가능성이 높음,
      따라서 여러 학습 기술들을 적용함
x = np.arange(len(model_3_train_acc_list))

plt.plot(x, model_1_train_acc_list, 'b--', label = 'Model 1 train', markersize = 3)
plt.plot(x, model_2_train_acc_list, 'r:', label = 'Model 2 train', markersize = 3)
plt.plot(x, model_3_train_acc_list, 'go', label = 'Model 3 train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()

+ Recent posts