● MNIST 분류 실습
- Module Import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
- 데이터 로드
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
num_classes = 10
- 데이터 전처리
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
# 데이터 수가 너무 많으므로 조금 줄이기
x_train = x_train[:10000]
x_test = x_test[:3000]
y_train = y_train[:10000]
y_test = y_test[:3000]
# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)
x_train = x_train / .255
x_test = x_test / .255
# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
# 출력 결과
(10000, 784)
(10000, 10)
(3000, 784)
(3000,)
- Hyper Parameter
epochs = 1000
learning_rate = 1e-2
batch_size = 256
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)
- Util Functions
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis = 0)
y = np.exp(x) / np.sum(np.exp(x), axis = 0)
return y.T
x = x - np.max(x)
return np.exp(x) / np.sum(np.exp(x))
def mean_squared_error(y, t):
return 0.5 * np.sum((y - t)**2)
def cross_entropy_error(pred_y, true_y):
if pred_y.ndim == 1:
true_y = true_y.reshape(1, true_y.size)
pred_y = pred_y.reshape(1, pred_y.size)
if true_y.size == pred_y.size:
true_y = true_y.argmax(axis = 1)
batch_size = pred_y.shape[0]
return -np.sum(np.log(pred_y[np.arange(batch_size), true_y] + 1e-7)) / batch_size
- Util Classes
- ReLU
class ReLU:
def __init__(self):
self.mask = None
def forward(self, input_data):
self.mask = (input_data <= 0)
out = input_data.copy()
out[self.mask] = 0
return out
def backward(self, dout):
dout[self.mask] = 0
dx = dout
return dx
- Sigmoid
class Sigmoid:
def __init__(self):
self.out = None
def forward(self, input_data):
out = 1 / (1 + np.exp(-input_data))
self.out = out
return out
def backward(self, dout):
dx = dout * (1.0 - self.out) * self.dout
return dx
- Layer
class Layer:
def __init__(self, W, b):
self.W = W
self.b = b
self.input_data = None
self.input_data_shape = None
self.dW = None
self.db = None
def forward(self, input_data):
self.input_data_shape = input_data.shape
input_data = input_data.reshape(input_data.shape[0], -1)
self.input_data = input_data
out = np.dot(self.input_data, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.input_data.T, dout)
self.db = np.sum(dout, axis = 0)
dx = dx.reshape(*self.input_data_shape)
return dx
- Batch Normalization
class BatchNormalization:
def __init__(self, gamma, beta, momentum = 0.9, running_mean = None, running_var = None):
self.gamma = gamma
self.beta = beta
self.momentum = momentum
self.input_shape = None
self.running_mean = running_mean
self.running_var = running_var
self.batch_size = None
self.xc = None
self.std = None
self.dgamma = None
self.dbeta = None
def forward(self, input_data, is_train = True):
self.input_shape = input_data.shape
if input_data.ndim != 2:
N, C, H, W = input_data.shape
input_data = input_data.reshape(N, -1)
out = self.__forward(input_data, is_train)
return out.reshape(*self.input_shape)
def __forward(self, input_data, is_train):
if self.running_mean is None:
N, D = input_data.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if is_train:
mu = input_data.mean(axis = 0)
xc = input_data - mu
var = np.mean(xc**2, axis = 0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.batch_size = input_data.shape[0]
self.xc = xc
self.std = std
self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
else:
xc = input_data - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = self.gamma * xn + self.beta
return out
def backward(self, dout):
if dout.ndim != 2:
N, C, H, W = dout.shape
dout = dout.reshape(N, -1)
dx = self.__backward(dout)
dx = dx.reshape(*self.input_shape)
return dx
def __backward(self, dout):
dbeta = dout.sum(axis = 0)
dgamma = np.sum(self.xn * dout, axis = 0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis = 0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis = 0)
dx = dxc - dmu / self.batch_size
self.dgamma = dgamma
self.dbeta = dbeta
return dx
- Dropout
class Dropout:
def __init__(self, dropout_ratio = 0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward(self, input_data, is_train = True):
if is_train:
self.mask = np.random.rand(*input_data.shape) > self.dropout_ratio
return input_data * self.mask
else:
return input_data * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
- Softmax
class Softmax:
def __init__(self):
self.loss = None
self.y = None
self.t = None
def forward(self, input_data, t):
self.t = t
self.y = softmax(input_data)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout = 1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size:
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size
return dx
- Model
class MyModel:
def __init__(self, input_size, hidden_size_list, output_size,
activation = 'relu', decay_lambda = 0,
use_dropout = False, dropout_ratio = 0.5, use_batchnorm = False):
self.input_size = input_size
self.output_size = output_size
self.hidden_size_list = hidden_size_list
self.hidden_layer_num = len(hidden_size_list)
self.use_dropout = use_dropout
self.decay_lambda = decay_lambda
self.use_batchnorm = use_batchnorm
self.params = {}
self.__init_weight(activation)
activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU}
self.layers = OrderedDict()
for idx in range(1, self.hidden_layer_num + 1):
self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)],
self.params['b' + str(idx)])
if self.use_batchnorm:
self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx - 1])
self.params['beta' + str(idx)] = np.ones(hidden_size_list[idx - 1])
self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
if self.use_dropout:
self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio)
idx = self.hidden_layer_num + 1
self.layers['Layer' + str(idx)] = Layer(self.params['W' + str(idx)], self.params['b' + str(idx)])
self.last_layer = Softmax()
def __init_weight(self, activation):
all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
for idx in range(1, len(all_size_list)):
scale = None
if activation.lower() == 'relu':
scale = np.sqrt(2.0 / all_size_list[idx * 1])
elif activation.lower() == 'sigmoid':
scale = np.sqrt(1.0 / all_size_list[idx * 1])
self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx - 1], all_size_list[idx])
self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
def predict(self, x, is_train = False):
for key, layer in self.layers.items():
if 'Dropout' in key or 'BatchNorm' in key:
x = layer.forward(x, is_train)
else:
x = layer.forward(x)
return x
def loss(self, x, t, is_train = False):
y = self.predict(x, is_train)
weight_decay = 0
for idx in range(1, self.hidden_layer_num + 2):
W = self.params['W' + str(idx)]
# L2 규제 적용
weight_decay += 0.5 * self.decay_lambda * np.sum(W**2)
return self.last_layer.forward(y, t) + weight_decay
def accuracy(self, x, t):
y = self.predict(x, is_train = False)
y = np.argmax(y, axis = 1)
if t.ndim != 1:
t = np.argmax(t, axis = 1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
def gradient(self,x, t):
self.loss(x, t, is_train = True)
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
# backward이므로 한번 reverser해서 역으로 접근
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
grads = {}
for idx in range(1, self.hidden_layer_num + 2):
grads['W' + str(idx)] = self.layers['Layer' + str(idx)].dW + self.decay_lambda * self.params['W' + str(idx)]
grads['b' + str(idx)] = self.layers['Layer' + str(idx)].db
if self.use_batchnorm and idx != self.hidden_layer_num + 1:
grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta
return grads
- 모델 생성 및 학습 (1)
- 사용 기법
- 학습 데이터 수: 10,000
- Hidden Layers: 4 [100, 100, 100, 100]
- SGD
- EPOCHS: 1000
- 학습률: 1e-2(0.01)
- 배치사이즈: 256
- 드롭아웃: 0.2
- 배치 정규화
- 규제화: 0.1
decay_lambda = 0.1
model_1 = MyModel(input_size = 784, hidden_size_list = [256, 100, 64, 32], output_size = 10,
decay_lambda = decay_lambda, use_batchnorm = True)
optimizer = SGD(learning_rate = learning_rate)
model_1_train_loss_list = []
model_1_train_acc_list = []
model_1_test_acc_list = []
for epoch in range(epochs):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]
grads = model_1.gradient(x_batch, y_batch)
optimizer.update(model_1.params, grads)
loss = model_1.loss(x_batch, y_batch)
model_1_train_loss_list.append(loss)
train_acc = model_1.accuracy(x_train, y_train)
test_acc = model_1.accuracy(x_test, y_test)
model_1_train_acc_list.append(train_acc)
model_1_test_acc_list.append(test_acc)
if epoch % 50 == 0:
print("[Model 1] Epoch: {} Train Loss: {:.4f} Train Accuracy: {:.4f} Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))
# 출력 결과
[Model 1] Epoch: 1 Train Loss: 137.5669 Train Accuracy: 0.1000 Test Accuracy: 0.1020
[Model 1] Epoch: 51 Train Loss: 112.5705 Train Accuracy: 0.6919 Test Accuracy: 0.6257
[Model 1] Epoch: 101 Train Loss: 101.5959 Train Accuracy: 0.7885 Test Accuracy: 0.7303
[Model 1] Epoch: 151 Train Loss: 91.9510 Train Accuracy: 0.8327 Test Accuracy: 0.7677
[Model 1] Epoch: 201 Train Loss: 83.1132 Train Accuracy: 0.8590 Test Accuracy: 0.7963
[Model 1] Epoch: 251 Train Loss: 75.2112 Train Accuracy: 0.8741 Test Accuracy: 0.8127
[Model 1] Epoch: 301 Train Loss: 68.0901 Train Accuracy: 0.8852 Test Accuracy: 0.8243
[Model 1] Epoch: 351 Train Loss: 61.6642 Train Accuracy: 0.8969 Test Accuracy: 0.8347
[Model 1] Epoch: 401 Train Loss: 55.9115 Train Accuracy: 0.9010 Test Accuracy: 0.8450
[Model 1] Epoch: 451 Train Loss: 50.6766 Train Accuracy: 0.9085 Test Accuracy: 0.8533
[Model 1] Epoch: 501 Train Loss: 45.8550 Train Accuracy: 0.9132 Test Accuracy: 0.8573
[Model 1] Epoch: 551 Train Loss: 41.5136 Train Accuracy: 0.9185 Test Accuracy: 0.8613
[Model 1] Epoch: 601 Train Loss: 37.5357 Train Accuracy: 0.9221 Test Accuracy: 0.8667
[Model 1] Epoch: 651 Train Loss: 34.0123 Train Accuracy: 0.9255 Test Accuracy: 0.8720
[Model 1] Epoch: 701 Train Loss: 30.7791 Train Accuracy: 0.9269 Test Accuracy: 0.8747
[Model 1] Epoch: 751 Train Loss: 27.9667 Train Accuracy: 0.9301 Test Accuracy: 0.8800
[Model 1] Epoch: 801 Train Loss: 25.3409 Train Accuracy: 0.9313 Test Accuracy: 0.8823
[Model 1] Epoch: 851 Train Loss: 23.0407 Train Accuracy: 0.9345 Test Accuracy: 0.8830
[Model 1] Epoch: 901 Train Loss: 20.8816 Train Accuracy: 0.9363 Test Accuracy: 0.8867
[Model 1] Epoch: 951 Train Loss: 18.8845 Train Accuracy: 0.9387 Test Accuracy: 0.8903
- 시각화
# 정확도 시각화
x = np.arange(len(model_1_train_acc_list))
plt.plot(x, model_1_train_acc_list, 'bc', label = 'train', markersize = 3)
plt.plot(x, model_1_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()
# 손실함수 시각화
x = np.arange(len(model_1_train_loss_list))
plt.plot(x, model_1_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()
- 모델 생성 및 학습 (2)
- 사용 기법
- 학습 데이터 수: 10,000
- Hidden Layers: 4 [100, 100, 100, 100]
- Adam
- EPOCHS: 1000
- 학습률: 1e-3(0.001)
- 배치사이즈: 100
- 드롭아웃: 0.5
- 배치 정규화
- 규제화: 0.15
# 데이터 로드 및 전처리
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
x_train = x_train[:10000]
x_test = x_test[:3000]
y_train = y_train[:10000]
y_test = y_test[:3000]
# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)
x_train = x_train / .255
x_test = x_test / .255
# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
# 하이퍼 파라미터
epochs = 1000
learning_rate = 1e-3
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)
decay_lambda_2 = 0.15
model_2 = MyModel(input_size = 784, hidden_size_list = [100, 100, 100, 100], decay_lambda = decay_lambda_2,
output_size = 10, use_dropout = True, dropout_ratio = 0.5, use_batchnorm = True)
optimizer = Adam(learning_rate = learning_rate)
model_2_train_loss_list = []
model_2_train_acc_list = []
model_2_test_acc_list = []
# 모델 생성 및 학습
for epoch in range(epochs):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]
grads = model_2.gradient(x_batch, y_batch)
optimizer.update(model_2.params, grads)
loss = model_2.loss(x_batch, y_batch)
model_2_train_loss_list.append(loss)
train_acc = model_2.accuracy(x_train, y_train)
test_acc = model_2.accuracy(x_test, y_test)
model_2_train_acc_list.append(train_acc)
model_2_test_acc_list.append(test_acc)
if epoch % 50 == 0:
print("[Model 1] Epoch: {} Train Loss: {:.4f} Train Accuracy: {:.4f} Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))
# 출력 결과
[Model 1] Epoch: 1 Train Loss: 189.7545 Train Accuracy: 0.0730 Test Accuracy: 0.0750
[Model 1] Epoch: 51 Train Loss: 110.1612 Train Accuracy: 0.2698 Test Accuracy: 0.2470
[Model 1] Epoch: 101 Train Loss: 69.2994 Train Accuracy: 0.5468 Test Accuracy: 0.5150
[Model 1] Epoch: 151 Train Loss: 44.7758 Train Accuracy: 0.5966 Test Accuracy: 0.5520
[Model 1] Epoch: 201 Train Loss: 29.6832 Train Accuracy: 0.6948 Test Accuracy: 0.6287
[Model 1] Epoch: 251 Train Loss: 20.2380 Train Accuracy: 0.7174 Test Accuracy: 0.6733
[Model 1] Epoch: 301 Train Loss: 14.4343 Train Accuracy: 0.7739 Test Accuracy: 0.7323
[Model 1] Epoch: 351 Train Loss: 10.3112 Train Accuracy: 0.7837 Test Accuracy: 0.7340
[Model 1] Epoch: 401 Train Loss: 7.9462 Train Accuracy: 0.8494 Test Accuracy: 0.7950
[Model 1] Epoch: 451 Train Loss: 6.2215 Train Accuracy: 0.8380 Test Accuracy: 0.7767
[Model 1] Epoch: 501 Train Loss: 4.9697 Train Accuracy: 0.8574 Test Accuracy: 0.8087
[Model 1] Epoch: 551 Train Loss: 4.3279 Train Accuracy: 0.8439 Test Accuracy: 0.7980
[Model 1] Epoch: 601 Train Loss: 3.6755 Train Accuracy: 0.8670 Test Accuracy: 0.8337
[Model 1] Epoch: 651 Train Loss: 3.1388 Train Accuracy: 0.8588 Test Accuracy: 0.8090
[Model 1] Epoch: 701 Train Loss: 2.8542 Train Accuracy: 0.8635 Test Accuracy: 0.8040
[Model 1] Epoch: 751 Train Loss: 2.5575 Train Accuracy: 0.8723 Test Accuracy: 0.8247
[Model 1] Epoch: 801 Train Loss: 2.3355 Train Accuracy: 0.8722 Test Accuracy: 0.8247
[Model 1] Epoch: 851 Train Loss: 2.3049 Train Accuracy: 0.8755 Test Accuracy: 0.8163
[Model 1] Epoch: 901 Train Loss: 2.1523 Train Accuracy: 0.8509 Test Accuracy: 0.8027
- 시각화
# 정확도 시각화
x = np.arange(len(model_2_train_acc_list))
plt.plot(x, model_2_train_acc_list, 'bo', label = 'train', markersize = 3)
plt.plot(x, model_2_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()
# 손실함수 시각화
x = np.arange(len(model_2_train_loss_list))
plt.plot(x, model_2_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()
- 모델 생성 및 학습 (3)
- 사용 기법
- 학습 데이터 수: 20,000
- Hidden Layers: 3 [256, 100, 100]
- Adam
- EPOCHS: 1000
- 학습률: 1e-2(0.01)
- 배치사이즈: 100
- 배치정규화
# 데이터 로드 및 전처리
np.random.seed(42)
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
x_train = x_train[:20000]
x_test = x_test[:3000]
y_train = y_train[:20000]
y_test = y_test[:3000]
# flatten
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32), x_test.reshape(-1, 28*28).astype(np.float32)
x_train = x_train / .255
x_test = x_test / .255
# y는 원-핫 벡터로 변경
y_train = np.eye(num_classes)[y_train]
# 하이퍼 파라미터
epochs = 1000
learning_rate = 1e-2
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)
decay_lambda_3 = 0
model_3 = MyModel(input_size = 784, hidden_size_list = [256, 100, 100], decay_lambda = decay_lambda_3,
output_size = 10, use_batchnorm = True)
optimizer = Adam(learning_rate = learning_rate)
model_3_train_loss_list = []
model_3_train_acc_list = []
model_3_test_acc_list = []
# 모델 생성 및 학습
for epoch in range(epochs):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]
grads = model_3.gradient(x_batch, y_batch)
optimizer.update(model_3.params, grads)
loss = model_3.loss(x_batch, y_batch)
model_3_train_loss_list.append(loss)
train_acc = model_3.accuracy(x_train, y_train)
test_acc = model_3.accuracy(x_test, y_test)
model_3_train_acc_list.append(train_acc)
model_3_test_acc_list.append(test_acc)
if epoch % 50 == 0:
print("[Model 1] Epoch: {} Train Loss: {:.4f} Train Accuracy: {:.4f} Test Accuracy: {:.4f}".format(epoch+1, loss, train_acc, test_acc))
# 출력 결과
[Model 1] Epoch: 1 Train Loss: 11.1115 Train Accuracy: 0.2633 Test Accuracy: 0.2520
[Model 1] Epoch: 51 Train Loss: 0.3368 Train Accuracy: 0.8868 Test Accuracy: 0.8573
[Model 1] Epoch: 101 Train Loss: 0.3627 Train Accuracy: 0.9221 Test Accuracy: 0.8937
[Model 1] Epoch: 151 Train Loss: 0.1413 Train Accuracy: 0.9246 Test Accuracy: 0.8897
[Model 1] Epoch: 201 Train Loss: 0.1724 Train Accuracy: 0.9344 Test Accuracy: 0.8950
[Model 1] Epoch: 251 Train Loss: 0.2378 Train Accuracy: 0.9447 Test Accuracy: 0.9123
[Model 1] Epoch: 301 Train Loss: 0.1957 Train Accuracy: 0.9496 Test Accuracy: 0.9133
[Model 1] Epoch: 351 Train Loss: 0.0789 Train Accuracy: 0.9612 Test Accuracy: 0.9300
[Model 1] Epoch: 401 Train Loss: 0.1396 Train Accuracy: 0.9544 Test Accuracy: 0.9150
[Model 1] Epoch: 451 Train Loss: 0.0557 Train Accuracy: 0.9593 Test Accuracy: 0.9223
[Model 1] Epoch: 501 Train Loss: 0.0462 Train Accuracy: 0.9615 Test Accuracy: 0.9250
[Model 1] Epoch: 551 Train Loss: 0.0584 Train Accuracy: 0.9661 Test Accuracy: 0.9340
[Model 1] Epoch: 601 Train Loss: 0.1176 Train Accuracy: 0.9692 Test Accuracy: 0.9323
[Model 1] Epoch: 651 Train Loss: 0.0956 Train Accuracy: 0.9679 Test Accuracy: 0.9300
[Model 1] Epoch: 701 Train Loss: 0.0324 Train Accuracy: 0.9703 Test Accuracy: 0.9377
[Model 1] Epoch: 751 Train Loss: 0.0896 Train Accuracy: 0.9640 Test Accuracy: 0.9317
[Model 1] Epoch: 801 Train Loss: 0.0107 Train Accuracy: 0.9813 Test Accuracy: 0.9413
[Model 1] Epoch: 851 Train Loss: 0.1093 Train Accuracy: 0.9795 Test Accuracy: 0.9450
[Model 1] Epoch: 901 Train Loss: 0.0329 Train Accuracy: 0.9755 Test Accuracy: 0.9353
[Model 1] Epoch: 951 Train Loss: 0.0891 Train Accuracy: 0.9759 Test Accuracy: 0.9357
- 시각화
# 정확도 시각화
x = np.arange(len(model_3_train_acc_list))
plt.plot(x, model_3_train_acc_list, 'bo', label = 'train', markersize = 3)
plt.plot(x, model_3_test_acc_list, 'rv', label = 'test', markersize = 1)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()
# 손실함수 시각화
x = np.arange(len(model_1_train_loss_list))
plt.plot(x, model_3_train_loss_list, 'g--', label = 'train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.show()
- 세가지 모델 비교
- 위의 세가지 모델은 전체적으로 학습 데이터 수를 일부로 제한했기 때문에 학습이 잘 안 될 가능성이 높음,
따라서 여러 학습 기술들을 적용함
- 위의 세가지 모델은 전체적으로 학습 데이터 수를 일부로 제한했기 때문에 학습이 잘 안 될 가능성이 높음,
x = np.arange(len(model_3_train_acc_list))
plt.plot(x, model_1_train_acc_list, 'b--', label = 'Model 1 train', markersize = 3)
plt.plot(x, model_2_train_acc_list, 'r:', label = 'Model 2 train', markersize = 3)
plt.plot(x, model_3_train_acc_list, 'go', label = 'Model 3 train', markersize = 3)
plt.xlabel("Epochs")
plt.ylabel('Accuracy')
plt.grid()
plt.ylim(0, 1.0)
plt.legend()
plt.show()
'Python > Deep Learning' 카테고리의 다른 글
[딥러닝 기초] CNN(합성곱 신경망)(2) (0) | 2023.03.23 |
---|---|
[딥러닝 기초] CNN(합성곱 신경망)(1) (0) | 2023.03.23 |
[딥러닝 기초] 딥러닝 학습 기술 (1) (0) | 2023.03.21 |
[딥러닝 기초] 오차역전파(Backpropagation) (0) | 2023.03.15 |
[딥러닝 기초] 신경망 학습 (0) | 2023.03.14 |