import tensorflow as tf
from tensorflow.keras.datasets.boston_housing import load_data
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
# 가장 첫번째 train 데이터의 독립변수들
print(x_train_full[0])
# 출력 결과
[2.8750e-02 2.8000e+01 1.5040e+01 0.0000e+00 4.6400e-01 6.2110e+00
2.8900e+01 3.6659e+00 4.0000e+00 2.7000e+02 1.8200e+01 3.9633e+02
6.2100e+00]
# 가장 첫번째 train 데이터의 종속변수
print(y_train_full[0])
# 출력 결과
25.0
3. 데이터 확인
print('학습 데이터: {}\t레이블: {}'.format(x_train_full.shape, y_train_full.shape))
print('테스트 데이터: {}\t레이블: {}'.format(x_test.shape, y_test.shape))
# 출력 결과
학습 데이터: (404, 13) 레이블: (404,)
테스트 데이터: (102, 13) 레이블: (102,)
from sklearn.model_selection import KFold
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
tf.random.set_seed(111)
(x_train_full, y_train_full), (x_test, y_test) = load_data(path = 'boston_housing.npz',
test_split = 0.2,
seed = 111)
mean = np.mean(x_train_full, axis = 0)
std = np.std(x_train_full, axis = 0)
x_train_preprocessed = (x_train_full - mean) / std
x_test = (x_test - mean) / std
# 3개로 나누는 KFold 모델 생성
k = 3
kfold = KFold(n_splits = k, random_state = 111, shuffle = True)
# 모델 생성
def build_model():
input = Input(shape = (13, ), name = 'input')
hidden1 = Dense(100, activation = 'relu', input_shape = (13, ), name = 'dense1')(input)
hidden2 = Dense(64, activation = 'relu', name = 'dense2')(hidden1)
hidden3 = Dense(32, activation = 'relu', name = 'dense3')(hidden2)
output = Dense(1, name = 'output')(hidden3)
model = Model(inputs = [input], outputs = [output])
model.compile(loss = 'mse',
optimizer = 'adam',
metrics = ['mae'])
return model
# mae값을 저장할 리스트
mae_list = []
# 각 fold마다 학습 진행
for train_idx, val_idx in kfold.split(x_train):
x_train_fold, x_val_fold = x_train[train_idx], x_train[val_idx]
y_train_fold, y_val_fold = y_train_full[train_idx], y_train_full[val_idx]
model = build_model()
model.fit(x_train_fold, y_train_fold, epochs = 300,
validation_data = (x_val_fold, y_val_fold))
_, test_mae = model.evaluate(x_test, y_test)
mae_list.append(test_mae)
print(mae_list)
print(np.mean(mae_list))
# 출력 결과
# 기준이 $1000이므로 $8000정도의 오차범위가 존재한다는 의미
[9.665495872497559, 8.393745422363281, 8.736763954162598]
8.932001749674479