keras 뽀개기 (3) 모형 학습시키기
by sangjae kang
keras에서의 모델 학습은 model.fit
으로 high-level API로 이루어집니다. 모델의 손실함수와 옵티마이저를 지정하고, 데이터를 넣으면 데이터에 맞게 모형의 가중치를 갱신합니다. 무척 편하지만, 어떻게 돌아가는지를 알지 못하면 튜닝하기도 어렵습니다. 간단한 예시로 한번 살펴보도록 하겠습니다.
MNIST 코드 후다닥 작성하기
데이터 불러오기
from tensorflow.keras.datasets.mnist import load_data
(x_train, y_train), (x_test, y_test) = load_data()
print("train image shape : ", x_train.shape)
print("train label shape : ", y_train.shape)
print("test image shape : ", x_test.shape)
print("test label shape : ", y_test.shape)
Output :
train image shape : (60000, 28, 28)
train label shape : (60000,)
test image shape : (10000, 28, 28)
test label shape : (10000,)
모델 구성하기
은닉층이 6개인 딥러닝 모형을 구성하도록 하겠습니다.
from tensorflow.keras.layers import Input, Reshape
from tensorflow.keras.layers import Conv2D, Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Model
inputs = Input((28,28))
reshaped = Reshape((28,28,1))(inputs) # 채널 축 추가
# Convolution Layers
hidden = Conv2D(16, (3,3), activation='relu')(reshaped)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
# Fully Connected Layers
flatten = Flatten()(hidden)
fc = Dense(20, activation='relu')(flatten)
outputs = Dense(10, activation='softmax')(fc)
model = Model(inputs, outputs)
model.summary()
Output :
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 28, 28)] 0
_________________________________________________________________
reshape (Reshape) (None, 28, 28, 1) 0
_________________________________________________________________
conv2d (Conv2D) (None, 26, 26, 16) 160
_________________________________________________________________
conv2d_1 (Conv2D) (None, 12, 12, 16) 2320
_________________________________________________________________
conv2d_2 (Conv2D) (None, 5, 5, 16) 2320
_________________________________________________________________
conv2d_3 (Conv2D) (None, 2, 2, 16) 2320
_________________________________________________________________
flatten (Flatten) (None, 64) 0
_________________________________________________________________
dense (Dense) (None, 20) 1300
_________________________________________________________________
dense_1 (Dense) (None, 10) 210
=================================================================
Total params: 8,630
Trainable params: 8,630
Non-trainable params: 0
_________________________________________________________________
모델 컴파일하기
라벨이 인덱스로 되어 있으므로, sparse_categorical_crossentropy를 이용해야 합니다.
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.metrics import SparseCategoricalAccuracy
model.compile(
optimizer=SGD(1e-2),
loss=SparseCategoricalCrossentropy(),
metrics=[SparseCategoricalAccuracy()])
모델 학습하기
batch size는 64, epoch은 3번만 학습하도록 하겠습니다.
model.fit(x=x_train, y=y_train,
batch_size=64, epochs=3,
validation_data=(x_test, y_test));
Output :
Train on 60000 samples, validate on 10000 samples
Epoch 1/3
60000/60000 [==============================] - 12s 194us/sample - loss: 0.5621 - sparse_categorical_accuracy: 0.8195 - val_loss: 0.2037 - val_sparse_categorical_accuracy: 0.9347
Epoch 2/3
60000/60000 [==============================] - 11s 189us/sample - loss: 0.1718 - sparse_categorical_accuracy: 0.9471 - val_loss: 0.1300 - val_sparse_categorical_accuracy: 0.9589
Epoch 3/3
60000/60000 [==============================] - 11s 187us/sample - loss: 0.1269 - sparse_categorical_accuracy: 0.9611 - val_loss: 0.1184 - val_sparse_categorical_accuracy: 0.9619
Keras Low-API로 작성해보기
동일한 모델을 .fit
없이 코드를 작성해보도록 하겠습니다.
inputs = Input((28,28))
reshaped = Reshape((28,28,1))(inputs) # 채널 축 추가
# Convolution Layers
hidden = Conv2D(16, (3,3), activation='relu')(reshaped)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
hidden = Conv2D(16, (3,3), strides=(2,2), activation='relu')(hidden)
# Fully Connected Layers
flatten = Flatten()(hidden)
fc = Dense(20, activation='relu')(flatten)
outputs = Dense(10, activation='softmax')(fc)
model = Model(inputs, outputs)
model.summary()
Output :
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 28, 28)] 0
_________________________________________________________________
reshape_2 (Reshape) (None, 28, 28, 1) 0
_________________________________________________________________
conv2d_8 (Conv2D) (None, 26, 26, 16) 160
_________________________________________________________________
conv2d_9 (Conv2D) (None, 12, 12, 16) 2320
_________________________________________________________________
conv2d_10 (Conv2D) (None, 5, 5, 16) 2320
_________________________________________________________________
conv2d_11 (Conv2D) (None, 2, 2, 16) 2320
_________________________________________________________________
flatten_2 (Flatten) (None, 64) 0
_________________________________________________________________
dense_4 (Dense) (None, 20) 1300
_________________________________________________________________
dense_5 (Dense) (None, 10) 210
=================================================================
Total params: 8,630
Trainable params: 8,630
Non-trainable params: 0
_________________________________________________________________
fit 구문 없이 작성해보기
손실함수 / 옵티마이저 / Metric 함수 없이 날것으로 코드를 작성한다면 아래와 같이 작성할 수 있습니다.
from sklearn.utils import shuffle
from tqdm import tqdm
import tensorflow as tf
import numpy as np
epochs = 3
batch_size = 64
lr = 1e-2
for epoch in range(epochs):
# Shuffling
x_train, y_train = shuffle(x_train, y_train)
for i in tqdm(range(len(x_train)//batch_size)):
# 배치단위로 나누기
x_batch = x_train[i*batch_size:(i+1)*batch_size]
x_batch = x_batch.astype(np.float32)
y_batch = y_train[i*batch_size:(i+1)*batch_size]
with tf.GradientTape() as tape:
# 순전파
y_pred = model(x_batch)
# Sparse Categorical Crossentropy 수식
y_pred = tf.clip_by_value(y_pred,1e-7,1-1e-7)
loss = tf.reduce_mean(
-tf.reduce_sum(
tf.one_hot(y_batch,10) * tf.math.log(y_pred),axis=-1))
# 역전파를 통해 gradient 계산
grads = tape.gradient(loss, model.weights)
# Stochastic Gradient Descent
for weight, grad in zip(model.weights, grads):
weight.assign_sub(lr*grad)
# Model Evaluation
y_pred = model(x_test.astype(np.float32))
accuracy = np.mean(np.argmax(y_pred,axis=-1) == y_test)
print(f"{epoch}th epoch test accuracy : {accuracy:.3%}")
Output :
100%|██████████| 937/937 [00:24<00:00, 38.77it/s]
0%| | 4/937 [00:00<00:26, 35.36it/s]
0th epoch test accuracy : 94.830%
100%|██████████| 937/937 [00:22<00:00, 42.20it/s]
0%| | 4/937 [00:00<00:27, 33.82it/s]
1th epoch test accuracy : 96.690%
100%|██████████| 937/937 [00:22<00:00, 41.87it/s]
2th epoch test accuracy : 97.160%
Subscribe via RSS