CNN :: 컴퓨터 비전 - mindscale
Skip to content

CNN

합성곱

import numpy as np
from PIL import Image

img = Image.open('balloon.webp').resize((256, 256)).convert('L')
img
img = np.array(img, dtype='float32')[np.newaxis, ..., np.newaxis]
img.shape
(1, 256, 256, 1)
from tensorflow.keras.layers import Conv2D
conv = Conv2D(1, (3, 3))
conv.build(input_shape=(None, 256, 256, 1))
# 블러
# filter = [
#     [1/8, 1/8, 1/8], 
#     [1/8,   0, 1/8], 
#     [1/8, 1/8, 1/8]]
# 경계선
# filter = [
#     [-1/8, -1/8, -1/8], 
#     [-1/8,    1, -1/8], 
#     [-1/8, -1/8, -1/8]]
# 수평선
# filter = [
#     [-1/4, -2/4, -1/4], 
#     [   0,    0,    0], 
#     [ 1/4,  2/4,  1/4]]
# 수직선
filter = [
    [1/4, 0, -1/4], 
    [2/4, 0, -2/4], 
    [1/4, 0, -1/4]]

filter = np.array(filter, dtype='float32').reshape((3, 3, 1, 1))
conv.weights[0].assign(filter);
feature = conv(img)
Image.fromarray(feature[0, ..., 0].numpy().astype('uint8'))

MNIST 데이터셋

import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
  • x_train의 형태는 (N, 28, 28)
  • 컬러 이미지의 경우 삼원색의 이미지가 겹쳐져서 (N, 28, 28, 3) 형태
  • CNN의 경우 입력에서 마지막 차원이 색상의 수를 나타냄
  • 흑백 이미지이므로 (N, 28, 28, 1) 형태로 바꿔줌
import numpy as np
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

CNN 모델 정의

from tensorflow.keras.layers import *
model = tf.keras.Sequential([
    Rescaling(1/255),
    Conv2D(32, kernel_size=(3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(10, activation="softmax"),
])

훈련

설정

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])

훈련

model.fit(x_train, y_train, epochs=10)

테스트

model.evaluate(x_test, y_test)
313/313 [==============================] - 1s 3ms/step - loss: 0.0238 - accuracy: 0.9919
[0.023816542699933052, 0.9919000267982483]

CIFAR10

10 종류의 32x32 크기 컬러 이미지 데이터

훈련용 5만개, 테스트용 1만개

컬러 이미지이므로 데이터 로딩 후 차원을 추가할 필요 없음

(x_train, y_train), (x_test, y_test) = \
    tf.keras.datasets.cifar10.load_data()
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170498071/170498071 [==============================] - 24s 0us/step