[텍스트 분석] 정칙화
L1 및 L2 정칙화
임포트
import tensorflow as tf
from tensorflow.keras.layers import *
모델 정의
L1
L2
L1L2
model = tf.keras.Sequential([
Dense(20, activation='softmax',
kernel_regularizer=tf.keras.regularizers.L1(0.001))
])
훈련
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train.A, y_train, epochs=10)
평가
model.evaluate(x_test.A, y_test)
얼리 스톱핑
model = tf.keras.Sequential([
Dense(20, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train.A, y_train, epochs=100, validation_split=0.1,
callbacks=[
tf.keras.callbacks.EarlyStopping()
])
자동 저장
model.fit(
x_train.A, y_train, epochs=10, validation_split=0.1,
callbacks=[tf.keras.callbacks.ModelCheckpoint(
'checkpoints', # checkpoints 폴더에
monitor="val_accuracy", # 정확도 기준
save_best_only=True, # 이전보다 좋은 결과만 저장
save_weights_only=True, # 파라미터만 저장
)])
불러오기
model.load_weights('checkpoints')
드롭아웃
드롭아웃 레이어 추가
model = tf.keras.Sequential([
Dense(128, activation='relu'),
Dropout(0.5),
Dense(20, activation='softmax')
])
활성화 함수 이전에 Dropout 넣기
model = tf.keras.Sequential([
Dense(128),
Dropout(0.5),
ReLU(),
Dense(20, activation='softmax')
])
Normalization
Batch Normalization
model = tf.keras.Sequential([
Dense(128, activation='relu'),
tf.keras.layers.BatchNormalization(),
Dense(20, activation='softmax')
])
Layer Normalization
model = tf.keras.Sequential([
Dense(128, activation='relu'),
LayerNormalization(),
Dense(20, activation='softmax')
])
Label Smoothing
원 핫 인코딩된 형태로 변환
y_train_one_hot = tf.one_hot(y_train, 20)
y_train_one_hot
<tf.Tensor: shape=(9051, 20), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[1., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 1., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>
손실함수
loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2)
훈련
model = tf.keras.Sequential([
Dense(20, activation='softmax')
])
model.compile(loss=loss_fn, metrics=['accuracy'])
model.fit(x_train.A, y_train_one_hot)
평가
model.evaluate(x_test.A, tf.one_hot(y_test, 20))