Machine Learning/Tensorflow
Tensorflow 자격증 대비 - CNN (SIGN LANGUAGE MNIST 손동작 구분)
jinmc
2022. 3. 6. 15:59
반응형
1. 라이브러리 설치
import csv
import string
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img
2. 데이터 다운로드 및 변수 설정
# sign_mnist_train.csv
!gdown --id 1z0DkA9BytlLxO1C0BAWzknLyQmZAp0HR
# sign_mnist_test.csv
!gdown --id 1z1BIj4qmri59GWBG4ivMNFtpZ4AXIbzg
TRAINING_FILE = './sign_mnist_train.csv'
VALIDATION_FILE = './sign_mnist_test.csv'
3. 위의 두 라인을 봅니다
with open(TRAINING_FILE) as training_file:
line = training_file.readline()
print(f"First line (header) looks like this:\n{line}")
line = training_file.readline()
print(f"Each subsequent line (data points) look like this:\n{line}")
대충 다음과 같은 식의로 print가 됩니다.
First line (header) looks like this:
label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8 ...
Each subsequent line (data points) look like this:
3,107,118,127,134,139,143,146,150,153,156,158,160,163,165,...
4. Input에서 데이터 가져오기
next를 사용해서 첫 번째 row를 무시한 후,
각각의 row에서 첫번째 항목을 label로 하고, 나머지 784개의 item들을 (28,28)로 넣습니다.
# GRADED FUNCTION: parse_data_from_input
def parse_data_from_input(filename):
with open(filename) as file:
### START CODE HERE
# Use csv.reader, passing in the appropriate delimiter
# Remember that csv.reader can be iterated and returns one line in each iteration
csv_reader = csv.reader(file, delimiter=",")
labels = []
images = []
next(csv_reader, None)
for row in csv_reader:
label = row[0]
img = row[1:]
img = np.array(img).reshape((28,28))
images.append(img)
labels.append(label)
images = np.array(images).astype(float)
labels = np.array(labels).astype(float)
### END CODE HERE
return images, labels
여기서 astype(float)을 하지 않으면 나중에 에러가 나오게 되므로, 주의해 줍시다!
5. 확인하기
# Plot a sample of 10 images from the training set
def plot_categories(training_images, training_labels):
fig, axes = plt.subplots(1, 10, figsize=(16, 15))
axes = axes.flatten()
letters = list(string.ascii_lowercase)
for k in range(10):
img = training_images[k]
img = np.expand_dims(img, axis=-1)
img = array_to_img(img)
ax = axes[k]
ax.imshow(img, cmap="Greys_r")
ax.set_title(f"{letters[int(training_labels[k])]}")
ax.set_axis_off()
plt.tight_layout()
plt.show()
plot_categories(training_images, training_labels)
6. ImageDataGenerator를 이용한 데이터 준비
rescale을 이용해서 normalization 을 해 주고,
flow function을 사용해서 input과 label, batch_size를 설정해 줍니다.
label을 넣어주기 위해서 dimension을 추가하기 때문에, expand_dims function을 사용해 줍니다.
def train_val_generators(training_images, training_labels, validation_images, validation_labels):
### START CODE HERE
# In this section you will have to add another dimension to the data
# So, for example, if your array is (10000, 28, 28)
# You will need to make it (10000, 28, 28, 1)
# Hint: np.expand_dims
training_images = np.expand_dims(training_images, axis=-1)
validation_images = np.expand_dims(validation_images, axis=-1)
# Instantiate the ImageDataGenerator class
# Don't forget to normalize pixel values
# and set arguments to augment the images (if desired)
train_datagen = ImageDataGenerator( rescale=1. / 255)
# Pass in the appropriate arguments to the flow method
train_generator = train_datagen.flow(x=training_images,
y=training_labels,
batch_size=32)
# Instantiate the ImageDataGenerator class (don't forget to set the rescale argument)
# Remember that validation data should not be augmented
validation_datagen = ImageDataGenerator( rescale=1. / 255)
# Pass in the appropriate arguments to the flow method
validation_generator = validation_datagen.flow(x=validation_images,
y=validation_labels,
batch_size=32)
### END CODE HERE
return train_generator, validation_generator
7. CNN Model 만들고, 실행해 줍시다.
def create_model():
### START CODE HERE
# Define the model
# Use no more than 2 Conv2D and 2 MaxPooling2D
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(26, activation='softmax')])
model.compile(optimizer = 'adam',
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy'])
### END CODE HERE
return model
# Save your model
model = create_model()
# Train your model
history = model.fit(train_generator,
epochs=15,
validation_data=validation_generator)
8. Metric 확인
# Plot the chart for accuracy and loss on both training and validation
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
반응형