import os
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
print("numpy :", np.__version__)
print("pandas :", pd.__version__)
print("tensorflow :", tf.__version__)
print("matplotlib :", matplotlib.__version__)
モデルの学習曲線(後述)を描画できるように,CSVファイルを作成しましょう.
CSV_FILE_PATH = "trainlog.csv"
if not os.path.exists(CSV_FILE_PATH):
pathlib.Path(CSV_FILE_PATH).touch()
Tensorflowから実行環境(このファイル)へMNIST Datasetを読み込みましょう.
MNIST Dataset:
# Load MNIST dataset from tensorflow
mnist = tf.keras.datasets.mnist
(X_train, y_train),(X_test, y_test) = mnist.load_data()
del mnist
print("X_train : ", X_train.shape)
print("y_train : ", y_train.shape)
print("X_test : ", X_test.shape)
print("y_test : ", y_test.shape)
MNIST Datasetのサンプル
X(白黒画像)とy(ラベル)を見てみましょう.
for i in [1,10,100]:
print("y_train", "(i="+str(i)+"): ", y_train[i])
print("X_train", "(i="+str(i)+"): ")
plt.imshow(X_train[i], cmap='gray')
plt.show()
一般に,「(扱う)データの値を,何らかの基準に基づいて整える」処理を正規化(Normalization, Scalijng)といいます.代表的な正規化手法としては以下2つが挙げられます.
MNIST Datasetに含まれる画像データでは,各画素の値が「0以上255以下」の8bit整数で表現されています.今回は,これにmin-max normalizationを適用することで,値の範囲を「0~1」に限定させます.
print("X_train min", X_train.min())
print("X_train max", X_train.max())
# Min-Max Normalization
X_train, X_test = X_train/255.0, X_test/255.0
print("X_train min", X_train.min())
print("X_train max", X_train.max())
# モデル
model = tf.keras.models.Sequential([
# (None, 28, 28) -> (None, 784)
tf.keras.layers.Flatten(input_shape=(28, 28), name='input'),
# Layer1: Linear mapping: (None, 784) -> (None, 512)
tf.keras.layers.Dense(512, name='fc_1'),
# Activation function: ReLU
tf.keras.layers.Activation(tf.nn.relu, name='relu_1'),
# Layer2: Linear mapping: (None, 512) -> (None, 256)
tf.keras.layers.Dense(256, name='fc_2'),
# Activation function: ReLU
tf.keras.layers.Activation(tf.nn.relu, name='relu_2'),
# Layer3: Linear mapping: (None, 256) -> (None, 256)
tf.keras.layers.Dense(256, name='fc_3'),
# Activation function: ReLU
tf.keras.layers.Activation(tf.nn.relu, name='relu_3'),
# Layer4: Linear mapping: (None, 256) -> (None, 10)
tf.keras.layers.Dense(10, name='dense_3'),
# Activation function: Softmax
tf.keras.layers.Activation(tf.nn.softmax, name='softmax')
])
# View model architecture
model.summary()
# Compiling
# Set model & training information into machine memory (CPU or GPU)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Set callback functions which are called during model training
callbacks = []
callbacks.append(tf.keras.callbacks.CSVLogger(CSV_FILE_PATH))
モデルにtrainデータを与えて,学習(training)させましょう.
X_train
, y_train
): model
)の学習(training)に用いる.X_test
, y_test
): model
)の性能検証(validation)に用いる.# Train model history = model.fit(...)
なお,ニューラルネットワーク$f_{\theta}: X \mapsto Y$において,「推論」「学習」とは以下の計算処理を指します.
# Train model
history = model.fit(X_train, y_train,
batch_size=100,
epochs=30,
verbose=1,
validation_data=(X_test, y_test),
callbacks=callbacks)
学習済みモデル(model
)のtestデータ(X_test
,y_test
)に対するの正答率(accuracy)と損失関数の値(loss)を確認しましょう.
# Model evaluation
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=1)
print("loss(train): {:.4}".format(train_loss))
print("accuracy(train): {:.4}".format(train_acc))
print()
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)
print("loss(test): {:.4}".format(test_loss))
print("accuracy(test): {:.4}".format(test_acc))
用意しておいたCSVファイルを元に,学習曲線(モデルに対する評価指標の経過を表す)を描画してみましょう.
df = pd.read_csv(CSV_FILE_PATH)
df.head()
epochs = df["epoch"].values
train_acc = df["acc"].values
train_loss = df["loss"].values
test_acc = df["val_acc"].values
test_loss = df["val_loss"].values
画像分類の正答率
plt.plot(epochs, train_loss, label="train data")
plt.plot(epochs, test_loss, label="test data")
plt.xlabel("epochs")
plt.ylabel("loss\n(categorical crossentropy)")
plt.legend(loc="upper right")
plt.show()
損失関数の値
plt.plot(epochs, train_acc, label="train data")
plt.plot(epochs, test_acc, label="test data")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend(loc="lower right")
plt.show()
学習済みモデルを用いて,testデータに対する推論計算を行い,分類結果をみてみましょう.
for i in [0,1,2]:
y_true = y_test[i]
y_pred = model.predict_classes(X_test[i].reshape(1,28,28))[0]
print("y_test_pred", "(i="+str(i)+"): ", y_pred)
print("y_test_true", "(i="+str(i)+"): ", y_true)
print("X_test", "(i="+str(i)+"): ")
plt.imshow(X_test[i], cmap='gray')
plt.show()
fig = plt.figure(figsize=(12, 8))
ROW = 4
COLUMN = 5
for i in range(ROW * COLUMN):
y_true = y_test[i]
y_pred = model.predict_classes(X_test[i].reshape(1,28,28))[0]
if y_true == y_pred:
result = "True" # Correct answer from the model
else:
result = "False" # Incorrect answer from the model
plt.subplot(ROW, COLUMN, i+1)
plt.imshow(X_test[i], cmap='gray')
plt.title("No.{} - {}\ny_true:{}, y_pred:{}".format(i, result, y_true, y_pred))
plt.axis("off")
fig.tight_layout()
fig.show()
Kerasでは,全てのニューラルネットワークモデルがkeras.models.Model()
クラスのインスタンスとなっています.学習済みモデルmodel
に対して,
model.save()
を実行することで「モデルの保存」が完了します.
# save model as keras instance ins_path = 'trained_model_v0.h5' model.save(ins_path)