I am new to machine learning and python, So sorry for a newbie question.
I am using MNIST data set to study a simple CNN model. First, I saved MNIST data to png files.
import cv2
import os
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
img_root = './images'
train = os.path.join(img_root, 'train')
test = os.path.join(img_root, 'test')
if not os.path.exists(img_root):
os.mkdir(img_root)
if not os.path.exists(train):
os.mkdir(train)
if not os.path.exists(test):
os.mkdir(test)
# Save Train images
for i in range(x_train.shape[0]):
img_dir = os.path.join(train, str(y_train[i]))
if not os.path.exists(img_dir):
os.mkdir(img_dir)
image_out = os.path.join(img_dir, str(i) + ".png")
cv2.imwrite(image_out, x_train[i])
# Save Test images
for i in range(x_test.shape[0]):
img_dir = os.path.join(test, str(y_test[i]))
if not os.path.exists(img_dir):
os.mkdir(img_dir)
image_out = os.path.join(img_dir, str(i) + ".png")
cv2.imwrite(image_out, x_test[i])
Then I loaded those images using three different methods and trained-tested the model.
Method 1 “cv_only”: load all images to numpy array using cv2.imread(). Do not use tf.data. Results are
Epoch 48/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0025 - accuracy: 0.9992 - val_loss: 0.0490 - val_accuracy: 0.9937
Epoch 49/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0042 - accuracy: 0.9990 - val_loss: 0.0477 - val_accuracy: 0.9924
Epoch 50/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0485 - val_accuracy: 0.9914
Learning time: 199[s]
Method 2 “tf_cv” : use tf.data.Dataset.from_tensor_slices(), map() and batch() methods to make data sets. Inside map() method, uses cv2.imread() to read images. Results are
Epoch 48/50 469/469 [==============================] - 32s 68ms/step - loss: 5.8638e-07 - accuracy: 1.0000 - val_loss: 4.7863 - val_accuracy: 0.4507
Epoch 49/50 469/469 [==============================] - 32s 68ms/step - loss: 4.6416e-07 - accuracy: 1.0000 - val_loss: 4.8436 - val_accuracy: 0.4514
Epoch 50/50 469/469 [==============================] - 32s 69ms/step - loss: 3.6748e-07 - accuracy: 1.0000 - val_loss: 4.8742 - val_accuracy: 0.4517
Learning time: 1638[s]
Method 3 “tf_only : use tf.data.Dataset.from_tensor_slices(), map() and batch() methods to make data sets (Same as Method 2). But Inside map() method, uses tf.io.read_file() to read images. Results are
Epoch 48/50 469/469 [==============================] - 16s 33ms/step - loss: 8.5148e-07 - accuracy: 1.0000 - val_loss: 5.4608 - val_accuracy: 0.4065
Epoch 49/50 469/469 [==============================] - 16s 34ms/step - loss: 6.7230e-07 - accuracy: 1.0000 - val_loss: 5.4721 - val_accuracy: 0.4085
Epoch 50/50 469/469 [==============================] - 15s 33ms/step - loss: 5.3065e-07 - accuracy: 1.0000 - val_loss: 5.4845 - val_accuracy: 0.4087
Learning time: 700[s]
What am I doing wrong in Method 2 and 3. Please Help.
This is the full code for the model I used.
import numpy as np
import cv2
import glob
import datetime
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "tf_cv"
train_images = "./images/train/"
test_images = "./images/test/"
# network parameters
batch_size = 128
categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)
def make_sample_imgs(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
Y.append(cat)
return np.array(X), np.array(Y)
def make_sample(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(fname)
Y.append(cat)
return np.array(X), np.array(Y)
def process_path_cv2(image_path, label):
def load_image(path):
image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
image_gray = image_gray.astype('float32')/255
image_gray = tf.expand_dims(image_gray, axis=-1)
return image_gray
image = tf.numpy_function(load_image, [image_path], tf.float32)
return image, label
def process_path_tf(image_path, label):
image = tf.io.read_file(image_path)
image = tf.image.decode_image(image, channels=1)
image = tf.image.convert_image_dtype(image, tf.float32)
return image, label
start_time = datetime.datetime.now()
train = []
test = []
for idx, cat in enumerate(categories):
image_dir = train_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
train.append((idx, f))
print("Train ", image_dir, "append ", len(files), "files!")
for idx, cat in enumerate(categories):
image_dir = test_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
test.append((idx, f))
print("Test ", image_dir, "append ", len(files), "files!")
if data_load_method == "cv_only":
x_train, y_train = make_sample_imgs(train)
x_test, y_test = make_sample_imgs(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
x_train, y_train = make_sample(train)
x_test, y_test = make_sample(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
train_datasets = tf.data.Dataset.from_tensor_slices((x_train, y_train))
test_datasets = tf.data.Dataset.from_tensor_slices((x_test, y_test))
if data_load_method == "tf_cv":
train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
else:
train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
# train_datasets = train_datasets.cache()
# test_datasets = test_datasets.cache()
train_datasets = train_datasets.batch(batch_size)
test_datasets = test_datasets.batch(batch_size)
train_datasets = train_datasets.prefetch(tf.data.AUTOTUNE)
test_datasets = test_datasets.prefetch(tf.data.AUTOTUNE)
else:
print("Method not defined!")
exit()
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
if data_load_method == "cv_only":
model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
model.fit(train_datasets, epochs=50, batch_size=batch_size, validation_data=test_datasets)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')
If I use
train_datasets = train_datasets.cache()
test_datasets = test_datasets.cache()
then time can be reduce to around 150 seconds, But the acurrcy also drops. I am using python 3.10, keras 2.10.0, tensorflow-gpu 2.10.0
Sorry about the messy code. I am still learning coding.
Solved by myself. Summry is:
tf.image.decode_image()
to tf.image.decode_png()
, becuase I am using png files.train_datasets = train_datasets.shuffle(15000)
, test_datasets = test_datasets.shuffle(4000)
. In previous code, no shuffling.Now all the three methods give almost same val_loss:
and val_accuracy:
.
New Code is as follows.
import numpy as np
import cv2
import glob
import datetime
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "cv_only"
train_images = "./images/train/"
test_images = "./images/test/"
# network parameters
batch_size = 128
categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)
def make_sample_imgs(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
Y.append(cat)
return np.array(X), np.array(Y)
def make_sample(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(fname)
Y.append(cat)
return np.array(X), np.array(Y)
def process_path_cv2(image_path, label):
def load_image(path):
image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
image_gray = image_gray.astype('float32')/255
image_gray = tf.expand_dims(image_gray, axis=-1)
return image_gray
image = tf.numpy_function(load_image, [image_path], tf.float32)
return image, label
def process_path_tf(image_path, label):
image = tf.io.read_file(image_path)
image = tf.image.decode_png(image, channels=1) #
image = tf.image.resize(image, [28, 28]) # Must use this, increase the val_accuracy
image = tf.image.convert_image_dtype(image, tf.float32)
image /= 255.0
return image, label
start_time = datetime.datetime.now()
train = []
test = []
for idx, cat in enumerate(categories):
image_dir = train_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
train.append((idx, f))
print("Train ", image_dir, "append ", len(files), "files!")
for idx, cat in enumerate(categories):
image_dir = test_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
test.append((idx, f))
print("Test ", image_dir, "append ", len(files), "files!")
if data_load_method == "cv_only":
x_train, y_train = make_sample_imgs(train)
x_test, y_test = make_sample_imgs(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
x_train, y_train = make_sample(train)
x_test, y_test = make_sample(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
train_datasets = tf.data.Dataset.from_tensor_slices((x_train, tf.cast(y_train, tf.int64)))
test_datasets = tf.data.Dataset.from_tensor_slices((x_test, tf.cast(y_test, tf.int64)))
if data_load_method == "tf_cv":
train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
else:
train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_datasets = train_datasets.cache()
test_datasets = test_datasets.cache()
train_datasets = train_datasets.shuffle(15000)
test_datasets = test_datasets.shuffle(4000)
train_datasets = train_datasets.batch(batch_size)
test_datasets = test_datasets.batch(batch_size)
test_datasets = test_datasets.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
else:
print("Method not defined!")
exit()
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
if data_load_method == "cv_only":
model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
model.fit(train_datasets, epochs=50, validation_data=test_datasets)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')