pythontensorflowkerasmnistopencv

Model with MNIST gives poor accuracy and long training time when the data is loaded using tf.data


I am new to machine learning and python, So sorry for a newbie question.

I am using MNIST data set to study a simple CNN model. First, I saved MNIST data to png files.

import cv2
import os

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

img_root = './images'
train = os.path.join(img_root, 'train')
test = os.path.join(img_root, 'test')

if not os.path.exists(img_root):
    os.mkdir(img_root)

if not os.path.exists(train):
    os.mkdir(train)

if not os.path.exists(test):
    os.mkdir(test)

# Save Train images
for i in range(x_train.shape[0]):
    img_dir = os.path.join(train, str(y_train[i]))
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    image_out = os.path.join(img_dir, str(i) + ".png")
    cv2.imwrite(image_out, x_train[i])

# Save Test images
for i in range(x_test.shape[0]):
    img_dir = os.path.join(test, str(y_test[i]))
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    image_out = os.path.join(img_dir, str(i) + ".png")
    cv2.imwrite(image_out, x_test[i])

Then I loaded those images using three different methods and trained-tested the model.

Method 1 “cv_only”: load all images to numpy array using cv2.imread(). Do not use tf.data. Results are

Epoch 48/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0025 - accuracy: 0.9992 - val_loss: 0.0490 - val_accuracy: 0.9937

Epoch 49/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0042 - accuracy: 0.9990 - val_loss: 0.0477 - val_accuracy: 0.9924

Epoch 50/50 469/469 [==============================] - 2s 5ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0485 - val_accuracy: 0.9914

Learning time: 199[s]

Method 2 “tf_cv” : use tf.data.Dataset.from_tensor_slices(), map() and batch() methods to make data sets. Inside map() method, uses cv2.imread() to read images. Results are

Epoch 48/50 469/469 [==============================] - 32s 68ms/step - loss: 5.8638e-07 - accuracy: 1.0000 - val_loss: 4.7863 - val_accuracy: 0.4507

Epoch 49/50 469/469 [==============================] - 32s 68ms/step - loss: 4.6416e-07 - accuracy: 1.0000 - val_loss: 4.8436 - val_accuracy: 0.4514

Epoch 50/50 469/469 [==============================] - 32s 69ms/step - loss: 3.6748e-07 - accuracy: 1.0000 - val_loss: 4.8742 - val_accuracy: 0.4517

Learning time: 1638[s]

Method 3 “tf_only : use tf.data.Dataset.from_tensor_slices(), map() and batch() methods to make data sets (Same as Method 2). But Inside map() method, uses tf.io.read_file() to read images. Results are

Epoch 48/50 469/469 [==============================] - 16s 33ms/step - loss: 8.5148e-07 - accuracy: 1.0000 - val_loss: 5.4608 - val_accuracy: 0.4065

Epoch 49/50 469/469 [==============================] - 16s 34ms/step - loss: 6.7230e-07 - accuracy: 1.0000 - val_loss: 5.4721 - val_accuracy: 0.4085

Epoch 50/50 469/469 [==============================] - 15s 33ms/step - loss: 5.3065e-07 - accuracy: 1.0000 - val_loss: 5.4845 - val_accuracy: 0.4087

Learning time: 700[s]

What am I doing wrong in Method 2 and 3. Please Help.

This is the full code for the model I used.


import numpy as np
import cv2
import glob
import datetime

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical

# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "tf_cv"

train_images = "./images/train/"
test_images = "./images/test/"

# network parameters
batch_size = 128

categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)


def make_sample_imgs(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
        Y.append(cat)
    return np.array(X), np.array(Y)


def make_sample(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(fname)
        Y.append(cat)
    return np.array(X), np.array(Y)


def process_path_cv2(image_path, label):

    def load_image(path):
        image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
        image_gray = image_gray.astype('float32')/255
        image_gray = tf.expand_dims(image_gray, axis=-1)
        return image_gray

    image = tf.numpy_function(load_image, [image_path], tf.float32)
    return image, label


def process_path_tf(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=1)
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, label


start_time = datetime.datetime.now()

train = []
test = []

for idx, cat in enumerate(categories):
    image_dir = train_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        train.append((idx, f))
    print("Train ", image_dir, "append ", len(files), "files!")

for idx, cat in enumerate(categories):
    image_dir = test_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        test.append((idx, f))
    print("Test ", image_dir, "append ", len(files), "files!")


if data_load_method == "cv_only":
    x_train, y_train = make_sample_imgs(train)
    x_test, y_test = make_sample_imgs(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)

    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    x_train, y_train = make_sample(train)
    x_test, y_test = make_sample(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    train_datasets = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    test_datasets = tf.data.Dataset.from_tensor_slices((x_test, y_test))

    if data_load_method == "tf_cv":
        train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)

    else:
        train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)

    # train_datasets = train_datasets.cache()
    # test_datasets = test_datasets.cache()

    train_datasets = train_datasets.batch(batch_size)
    test_datasets = test_datasets.batch(batch_size)
    train_datasets = train_datasets.prefetch(tf.data.AUTOTUNE)
    test_datasets = test_datasets.prefetch(tf.data.AUTOTUNE)

else:
    print("Method not defined!")
    exit()


model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(512, activation="relu"))

model.add(Dense(10, activation="softmax"))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

if data_load_method == "cv_only":
    model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    model.fit(train_datasets, epochs=50, batch_size=batch_size, validation_data=test_datasets)

end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')

If I use

train_datasets = train_datasets.cache()

test_datasets = test_datasets.cache()

then time can be reduce to around 150 seconds, But the acurrcy also drops. I am using python 3.10, keras 2.10.0, tensorflow-gpu 2.10.0

Sorry about the messy code. I am still learning coding.


Solution

  • Solved by myself. Summry is:

    1. Changed tf.image.decode_image() to tf.image.decode_png(), becuase I am using png files.
    2. Shuffle the data. train_datasets = train_datasets.shuffle(15000), test_datasets = test_datasets.shuffle(4000). In previous code, no shuffling.

    Now all the three methods give almost same val_loss: and val_accuracy:.

    New Code is as follows.

    import numpy as np
    import cv2
    import glob
    import datetime
    
    import tensorflow as tf
    from keras.models import Sequential
    from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
    from keras.layers import Dense, Flatten
    from keras.utils import to_categorical
    
    # Valid Options are "cv_only", "tf_cv" and "tf_only"
    data_load_method = "cv_only"
    
    train_images = "./images/train/"
    test_images = "./images/test/"
    
    # network parameters
    batch_size = 128
    
    categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    num_classes = len(categories)
    
    
    def make_sample_imgs(files):
        global X, Y
        X = []
        Y = []
        for cat, fname in files:
            X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
            Y.append(cat)
        return np.array(X), np.array(Y)
    
    
    def make_sample(files):
        global X, Y
        X = []
        Y = []
        for cat, fname in files:
            X.append(fname)
            Y.append(cat)
        return np.array(X), np.array(Y)
    
    
    def process_path_cv2(image_path, label):
    
        def load_image(path):
            image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
            image_gray = image_gray.astype('float32')/255
            image_gray = tf.expand_dims(image_gray, axis=-1)
            return image_gray
    
        image = tf.numpy_function(load_image, [image_path], tf.float32)
        return image, label
    
    
    def process_path_tf(image_path, label):
        image = tf.io.read_file(image_path)
        image = tf.image.decode_png(image, channels=1)  # 
        image = tf.image.resize(image, [28, 28])    # Must use this, increase the val_accuracy
        image = tf.image.convert_image_dtype(image, tf.float32)
        image /= 255.0
        return image, label
    
    
    start_time = datetime.datetime.now()
    
    train = []
    test = []
    
    for idx, cat in enumerate(categories):
        image_dir = train_images + cat
        files = glob.glob(image_dir + "/*.png")
        for f in files:
            train.append((idx, f))
        print("Train ", image_dir, "append ", len(files), "files!")
    
    for idx, cat in enumerate(categories):
        image_dir = test_images + cat
        files = glob.glob(image_dir + "/*.png")
        for f in files:
            test.append((idx, f))
        print("Test ", image_dir, "append ", len(files), "files!")
    
    
    if data_load_method == "cv_only":
        x_train, y_train = make_sample_imgs(train)
        x_test, y_test = make_sample_imgs(test)
    
        # convert to one-hot vector
        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)
    
        x_train = x_train.reshape(-1, 28, 28, 1)
        x_test = x_test.reshape(-1, 28, 28, 1)
    
        x_train = x_train.astype('float32') / 255
        x_test = x_test.astype('float32') / 255
    
    elif data_load_method == "tf_cv" or data_load_method == "tf_only":
        x_train, y_train = make_sample(train)
        x_test, y_test = make_sample(test)
    
        # convert to one-hot vector
        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)
    
        train_datasets = tf.data.Dataset.from_tensor_slices((x_train, tf.cast(y_train, tf.int64)))
        test_datasets = tf.data.Dataset.from_tensor_slices((x_test, tf.cast(y_test, tf.int64)))
    
        if data_load_method == "tf_cv":
            train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
            test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
    
        else:
            train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
            test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    
        train_datasets = train_datasets.cache()
        test_datasets = test_datasets.cache()
        train_datasets = train_datasets.shuffle(15000)
        test_datasets = test_datasets.shuffle(4000)
        train_datasets = train_datasets.batch(batch_size)
        test_datasets = test_datasets.batch(batch_size)
        test_datasets = test_datasets.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    else:
        print("Method not defined!")
        exit()
    
    
    model = Sequential()
    
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
    model.add(BatchNormalization())
    
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
    model.add(BatchNormalization())
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    
    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    
    model.add(Dense(10, activation="softmax"))
    
    model.summary()
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    if data_load_method == "cv_only":
        model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))
    
    elif data_load_method == "tf_cv" or data_load_method == "tf_only":
        model.fit(train_datasets, epochs=50, validation_data=test_datasets)
    
    end_time = datetime.datetime.now()
    time_diff = (end_time - start_time)
    learning_time = time_diff.total_seconds()
    print(f'Learning time: {int(learning_time)}[s]')