pythontensorflowmachine-learning-model

Tensorflow keras, Shapes (None, 6, 36) and (None, 216) are incompatible


i'm new to ML and AI, and i'm trying to build a model for reading text images and I'm kind of lost here. this code is what i'm using to build the model but i'm not able to.

import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential

# Defines the path to your images folder
folder_path = r"C:\Users\cuell.DESKTOP-1DJM07S\Desktop\MLTU\train"

# Defines the characters that can appear in the images
characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

# Define the number of images to use for training
num_train_images = 85

# Define the size of the images
image_width, image_height, num_channels = 180, 40, 3

# Defines the number of characters in each image
num_characters = 6

# Defines the batch size and number of epochs to use for training
batch_size, num_epochs = 10, 1

# Load the images and their labels
def load_images(folder_path):
    images, labels = [], []
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):
            image_path = os.path.join(folder_path, filename)
            image = tf.io.decode_png(tf.io.read_file(image_path), channels=num_channels)
            label = filename.split(".")[0]
            images.append(image)
            labels.append(label)
    return images, labels

images, labels = load_images(folder_path)
# Convert the images and labels to numpy arrays
def convert_to_arrays(images, labels):
    images_array = np.array(images) / 255.0
    labels_array = np.zeros((num_train_images, num_characters, len(characters)))
    for i, label in enumerate(labels):
        for j, char in enumerate(label):
            labels_array[i, j, characters.index(char)] = 1
    return images_array, labels_array

images_array, labels_array = convert_to_arrays(images, labels)

# Shuffle the images and labels
def shuffle_arrays(images_array, labels_array):
    permutation = np.random.permutation(len(images_array))
    return images_array[permutation], labels_array[permutation]

images_array, labels_array = shuffle_arrays(images_array, labels_array)


# Split the images and labels into training and validation sets
def split_arrays(images_array, labels_array):
    num_train = int(num_train_images * 0.8)
    train_images, train_labels = images_array[:num_train], labels_array[:num_train]
    val_images, val_labels = images_array[num_train:], labels_array[num_train:]
    return train_images, train_labels, val_images, val_labels

train_images, train_labels, val_images, val_labels = split_arrays(images_array, labels_array)

# Define the TensorFlow model
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(40, 180, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(num_characters * len(characters), activation="relu")
])

# Compile the TensorFlow model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


# Train the TensorFlow model
model.fit(train_images, train_labels, batch_size=batch_size, epochs=num_epochs, validation_data=(val_images, val_labels))

#model.save("text_model.h5")

Once i hit model.fit, i get the answer of ValueError: Shapes (None, 6, 36) and (None, 216) are incompatible, My guess it is that i'm doing something wrong on the split of the 20% of the testing and training, and the dimensions of them change but i'm still unsure.

I'm expecting to have my first text reading model running. :)


Solution

  • You define

    labels_array = np.zeros((num_train_images, num_characters, len(characters)))
    

    After splitting into batches, your labels have shape (batch_size, 6, 36).

    Meanwhile, the last layer of your model is

    Dense(num_characters * len(characters), activation="relu")
    

    which has shape (batch_size, 216). This mismatch causes the error.

    To fix this, you need to reshape the output of your model. Add this extra layer to the end of your Sequential:

    Reshape(shape=(num_characters, len(characters)))
    

    Edit: Also import Reshape from tensorflow.keras.layers.