[SOLVED] ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 8 'y' sizes: 3

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 8 'y' sizes: 3

whenever i try to run this code it shows this value error and i don't know why i check the lenght of the labels and images list and it's equal but x_train and y_train is different in length note that i can't use tensorflow.keras for some reason it shows an error so i use only keras

import numpy as np
import os
import keras
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import cv2 as cv

people = ['H', 'J']
DIR = 'C:\AI'
images = []
labels = []
haar_cascade = cv.CascadeClassifier('haar_face.xml')

for person in people:
    path = os.path.join(DIR, person)
    label = people.index(person)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        img_array = cv.imread(img_path)
        gray = cv.cvtColor(img_array, cv.COLOR_BGR2GRAY)
        face_rect = haar_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6)
        for (x, y, w, h) in face_rect:
            face_roi = img_array[y:y + h, x:x + w]
            face_roi = cv.resize(face_roi, (128, 128))
            images.append(face_roi)
            labels.append(label)



#images = np.array(images, dtype='float')/255.0
#labels = np.array(labels, dtype='float')/255.0

x_train, y_train, x_test, y_test = train_test_split(images, labels, test_size=0.2, random_state=4)

x_train = np.array(x_train, dtype='float')/255.0
y_train = np.array(y_train, dtype='float')/255.0
print(len(x_train), ' ', len(y_train))

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(128, 128, 3)))
model.add(keras.layers.MaxPool2D(pool_size=(2, 2)))
model.add(keras.layers.BatchNormalization(axis=-1))
model.add(keras.layers.Dropout(0, 2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(512, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
hist = model.fit(np.array(x_train), np.array(y_train), epochs=5, batch_size=64)

Solution

The example of sklearn.model_selection.train_test_split states:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

Since the code you provided is assigning the returning splittings in the wrong order, I am assuming you are providing the .fit() function of your model with the input test data instead of the desired output data of your train splitting. Try the following:

x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=4)