pythonnumpytensorflowdeep-learning

TypeError: Only integers, slices, ellipsis, tf.newaxis and scalar tf.int32/tf.int64 tensors are valid indices


As an exercise I am trying to train a classification model using Tensorflow v1 without using keras nor sklearn nor any other library that would greatly ease my life.. lol.

I keep getting this error though and I can't figure out what's wrong about my code :

# imports
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O

import os,shutil, cv2, itertools

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() # use Tensorflow v1

For simplicity I resized every image to 64 x 64.

Since there are 3 channels, size of input vector X wiz n is 64 x 64 x 3 (=12288).

To reduce computation time I only use 3000 images from the training data.

TRAIN_DATA_DIR_PREFIX = '/kaggle/input/cat-and-dog/training_set/training_set/'
TRAIN_DATA_DOG_DIR = TRAIN_DATA_DIR_PREFIX + 'dogs/'
TRAIN_DATA_CAT_DIR = TRAIN_DATA_DIR_PREFIX + 'cats/'

N_x = 64 * 64 * 3

TRAIN_DOG = [(TRAIN_DATA_DOG_DIR+i, 1) for i in os.listdir(TRAIN_DATA_DOG_DIR) if i.endswith(".jpg")]
TRAIN_DOG = TRAIN_DOG[:1500]

TRAIN_CAT = [(TRAIN_DATA_CAT_DIR+i, 0) for i in os.listdir(TRAIN_DATA_CAT_DIR) if i.endswith(".jpg")]
TRAIN_CAT = TRAIN_CAT[:1500]

TRAIN_DATA = TRAIN_DOG + TRAIN_CAT

print('TRAIN_DATA :', len(TRAIN_DATA))

def read_image(file_path):
    image = cv2.imread(file_path, cv2.IMREAD_COLOR)
    return cv2.resize(image, (64, 64),interpolation=cv2.INTER_CUBIC)

def prepare(data):
    X = np.ndarray((N_x,len(TRAIN_DATA)), dtype=np.uint8)
    print ("X shape is {}".format(X.shape))
    Y = np.zeros((1,len(TRAIN_DATA)))
    print ("Y shape is {}".format(Y.shape))
    for i,(image_path, y) in enumerate(data):
        Y[0,i] = y
        image = read_image(image_path)
        X[:,i] = np.squeeze(image.reshape((N_x,1)))
    return X,Y

X,Y = prepare(TRAIN_DATA)
Y = tf.transpose(Y)
X = tf.transpose(X)
print ("after transpose X shape is {}".format(X.shape))
print ("after transpose Y shape is {}".format(Y.shape))

Output:

TRAIN_DATA : 3000
X shape is (12288, 3000)
Y shape is (1, 3000)
after transpose X shape is (3000, 12288)
after transpose Y shape is (3000, 1)

Then I define my tf_train function:

def tf_train(X, Y, batch_size=20):
    # Dataset (inputs and labels)
    
    D=12880
    
    x = tf.placeholder(tf.float32, [batch_size, D])
    y_ = tf.placeholder(tf.float32, [batch_size, 1])

    # random variable
    W = tf.Variable(tf.random_normal([D, 1],stddev=0.1))
    
    z=tf.matmul(x,W)
    
    
    # Define loss and optimizer
    cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=z, labels=y_))
    
    train_step = tf.train.GradientDescentOptimizer(1e-2).minimize(cross_entropy)
    
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run() #initializes W and z
    
    # Train
    for epoch in range (3000):
        idx = np.random.choice(len(TRAIN_DATA), batch_size, replace=False)
        #idx = np.random.permutation(np.arange(X.shape[1])) [:batch_size]
        _, l = sess.run([train_step, cross_entropy], feed_dict={x: X[idx], y_: Y[0,idx]})
        if epoch%100 == 0:
            print('loss: '+str(l))

And then when I start training my model :

tf_train(X,Y)

I get this following output:

/opt/conda/lib/python3.7/site-packages/tensorflow/python/client/session.py:1766: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).
  warnings.warn('An interactive session is already active. This can '
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_33/1897119706.py in <module>
----> 1 tf_train(X,Y)

/tmp/ipykernel_33/3528951873.py in tf_train(X, Y, batch_size)
     29         idx = np.random.choice(len(TRAIN_DATA), batch_size, replace=False)
     30         #idx = np.random.permutation(np.arange(X.shape[1])) [:batch_size]
---> 31         _, l = sess.run([train_step, cross_entropy], feed_dict={x: X[idx], y_: Y[0,idx]})
     32         if epoch%100 == 0:
     33             print('loss: '+str(l))

/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
    204     """Call target, and fall back on dispatchers if there is a TypeError."""
    205     try:
--> 206       return target(*args, **kwargs)
    207     except (TypeError, ValueError):
    208       # Note: convert_to_eager_tensor currently raises a ValueError, not a

/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py in _slice_helper(tensor, slice_spec, var)
   1012       new_axis_mask |= (1 << index)
   1013     else:
-> 1014       _check_index(s)
   1015       begin.append(s)
   1016       end.append(s + 1)

/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py in _check_index(idx)
    886     # TODO(slebedev): IndexError seems more appropriate here, but it
    887     # will break `_slice_helper` contract.
--> 888     raise TypeError(_SLICE_TYPE_ERROR + ", got {!r}".format(idx))
    889 
    890 

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([2282, 1114, 1884, 2812,  887,  381, 1723, 2031,  820, 2989,  314,
       1800,  372, 2219, 1937, 2313, 2264, 2154, 2168,  283])

Apparently the error is caused by

_, l = sess.run([train_step, cross_entropy], feed_dict={x: X[idx], y_: Y[0,idx]})

Any idea what's wrong with it ?


Solution

  • You are trying to slice your inputs based on idx, which is a list and therefore it will not work. Try something like this:

    %tensorflow_version 1.x
    import tensorflow as tf
    import numpy as np
    
    def tf_train(X, Y, batch_size=20):
        # Dataset (inputs and labels)
        
        D=12880
        
        x = tf.placeholder(tf.float32, [batch_size, D])
        y_ = tf.placeholder(tf.float32, [batch_size, 1])
    
        # random variable
        W = tf.Variable(tf.random_normal([D, 1],stddev=0.1))
        
        z=tf.matmul(x,W)
        
        
        # Define loss and optimizer
        cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=z, labels=y_))
        
        train_step = tf.train.GradientDescentOptimizer(1e-2).minimize(cross_entropy)
        
        sess = tf.InteractiveSession()
        tf.initialize_all_variables().run() #initializes W and z
        
        # Train
        for epoch in range (3000):
            idx = np.random.choice(3000, batch_size, replace=False)
            xx = np.array([X[i, :] for i in idx])
            yy = np.array([Y[i, :] for i in idx])
            _, l = sess.run([train_step, cross_entropy], feed_dict={x: xx, y_: yy})
            if epoch%100 == 0:
                print('loss: '+str(l))
    
    X = np.random.random((3000, 12880))
    Y = np.random.random((3000, 1))
    
    tf_train(X, Y)
    
    loss: 2.0646608
    loss: 3.5250945
    loss: 4.343132
    loss: 4.4199777
    loss: 2.8261409
    loss: 3.0091796
    loss: 2.010383
    loss: 2.4877071
    loss: 3.520267
    loss: 4.089248
    loss: 8.244058
    loss: 1.5667378