pythonpandasnumpyindex-error

IndexError: index 0 is out of bounds for axis 0 with size 0? see detail in output1111


#count the number of fake and real videos
def number_of_real_and_fake_videos(data_list):
    header_list = ["file","label"]
    lab = pd.read_csv('/content/drive/My Drive/Gobal_metadata.csv',names=header_list)
    fake = 0
    real = 0
    for i in data_list:
    temp_video = i.split('/')[-1]
    label = lab.iloc[(labels.loc[labels["file"] == temp_video].index.values[0]),1]
    if(label == 'FAKE'):
        fake+=1
    if(label == 'REAL'):
        real+=1
    return real,fake
# load the labels and video in data loader
import random
import pandas as pd
from sklearn.model_selection import train_test_split

header_list = ["file","label"]
labels = pd.read_csv('/content/drive/My Drive/Gobal_metadata.csv',names=header_list)
#print(labels)
train_videos = video_files[:int(0.8*len(video_files))]
valid_videos = video_files[int(0.8*len(video_files)):]
print("train : " , len(train_videos))
print("test : " , len(valid_videos))
# train_videos,valid_videos = train_test_split(data,test_size = 0.2)
# print(train_videos)

print("TRAIN: ", "Real:",number_of_real_and_fake_videos(train_videos)[0]," Fake:",number_of_real_and_fake_videos(train_videos)[1])
print("TEST: ", "Real:",number_of_real_and_fake_videos(valid_videos)[0]," Fake:",number_of_real_and_fake_videos(valid_videos)[1])


im_size = 112
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.Resize((im_size,im_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean,std)])

test_transforms = transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.Resize((im_size,im_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean,std)])
train_data = video_dataset(train_videos,labels,sequence_length = 10,transform = train_transforms)
#print(train_data)
val_data = video_dataset(valid_videos,labels,sequence_length = 10,transform = train_transforms)
train_loader = DataLoader(train_data,batch_size = 4,shuffle = True,num_workers = 2)
valid_loader = DataLoader(val_data,batch_size = 4,shuffle = True,num_workers = 2)
image,label = train_data[0]
im_plot(image[0,:,:,:])

Output:

train :  8720
test :  2180
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-32-7ad703495b44> in <module>()
     14 # print(train_videos)
     15 
---> 16 print("TRAIN: ", "Real:",number_of_real_and_fake_videos(train_videos)[0]," Fake:",number_of_real_and_fake_videos(train_videos)[1])
     17 print("TEST: ", "Real:",number_of_real_and_fake_videos(valid_videos)[0]," Fake:",number_of_real_and_fake_videos(valid_videos)[1])
     18 

<ipython-input-29-8723d4941fd5> in number_of_real_and_fake_videos(data_list)
      7   for i in data_list:
      8     temp_video = i.split('/')[-1]
----> 9     label = lab.iloc[(labels.loc[labels["file"] == temp_video].index.values[0]),1]
     10     if(label == 'FAKE'):
     11       fake+=1

IndexError: index 0 is out of bounds for axis 0 with size 0

Solution

  • These 2 lines of code are accessing a list index that may not exist

    print("TRAIN: ", "Real:",number_of_real_and_fake_videos(train_videos)[0]," Fake:",number_of_real_and_fake_videos(train_videos)[1])
    print("TEST: ", "Real:",number_of_real_and_fake_videos(valid_videos)[0]," Fake:",number_of_real_and_fake_videos(valid_videos)[1])
    

    Maybe try a safer alternative

    if len(number_of_real_and_fake_videos(train_videos)) > 1:
        print("TRAIN: ", "Real:",number_of_real_and_fake_videos(train_videos)[0]," Fake:",number_of_real_and_fake_videos(train_videos)[1])
    

    Same for the other one

    if len(number_of_real_and_fake_videos(valid_videos)) > 1:
        print("TEST: ", "Real:",number_of_real_and_fake_videos(valid_videos)[0]," Fake:",number_of_real_and_fake_videos(valid_videos)[1])
    

    Regarding why it happens, we would need the data, etc. But this a good starting point to find out what's causing the issue, trying printing the data, etc.