I am trying to train a YOLOv8 detection model. Since I couldn't find a dataset that fits my project's purpose, I've decided to create my own dataset by overlaying images on top of a background image. To achieve this, I used Python's PIL package. However, a few errors consistently show up when training the YOLO model.
Below is the code I used to generate images.
from PIL import Image, ImageCms
from glob import glob
from tqdm import tqdm
import numpy as np
import os
import shutil
import random
import sys
def overlay_and_resize_pillow(background_image_path, overlay_image_path, output_image_path, white):
# Open the background and overlay images
if white:
background = Image.new(mode="RGBA", size=(random.randint(512, 1024), random.randint(512, 1024)), color="white")
else:
background = Image.open(background_image_path).resize((random.randint(512, 1024), random.randint(512, 1024)))
overlay = Image.open(overlay_image_path)
profile = ImageCms.createProfile("sRGB")
# Generate a random scale factor for resizing the overlay image
scale_factor = random.uniform(0.5, 1.2)
# Resize the overlay image based on the scale factor
overlay_resized = overlay.resize((int(overlay.width * scale_factor), int(overlay.height * scale_factor)))
# Get the dimensions of the resized overlay image
overlay_width, overlay_height = overlay_resized.size
# Calculate the valid range for overlay position to ensure it fits within the background
max_x_offset = background.width - overlay_width
max_y_offset = background.height - overlay_height
# Calculate the position to overlay the resized image on the background
x_offset = random.randint(0, max_x_offset)
y_offset = random.randint(0, max_y_offset)
# Create a copy of the background to overlay the resized image
background_copy = background.copy()
# Paste the resized overlay image onto the background copy
background_copy.paste(overlay_resized, (x_offset, y_offset), overlay_resized)
# Save the output image
background_copy.save(output_image_path, icc_profile=ImageCms.ImageCmsProfile(profile).tobytes())
# background_copy.save(output_image_path)
name = output_image_path.split('/')[-1].split('.')[0]
cl = '0'
width = overlay_width / background.width
height = overlay_height / background.height
x_center = (x_offset + overlay_width / 2) / background.width
y_center = (y_offset + overlay_height / 2) / background.height
with open(f'./chartdetection/labels/train/{name}.txt', 'w+') as f:
f.write(' '.join(map(str, [cl, x_center, y_center, width, height])))
When training, I keep getting these errors
How should I modify my code?
Convert your image to RGB before trying to save it with a color profile:
background_copy = background_copy.convert("RGB")
# Save the output image
...