I'm trying to do predictions on the webcam and display it within my web application. I want the webcam frame to have a specific size but when I resize it the model stops working correctly and bounding boxes looks very bad.
here's the function I'm using
def RunYOLOWebcam(path_x):
# Start webcam
cap = cv2.VideoCapture(path_x)
desired_width = 540
desired_height = 300
# Model
model = YOLO("best.pt")
# Object classes
classNames = [""] * 26 # Create an array with 26 empty strings
for i in range(26):
classNames[i] = chr(65 + i) # Fill the array with uppercase letters (A-Z)
while True:
success, img = cap.read()
if not success:
break
# Perform YOLO detection on the original image
results = model(img, stream=True)
# Save bounding box coordinates
bounding_boxes = []
for r in results:
boxes = r.boxes
for box in boxes:
# Scale bounding box coordinates to match original image size
x1, y1, x2, y2 = box.xyxy[0]
x1, y1, x2, y2 = int(x1 * img.shape[1] / desired_width), int(y1 * img.shape[0] / desired_height), \
int(x2 * img.shape[1] / desired_width), int(y2 * img.shape[0] / desired_height)
bounding_boxes.append((x1, y1, x2, y2))
# Resize the image to the desired resolution
img_resized = cv2.resize(img, (desired_width, desired_height))
# Resize the bounding boxes to match the resized image
resized_bounding_boxes = []
for box in bounding_boxes:
x1, y1, x2, y2 = box
x1_resized, y1_resized, x2_resized, y2_resized = int(x1 * desired_width / img.shape[1]), \
int(y1 * desired_height / img.shape[0]), \
int(x2 * desired_width / img.shape[1]), \
int(y2 * desired_height / img.shape[0])
resized_bounding_boxes.append((x1_resized, y1_resized, x2_resized, y2_resized))
# Draw bounding boxes on the resized image
for box in resized_bounding_boxes:
x1, y1, x2, y2 = box
cv2.rectangle(img_resized, (x1, y1), (x2, y2), (255, 0, 255), 3)
yield img_resized
cv2.destroyAllWindows()`
I tried to resize the boxes according to the new resized frame but it's still not working.
It looks like you are a little confused about the coordinates you have. You don't need to scale yolov8 box xyxy coordinates to the original image size, they are already scaled to it.
For convenience, I have used box.xyxyn instead of box.xyxy: it returns the boxes in xyxy format normalized by original image size (x1 and x2 divided by the original image width, y1 and y2 divided by the original image height). Normalized coordinates are easily scaled to different image sizes: you just need to multiply them to the desired image width and height respectively.
for r in results:
boxes = r.boxes
for box in boxes:
x1, y1, x2, y2 = box.xyxyn[0]
bounding_boxes.append((x1, y1, x2, y2))
# Resize the image to the desired resolution
img_resized = cv2.resize(img, (desired_width, desired_height))
# Resize the bounding boxes to match the resized image
resized_bounding_boxes = []
for box in bounding_boxes:
x1, y1, x2, y2 = box
x1_resized, y1_resized, x2_resized, y2_resized = int(x1 * desired_width), \
int(y1 * desired_height), \
int(x2 * desired_width), \
int(y2 * desired_height)
resized_bounding_boxes.append((x1_resized, y1_resized, x2_resized, y2_resized))
Available yolov8 boxes coordinates formats are listed here.