I've trained a YOLOv5 model and it works well on new images with yolo detect.py
I've exported the model to ONNX and now i'm trying to load the ONNX model and do inference on a new image. My code works but I don't get the correct bounding boxes.
I need to get the area of the bounding boxes etc. so I can't just use detect.py
Also I used Yolo's non_max_suppression to prune the list of bbox but I don't if it's a good solution.
What I did yet:
# Load image and preprocessing
import cv2
import numpy as np
img = cv2.imread("image.jpg", cv2.IMREAD_UNCHANGED)
resized = cv2.resize(img, (640,640), interpolation = cv2.INTER_AREA).astype(np.float32)
resized = resized.transpose((2, 0, 1))
resized = np.expand_dims(resized, axis=0) # Add batch dimension
# run session on ONNX
import onnxruntime as ort
ort_session = ort.InferenceSession("yolov5.onnx", providers=["CUDAExecutionProvider"])
# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: resized}
ort_outs = ort_session.run(None, ort_inputs)
# Keep only interesting bounding boxes
import torch
from yolov5.utils.general import non_max_suppression, xyxy2xywh
output= torch.from_numpy(np.asarray(ort_outs))
out = non_max_suppression(output, conf_thres=0.2, iou_thres=0.5)[0]
# convert xyxy to xywh
xyxy = out[:,:4]
xywh = xyxy2xywh(xyxy)
out[:, :4] = xywh
# Show bbox
from PIL import Image, ImageDraw, ImageFont
tmp_image = Image.fromarray(img)
draw = ImageDraw.Draw(tmp_image)
for i,(x,y,w,h,score,class_id) in enumerate(out):
real_x = x * w_ratio # resize from model size to image size
real_y = y * h_ratio
shape = (real_x, real_y, (x + w) * w_ratio, (y + h) * h_ratio) # shape of the bounding box to draw
class_id = round(float(class_id))
class_string = list(class_list.keys())[list(class_list.values()).index(class_id)]
color = CLASS_RECTANGLE_COLORS[class_string]
draw.rectangle(shape, outline=color, width=4)
fnt = ImageFont.load_default()
draw.multiline_text((real_x + 8, real_y + 8), f"{class_string} {score*100:.2f}%", font=fnt, fill=color)
Image with my algorithm vs detect.py : https://i.sstatic.net/ZxNLJ.jpg
Can anyone help ?
I decided to give up and use this code :
import cv2
import torch
from PIL import Image
# Model
model = torch.hub.load(path_to_yolo_library, 'custom', path=onnx_path, source='local')
img = Image.open(image_path) # PIL image
img = img.resize((640,640))
# Inference
results = model(img, size=640) # includes NMS
# Results
results.print() # print results to screen
results.show() # display results
results.save() # save as results1.jpg, results2.jpg... etc.
# Data
print('\n', results.xyxy[0]) # print img1 predictions