pythonopencvobject-detectionyolo

YOLOv8 get predicted bounding box


I want to integrate OpenCV with YOLOv8 from ultralytics, so I want to obtain the bounding box coordinates from the model prediction. How do I do this?

from ultralytics import YOLO
import cv2

model = YOLO('yolov8n.pt')
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

while True:
    _, frame = cap.read()
    
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = model.predict(img)

    for r in results:
        for c in r.boxes.cls:
            print(model.names[int(c)])

    cv2.imshow('YOLO V8 Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord(' '):
        break

cap.release()
cv2.destroyAllWindows()

I want to display the YOLO annotated image in OpenCV. I know I can use the stream parameter in model.predict(source='0', show=True). But I want to continuously monitor the predicted class names for my program, at the same time displaying the image output.


Solution

  • This will:

    1. Loop through each frame in the video
    2. Pass each frame to Yolov8 which will generate bounding boxes
    3. Draw the bounding boxes on the frame using the built in ultralytics' annotator:
    
    from ultralytics import YOLO
    import cv2
    from ultralytics.utils.plotting import Annotator  # ultralytics.yolo.utils.plotting is deprecated
    
    model = YOLO('yolov8n.pt')
    cap = cv2.VideoCapture(0)
    cap.set(3, 640)
    cap.set(4, 480)
    
    while True:
        _, img = cap.read()
        
        # BGR to RGB conversion is performed under the hood
        # see: https://github.com/ultralytics/ultralytics/issues/2575
        results = model.predict(img)
    
        for r in results:
            
            annotator = Annotator(img)
            
            boxes = r.boxes
            for box in boxes:
                
                b = box.xyxy[0]  # get box coordinates in (left, top, right, bottom) format
                c = box.cls
                annotator.box_label(b, model.names[int(c)])
              
        img = annotator.result()  
        cv2.imshow('YOLO V8 Detection', img)     
        if cv2.waitKey(1) & 0xFF == ord(' '):
            break
    
    cap.release()
    cv2.destroyAllWindows()