machine-learningcomputer-visionartificial-intelligenceyolov8ultralytics

I have my custom trained model (best.pt), it detects two things person and headlight. Now I want the output according to these conditions


can you please help me......... I have my custom trained model (best.pt), it detects two things person and headlight. Now I want the output according to these conditions: 1. If model detect only headlight return 0, 2. If model detect only person return 1, 3. If model detect headlight and person both return 0.

import cv2
from ultralytics import YOLO

video_path = 'data/video1.mp4'
video_out_path = 'out.mp4'

cap = cv2.VideoCapture(video_path)

# Check if the video file is opened successfully
if not cap.isOpened():
    print("Error: Could not open the video file.")
    exit()

ret, frame = cap.read()

# Check if the first frame is read successfully
if not ret:
    print("Error: Could not read the first frame from the video.")
    exit()

cap_out = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'MP4V'), cap.get(cv2.CAP_PROP_FPS),
                          (int(cap.get(3)), int(cap.get(4))))  # Use cap.get(3) and cap.get(4) for width and height

model = YOLO("bestall5.pt")

detection_threshold = 0.5
while ret:
    results_list = model(frame)

    headlight_detected = False
    person_detected = False

    # Iterate through the list of results
    for results in results_list:
        # Check if the current result has the necessary attributes
        if hasattr(results, 'xyxy'):
            for result in results.xyxy:
                x1, y1, x2, y2, score, class_id = result.tolist()
                x1, x2, y1, y2 = int(x1), int(x2), int(y1), int(y2)

                # Assuming class_id is the index of the class in the model's class list
                class_name = model.names[class_id]

                if class_name == "headlight" and score > detection_threshold:
                    headlight_detected = True
                elif class_name == "person" and score > detection_threshold:
                    person_detected = True

    # Output based on the specified conditions
    if headlight_detected and person_detected:
        output = 0
    elif headlight_detected:
        output = 0
    elif person_detected:
        output = 1
    else:
        output = -1  # No person or headlight detected

    print("Output:", output)

    cap_out.write(frame)

    cv2.imshow('Object Detection', frame)
    
    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    ret, frame = cap.read()

cap.release()
cap_out.release()
cv2.destroyAllWindows()

I tried this but getting only -1 as output but my video has both headlight and person


Solution

  • The condition if hasattr(results, 'xyxy') here is always negative. The available attributes of the results are:

    orig_img, orig_shape, boxes, masks, probs, keypoints, obb, speed, names, path
    

    To get the xyxy box coordinates, score, and class_id please refer to the results.boxes. The available properties of the boxes are:

    xyxy, conf, cls, id, xywh, xyxyn, xywhn
    

    All of them are returning in the form of a torch.Tensor. To get the row values you can do the following:

    # Iterate through the list of results
    for results in results_list:
        # Check if the current result has the necessary attributes
        if hasattr(results, 'boxes'):
            for box in results.boxes:
                x1, y1, x2, y2 = box.xyxy.tolist()[0]
                x1, x2, y1, y2 = int(x1), int(x2), int(y1), int(y2)
                score = box.conf.item()
                class_id = int(box.cls.item())