pythondeep-learningpytorchconv-neural-networkyolo

Real time object detection with Yolo model not working


I have trained a custom yolo model to detect square slots on a board, and is working with more than 95 % accuracy on images.

But as soon as I switch to video detection it seems to not detect even a single thing

I am using the following code to run real time object detection

cap = cv2.VideoCapture('../video/1st/output.mp4')
while cap.isOpened():
    ret, frame = cap.read()

    results = model(frame)
    final_img = np.squeeze(results.render())


    cv2.imshow("YOLO", final_img)
    if cv2.waitKey(10) & 0XFF == ord("q"):
        break
cap.release()
cv2.destroyAllWindows()

I load the model using this code

model = torch.hub.load("ultralytics/yolov5", "custom", path="yolov5/runs/train/exp36/weights/best.pt",force_reload=True)
model.conf = 0.6

I have even tried splitting the available video into jpegs and running the model on individual images, saving the output and then merging the output images into a new video file.

that works perfectly, so the model is detecting something.

but as soon as I switch to video it seems to go back to nothing.


Solution

  • Yes, You cannot see anything due to the none type of object return in the results.renders()/ You can change the code script like this

    cap = cv2.VideoCapture('../video/1st/output.mp4')
    while cap.isOpened():
        ret, frame = cap.read()
    
        results = model(frame)
        bboxes = results.xyxy[0].cpu().tolist()
        for bbox in bboxes:
          conf = f'{bbox[4]:.4f}' #Confidance of that prediction
          bbox = list(map(lambda x: int(x), bbox)) #To convert float to integer
          class_id = bbox[5] #Class_id 
          bbox =bbox[:4] 
          cv2.rectangle(frame,(bbox[0],bbox[1]),(bbox[2],bbox[3]),color=(255,255,255),thickness=3)
        cv2.imshow("YOLO", frame)
        if cv2.waitKey(10) & 0XFF == ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()
    

    and write the frames in the video the full code should look like this

    input_video_path = #Enter your video path
    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    output_video_path = 'output_video.mp4' # Output video path
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
    
    model = #Here load your model
    while True:
        ret, frame = cap.read()
        if not ret:
          break
        results = model(frame)
        bboxes = results.xyxy[0].cpu().tolist()
        for bbox in bboxes:
          conf = f'{bbox[4]:.4f}' #Confidance of that prediction
          bbox = list(map(lambda x: int(x), bbox)) #To convert float to integer
          class_id = bbox[5] #Class_id 
          bbox =bbox[:4] 
          cv2.rectangle(frame,(bbox[0],bbox[1]),(bbox[2],bbox[3]),color=(255,255,255),thickness=3)
        out.write(frame)
        #cv2_imshow(frame)
        if cv2.waitKey(10) & 0XFF == ord("q"):
            break
    
    cap.release()
    out.release()
    
    # Close all OpenCV windows
    cv2.destroyAllWindows()
    

    References:

    https://docs.ultralytics.com/
    https://docs.ultralytics.com/yolov5/
    https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/