pythonpython-3.xrtspyolov8

RTSP always reconnect and unstable when using Yolov8 with python3


I am conducting an experiment involving YOLOv8, Python3, and RTSP as the video source.

When designing the program, I expected RTSP to remain stable without frequent reconnections. However, in reality, the RTSP protocol keeps reconnecting and is unstable. The higher the model I use, the worse the RTSP reconnection issue becomes.

I have conducted an in-depth hardware analysis:

  1. My UTP connection is very stable with extremely low latency, as confirmed by various tools.
  2. My computer is highly capable of processing and using CUDA, so I believe the issue is not caused by the computer.
  3. I have already tried using both TCP and UDP methods in RTSP, but the results remain the same.

My suspicion is that the RTSP protocol in OpenCV running on Python3 is rather slow and contains bugs, making it unable to handle significant video delays. Indeed, the results obtained during object detection processing exhibit considerable delay.

Below is the program I am using:

import numpy as np
from ultralytics import YOLO
import cv2
import cvzone
import time, math, datetime, os
from sort import *

# os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp,max_delay=10000000"


output_folder = "Captured"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

error_detection = 0

int_counting = 0

# RTSP Source
rtsp_source = "rtsp://admin:gcc12345@192.168.6.7:554/Streaming/Channels/101/" # HIKVISION
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=3&subtype=0" # DAHUA
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=2&subtype=0" # DAHUA
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=4&subtype=0" # DAHUA

# rtsp_source = "../Videos/cars.mp4" # VIDEO

mask = cv2.imread("mask.png")

# Tracking
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.5)

# limits = [300, 350, 673, 350]

limits = [550, 220, 550, 670]

# Inisialisasi model YOLO
model = YOLO("../Yolo-Weights/yolov8x.pt")

className = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter',
 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase',
 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

# Fungsi untuk mereset jumlah upaya koneksi ulang
def reset_attempts():
    return 50

# Fungsi untuk memproses video dan mengatur reconnect
def process_video(attempts):
    global error_detection, tracker, int_counting
    totalCount = []
    # Inisialisasi waktu untuk perhitungan FPS
    prev_time = time.time()

    while True:
        success, img = camera.read()
        start_time = time.time()

        if not success:
            error_detection += 1
            print("[ERROR] Gagal mendapatkan frame, mencoba reconnect..." + datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
            camera.release()

            if attempts > 0:
                time.sleep(min(5, (50 - attempts) / 10))
                return True
            else:
                return False

        # RESET Tracker
        if time.time() - start_time > 300:  # Reset setiap 5 menit (300 detik)
            tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)
            totalCount.clear()
            start_time = time.time()  # Reset waktu mulai
            print("[INFO] Tracker di-reset secara berkala.")

        # Menghitung waktu sekarang
        current_time = time.time()

        # Hitung FPS
        fps = 1 / (current_time - prev_time)
        prev_time = current_time

        print(img)

        img_resized = cv2.resize(img, (1300, 700))  # Resize frame

        # img_resized = img

        # Tampilkan FPS di frame
        cv2.putText(img_resized, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        img_region = cv2.bitwise_and(img_resized, mask)

        # Deteksi dengan YOLO
        results = model(img_region, stream=True)

        # Tracker
        detections = np.empty((0,5))
        for r in results:
            boxes = r.boxes
            for box in boxes:
                # Bounding Box
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                w, h = x2 - x1, y2 - y1

                # Menampilkan Tulisan Persentase
                conf = math.ceil((box.conf[0] * 100)) / 100

                # Class Name
                cls = int(box.cls[0])

                currentClass = className[cls]

                if currentClass == "car" or currentClass == "truck" or currentClass == "bus" or currentClass == "motorcycle" and conf > 0.5:
                    # cvzone.putTextRect(img_resized, f'{className[cls]} {conf}', (max(0, x1), max(35, y1)), scale=0.7, thickness=1, offset=3)
                    # cvzone.cornerRect(img_resized, (x1, y1, w, h), l=5, rt=5)
                    currentArray = np.array([x1,y1,x2,y2,conf])
                    detections = np.vstack((detections,currentArray))

        # Tracker
        resultsTracker = tracker.update(detections)
        cv2.line(img_resized,(limits[0], limits[1]), (limits[2], limits[3]),(0, 0, 255), 5)
        # cv2.line(img_region, (limits[0], limits[1]), (limits[2], limits[3]), (0, 0, 255), 5)

        for result in resultsTracker:
            x1, y1, x2, y2, Id = result
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            # print(result)
            w, h = x2 - x1, y2 - y1
            cvzone.cornerRect(img_resized, (x1, y1, w, h), l=9, rt=2, colorR=(255,0,255))
            cvzone.putTextRect(img_resized, f'{int(Id)}', (max(0, x1), max(35, y1)), scale=2, thickness=3, offset=10)

            cx, cy = x1+w//2, y1+h//2
            cv2.circle(img_resized,(cx,cy),5,(255,0,255),cv2.FILLED)

            if limits[0] - 20 < cx < limits[0] + 20 and limits[1]  < cy < limits[3]:
                if totalCount.count(Id) == 0:
                    totalCount.append(Id)

                    int_counting += 1

                    # Capture gambar ketika objek melewati batas
                    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
                    file_name = f"capture_{Id}_{timestamp}.png"
                    file_path = os.path.join(output_folder, file_name)

                    # Simpan gambar yang ditangkap ke dalam folder
                    cv2.imwrite(file_path, img_resized)
                    print(f"Gambar disimpan: {file_path}")

        cvzone.putTextRect(img_resized,f'Count: {int_counting} Error Detection: {int(error_detection)}',(50, 650))

        # Tampilkan hasil frame
        cv2.imshow("Image", img_resized)
        # cv2.imshow("ImageRegion", img_region)

        # Tekan 'q' untuk keluar
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        # cv2.waitKey(0)

    return False

# Inisialisasi variabel recall untuk reconnect dan attempts untuk jumlah upaya
recall = True
attempts = reset_attempts()

while recall:
    # Membuka stream RTSP
    camera = cv2.VideoCapture(rtsp_source, cv2.CAP_FFMPEG)
    camera.set(cv2.CAP_PROP_BUFFERSIZE, 1)
    camera.set(cv2.CAP_PROP_FPS, 10)

    if camera.isOpened():
        print("[INFO] Camera connected at " +
              datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
        attempts = reset_attempts()  # Reset attempts saat berhasil terkoneksi
        recall = process_video(attempts)  # Proses video
    else:
        print("[ERROR] Camera not opened " +
              datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
        camera.release()
        attempts -= 1
        print(f"Remaining attempts: {attempts}")

        # Beri jeda untuk mencoba kembali
        time.sleep(5)
        continue

# Tutup jendela dan stream
cv2.destroyAllWindows()

Environment:

absl-py==2.1.0
antlr4-python3-runtime==4.9.3
asttokens==2.4.1
certifi==2024.8.30
charset-normalizer==3.3.2
colorama==0.4.6
contourpy==1.3.0
cvzone==1.5.6
cycler==0.12.1
decorator==5.1.1
exceptiongroup==1.2.2
executing==2.1.0
filelock==3.16.1
filterpy==1.4.5
fonttools==4.54.1
fsspec==2024.9.0
grpcio==1.66.1
hydra-core==1.3.2
idna==3.10
imageio==2.35.1
ipython==8.27.0
jedi==0.19.1
Jinja2==3.1.4
kiwisolver==1.4.7
lap==0.4.0
Markdown==3.7
MarkupSafe==2.1.5
matplotlib==3.9.2
matplotlib-inline==0.1.7
mpmath==1.3.0
networkx==3.3
numpy==1.26.4
omegaconf==2.3.0
opencv-python==4.10.0.84
packaging==24.1
pandas==2.2.3
parso==0.8.4
pillow==10.4.0
prompt_toolkit==3.0.47
protobuf==5.28.2
psutil==6.0.0
pure_eval==0.2.3
py-cpuinfo==9.0.0
Pygments==2.18.0
pyparsing==3.1.4
python-dateutil==2.9.0.post0
pytz==2024.2
PyWavelets==1.7.0
PyYAML==6.0.2
requests==2.32.3
scikit-image==0.19.3
scipy==1.14.1
seaborn==0.13.2
sentry-sdk==2.14.0
six==1.16.0
stack-data==0.6.3
sympy==1.13.3
tensorboard==2.17.1
tensorboard-data-server==0.7.2
thop==0.1.1.post2209072238
tifffile==2024.9.20
torch==1.13.1+cu116
torchaudio==0.13.1+cu116
torchvision==0.14.1+cu116
tqdm==4.66.5
traitlets==5.14.3
typing_extensions==4.12.2
tzdata==2024.2
ultralytics==8.2.101
ultralytics-thop==2.0.8
urllib3==2.2.3
wcwidth==0.2.13
Werkzeug==3.0.4

Anyone can help me to solving RTSP issue?


Solution

  • The Problem is you perform object detection in the "frame read loop". Decouple "frame reading" and detection, use a Queue to push frames from the read thread, and pull it from the detection thread. See my comment

    import av
    import time
    import logging
    from threading import Event, Thread, Lock
    from collections import deque
    
    logging.getLogger('libav').setLevel(logging.ERROR)
    
    
    class RtspFrameProducer:
        def __init__(self, url: str, frame_buffer_size: int = 25, transport: str = "tcp"):
            self.url = url
            self.frame_buffer = deque(maxlen=frame_buffer_size)  
            self.transport = transport
            self.stop_event = Event()
            self.lock = Lock()
    
        def _decode_frames(self):
            try:
                logging.info(f"Connecting to RTSP stream: {self.url}")
                container = av.open(self.url, 'r', options={'rtsp_transport': self.transport})
                for packet in container.demux():
                    for frame in packet.decode():
                        if self.stop_event.is_set():
                            logging.info(f"Stopping RTSP connection: {self.url}")
                            container.close()
                            return
    
                        with self.lock:
                            ts = time.time()  # Unix timestamp
                            frame_image = frame.to_ndarray(format='bgr24')
                            self.frame_buffer.append((ts, frame_image))
            except Exception as e:
                logging.error(f"Error in RTSP connection: {e}")
    
        def start(self):
            self.stop_event.clear()
            self.thread = Thread(target=self._decode_frames, daemon=True)
            self.thread.start()
    
        def stop(self):
            self.stop_event.set()
            self.thread.join()
    
    
    class FrameConsumer:
        def __init__(self, producer: RtspFrameProducer, fps: int = 1):
            self.producer = producer
            self.fps = fps
            self.stop_event = Event()
    
        def _consume_frames(self):
            interval = 1 / self.fps
            while not self.stop_event.is_set():
                time.sleep(interval) 
                with self.producer.lock:
                    if self.producer.frame_buffer:
                        ts, frame = self.producer.frame_buffer[-1]  # Get the latest frame
                        self._process_frame(ts, frame)
    
        def _process_frame(self, timestamp, frame):
            # Here you can perform your yolo stuff
            # like detector.inference(frame)
            logging.info(f"Processing frame at {timestamp} {frame.shape}")
            time.sleep(0.1)
    
        def start(self):
            self.stop_event.clear()
            self.thread = Thread(target=self._consume_frames, daemon=True)
            self.thread.start()
    
        def stop(self):
            self.stop_event.set()
            self.thread.join()
    
    
    # Example Usage
    if __name__ == "__main__":
        logging.basicConfig(level=logging.INFO)
    
        # Parameters
        rtsp_url = "rtsp://admin:gcc12345@192.168.6.7:554/Streaming/Channels/101/"
        frame_buffer_size = 25
        fps = 1
    
        # RTSP producer
        producer = RtspFrameProducer(url=rtsp_url, frame_buffer_size=frame_buffer_size)
        producer.start()
    
        # Frame consumer
        consumer = FrameConsumer(producer=producer, fps=fps)
        consumer.start()
    
        try:
            # Run for 30 seconds as an example
            time.sleep(30)
        finally:
            producer.stop()
            consumer.stop()
    

    should look like this

    INFO:root:Connecting to RTSP stream: rtsp://xxx:xxx@10.0.0.47/axis-media/media.amp?videocodec=h264
    INFO:root:Processing frame at 1735813608.9273415 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813610.0382552 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813611.1204984 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813612.2402372 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813613.319367 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813614.4371312 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813615.5169122 (1080, 1920, 3)
    INFO:root:Processing frame at 1735813616.637501 (1080, 1920, 3)