I am conducting an experiment involving YOLOv8, Python3, and RTSP as the video source.
When designing the program, I expected RTSP to remain stable without frequent reconnections. However, in reality, the RTSP protocol keeps reconnecting and is unstable. The higher the model I use, the worse the RTSP reconnection issue becomes.
I have conducted an in-depth hardware analysis:
My suspicion is that the RTSP protocol in OpenCV running on Python3 is rather slow and contains bugs, making it unable to handle significant video delays. Indeed, the results obtained during object detection processing exhibit considerable delay.
Below is the program I am using:
import numpy as np
from ultralytics import YOLO
import cv2
import cvzone
import time, math, datetime, os
from sort import *
# os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp,max_delay=10000000"
output_folder = "Captured"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
error_detection = 0
int_counting = 0
# RTSP Source
rtsp_source = "rtsp://admin:gcc12345@192.168.6.7:554/Streaming/Channels/101/" # HIKVISION
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=3&subtype=0" # DAHUA
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=2&subtype=0" # DAHUA
# rtsp_source = "rtsp://admin:admin12345@192.168.6.2:554/cam/realmonitor?channel=4&subtype=0" # DAHUA
# rtsp_source = "../Videos/cars.mp4" # VIDEO
mask = cv2.imread("mask.png")
# Tracking
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.5)
# limits = [300, 350, 673, 350]
limits = [550, 220, 550, 670]
# Inisialisasi model YOLO
model = YOLO("../Yolo-Weights/yolov8x.pt")
className = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter',
'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase',
'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# Fungsi untuk mereset jumlah upaya koneksi ulang
def reset_attempts():
return 50
# Fungsi untuk memproses video dan mengatur reconnect
def process_video(attempts):
global error_detection, tracker, int_counting
totalCount = []
# Inisialisasi waktu untuk perhitungan FPS
prev_time = time.time()
while True:
success, img = camera.read()
start_time = time.time()
if not success:
error_detection += 1
print("[ERROR] Gagal mendapatkan frame, mencoba reconnect..." + datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
camera.release()
if attempts > 0:
time.sleep(min(5, (50 - attempts) / 10))
return True
else:
return False
# RESET Tracker
if time.time() - start_time > 300: # Reset setiap 5 menit (300 detik)
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)
totalCount.clear()
start_time = time.time() # Reset waktu mulai
print("[INFO] Tracker di-reset secara berkala.")
# Menghitung waktu sekarang
current_time = time.time()
# Hitung FPS
fps = 1 / (current_time - prev_time)
prev_time = current_time
print(img)
img_resized = cv2.resize(img, (1300, 700)) # Resize frame
# img_resized = img
# Tampilkan FPS di frame
cv2.putText(img_resized, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
img_region = cv2.bitwise_and(img_resized, mask)
# Deteksi dengan YOLO
results = model(img_region, stream=True)
# Tracker
detections = np.empty((0,5))
for r in results:
boxes = r.boxes
for box in boxes:
# Bounding Box
x1, y1, x2, y2 = box.xyxy[0]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
w, h = x2 - x1, y2 - y1
# Menampilkan Tulisan Persentase
conf = math.ceil((box.conf[0] * 100)) / 100
# Class Name
cls = int(box.cls[0])
currentClass = className[cls]
if currentClass == "car" or currentClass == "truck" or currentClass == "bus" or currentClass == "motorcycle" and conf > 0.5:
# cvzone.putTextRect(img_resized, f'{className[cls]} {conf}', (max(0, x1), max(35, y1)), scale=0.7, thickness=1, offset=3)
# cvzone.cornerRect(img_resized, (x1, y1, w, h), l=5, rt=5)
currentArray = np.array([x1,y1,x2,y2,conf])
detections = np.vstack((detections,currentArray))
# Tracker
resultsTracker = tracker.update(detections)
cv2.line(img_resized,(limits[0], limits[1]), (limits[2], limits[3]),(0, 0, 255), 5)
# cv2.line(img_region, (limits[0], limits[1]), (limits[2], limits[3]), (0, 0, 255), 5)
for result in resultsTracker:
x1, y1, x2, y2, Id = result
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
# print(result)
w, h = x2 - x1, y2 - y1
cvzone.cornerRect(img_resized, (x1, y1, w, h), l=9, rt=2, colorR=(255,0,255))
cvzone.putTextRect(img_resized, f'{int(Id)}', (max(0, x1), max(35, y1)), scale=2, thickness=3, offset=10)
cx, cy = x1+w//2, y1+h//2
cv2.circle(img_resized,(cx,cy),5,(255,0,255),cv2.FILLED)
if limits[0] - 20 < cx < limits[0] + 20 and limits[1] < cy < limits[3]:
if totalCount.count(Id) == 0:
totalCount.append(Id)
int_counting += 1
# Capture gambar ketika objek melewati batas
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"capture_{Id}_{timestamp}.png"
file_path = os.path.join(output_folder, file_name)
# Simpan gambar yang ditangkap ke dalam folder
cv2.imwrite(file_path, img_resized)
print(f"Gambar disimpan: {file_path}")
cvzone.putTextRect(img_resized,f'Count: {int_counting} Error Detection: {int(error_detection)}',(50, 650))
# Tampilkan hasil frame
cv2.imshow("Image", img_resized)
# cv2.imshow("ImageRegion", img_region)
# Tekan 'q' untuk keluar
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# cv2.waitKey(0)
return False
# Inisialisasi variabel recall untuk reconnect dan attempts untuk jumlah upaya
recall = True
attempts = reset_attempts()
while recall:
# Membuka stream RTSP
camera = cv2.VideoCapture(rtsp_source, cv2.CAP_FFMPEG)
camera.set(cv2.CAP_PROP_BUFFERSIZE, 1)
camera.set(cv2.CAP_PROP_FPS, 10)
if camera.isOpened():
print("[INFO] Camera connected at " +
datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
attempts = reset_attempts() # Reset attempts saat berhasil terkoneksi
recall = process_video(attempts) # Proses video
else:
print("[ERROR] Camera not opened " +
datetime.datetime.now().strftime("%m-%d-%Y %I:%M:%S%p"))
camera.release()
attempts -= 1
print(f"Remaining attempts: {attempts}")
# Beri jeda untuk mencoba kembali
time.sleep(5)
continue
# Tutup jendela dan stream
cv2.destroyAllWindows()
Environment:
absl-py==2.1.0
antlr4-python3-runtime==4.9.3
asttokens==2.4.1
certifi==2024.8.30
charset-normalizer==3.3.2
colorama==0.4.6
contourpy==1.3.0
cvzone==1.5.6
cycler==0.12.1
decorator==5.1.1
exceptiongroup==1.2.2
executing==2.1.0
filelock==3.16.1
filterpy==1.4.5
fonttools==4.54.1
fsspec==2024.9.0
grpcio==1.66.1
hydra-core==1.3.2
idna==3.10
imageio==2.35.1
ipython==8.27.0
jedi==0.19.1
Jinja2==3.1.4
kiwisolver==1.4.7
lap==0.4.0
Markdown==3.7
MarkupSafe==2.1.5
matplotlib==3.9.2
matplotlib-inline==0.1.7
mpmath==1.3.0
networkx==3.3
numpy==1.26.4
omegaconf==2.3.0
opencv-python==4.10.0.84
packaging==24.1
pandas==2.2.3
parso==0.8.4
pillow==10.4.0
prompt_toolkit==3.0.47
protobuf==5.28.2
psutil==6.0.0
pure_eval==0.2.3
py-cpuinfo==9.0.0
Pygments==2.18.0
pyparsing==3.1.4
python-dateutil==2.9.0.post0
pytz==2024.2
PyWavelets==1.7.0
PyYAML==6.0.2
requests==2.32.3
scikit-image==0.19.3
scipy==1.14.1
seaborn==0.13.2
sentry-sdk==2.14.0
six==1.16.0
stack-data==0.6.3
sympy==1.13.3
tensorboard==2.17.1
tensorboard-data-server==0.7.2
thop==0.1.1.post2209072238
tifffile==2024.9.20
torch==1.13.1+cu116
torchaudio==0.13.1+cu116
torchvision==0.14.1+cu116
tqdm==4.66.5
traitlets==5.14.3
typing_extensions==4.12.2
tzdata==2024.2
ultralytics==8.2.101
ultralytics-thop==2.0.8
urllib3==2.2.3
wcwidth==0.2.13
Werkzeug==3.0.4
Anyone can help me to solving RTSP issue?
The Problem is you perform object detection in the "frame read loop". Decouple "frame reading" and detection, use a Queue to push frames from the read thread, and pull it from the detection thread. See my comment
import av
import time
import logging
from threading import Event, Thread, Lock
from collections import deque
logging.getLogger('libav').setLevel(logging.ERROR)
class RtspFrameProducer:
def __init__(self, url: str, frame_buffer_size: int = 25, transport: str = "tcp"):
self.url = url
self.frame_buffer = deque(maxlen=frame_buffer_size)
self.transport = transport
self.stop_event = Event()
self.lock = Lock()
def _decode_frames(self):
try:
logging.info(f"Connecting to RTSP stream: {self.url}")
container = av.open(self.url, 'r', options={'rtsp_transport': self.transport})
for packet in container.demux():
for frame in packet.decode():
if self.stop_event.is_set():
logging.info(f"Stopping RTSP connection: {self.url}")
container.close()
return
with self.lock:
ts = time.time() # Unix timestamp
frame_image = frame.to_ndarray(format='bgr24')
self.frame_buffer.append((ts, frame_image))
except Exception as e:
logging.error(f"Error in RTSP connection: {e}")
def start(self):
self.stop_event.clear()
self.thread = Thread(target=self._decode_frames, daemon=True)
self.thread.start()
def stop(self):
self.stop_event.set()
self.thread.join()
class FrameConsumer:
def __init__(self, producer: RtspFrameProducer, fps: int = 1):
self.producer = producer
self.fps = fps
self.stop_event = Event()
def _consume_frames(self):
interval = 1 / self.fps
while not self.stop_event.is_set():
time.sleep(interval)
with self.producer.lock:
if self.producer.frame_buffer:
ts, frame = self.producer.frame_buffer[-1] # Get the latest frame
self._process_frame(ts, frame)
def _process_frame(self, timestamp, frame):
# Here you can perform your yolo stuff
# like detector.inference(frame)
logging.info(f"Processing frame at {timestamp} {frame.shape}")
time.sleep(0.1)
def start(self):
self.stop_event.clear()
self.thread = Thread(target=self._consume_frames, daemon=True)
self.thread.start()
def stop(self):
self.stop_event.set()
self.thread.join()
# Example Usage
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
# Parameters
rtsp_url = "rtsp://admin:gcc12345@192.168.6.7:554/Streaming/Channels/101/"
frame_buffer_size = 25
fps = 1
# RTSP producer
producer = RtspFrameProducer(url=rtsp_url, frame_buffer_size=frame_buffer_size)
producer.start()
# Frame consumer
consumer = FrameConsumer(producer=producer, fps=fps)
consumer.start()
try:
# Run for 30 seconds as an example
time.sleep(30)
finally:
producer.stop()
consumer.stop()
should look like this
INFO:root:Connecting to RTSP stream: rtsp://xxx:xxx@10.0.0.47/axis-media/media.amp?videocodec=h264
INFO:root:Processing frame at 1735813608.9273415 (1080, 1920, 3)
INFO:root:Processing frame at 1735813610.0382552 (1080, 1920, 3)
INFO:root:Processing frame at 1735813611.1204984 (1080, 1920, 3)
INFO:root:Processing frame at 1735813612.2402372 (1080, 1920, 3)
INFO:root:Processing frame at 1735813613.319367 (1080, 1920, 3)
INFO:root:Processing frame at 1735813614.4371312 (1080, 1920, 3)
INFO:root:Processing frame at 1735813615.5169122 (1080, 1920, 3)
INFO:root:Processing frame at 1735813616.637501 (1080, 1920, 3)