How to save Intel Realsense images in list (pyrealsense2)

I'm trying to save both, the depth and color images of the Intel Realsense D435i camera in a list of 300 images. Then I will use multiprocessing to save this chunk of 300 images onto my disk. But every time I try, the program successfully appends 15 images in the list and then I get this error:

    Frame didn't arrived within 5000

I made sure I had the 64 bit version on python 3.6 installed and the camera streams perfectly well when I do not try to save the images in a list. The real-sense viewer works great too. I also tried with different resolutions and frame rates but it doesn't seem to work either. What is interesting is if I only save the color images, I will not get the same error, instead I will get the same color image over and over in the list.

if __name__ == '__main__':
pipeline = rs.pipeline()
config = rs.config()

config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)
profile = pipeline.start(config)

depth_sensor = profile.get_device().first_depth_sensor()
depth_sensor.set_option(
    rs.option.visual_preset, 3
)  # Set high accuracy for depth sensor
depth_scale = depth_sensor.get_depth_scale()

align_to = rs.stream.color
align = rs.align(align_to)

#   Init variables
im_count = 0
image_chunk = []
image_chunk2 = []
# sentinel = True
try:
    while True:

        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        aligned_frames = align.process(frames)
        aligned_depth_frame = aligned_frames.get_depth_frame()
        color_frame = aligned_frames.get_color_frame()


        if not aligned_depth_frame or not color_frame:
            print("problem here")
            raise RuntimeError("Could not acquire depth or color frames.")

        depth_image = np.asanyarray(aligned_depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())

        image_chunk.append(color_image)
        image_chunk2.append(depth_image)




except Exception as e:
    print(e)

finally:
    # Stop streaming
    pipeline.stop()

I simply need it to save 300 images in a row, that's all, so I am quite troubled as to what is causing this issue.

Solution

Holding onto the frame locks the memory, and eventually it hits a limit, which prevents acquiring more images. Even though you are creating an image, the data is still from the frame. You need to clone the image after you create it to release the link to the frame's memory.

depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())

depth_image = depth_image.copy()
color_image = color_image.copy()

image_chunk.append(color_image)
image_chunk2.append(depth_image)

Read more on frames and memory management here: https://dev.intelrealsense.com/docs/frame-management

I created a wrapper class to extract the various elements out of the frame set that can't be recreated later. It's a bit heavy, but shows some common operations that might be helpful for others:

# import the necessary packages
import logging

import cv2 as cv2
import numpy as np
import pyrealsense2 as rs

# create local logger to allow adjusting verbosity at the module level
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

colorizer = None
align_to_depth = None
align_to_color = None
pointcloud = None


class IntelD435ImagePacket:
    """
    Class that contains image and associated processing data.
    """

    @property
    def frame_id(self):
        return self._frame_id

    @property
    def timestamp(self):
        return self._timestamp

    @property
    def image_color(self):
        return self._image_color

    @property
    def image_depth(self):
        return self._image_depth

    @property
    def image_color_aligned(self):
        if self._image_color_aligned is None:
            self._image_color_aligned = self._align_color_to_depth(self.image_color, self.depth_intrinsics,
                                                                   self.color_intrinsics,
                                                                   self.color_to_depth_extrinsics)
        return self._image_color_aligned

    @property
    def image_depth_aligned(self):
        if self._image_depth_aligned is None:
            self._image_depth_aligned = self._align_depth_to_color(self.image_depth, self.depth_intrinsics,
                                                                   self.color_intrinsics,
                                                                   self.depth_to_color_extrinsics)
        return self._image_depth_aligned

    @property
    def image_depth_colorized(self, colormap=None, min_value=None, max_value=None, visual_preset=None):
        if self._image_depth_colorized is None:
            self._image_depth_colorized = self._colorize(self.image_depth, colormap, min_value, max_value, visual_preset)
        return self._image_depth_colorized

    @property
    def image_depth_8U(self, min_value=None, max_value=None):
        if min_value is None:
            min_value = np.amin(self.image_depth)
        if max_value is None:
            max_value = np.amax(self.image_depth)
        if (self._image_depth_8U_min != min_value) or (self._image_depth_8U_max != max_value):
            self._image_depth_8U = None
        if self._image_depth_8U is None:
            self._image_depth_8U_min = min_value
            self._image_depth_8U_max = max_value
            self._image_depth_8U = self._convert_to_GRAY_8U(self.image_depth, min_value, max_value)
        return self._image_depth_8U

    @property
    def depth_intrinsics(self):
        return self._depth_intrinsics

    @property
    def color_intrinsics(self):
        return self._color_intrinsics

    @property
    def depth_to_color_extrinsics(self):
        return self._depth_to_color_extrinsics

    @property
    def color_to_depth_extrinsics(self):
        return self._color_to_depth_extrinsics

    @property
    def pointcloud(self):
        if self._pointcloud is None:
            self._pointcloud = self._create_pointcloud(self.image_depth, self.depth_intrinsics).reshape(-1, 3)
        return self._pointcloud

    @property
    def pointcloud_texture(self):
        if self._pointcloud is None:
            self._pointcloud_texture = self._create_pointcloud_texture(self.image_color, self.color_intrinsics,
                                                                       self.color_to_depth_extrinsics).reshape(-1, 3)
        return self._pointcloud_texture

    @staticmethod
    def _align_color_to_depth(image_color, depth_intrinsics, color_intrinsics, color_to_depth_extrinsics):
        convert_3x3_to_4x4 = np.eye(3, 4)
        convert_4x4_to_3x3 = np.eye(4, 3)
        H = depth_intrinsics @ convert_3x3_to_4x4 @ color_to_depth_extrinsics @ convert_4x4_to_3x3 @ np.linalg.inv(
            color_intrinsics)
        H = H / H[2][2]
        return cv2.warpPerspective(image_color, H, (image_color.shape[1], image_color.shape[0]))

    @staticmethod
    def _align_depth_to_color(image_depth, depth_intrinsics, color_intrinsics, depth_to_color_extrinsics):
        convert_3x3_to_4x4 = np.eye(3, 4)
        convert_4x4_to_3x3 = np.eye(4, 3)
        H = color_intrinsics @ convert_3x3_to_4x4 @ depth_to_color_extrinsics @ convert_4x4_to_3x3 @ np.linalg.inv(
            depth_intrinsics)
        H = H / H[2][2]
        return cv2.warpPerspective(image_depth, H, (image_depth.shape[1], image_depth.shape[0]))

    def _convert_to_GRAY_8U(self, image, min_value=None, max_value=None, dynamic_range=None):
        if dynamic_range is None:
            dynamic_range = False

        # Perform grayscale conversion if needed
        image_gray = None
        if len(image.shape) == 3:
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        elif len(image.shape) == 2:
            image_gray = image
        else:
            raise ValueError('image was an unknown format.  Expected len(image.shape) = 2 or 3')

        # check if further processing is needed
        if dynamic_range is None and image_gray.dtype == np.uint8:
            return image_gray

        # Determine range of normalization
        if min_value is None or max_value is None:
            if dynamic_range:
                # assume full range based on min/max picel values
                if min_value is None:
                    min_value = np.amin(self.image_depth)
                if max_value is None:
                    max_value = np.amax(self.image_depth)
            else:
                # assume full range based on data type
                if np.issubdtype(image_gray.dtype, np.integer) or np.issubdtype(image_gray.dtype, np.signedinteger):
                    # int range is full range of value type
                    dtype_info = np.iinfo(image_gray.dtype)
                    if min_value is None:
                        min_value = dtype_info.min
                    if max_value is None:
                        max_value = dtype_info.max
                else:
                    # float range is 0.0-1.0
                    if min_value is None:
                        min_value = 0.0
                    if max_value is None:
                        max_value = 1.0
        # return cv2.normalize(image_gray, None, min_value, max_value, cv2.NORM_MINMAX, cv2.CV_8U)
        range_scale_factor = 255.0 / (max_value - min_value)
        return np.round((image_gray - min_value) * range_scale_factor).astype(np.uint8)

    def _colorize(self, image, colormap=None, min_value=None, max_value=None, dynamic_range=None):
        if colormap is None:
            colormap = cv2.COLORMAP_JET
        if dynamic_range is None:
            dynamic_range = True
        scaled = self._convert_to_GRAY_8U(image, min_value, max_value, dynamic_range)
        return cv2.applyColorMap(scaled, colormap)

    @staticmethod
    def _rs_colorize(depth_frame, colormap=None, min_value=None, max_value=None, visual_preset=None):
        global colorizer

        if colormap is None:
            colormap = 0
        if visual_preset is None:
            if min_value and max_value:
                visual_preset = 1
            elif min_value and max_value is None:
                visual_preset = 2
            elif min_value is None and max_value:
                visual_preset = 3
            else:
                visual_preset = 0

        # Apply options to colorizer
        if min_value:
            colorizer.set_option(rs.option.min_distance, min_value)
        if max_value:
            colorizer.set_option(rs.option.max_distance, max_value)
        # colorizer.set_option(rs.option.histogram_equalization_enabled, 0)
        colorizer.set_option(rs.option.visual_preset, visual_preset)  # 0=Dynamic, 1=Fixed, 2=Near, 3=Far
        colorizer.set_option(rs.option.color_scheme,
                             colormap)  # 0=Jet, 1=Classic, 2=WhiteToBlack, 3=BlackToWhite, 4=Bio, 5=Cold, 6=Warm, 7=Quantized, 8=Pattern

        return colorizer.colorize(depth_frame)

    @staticmethod
    def _convert_rs_intrinsics_to_opencv_matrix(rs_intrinsics):
        fx = rs_intrinsics.fx
        fy = rs_intrinsics.fy
        cx = rs_intrinsics.ppx
        cy = rs_intrinsics.ppy
        s = 0  # skew
        return np.array([[fx, s, cx],
                         [0.0, fy, cy],
                         [0.0, 0.0, 1.0]]).copy()

    @staticmethod
    def _convert_rs_extrinsics_to_opencv_matrix(rs_extrinsics):
        # https://www.brainvoyager.com/bv/doc/UsersGuide/CoordsAndTransforms/SpatialTransformationMatrices.html
        extrinsics_rotation = np.asanyarray(rs_extrinsics.rotation).reshape(3, 3).copy()
        extrinsics_translation = np.asanyarray(rs_extrinsics.translation).reshape(1, 3).copy()
        extrinsics_projection = np.concatenate((extrinsics_rotation, extrinsics_translation.T), axis=1)
        extrinsics_projection = np.concatenate((extrinsics_projection, np.array([[0, 0, 0, 1]])), axis=0)
        return extrinsics_projection  # , extrinsics_rotation, extrinsics_translation

    @staticmethod
    def _create_rs_pointcloud(pointcloud, depth_frame):
        points = pointcloud.calculate(depth_frame)
        vtx = np.asanyarray(points.get_vertices())
        return vtx.view(np.float32).reshape(vtx.shape + (-1,)).copy(), points

    @staticmethod
    def _create_rs_pointcloud_texture(pointcloud, color_frame, points):
        pointcloud.map_to(color_frame)
        tex = np.asanyarray(points.get_texture_coordinates())
        return tex.view(np.float32).reshape(tex.shape + (-1,)).copy()

    @staticmethod
    def _create_pointcloud(image_depth, depth_intrinsics):
        arr_depth = image_depth * 0.001
        cx = depth_intrinsics[0, 2]
        cy = depth_intrinsics[1, 2]
        fx = depth_intrinsics[0, 0]
        fy = depth_intrinsics[1, 1]
        arr = np.indices(arr_depth.shape).transpose(1, 2, 0).astype(arr_depth.dtype)
        arr = arr[..., ::-1]
        arr[:, :, 0] = (arr[:, :, 0] - cx) / fx * arr_depth
        arr[:, :, 1] = (arr[:, :, 1] - cy) / fy * arr_depth
        return np.dstack((arr, arr_depth))

    def _create_pointcloud_texture(self, image_color, color_intrinsics, color_to_depth_extrinsics):
        raise NotImplementedError()

    def __init__(self, frame_set, frame_id=None, timestamp=None, use_frame_blocking_methods=True, *args, **kwargs):
        # Frame must be cloned and released to allow the next acquisition to occur
        # Frame is not serializable, so the main goal is to capture the color/depth images and their intrinsics/extrinsics for downstream processing
        # It's unfortunate that the D435 doesn't provide a non-blocking way yto do this, as their built-in functions are efficient but limit performance to 15FPS
        # https://dev.intelrealsense.com/docs/projection-texture-mapping-and-occlusion-with-intel-realsense-depth-cameras
        self._use_frame_blocking_methods = use_frame_blocking_methods

        # Extract color image
        color_frame = frame_set.get_color_frame()
        self._image_color = np.asanyarray(color_frame.get_data()).copy()

        # Extract depth image
        depth_frame = frame_set.get_depth_frame()
        self._image_depth = np.asanyarray(depth_frame.get_data()).copy()

        self._pointcloud = None
        self._pointcloud_texture = None
        self._image_color_aligned = None
        self._image_depth_aligned = None
        self._image_depth_colorized = None
        self._image_depth_aligned_colorized = None
        self._image_depth_8U = None
        self._image_depth_8U_min = None
        self._image_depth_8U_max = None
        if frame_id:
            self._frame_id = frame_id
        else:
            self._frame_id = frame_set.frame_number
        if timestamp:
            self._timestamp = timestamp
        else:
            self._timestamp = frame_set.timestamp
        self.__dict__.update(kwargs)

        # Intrinsics map from camera space to world coordinate space
        # Extrinsics map within world space
        # ie, to get pixels from dept to color, intrinsic from depth to depth-in-world, extrinsics from depth-in-world to color-in-world, intrinsic from color-in-world to color

        # Get depth intrinsics
        depth_profile = frame_set.get_depth_frame().get_profile()
        depth_video_stream_profile = depth_profile.as_video_stream_profile()
        rs_depth_intrinsics = depth_video_stream_profile.get_intrinsics()
        self._depth_intrinsics = self._convert_rs_intrinsics_to_opencv_matrix(rs_depth_intrinsics)

        # Get color intrinsics
        color_profile = frame_set.get_color_frame().get_profile()
        color_video_stream_profile = color_profile.as_video_stream_profile()
        rs_color_intrinsics = color_video_stream_profile.get_intrinsics()
        self._color_intrinsics = self._convert_rs_intrinsics_to_opencv_matrix(rs_color_intrinsics)

        # Get depth_to_color extrinsics
        rs_depth_to_color_extrinsics = depth_video_stream_profile.get_extrinsics_to(color_video_stream_profile)
        self._depth_to_color_extrinsics = self._convert_rs_extrinsics_to_opencv_matrix(rs_depth_to_color_extrinsics)

        # Get color_to_depth extrinsics
        rs_color_to_depth_extrinsics = color_video_stream_profile.get_extrinsics_to(depth_video_stream_profile)
        self._color_to_depth_extrinsics = self._convert_rs_extrinsics_to_opencv_matrix(rs_color_to_depth_extrinsics)

        # https://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/EPSRC_SSAZ/node3.html
        # I=intrinsic matrix, 3x3
        # E=extrinsic matrix, 3x3
        # project 3D points to images
        # cv2.Rodrigues(rotation1, rvec1); // 3x3 rot matrix to 3x1
        # image_pixel_points = cv2.projectPoints(1xN 3dpoints vector, np.zeros(3), np.zeros(3), cameraMatrix, None)
        # image_pixels_array = image_pixels_array.reshape(-1, 2).astype(np.uint8)

        # Project 3D points to images
        # image1_pixel_points = cv2.perspectiveTransform(depth image, 4x4 perspective transform matrix)

        if use_frame_blocking_methods:
            # Get pointcloud
            global pointcloud
            if pointcloud is None:
                pointcloud = rs.pointcloud()
            self._pointcloud, points = self._create_rs_pointcloud(pointcloud, depth_frame)

            # Get pointcloud texture mapping
            self._pointcloud_texture = self._create_rs_pointcloud_texture(pointcloud, color_frame, points)

            # Align the color frame to depth frame and extract color image
            global align_to_depth
            if align_to_depth is None:
                align_to_depth = rs.align(rs.stream.depth)
            color_frame_aligned = align_to_depth.process(frame_set).get_color_frame()
            self._image_color_aligned = np.asanyarray(color_frame_aligned.get_data()).copy()

            # Align the depth frame to color frame and extract depth image
            global align_to_color
            if align_to_color is None:
                align_to_color = rs.align(rs.stream.color)
            depth_frame_aligned = align_to_color.process(frame_set).get_depth_frame()
            self._image_depth_aligned = np.asanyarray(depth_frame_aligned.get_data()).copy()

            # Colorize depth images
            global colorizer
            if colorizer is None:
                colorizer = rs.colorizer()
            depth_frame_colorized = self._rs_colorize(depth_frame)
            self._image_depth_colorized = np.asanyarray(depth_frame_colorized.get_data()).copy()
            depth_frame_aligned_colorized = self._rs_colorize(depth_frame_aligned)
            self._image_depth_aligned_colorized = np.asanyarray(depth_frame_aligned_colorized.get_data()).copy()

You use it by passing the frame_set from the datastream like so:

# First import the library
import pyrealsense2 as rs

try:
    # Create a context object. This object owns the handles to all connected realsense devices
    pipeline = rs.pipeline()

    # Configure streams
    config = rs.config()
    config.enable_stream(rs.stream.depth, 1280, 720, rs.format.z16, 30)
    config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)

    # Start streaming
    pipeline.start(config)

    while True:
        # This call waits until a new coherent set of frames is available on a device
        # Calls to get_frame_data(...) and get_frame_timestamp(...) on a device will return stable values until wait_for_frames(...) is called
        frames = pipeline.wait_for_frames()
        image_packet = IntelD435ImagePacket(frames)
    exit(0)
except Exception as e:
    print(e)
    pass