pythonpoint-cloudskitti

Kitti dataset get rgb data for each point in points clouds from corresponding image


I working with the Kitti dataset, I am trying to fuse points cloud and image on early stage for 3d object detection model, so each point in the points cloud has a foloving feature vector [x, y, z, r] where x,y,z coordinates and r is intensity. I need to expand this vector by adding R, G, B data from the corresponding image, so I will get vector [x, y, z, r, R, G, B] for each point. Can someone share the function (Python or another language) on how to do that properly?

So I get that I need to use calibration file to project on image plain and then gets RGB data, but have no clue how to do that, and also the problem how to filter point cloud to be in range of image view. I have found this code

# https://github.com/azureology/kitti-velo2cam/blob/master/proj_velo2cam.py
name = ''
img = ''
binary =''
with open(f'./testing/calib/{name}.txt','r') as f:
    calib = f.readlines()
# P2 (3 x 4) for left eye
P2 = np.array([float(x) for x in calib[2].strip('\n').split(' ')[1:]]).reshape(3, 4)
R0_rect = np.array([float(x) for x in calib[4].strip('\n').split(' ')[1:]]).reshape(3, 3)
# Add a 1 in bottom-right, reshape to 4 x 4
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0], axis=0)
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0, 1], axis=1)
Tr_velo_to_cam = np.array([float(x) for x in calib[5].strip('\n').split(' ')[1:]]).reshape(3, 4)
Tr_velo_to_cam = np.insert(Tr_velo_to_cam, 3, values=[0, 0, 0, 1], axis=0)

# read raw data from binary
scan = np.fromfile(binary, dtype=np.float32).reshape((-1, 4))
points = scan[:, 0:3]  # lidar xyz (front, left, up)
# TODO: use fov filter?
velo = np.insert(points, 3, 1, axis=1).T
velo = np.delete(velo, np.where(velo[0, :] < 0), axis=1)
cam = P2.dot(R0_rect.dot(Tr_velo_to_cam.dot(velo)))
cam = np.delete(cam, np.where(cam[2, :] < 0), axis=1)
# get u,v,z
cam[:2] /= cam[2, :]
# do projection staff
plt.figure(figsize=(12, 5), dpi=96, tight_layout=True)
png = mpimg.imread(img)
IMG_H, IMG_W, _ = png.shape
# restrict canvas in range
plt.axis([0, IMG_W, IMG_H, 0])
plt.imshow(png)
# filter point out of canvas
u, v, z = cam
u_out = np.logical_or(u < 0, u > IMG_W)
v_out = np.logical_or(v < 0, v > IMG_H)
outlier = np.logical_or(u_out, v_out)
cam = np.delete(cam, np.where(outlier), axis=1)
# generate color map from depth
u, v, z = cam
plt.scatter([u], [v], c=[z], cmap='rainbow_r', alpha=0.5, s=2)
plt.title(name)
plt.savefig(f'{name}.png', bbox_inches='tight')
plt.show()

Solution

  • R0_rect = np.array([float(x) for x in calib[4].strip('\n').split(' ')[1:]]).reshape(3, 3)
    # Add a 1 in bottom-right, reshape to 4 x 4
    R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0], axis=0)
    R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0, 1], axis=1)
    Tr_velo_to_cam = np.array([float(x) for x in calib[5].strip('\n').split(' ')[1:]]).reshape(3, 4)
    Tr_velo_to_cam = np.insert(Tr_velo_to_cam, 3, values=[0, 0, 0, 1], axis=0)
    
    # read raw data from binary
    scan = np.fromfile(binary, dtype=np.float32).reshape((-1, 4))
    points = scan[:, 0:3]  # lidar xyz (front, left, up)
    # TODO: use fov filter?
    velo = np.insert(points, 3, 1, axis=1).T
    velo = np.delete(velo, np.where(velo[0, :] < 0), axis=1)
    cam = P2.dot(R0_rect.dot(Tr_velo_to_cam.dot(velo)))
    cam = np.delete(cam, np.where(cam[2, :] < 0), axis=1)
    # get u,v,z
    cam[:2] /= cam[2, :]
    # do projection staff
    plt.figure(figsize=(12, 5), dpi=96, tight_layout=True)
    png = mpimg.imread(img)
    IMG_H, IMG_W, _ = png.shape
    # restrict canvas in range
    plt.axis([0, IMG_W, IMG_H, 0])
    # plt.imshow(png)
    # filter point out of canvas
    u, v, z = cam
    u_out = np.logical_or(u < 0, u > IMG_W)
    v_out = np.logical_or(v < 0, v > IMG_H)
    outlier = np.logical_or(u_out, v_out)
    cam = np.delete(cam, np.where(outlier), axis=1)
    # generate color map from depth
    u, v, z = cam
    
    # Adding rgb data
    rgb_values = []
    for u_coord, v_coord in zip(u, v):
        u_int, v_int = int(round(u_coord)), int(round(v_coord))
        if 0 <= u_int < IMG_W and 0 <= v_int < IMG_H:
    
            rgb_value = png[v_int, u_int]
            rgb_values.append(rgb_value)
        else:
            rgb_values.append((0, 0, 0))
    
    plt.scatter([u], [v],  c=rgb_values, alpha=0.5, s=2)
    plt.title(name)
    plt.savefig(f'{name}.png', bbox_inches='tight')
    plt.show()
    
    
    
    import open3d as o3d
    
    # Create Open3D point cloud
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(scan[:, :3])
    point_cloud.colors = o3d.utility.Vector3dVector(rgb_values)
    
    # Visualize the point cloud
    o3d.visualization.draw_geometries([point_cloud], window_name='Point Cloud with RGB Data')