Speed-up RANSAC Plane Detetction in 3D Pointcloud

Right now I am working to do plane segmentation of 3D point cloud data using RANSAC. Basic flowchart of my code is:

Select 3 random points then create a candidate plane
Check all other points within certain distance threshold to the plane. If there are many point below the threshold then I save the plane.
If the covered point is below the threshold, then select another RANSAC point (back to no 1)

What make me frustrating is, how to speed up the process? Imagine if I have 1 mio points then I have to check all the point to my candidate plane, if do not meet the threshold then I select another random point and check with 1 mio point over and over. For any suggestion or even correction to my code, I would be very thankful

Here is my code, and the dataset can be download here. For this sample, I use small portion of my real data.

import numpy as np
import pandas as pd
import math
from scipy.spatial import cKDTree
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random

data = pd.read_csv('data_sample.txt', usecols=[0,1,2], header=None, delimiter=' ')
df_points = pd.DataFrame(data)

%matplotlib qt


#Change parameter
distance_threshold = 0.06
minimum_point_threshold = 400
sampling_resolution = 0.06

#search nearest neighbor
kdtree = cKDTree(df_points)

temp_pair_point = []
list_pair_point = []
list_candidates = []

#Select random points
for h in df_points.index:
    pair_point = kdtree.query_ball_point(np.array(df_points.iloc[h]),r=sampling_resolution, return_sorted=False)
    temp_pair_point.append(pair_point)


for k in range(0,len(temp_pair_point)):
    pairsize = len(temp_pair_point[k])
    if pairsize > 10:
        list_pair_point.append(temp_pair_point[k])

for m in range(0,len(list_pair_point)):
    candidates = random.choices(list_pair_point[m], k=3)
    list_candidates.append(candidates)




#select random point from nearest neighbor
list_sample_point = []

for i in range(0,len(list_candidates)):
    list_test_distance = []


    p1 = np.array(data.iloc[list_candidates[i][0]])
    p2 = np.array(data.iloc[list_candidates[i][1]])
    p3 = np.array(data.iloc[list_candidates[i][2]])
    sample = [p1,p2,p3]

    #create plane
    x1,y1,z1 =np.ravel(p1)
    x2,y2,z2 =np.ravel(p2)
    x3,y3,z3 =np.ravel(p3)

    m_u = [x2-x1,y2-y1,z2-z1]
    m_v = [x3-x2,y3-y2,z3-z2]

    m_normal = np.cross(m_u,m_v)
    m_pos = p1
    m_dist = np.dot(-m_normal, m_pos)

    for j in data.index:
        test_point = np.array(data.iloc[j])
        test_distance = np.dot(-m_normal,test_point)
        #print('Distance ={}'.format(test_distance))
        if abs(test_distance) < distance_threshold:
            list_test_distance.append(test_distance)

    if len(list_test_distance) > minimum_point_threshold:
        list_sample_point.append(sample)



#Check candidate plane
list_u = []
list_v = []
list_normal = []
list_distance = []
list_min_max = []

for i in range(0,len(list_sample_point)):
    psample = list_sample_point[i]
    psample1 = np.array(psample[0])
    psample2 = np.array(psample[1])
    psample3 = np.array(psample[2])
    min_max = [float(min(np.transpose(psample)[0])), float(max(np.transpose(psample)[0])),float(min(np.transpose(psample)[1])), float(max(np.transpose(psample)[1])),float(min(np.transpose(psample)[2])), float(max(np.transpose(psample)[2]))]
    list_min_max.append(min_max)

    u = [psample2[0]-psample1[0], psample2[1]-psample1[1], psample2[2]-psample1[2]]
    v = [psample3[0]-psample2[0], psample3[1]-psample2[1], psample3[2]-psample2[2]]
    normal = np.cross(u,v)
    d = np.dot(-normal,psample1)

    list_u.append(u)
    list_v.append(v)
    list_normal.append(normal)
    list_distance.append(d)




#Plot candidate plane  
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#ax.scatter3D(xs=plot_x, ys=plot_y, zs=plot_z, s=30, c='red')
ax.set_xlim3d(-25,-20)
ax.set_ylim3d(5,10)
ax.set_xlim3d(5,10)
ax.scatter3D(xs=df_points[0], ys=df_points[1], zs=df_points[2], s=1, c='green')

for j in range(0,len(list_normal)):
    if list_normal[j][2] != 0:
        print('Normal plane to Z axis')
        xx, yy = np.meshgrid(np.arange(list_min_max[j][0],list_min_max[j][1]+(list_min_max[j][1]-list_min_max[j][0]),(list_min_max[j][1]-list_min_max[j][0])), np.arange(list_min_max[j][2],list_min_max[j][3]+(list_min_max[j][3]-list_min_max[j][2]),(list_min_max[j][3]-list_min_max[j][2])), sparse=True)
        z = (-list_normal[j][0] * xx - list_normal[j][1] * yy -list_distance[j]) / list_normal[j][2]    
        ax.plot_surface(xx, yy, z, color=np.random.rand(3,), alpha=0.5)

    elif list_normal[j][0] != 0:
        print('Normal plane to X axis')
        yy, zz, = np.meshgrid(np.arange(list_min_max[j][2],list_min_max[j][3]+(list_min_max[j][3]-list_min_max[j][2]),(list_min_max[j][3]-list_min_max[j][2])), np.arange(list_min_max[j][4],list_min_max[j][5]+(list_min_max[j][5]-list_min_max[j][4]),(list_min_max[j][5]-list_min_max[j][4])), sparse=True)
        x = (-list_normal[j][1] * yy - list_normal[j][2] * zz -list_distance[j]) / list_normal[j][0]
        ax.plot_surface(x, yy, zz,  color=np.random.rand(3,), alpha=0.5)

    elif list_normal[j][1] !=0:
        print('Normal plane to Y axis')
        xx, zz, = np.meshgrid(np.arange(list_min_max[j][0],list_min_max[j][1]+(list_min_max[j][1]-list_min_max[j][0]),(list_min_max[j][1]-list_min_max[j][0])), np.arange(list_min_max[j][4],list_min_max[j][5]+(list_min_max[j][5]-list_min_max[j][4]),(list_min_max[j][5]-list_min_max[j][4])), sparse=True)
        y = (-list_normal[j][0] * xx - list_normal[j][2] * zz -list_distance[j]) / list_normal[j][1]
        ax.plot_surface(xx, y, zz, color=np.random.rand(3,), alpha=0.5)

Solution

First of all a RANSAC is not made to be run on every point there is. Using your code it calculates 1729 planes out of 1945 points that you have. Usually you calculate a number of iterations you want to run the RANSAC for before starting it. This depends on the number of inliers and outliers of the expected plane:

k = log(1-p)/log(1-(1-e)^s)

p is your desired probability, that the RANSAC results in an outlier free plane, for example 99.99% probability. e is the percentage of outliers e.g. for 80% outlier e = 0.8. This results in 1147 iterations. However usually you should be able to assume a lower outlier probability, lets say 20% due to choosing neighbors (as you do) now it suddenly is only 13 iterations to detect a single plane with a probability of 99.99%, so only 13(!) instead of 1729(!). If you would want to find lets say 100 planes it would still reduce your number of iterations by about 25%.

Secondly you can fasten up some parts of your code, if you still want to run it on every point: First of all you are using two for loops for something that could be written in one and you are continuing to calculate the test_distance even if len(list_test_distance) > minimum_point_threshold might already be true. By changing the code to this:

#select random point from nearest neighbor
list_sample_point = []
list_u = []
list_v = []
list_normal = []
list_distance = []
list_min_max = []


for i in range(0,len(list_candidates)):
    list_test_distance = []


    p1 = np.array(data.iloc[list_candidates[i][0]])
    p2 = np.array(data.iloc[list_candidates[i][1]])
    p3 = np.array(data.iloc[list_candidates[i][2]])
    sample = [p1,p2,p3]

    #create plane
    x1,y1,z1 =np.ravel(p1)
    x2,y2,z2 =np.ravel(p2)
    x3,y3,z3 =np.ravel(p3)

    m_u = [x2-x1,y2-y1,z2-z1]
    m_v = [x3-x2,y3-y2,z3-z2]

    m_normal = np.cross(m_u,m_v)
    m_pos = p1
    m_dist = np.dot(-m_normal, m_pos)

    for j in data.index:
        test_point = np.array(data.iloc[j])
        test_distance = np.dot(-m_normal,test_point)
        #print('Distance ={}'.format(test_distance))
        if abs(test_distance) < distance_threshold:
            list_test_distance.append(test_distance)

        if len(list_test_distance) > minimum_point_threshold:
            #list_sample_point.append(sample) I think you don't need this one any more as well
            psample = sample
            psample1 = np.array(psample[0])
            psample2 = np.array(psample[1])
            psample3 = np.array(psample[2])
            min_max = [float(min(np.transpose(psample)[0])), float(max(np.transpose(psample)[0])),float(min(np.transpose(psample)[1])), float(max(np.transpose(psample)[1])),float(min(np.transpose(psample)[2])), float(max(np.transpose(psample)[2]))]
            list_min_max.append(min_max)

            u = [psample2[0]-psample1[0], psample2[1]-psample1[1], psample2[2]-psample1[2]]
            v = [psample3[0]-psample2[0], psample3[1]-psample2[1], psample3[2]-psample2[2]]
            normal = np.cross(u,v)
            d = np.dot(-normal,psample1)

            list_u.append(u)
            list_v.append(v)
            list_normal.append(normal)
            list_distance.append(d)
            break

I changed the runtime on my laptop from 7 min 11 s down to 2 min 35 sec making it more than 2.5 times faster. The results should still be the same (correct me, if I'm wrong)