Right now I am working to do plane segmentation of 3D point cloud data using RANSAC. Basic flowchart of my code is:
What make me frustrating is, how to speed up the process? Imagine if I have 1 mio points then I have to check all the point to my candidate plane, if do not meet the threshold then I select another random point and check with 1 mio point over and over. For any suggestion or even correction to my code, I would be very thankful
Here is my code, and the dataset can be download here. For this sample, I use small portion of my real data.
import numpy as np
import pandas as pd
import math
from scipy.spatial import cKDTree
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
data = pd.read_csv('data_sample.txt', usecols=[0,1,2], header=None, delimiter=' ')
df_points = pd.DataFrame(data)
%matplotlib qt
#Change parameter
distance_threshold = 0.06
minimum_point_threshold = 400
sampling_resolution = 0.06
#search nearest neighbor
kdtree = cKDTree(df_points)
temp_pair_point = []
list_pair_point = []
list_candidates = []
#Select random points
for h in df_points.index:
pair_point = kdtree.query_ball_point(np.array(df_points.iloc[h]),r=sampling_resolution, return_sorted=False)
temp_pair_point.append(pair_point)
for k in range(0,len(temp_pair_point)):
pairsize = len(temp_pair_point[k])
if pairsize > 10:
list_pair_point.append(temp_pair_point[k])
for m in range(0,len(list_pair_point)):
candidates = random.choices(list_pair_point[m], k=3)
list_candidates.append(candidates)
#select random point from nearest neighbor
list_sample_point = []
for i in range(0,len(list_candidates)):
list_test_distance = []
p1 = np.array(data.iloc[list_candidates[i][0]])
p2 = np.array(data.iloc[list_candidates[i][1]])
p3 = np.array(data.iloc[list_candidates[i][2]])
sample = [p1,p2,p3]
#create plane
x1,y1,z1 =np.ravel(p1)
x2,y2,z2 =np.ravel(p2)
x3,y3,z3 =np.ravel(p3)
m_u = [x2-x1,y2-y1,z2-z1]
m_v = [x3-x2,y3-y2,z3-z2]
m_normal = np.cross(m_u,m_v)
m_pos = p1
m_dist = np.dot(-m_normal, m_pos)
for j in data.index:
test_point = np.array(data.iloc[j])
test_distance = np.dot(-m_normal,test_point)
#print('Distance ={}'.format(test_distance))
if abs(test_distance) < distance_threshold:
list_test_distance.append(test_distance)
if len(list_test_distance) > minimum_point_threshold:
list_sample_point.append(sample)
#Check candidate plane
list_u = []
list_v = []
list_normal = []
list_distance = []
list_min_max = []
for i in range(0,len(list_sample_point)):
psample = list_sample_point[i]
psample1 = np.array(psample[0])
psample2 = np.array(psample[1])
psample3 = np.array(psample[2])
min_max = [float(min(np.transpose(psample)[0])), float(max(np.transpose(psample)[0])),float(min(np.transpose(psample)[1])), float(max(np.transpose(psample)[1])),float(min(np.transpose(psample)[2])), float(max(np.transpose(psample)[2]))]
list_min_max.append(min_max)
u = [psample2[0]-psample1[0], psample2[1]-psample1[1], psample2[2]-psample1[2]]
v = [psample3[0]-psample2[0], psample3[1]-psample2[1], psample3[2]-psample2[2]]
normal = np.cross(u,v)
d = np.dot(-normal,psample1)
list_u.append(u)
list_v.append(v)
list_normal.append(normal)
list_distance.append(d)
#Plot candidate plane
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#ax.scatter3D(xs=plot_x, ys=plot_y, zs=plot_z, s=30, c='red')
ax.set_xlim3d(-25,-20)
ax.set_ylim3d(5,10)
ax.set_xlim3d(5,10)
ax.scatter3D(xs=df_points[0], ys=df_points[1], zs=df_points[2], s=1, c='green')
for j in range(0,len(list_normal)):
if list_normal[j][2] != 0:
print('Normal plane to Z axis')
xx, yy = np.meshgrid(np.arange(list_min_max[j][0],list_min_max[j][1]+(list_min_max[j][1]-list_min_max[j][0]),(list_min_max[j][1]-list_min_max[j][0])), np.arange(list_min_max[j][2],list_min_max[j][3]+(list_min_max[j][3]-list_min_max[j][2]),(list_min_max[j][3]-list_min_max[j][2])), sparse=True)
z = (-list_normal[j][0] * xx - list_normal[j][1] * yy -list_distance[j]) / list_normal[j][2]
ax.plot_surface(xx, yy, z, color=np.random.rand(3,), alpha=0.5)
elif list_normal[j][0] != 0:
print('Normal plane to X axis')
yy, zz, = np.meshgrid(np.arange(list_min_max[j][2],list_min_max[j][3]+(list_min_max[j][3]-list_min_max[j][2]),(list_min_max[j][3]-list_min_max[j][2])), np.arange(list_min_max[j][4],list_min_max[j][5]+(list_min_max[j][5]-list_min_max[j][4]),(list_min_max[j][5]-list_min_max[j][4])), sparse=True)
x = (-list_normal[j][1] * yy - list_normal[j][2] * zz -list_distance[j]) / list_normal[j][0]
ax.plot_surface(x, yy, zz, color=np.random.rand(3,), alpha=0.5)
elif list_normal[j][1] !=0:
print('Normal plane to Y axis')
xx, zz, = np.meshgrid(np.arange(list_min_max[j][0],list_min_max[j][1]+(list_min_max[j][1]-list_min_max[j][0]),(list_min_max[j][1]-list_min_max[j][0])), np.arange(list_min_max[j][4],list_min_max[j][5]+(list_min_max[j][5]-list_min_max[j][4]),(list_min_max[j][5]-list_min_max[j][4])), sparse=True)
y = (-list_normal[j][0] * xx - list_normal[j][2] * zz -list_distance[j]) / list_normal[j][1]
ax.plot_surface(xx, y, zz, color=np.random.rand(3,), alpha=0.5)
First of all a RANSAC is not made to be run on every point there is. Using your code it calculates 1729 planes out of 1945 points that you have. Usually you calculate a number of iterations you want to run the RANSAC for before starting it. This depends on the number of inliers and outliers of the expected plane:
k = log(1-p)/log(1-(1-e)^s)
p is your desired probability, that the RANSAC results in an outlier free plane, for example 99.99% probability. e is the percentage of outliers e.g. for 80% outlier e = 0.8
. This results in 1147 iterations. However usually you should be able to assume a lower outlier probability, lets say 20% due to choosing neighbors (as you do) now it suddenly is only 13 iterations to detect a single plane with a probability of 99.99%, so only 13(!) instead of 1729(!). If you would want to find lets say 100 planes it would still reduce your number of iterations by about 25%.
Secondly you can fasten up some parts of your code, if you still want to run it on every point:
First of all you are using two for loops for something that could be written in one and you are continuing to calculate the test_distance
even if len(list_test_distance) > minimum_point_threshold
might already be true. By changing the code to this:
#select random point from nearest neighbor
list_sample_point = []
list_u = []
list_v = []
list_normal = []
list_distance = []
list_min_max = []
for i in range(0,len(list_candidates)):
list_test_distance = []
p1 = np.array(data.iloc[list_candidates[i][0]])
p2 = np.array(data.iloc[list_candidates[i][1]])
p3 = np.array(data.iloc[list_candidates[i][2]])
sample = [p1,p2,p3]
#create plane
x1,y1,z1 =np.ravel(p1)
x2,y2,z2 =np.ravel(p2)
x3,y3,z3 =np.ravel(p3)
m_u = [x2-x1,y2-y1,z2-z1]
m_v = [x3-x2,y3-y2,z3-z2]
m_normal = np.cross(m_u,m_v)
m_pos = p1
m_dist = np.dot(-m_normal, m_pos)
for j in data.index:
test_point = np.array(data.iloc[j])
test_distance = np.dot(-m_normal,test_point)
#print('Distance ={}'.format(test_distance))
if abs(test_distance) < distance_threshold:
list_test_distance.append(test_distance)
if len(list_test_distance) > minimum_point_threshold:
#list_sample_point.append(sample) I think you don't need this one any more as well
psample = sample
psample1 = np.array(psample[0])
psample2 = np.array(psample[1])
psample3 = np.array(psample[2])
min_max = [float(min(np.transpose(psample)[0])), float(max(np.transpose(psample)[0])),float(min(np.transpose(psample)[1])), float(max(np.transpose(psample)[1])),float(min(np.transpose(psample)[2])), float(max(np.transpose(psample)[2]))]
list_min_max.append(min_max)
u = [psample2[0]-psample1[0], psample2[1]-psample1[1], psample2[2]-psample1[2]]
v = [psample3[0]-psample2[0], psample3[1]-psample2[1], psample3[2]-psample2[2]]
normal = np.cross(u,v)
d = np.dot(-normal,psample1)
list_u.append(u)
list_v.append(v)
list_normal.append(normal)
list_distance.append(d)
break
I changed the runtime on my laptop from 7 min 11 s down to 2 min 35 sec making it more than 2.5 times faster. The results should still be the same (correct me, if I'm wrong)