pythonnumpygeometrypattern-matchingpoint-clouds

Identifying 4-connected point patterns in a grid of 3D points with noise


I have a set of points in 3D space represented by the following NumPy array:

import numpy as np
points = np.array([4.5403791090466825, -6.474122845743137, 1.720865155131852
                   4.544710813420152, -6.224218513611945, 1.7088861199877527
                   4.537233620491136, -5.98484530658863, 1.699488873354222
                   4.521937968342778, -5.721674150789189, 1.6849114580360904
                   4.4999241714099405, -5.4938430240669724, 1.679752942910385
                   4.830182546259025, -5.657936979701614, 1.6888307295378522
                   5.121468843368871, -5.803097135994744, 1.6954809688529893
                   5.439088364842268, -5.965512825953125, 1.6981638740242355
                   5.704276912211997, -6.100013441509505, 1.69575210534024
                   5.674604213881624, -6.316464211693753, 1.696360866592035
                   5.6375373276155125, -6.568875993817544, 1.7173100480278745
                   5.601324312047108, -6.791416283459123, 1.7222351265798983
                   5.556256817028301, -7.025669295257478, 1.7240530742321507
                   5.3173007670429975, -6.898939280431394, 1.7278595480033725
                   5.046628449981028, -6.753703318879904, 1.725804884872875
                   4.803244289601083, -6.620716409453152, 1.726377963879765
                   4.822340852273528, -6.379078541501187, 1.7032476446943
                   4.830532452723338, -6.144069893351172, 1.7047976672875338
                   4.834760563421453, -5.884305796074045, 1.6876475718597206
                   5.110719812888194, -6.028142616316405, 1.687525680260671
                   5.417740311618597, -6.184558989903391, 1.7124527178431916
                   5.385270875729216, -6.438008203433672, 1.712366908533943
                   5.355507693509876, -6.661093026054448, 1.729038450873722
                   5.07348022798482, -6.513010696655368, 1.726279452092121
                   5.090180621637709, -6.283446799878667, 1.7091940453643182])

A plot of these points (and their labels) in 3D space is shown in the image below.

In this image, certain points are circled in red, referred to as the 4-connected points to the point of interest. However, my attempt to solve this problem by finding the closest 4 points (euclidian distance) to the point of interest doesn't work in all cases (points drawn in blue).

enter image description here

It should be noted that the 3D points are contained in a plane or a curved surface. However, the presence of noise in the data readings means that while we can approximate the points to be on or near a surface, they are not precisely on top of it.

Additionally, the grid of points can sometimes be slightly deformed, not forming straight lines, as shown in this cloud of points (raw data).

enter image description here

Do you have any suggestions on how to determine these 4-connected points in 3D space?

Edit 04/04/2024:

Two possible solutions that look very similar in terms of relative angles when the grid is very skewed:

enter image description here


Solution

  • You can approach this in a few separate steps. First, extract a plane of best fit:

    import itertools
    
    import matplotlib.pyplot as plt
    import numpy as np
    import scipy.cluster.vq
    
    
    def load_points() -> np.ndarray:
        points = np.array((
            (4.5403791090466825, -6.474122845743137, 1.720865155131852 ),
            (4.544710813420152, -6.224218513611945, 1.7088861199877527 ),
            (4.537233620491136, -5.98484530658863, 1.699488873354222   ),
            (4.521937968342778, -5.721674150789189, 1.6849114580360904 ),
            (4.4999241714099405, -5.4938430240669724, 1.679752942910385),
            (4.830182546259025, -5.657936979701614, 1.6888307295378522 ),
            (5.121468843368871, -5.803097135994744, 1.6954809688529893 ),
            (5.439088364842268, -5.965512825953125, 1.6981638740242355 ),
            (5.704276912211997, -6.100013441509505, 1.69575210534024   ),
            (5.674604213881624, -6.316464211693753, 1.696360866592035  ),
            (5.6375373276155125, -6.568875993817544, 1.7173100480278745),
            (5.601324312047108, -6.791416283459123, 1.7222351265798983 ),
            (5.556256817028301, -7.025669295257478, 1.7240530742321507 ),
            (5.3173007670429975, -6.898939280431394, 1.7278595480033725),
            (5.046628449981028, -6.753703318879904, 1.725804884872875  ),
            (4.803244289601083, -6.620716409453152, 1.726377963879765  ),
            (4.822340852273528, -6.379078541501187, 1.7032476446943    ),
            (4.830532452723338, -6.144069893351172, 1.7047976672875338 ),
            (4.834760563421453, -5.884305796074045, 1.6876475718597206 ),
            (5.110719812888194, -6.028142616316405, 1.687525680260671  ),
            (5.417740311618597, -6.184558989903391, 1.7124527178431916 ),
            (5.385270875729216, -6.438008203433672, 1.712366908533943  ),
            (5.355507693509876, -6.661093026054448, 1.729038450873722  ),
            (5.07348022798482, -6.513010696655368, 1.726279452092121   ),
            (5.090180621637709, -6.283446799878667, 1.7091940453643182 ),
        ))
        return points
    
    
    def plane_fit(points: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        # Over-determined plane of best fit in form ax + by + cz = 1
        normal, residuals, rank, singular = np.linalg.lstsq(
            a=points, b=np.ones_like(points[:, 0]), rcond=None,
        )
    
        # Arbitrary reference point in middle of locus, exactly on plane of best fit
        reference_point = np.empty_like(points[0])
        reference_point[:-1] = points[:, :-1].mean(axis=0)
        reference_point[-1] = (
            1 - reference_point[:-1].dot(normal[:-1])
        ) / normal[-1]
    
        return normal, reference_point
    
    
    def project_planar_r3(
        points: np.ndarray,
        normal: np.ndarray,
        reference_point: np.ndarray,
    ) -> np.ndarray:
        # Project points onto plane, still in original xyz coordinate system
        planar = points - np.outer((points - reference_point)@normal, normal)
        return planar
    
    
    def plot_planar(planar: np.ndarray) -> plt.Figure:
        fig = plt.figure()
        fig.suptitle('Planar projection')
        plt.scatter(planar[:, 0], planar[:, 1])
        return fig
    

    planar

    Perform k-means to find where likely offsets are:

    def cartesian_neighbours(
        planar: np.ndarray,
        tol: float,
    ):
        px, py = planar[:, :-1].T
        return np.vstack([
            planar[
                  (px > x)
                & (py > y - tol)
                & (px <= x + tol)
                & (py <= y + tol),
                :,
            ] - (x, y, z)
            for x, y, z in planar
        ])
    
    
    def cluster(neighbours: np.ndarray) -> list:
        codebook, distortion = scipy.cluster.vq.kmeans(
            obs=neighbours, k_or_guess=3,
        )
        return codebook
    
    
    def plot_neighbours(neighbours: np.ndarray) -> plt.Figure:
        fig = plt.figure()
        fig.suptitle('Neighbour distribution')
        plt.scatter(neighbours[:, 0], neighbours[:, 1], marker='+')
        return fig
    

    k-means

    Pick two basis vectors from the neighbours. From these, form an affine correction matrix. This is a little involved and I didn't get it working well, but for your current data an affine matrix can look like

    np.array((
        (  3.5,  2.1),
        (  0.0,  4.0),
        (  0.0,  0.0),
        (-15.8, 16.4),
    ))
    

    applied like

    corrected = np.hstack((
        planar, np.ones_like(planar[:, :1]),
    )) @ affine
    

    regularised

    These corrected points can then be fairly easily assigned as "connected" based on proximity to integer coordinates.

    As an alternative (that still requires a more robust cluster selection heuristic),

    import matplotlib.pyplot as plt
    import numpy as np
    import scipy.cluster.vq
    
    
    def load_points() -> np.ndarray:
        points = np.array((
            (4.5403791090466825, -6.474122845743137, 1.720865155131852 ),
            (4.544710813420152, -6.224218513611945, 1.7088861199877527 ),
            (4.537233620491136, -5.98484530658863, 1.699488873354222   ),
            (4.521937968342778, -5.721674150789189, 1.6849114580360904 ),
            (4.4999241714099405, -5.4938430240669724, 1.679752942910385),
            (4.830182546259025, -5.657936979701614, 1.6888307295378522 ),
            (5.121468843368871, -5.803097135994744, 1.6954809688529893 ),
            (5.439088364842268, -5.965512825953125, 1.6981638740242355 ),
            (5.704276912211997, -6.100013441509505, 1.69575210534024   ),
            (5.674604213881624, -6.316464211693753, 1.696360866592035  ),
            (5.6375373276155125, -6.568875993817544, 1.7173100480278745),
            (5.601324312047108, -6.791416283459123, 1.7222351265798983 ),
            (5.556256817028301, -7.025669295257478, 1.7240530742321507 ),
            (5.3173007670429975, -6.898939280431394, 1.7278595480033725),
            (5.046628449981028, -6.753703318879904, 1.725804884872875  ),
            (4.803244289601083, -6.620716409453152, 1.726377963879765  ),
            (4.822340852273528, -6.379078541501187, 1.7032476446943    ),
            (4.830532452723338, -6.144069893351172, 1.7047976672875338 ),
            (4.834760563421453, -5.884305796074045, 1.6876475718597206 ),
            (5.110719812888194, -6.028142616316405, 1.687525680260671  ),
            (5.417740311618597, -6.184558989903391, 1.7124527178431916 ),
            (5.385270875729216, -6.438008203433672, 1.712366908533943  ),
            (5.355507693509876, -6.661093026054448, 1.729038450873722  ),
            (5.07348022798482, -6.513010696655368, 1.726279452092121   ),
            (5.090180621637709, -6.283446799878667, 1.7091940453643182 ),
        ))
        return points
    
    
    def plane_fit(points: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        # Over-determined plane of best fit in form ax + by + cz = 1
        normal, residuals, rank, singular = np.linalg.lstsq(
            a=points, b=np.ones_like(points[:, 0]), rcond=None,
        )
    
        # Arbitrary reference point in middle of locus, exactly on plane of best fit
        reference_point = np.empty_like(points[0])
        reference_point[:-1] = points[:, :-1].mean(axis=0)
        reference_point[-1] = (
            1 - reference_point[:-1].dot(normal[:-1])
        ) / normal[-1]
    
        return normal, reference_point
    
    
    def project_planar_r3(
        points: np.ndarray,
        normal: np.ndarray,
        reference_point: np.ndarray,
    ) -> np.ndarray:
        # Project points onto plane, still in original xyz coordinate system
        planar = points - np.outer((points - reference_point)@normal, normal)
        return planar
    
    
    def plot_planar(planar: np.ndarray, title: str) -> plt.Figure:
        fig = plt.figure()
        fig.suptitle(title)
        plt.scatter(planar[:, 0], planar[:, 1])
        return fig
    
    
    def cartesian_neighbours(
        planar: np.ndarray, tol: float,
    ):
        px, py = planar[:, :-1].T
        return np.vstack([
            planar[
                  (px > x)
                & (py > y - tol)
                & (px <= x + tol)
                & (py <= y + tol),
                :,
            ] - (x, y, z)
            for x, y, z in planar
        ])
    
    
    def plot_neighbours(neighbours: np.ndarray, u: np.ndarray, v: np.ndarray) -> plt.Figure:
        fig = plt.figure()
        fig.suptitle('Neighbour distribution')
        plt.scatter(neighbours[:, 0], neighbours[:, 1], marker='+')
        plt.plot(
            [0, u[0]],
            [0, u[1]],
        )
        plt.plot(
            [0, v[0]],
            [0, v[1]],
        )
        return fig
    
    
    def cluster(neighbours: np.ndarray) -> tuple[
        np.ndarray,  # neighbour centroids, k*3
        np.ndarray,  # label indices, n
    ]:
        centroids, labels = scipy.cluster.vq.kmeans2(
            data=neighbours, k=8, seed=0,
        )
        return centroids, labels
    
    
    def deskew_points(
        points: np.ndarray,
        u: np.ndarray,
        v: np.ndarray,
    ) -> np.ndarray:
        target = points.copy()
        target -= target.min(axis=0)
        de_uv, residuals, rank, singular = np.linalg.lstsq(
            a=np.stack((u, v)), b=np.eye(2), rcond=None,
        )
        target = target @ de_uv
        offset = (
            target[target[:, 0] - target[:, 0].min() < 0.5, 0].mean(),
            target[target[:, 1] - target[:, 1].min() < 0.5, 1].mean(),
        )
        target -= offset
        return target
    
    
    def affine_from_deskewed(
        points: np.ndarray,
        deskewed: np.ndarray,
    ) -> np.ndarray:
        deskewed = deskewed.round()
    
        affine, residuals, rank, singular = np.linalg.lstsq(
            a=np.hstack((
                points,
                np.ones_like(points[:, :1]),
            )),
            b=deskewed, rcond=None,
        )
        return affine
    
    
    def project_uv(
        points: np.ndarray,
        affine: np.ndarray,
    ):
        return np.hstack((
            points, np.ones_like(points[:, :1]),
        )) @ affine
    
    
    def plot_corrected(
        deskewed: np.ndarray,
        corrected: np.ndarray,
    ) -> plt.Figure:
        fig = plt.figure()
        plt.scatter(*deskewed.T, label='Deskewed')
        plt.scatter(*corrected.T, label='Corrected')
        plt.legend()
        return fig
    
    
    def main() -> None:
        points = load_points()
        normal, reference_point = plane_fit(points=points)
        print('Planar normal:', normal)
        print('Reference point:', reference_point)
    
        planar = project_planar_r3(points=points, normal=normal, reference_point=reference_point)
        plot_planar(planar=planar, title='Planar projection')
    
        neighbours = cartesian_neighbours(planar=planar, tol=0.5)
    
        centroids, labels = cluster(neighbours)
        print('Neighbour cluster centroids:')
        print(centroids)
    
        # You'll need to develop a heuristic to choose these neighbour labels
        u_label = 2
        v_label = 7
    
        u = centroids[u_label]
        v = centroids[v_label]
        print('Inferred basis u, v:')
        print(u)
        print(v)
        plot_neighbours(neighbours=neighbours, u=u, v=v)
    
        deskewed = deskew_points(points=points, u=u, v=v)
    
        affine = affine_from_deskewed(points=points, deskewed=deskewed)
        print('Affine correction:')
        print(affine)
    
        corrected = project_uv(points=points, affine=affine)
        plot_corrected(deskewed=deskewed, corrected=corrected)
    
        plt.show()
    
    
    if __name__ == '__main__':
        main()
    

    deskew