pythonarraysnumpyconnected-components

Python implementation for connected components labeling for arrays creates wrong results


This is my implementation of the connected components algorithm (https://en.wikipedia.org/wiki/Connected-component_labeling). What is the reason for my wrong results? Where is my mistake and how can I fix it? I appreciate any ideas!

import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np


def resolve_equivalent_labels_dict(input_dict, labels):
    # merge lists
    for key in reversed(input_dict.keys()):
        input_dict[key] = list(set(sorted(input_dict[key])))
        for key2, lis2 in input_dict.items():
            if key != key2 and key in lis2:
                input_dict[key2].extend(input_dict[key])
                input_dict[key] = []
    
    # Fix problem, where a label is part of multiple lists.
    for label_i in labels:
        key_list = []
        for key, lis in input_dict.items():
            if label_i in lis:
                key_list.append(key)
        if len(key_list) > 1:
            key_list.remove(min(key_list))
            for key in key_list:
                input_dict[key].remove(label_i)
    
    for key in input_dict.keys():
        if input_dict[key] == []:
            input_dict.pop(key)
        else:
            input_dict[key] = sorted(list(set(input_dict[key])))
    return input_dict


def CCL(arr):
    # Write labels
    offsets = [(0,-1),(-1,0),(-1,-1),(-1,1)]
    equivalent_labels = {}
    current_label = 1
    labels = [current_label]
    for row_i in range(len(arr)):
        for col_i in range(len(arr[0])):
            if arr[row_i, col_i] == 1:
                
                # Read labels from neighbors
                vals_neighbors = []
                for (row_off, col_off) in offsets:
                    v = arr[row_i+row_off, col_i+col_off]
                    if v != 0:
                        vals_neighbors.append(v)

                # Write labels and fill equivalent_label dict
                if len(vals_neighbors) == 0: # no neighbor, new label
                    current_label+=1
                    labels.append(current_label)
                    equivalent_labels[current_label] = []
                    arr[row_i, col_i] = current_label
                elif len(vals_neighbors) == 1: # one neighbor
                    arr[row_i, col_i] = vals_neighbors[0]
                elif len(vals_neighbors) > 1: # multiple neighbors
                    vals_neighbors = list(set(vals_neighbors))
                    first = min(vals_neighbors)
                    vals_neighbors.remove(first)
                    arr[row_i, col_i] = first
                    if first in equivalent_labels.keys():
                        equivalent_labels[first].extend(vals_neighbors)
                    else:
                        equivalent_labels[first] = vals_neighbors

    # Merge lists
    print(equivalent_labels)    
    equivalent_labels = resolve_equivalent_labels_dict(equivalent_labels, labels)
    print("")
    print(equivalent_labels)  

    # Rename labels
    for key in reversed(equivalent_labels.keys()):   
        lis = list(np.unique(equivalent_labels[key]))
        for elem in lis:
            arr = np.where(arr == elem, key, arr)

    return arr
                

# create data
gridlength = 20
input_array = np.zeros((gridlength+2, gridlength+2))
input_array[1:gridlength+1, 1:1+gridlength] = np.random.randint(2, size=(gridlength,gridlength))
print(input_array)

# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.pcolormesh(input_array, cmap=colors.ListedColormap(["white", "red"]))
output_array = CCL(input_array)
output_array[output_array==0] == np.nan
ax2.pcolormesh(output_array)
ax1.set_xlim([0,gridlength+2])
ax2.set_xlim([0,gridlength+2])
ax1.set_ylim([0,gridlength+2])
ax2.set_ylim([0,gridlength+2])
for (j,i),label in np.ndenumerate(output_array):
    ax2.text(i,j,int(label),ha='center',va='center')
plt.savefig("code/connectivity/ccl.png", dpi=300, bbox_inches="tight")

with input_array as

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0.]
 [0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.]
 [0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0.]
 [0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0.]
 [0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]
 [0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.]
 [0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0.]
 [0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
 [0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0.]
 [0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.]
 [0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0.]
 [0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.]
 [0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

gives this result where you can see that "3" and "5" areas are not merged: result


Solution

  • Is there a reason you want to implement this from scratch? There are implementations in scipy and skimage that will do this for you, where you can also define the connectivity.

    Example using the scipy implementation.

    import numpy as np
    from ast import literal_eval
    import re
    
    from scipy.ndimage import label
    import matplotlib.pyplot as plt
    
    # Working jupyterlab so enable plotting in the notebook.
    %matplotlib inline
    
    # Transforming your pasted output into a usable array
    raw_data = """
    [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.],
     [0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0.],
     [0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0.],
     [0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.],
     [0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0.],
     [0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0.],
     [0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0.],
     [0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.],
     [0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0.],
     [0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.],
     [0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0.],
     [0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.],
     [0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.],
     [0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.],
     [0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0.],
     [0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.],
     [0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0.],
     [0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0.],
     [0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0.],
     [0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.],
     [0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0.],
     [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
    """
    
    input_array = np.array(literal_eval(re.sub("\.", ",", raw_data))).astype(float)
    
    # label returns the labelled array and the number of components created
    component_array, num_components = label(input_array, structure=np.ones((3,3)))
    
    print(num_components)
    # 4
    
    # I had to vertical flip your input data to get it to display how it was in your expected result figure.
    plt.imshow(np.flipud(component_array))
    

    connected components

    I haven't really answered your question, but you might want to consider an implementation that already exists if you were not aware of them yet.