This is my implementation of the connected components algorithm (https://en.wikipedia.org/wiki/Connected-component_labeling). What is the reason for my wrong results? Where is my mistake and how can I fix it? I appreciate any ideas!
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
def resolve_equivalent_labels_dict(input_dict, labels):
# merge lists
for key in reversed(input_dict.keys()):
input_dict[key] = list(set(sorted(input_dict[key])))
for key2, lis2 in input_dict.items():
if key != key2 and key in lis2:
input_dict[key2].extend(input_dict[key])
input_dict[key] = []
# Fix problem, where a label is part of multiple lists.
for label_i in labels:
key_list = []
for key, lis in input_dict.items():
if label_i in lis:
key_list.append(key)
if len(key_list) > 1:
key_list.remove(min(key_list))
for key in key_list:
input_dict[key].remove(label_i)
for key in input_dict.keys():
if input_dict[key] == []:
input_dict.pop(key)
else:
input_dict[key] = sorted(list(set(input_dict[key])))
return input_dict
def CCL(arr):
# Write labels
offsets = [(0,-1),(-1,0),(-1,-1),(-1,1)]
equivalent_labels = {}
current_label = 1
labels = [current_label]
for row_i in range(len(arr)):
for col_i in range(len(arr[0])):
if arr[row_i, col_i] == 1:
# Read labels from neighbors
vals_neighbors = []
for (row_off, col_off) in offsets:
v = arr[row_i+row_off, col_i+col_off]
if v != 0:
vals_neighbors.append(v)
# Write labels and fill equivalent_label dict
if len(vals_neighbors) == 0: # no neighbor, new label
current_label+=1
labels.append(current_label)
equivalent_labels[current_label] = []
arr[row_i, col_i] = current_label
elif len(vals_neighbors) == 1: # one neighbor
arr[row_i, col_i] = vals_neighbors[0]
elif len(vals_neighbors) > 1: # multiple neighbors
vals_neighbors = list(set(vals_neighbors))
first = min(vals_neighbors)
vals_neighbors.remove(first)
arr[row_i, col_i] = first
if first in equivalent_labels.keys():
equivalent_labels[first].extend(vals_neighbors)
else:
equivalent_labels[first] = vals_neighbors
# Merge lists
print(equivalent_labels)
equivalent_labels = resolve_equivalent_labels_dict(equivalent_labels, labels)
print("")
print(equivalent_labels)
# Rename labels
for key in reversed(equivalent_labels.keys()):
lis = list(np.unique(equivalent_labels[key]))
for elem in lis:
arr = np.where(arr == elem, key, arr)
return arr
# create data
gridlength = 20
input_array = np.zeros((gridlength+2, gridlength+2))
input_array[1:gridlength+1, 1:1+gridlength] = np.random.randint(2, size=(gridlength,gridlength))
print(input_array)
# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.pcolormesh(input_array, cmap=colors.ListedColormap(["white", "red"]))
output_array = CCL(input_array)
output_array[output_array==0] == np.nan
ax2.pcolormesh(output_array)
ax1.set_xlim([0,gridlength+2])
ax2.set_xlim([0,gridlength+2])
ax1.set_ylim([0,gridlength+2])
ax2.set_ylim([0,gridlength+2])
for (j,i),label in np.ndenumerate(output_array):
ax2.text(i,j,int(label),ha='center',va='center')
plt.savefig("code/connectivity/ccl.png", dpi=300, bbox_inches="tight")
with input_array
as
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0.]
[0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.]
[0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0.]
[0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0.]
[0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0.]
[0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]
[0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.]
[0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0.]
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
[0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0.]
[0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.]
[0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0.]
[0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.]
[0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
gives this result where you can see that "3" and "5" areas are not merged: result
Is there a reason you want to implement this from scratch? There are implementations in scipy
and skimage
that will do this for you, where you can also define the connectivity.
Example using the scipy
implementation.
import numpy as np
from ast import literal_eval
import re
from scipy.ndimage import label
import matplotlib.pyplot as plt
# Working jupyterlab so enable plotting in the notebook.
%matplotlib inline
# Transforming your pasted output into a usable array
raw_data = """
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.],
[0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0.],
[0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0.],
[0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.],
[0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0.],
[0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0.],
[0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0.],
[0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.],
[0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0.],
[0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0.],
[0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0.],
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.],
[0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.],
[0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.],
[0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0.],
[0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.],
[0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0.],
[0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0.],
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0.],
[0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.],
[0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0.],
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
"""
input_array = np.array(literal_eval(re.sub("\.", ",", raw_data))).astype(float)
# label returns the labelled array and the number of components created
component_array, num_components = label(input_array, structure=np.ones((3,3)))
print(num_components)
# 4
# I had to vertical flip your input data to get it to display how it was in your expected result figure.
plt.imshow(np.flipud(component_array))
I haven't really answered your question, but you might want to consider an implementation that already exists if you were not aware of them yet.