Separation of Braille characters inside of an image

I'm making a project that would transform braille to text. I have written the code for identifying the braille dots from the image but I cant figure out how to segment the braille into cells.

This part is identifying the blobs in the image (smaller low quality images don't work right now)

import cv2
import numpy as np
from sklearn.cluster import KMeans

# Load the image
image_path = "braille.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# Set up SimpleBlobDetector
params = cv2.SimpleBlobDetector_Params()

# Filter by area (size of the blob)
params.filterByArea = True
params.minArea = 100  # Adjust based on dot size
params.maxArea = 1000

# Filter by circularity
params.filterByCircularity = True
params.minCircularity = 0.9  # Adjust for shape of the dots

# Filter by convexity
params.filterByConvexity = False
params.minConvexity = 0.7

# Filter by inertia (roundness)
params.filterByInertia = True
params.minInertiaRatio = 0.95

# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)

# Detect blobs
keypoints = detector.detect(image)

# Draw detected blobs as red circles
output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
output_image = cv2.drawKeypoints(output_image, keypoints, np.array([]),
                                 (0, 0, 255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

print("output image")
cv2.imshow("outputimage",output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

print(f"Number of blobs detected: {len(keypoints)}")

The code below puts the coordinates of the blob on a graph (thought it might be easier to work with this way)

#convert image into graph

import matplotlib.pyplot as plt
import numpy

blob_coords = np.array([kp.pt for kp in keypoints])  #coords of blob
rounded_coords = np.round(blob_coords).astype(int)  #rounded coords

x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]

# PROXIMITY BASED GROUPING
# IF X DISTANCE IS LESS THAN MIN DISTANCE
# IF Y DISTANCE IS LESS THAN MIN DISTANCE
# STORE X AND Y COORDINATES

# Calculate smallest x and y differences (trying for proximity based)
minx = 10000
miny = 10000
for i in x_coords:
    for j in x_coords:
        if abs(i - j) <= minx and (15 < abs(i - j)):  # Threshold for cell width
            minx = abs(i - j)

for i in y_coords:
    for j in y_coords:
        if abs(i - j) <= miny and (15 < abs(i - j)):  # Threshold for cell height
            miny = abs(i - j)

print(f"Smallest x difference: {minx}, Smallest y difference: {miny}",)

# Plotting
fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue")  # Plot the blobs
ax.invert_yaxis()
plt.title("Braille Cell Detection")
plt.show()

Tried to separate them via proximity (the cells that are in close proximity of each other get grouped), but I couldn't figure out the logic for it. I also tried group clustering (Kmeans) but it isn't very accurate and it wouldn't work for images with different number of characters because it constantly needs to know how many clusters are to be formed.

# trying out kmeans clustering method
# kmeans dont work (can't figure out number of clusters from image)
# could work if nclusters can be figured out

import math
from sklearn.cluster import KMeans

blob_coords = np.array([kp.pt for kp in keypoints])  # Extract (x, y) positions of blobs
rounded_coords = np.round(blob_coords).astype(int)  # Round coordinates for simplicity


x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]

fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue")  # Plot the blobs

ax.invert_yaxis()  # Invert Y-axis for image-like coordinates
plt.title("Braille Cell Detection")
plt.show()

inertias = []

# 2
kmeans = KMeans(n_clusters=26)
kmeans.fit(rounded_coords)

plt.scatter(x_coords,y_coords, c=kmeans.labels_)
plt.show()

Solution

Here's a PoC on translating Braille to text from a well defined image. Real images can be more complicated specially for hand written Braille since dot/cell spacing is not constant. Also, this image is of uncontracted type (Grade I) so translating contracted Braille (Grade II) could require a significant mapping dictionary and a more elaborated algorithm to identify cell indexes.

Key points of the algorithm:

Extract and sort coordinates of detected keypoints.
Find x,y differences between contiguous dots. X negative values mean second/third rows in a cell and the start of a line. e.g.: previous point p0=(520,69), current point p1=(69, 140).
xydiff = (-451, 71). xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.
Find cell parameters: Min/max x/y coord., x min cell spacing, y min cell spacing.
Group coordinates by line into lists (group_by_lines()).
Find dot indexes on each cell in the line. . . --> (1,4) --> 'c'
Map the tuple to a text character.

import sys
import cv2
import numpy as np

cell_map = {
    (1,): 'a', (1,2): 'b', (1,4): 'c', (1,4,5): 'd', (1,5): 'e',
    (1,2,4): 'f', (1,2,4,5): 'g', (1,2,5): 'h', (2,4): 'i', (2,4,5): 'j',
    (1,3): 'k', (1,2,3): 'l', (1,3,4): 'm', (1,3,4,5): 'n', (1,3,5): 'o',
    (1,2,3,4): 'p', (1,2,3,4,5): 'q', (1,2,3,5): 'r', (2,3,4): 's', (2,3,4,5): 't',
    (1,3,6): 'u', (1,2,3,6): 'v', (2,4,5,6): 'w', (1,3,4,6): 'x', (1,3,4,5,6): 'y', (1,3,5,6): 'z'
    }

def get_build_detector_params():
    # Set up SimpleBlobDetector
    params = cv2.SimpleBlobDetector_Params()
# Filter by area (size of the blob)
    params.filterByArea = True
    params.minArea = 10 # Adjust based on dot size
    params.maxArea = 1000 # Filter by circularity
    params.filterByCircularity = True
    params.minCircularity = 0.9 # Adjust for shape of the dots
# Filter by convexity
    params.filterByConvexity = False
    params.minConvexity = 0.7
# Filter by inertia (roundness)
    params.filterByInertia = True
    params.minInertiaRatio = 0.95
    return params

def show_detection(image, detected_lines, xcell, xsep, xmin, ymax):
    """Help to visually debug if lines are correctly detected since dots would be colored by line.
    Black dots represent not correctly detected cells/lines.
    Color will repeat every for lines."""
    
    colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0), (178,102,255)]
    while len(colors) < len(detected_lines):
        colors.extend(colors)
    # Draw detected blobs as red circles
    output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    
#    x = int(xmin)
#    for i in range(1,7):
#        output_image = cv2.line(image, ( x * i + int(xsep), 50), (x * i + int(xsep), ymax), (0, 255, 0), thickness=2)

    for i, line in enumerate(detected_lines):
        output_image = cv2.drawKeypoints(output_image, line, np.array([]), colors[i], cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

    print("output image")
    cv2.imshow("outputimage", output_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def get_cell_parameters(blob_coords, xydiff):
    """Parameters to help find cells from detected coordinates.
    """
    xuniq = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > 1]))
    yuniq = np.unique(np.array([round(xy[1]) for xy in xydiff if xy[1] > 1]))
    # x separation between dots in a cell
    xcell = round(xuniq.min())
    # y separation between dots in a cell
    ycell = round(yuniq.min())
    xcoords = [xy[0] for xy in blob_coords if xy[0] > 1]
    ycoords = [xy[1] for xy in blob_coords if xy[1] > 1]
    # minimum x in the whole image
    xmin = round(np.array(xcoords).min())
    # max x in the whole image. Represents last dot in a line.
    xmax = round(np.array(xcoords).max())
    # x separation between cells
    xsep = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > xcell + xcell / 2])).min()
    # minimum y in the whole image
    ymin = round(np.array(ycoords).min())
    return ycell, ymin, xmin, xcell, xsep, xmax

def group_by_lines(kp_map, blob_coords, xydiff, ycell):
    """Group coordinates by lines."""
    
    detected_lines = [[kp_map[blob_coords[0][0], blob_coords[0][1]]]]
    print(f"new line at: {int(blob_coords[0][0])},{int(blob_coords[0][1])}")
# split coordinates by lines
    for i, d in enumerate(xydiff):
        curr_pt = blob_coords[i + 1]
    #print(d, curr_pt, blob_coords[i+1], f"xdiff {d}, ydiff: {blob_coords[i+1][1] - blob_coords[i][1]}")
        if d[0] < 0 and d[1] >= ycell * 3:
            print(f"new line at: {curr_pt}, curr xdiff: {d}, {ycell * 3}, previous: {blob_coords[i]}")
            detected_lines.append([kp_map[curr_pt[0], curr_pt[1]]])
        else:
            detected_lines[-1].append(kp_map[curr_pt[0], curr_pt[1]])
    
    return detected_lines

def char_to_tuple(ycell, offset, cur_char):
    """Return a sorted tuple representing dot indexes in the cell.
    The tuple should map to a text character in cell_map dict.
    Indexes are
    1 4
    2 5
    3 6
    
    Cell    Indexes       Text
    .
    .
    . . --> (1,2,3,6) --> 'v'
    
    """
    cell = []
    x1 = np.array([cc[0] for cc in cur_char]).min() + offset
    y1 = np.array([cc[1] for cc in cur_char]).min() + offset
    y2 = y1 + ycell + offset
    cell_idx = None
    for cc in cur_char:
        if cc[0] <= x1:
            if cc[1] <= y1:
                cell_idx = 1
            elif cc[1] > y1 and cc[1] <= y2:
                cell_idx = 2
            elif cc[1] >= y2:
                cell_idx = 3
        elif cc[0] >= x1:
            if cc[1] <= y1:
                cell_idx = 4
            elif cc[1] > y1 and cc[1] <= y2:
                cell_idx = 5
            elif cc[1] >= y2:
                cell_idx = 6 
        #print("       ", cell_idx, cc, x1, y1)
        if cell_idx is None or cell_idx in cell:
            print(f"WARNING. cell_idx duplicate or not found: {cell_idx}, {cc}", x1, y1, y2)
        if len(cell) == 0 and cell_idx == 3:
            print("ERROR. First cell_idx can't be 3")
        cell.append(cell_idx)
        cell_idx = None
    return tuple(sorted(cell))

text = ''
count = 0
image_path = "braille.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

params = get_build_detector_params()
# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs
keypoints = detector.detect(image)
# map of keypoints coordinates to keypoints
kp_map = { (round(kp.pt[0], 2), round(kp.pt[1], 2)): kp for kp in keypoints}

# all dots coordinates, sorted to help find lines.
blob_coords = np.array(list(kp_map.keys()))
blob_coords = blob_coords[np.lexsort((blob_coords[:,0], blob_coords[:,1]))]

# x,y differences between contiguous dots. Negative values mean second/third rows in a cell and the start of a line.
# e.g.: previous point p0=(520,69), current point =(69, 140). xydiff = (-451, 71).
# xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.
xydiff = np.array([ (kp[0] - blob_coords[i-1][0], kp[1] - blob_coords[i-1][1]) for i,kp in enumerate(blob_coords) if i > 0 ])

print(f"blob_coords: {len(blob_coords)}, xydiff: {len(xydiff)}")
ycell, ymin, xmin, xcell, xsep, xmax = get_cell_parameters(blob_coords, xydiff)

#print(xuniq)
#print(yuniq)
print(f"x params: xcell {xcell}, xmin {xmin}, xsep {xsep}, xmax {xmax}")
print(f"y params: ycell {ycell}, ymin {ymin}")
print(f"max cells per line: {round((xmax)/(xcell + xsep))}")
offset = 2

# List of list of cells by line
detected_lines = group_by_lines(kp_map, blob_coords, xydiff, ycell)
p0 = 0
p1 = len(detected_lines[0])
# process each line
for j, line in enumerate(detected_lines):
    if j > 0:
        p0 = p0 + len(detected_lines[j - 1])
        p1 = p0 + len(detected_lines[j])
    print(f"\nSTARTING line: {j}, p0: {p0}, p1: {p1} {len(xydiff[p0:p1])}/{len(line)}")

    # coordinates of a line
    cur_coor = blob_coords[p0:p1]
    xchar1 = xmin
    xchar2 = xmin + xcell + xsep
    while xchar1 <= xmax:
        #cur_char = np.array([xy for xy in cur_coor if xy[0] >= xchar1 and xy[0] <= xchar2])
        cur_char =  cur_coor[ (xchar1 <= cur_coor[:,0]) & (cur_coor[:,0] <= xchar2) ]
        if len(cur_char) == 0 or cur_char[0][0] > xchar2:
            # No coordinates found at the x-range. It's a space so shift x range and move to next cell.
            print(f"WARNING: No char found. adding space to text. {xchar1}, {xchar2}")
            text += ' '
            xchar1 += xcell + xsep
            xchar2 = xchar1 + xcell + xsep
            continue
        cur_char = cur_char[np.lexsort((cur_char[:,0], ))]
        # build a cell indexes tuple to finally decode the cell to text
        cell = char_to_tuple(ycell, offset, cur_char)
        
        if cell in cell_map:
            print(cell_map[cell], ': ', cell )
            text += cell_map[cell]
        else:
            print('ERROR. Cell to text mapping not found: ', cell, cur_char)
            text += '?'
            
        xchar1 += xcell + xsep
        xchar2 = xchar1 + xcell + xsep
        if xchar2 > xmax:
            xchar2 = xmax

print(f"\nFound text:\n'{text}'")
show_detection(image, detected_lines, xcell, xsep, xmin, 400)
sys.exit()

Text from below image (bug: extra space at the end).

'abcdefghijklmnopqrstu vwxyz '