I'm making a project that would transform braille to text. I have written the code for identifying the braille dots from the image but I cant figure out how to segment the braille into cells.
This part is identifying the blobs in the image (smaller low quality images don't work right now)
import cv2
import numpy as np
from sklearn.cluster import KMeans
# Load the image
image_path = "braille.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Set up SimpleBlobDetector
params = cv2.SimpleBlobDetector_Params()
# Filter by area (size of the blob)
params.filterByArea = True
params.minArea = 100 # Adjust based on dot size
params.maxArea = 1000
# Filter by circularity
params.filterByCircularity = True
params.minCircularity = 0.9 # Adjust for shape of the dots
# Filter by convexity
params.filterByConvexity = False
params.minConvexity = 0.7
# Filter by inertia (roundness)
params.filterByInertia = True
params.minInertiaRatio = 0.95
# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs
keypoints = detector.detect(image)
# Draw detected blobs as red circles
output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
output_image = cv2.drawKeypoints(output_image, keypoints, np.array([]),
(0, 0, 255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
print("output image")
cv2.imshow("outputimage",output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(f"Number of blobs detected: {len(keypoints)}")
The code below puts the coordinates of the blob on a graph (thought it might be easier to work with this way)
#convert image into graph
import matplotlib.pyplot as plt
import numpy
blob_coords = np.array([kp.pt for kp in keypoints]) #coords of blob
rounded_coords = np.round(blob_coords).astype(int) #rounded coords
x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]
# PROXIMITY BASED GROUPING
# IF X DISTANCE IS LESS THAN MIN DISTANCE
# IF Y DISTANCE IS LESS THAN MIN DISTANCE
# STORE X AND Y COORDINATES
# Calculate smallest x and y differences (trying for proximity based)
minx = 10000
miny = 10000
for i in x_coords:
for j in x_coords:
if abs(i - j) <= minx and (15 < abs(i - j)): # Threshold for cell width
minx = abs(i - j)
for i in y_coords:
for j in y_coords:
if abs(i - j) <= miny and (15 < abs(i - j)): # Threshold for cell height
miny = abs(i - j)
print(f"Smallest x difference: {minx}, Smallest y difference: {miny}",)
# Plotting
fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue") # Plot the blobs
ax.invert_yaxis()
plt.title("Braille Cell Detection")
plt.show()
Tried to separate them via proximity (the cells that are in close proximity of each other get grouped), but I couldn't figure out the logic for it. I also tried group clustering (Kmeans) but it isn't very accurate and it wouldn't work for images with different number of characters because it constantly needs to know how many clusters are to be formed.
# trying out kmeans clustering method
# kmeans dont work (can't figure out number of clusters from image)
# could work if nclusters can be figured out
import math
from sklearn.cluster import KMeans
blob_coords = np.array([kp.pt for kp in keypoints]) # Extract (x, y) positions of blobs
rounded_coords = np.round(blob_coords).astype(int) # Round coordinates for simplicity
x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]
fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue") # Plot the blobs
ax.invert_yaxis() # Invert Y-axis for image-like coordinates
plt.title("Braille Cell Detection")
plt.show()
inertias = []
# 2
kmeans = KMeans(n_clusters=26)
kmeans.fit(rounded_coords)
plt.scatter(x_coords,y_coords, c=kmeans.labels_)
plt.show()
Here's a PoC on translating Braille to text from a well defined image. Real images can be more complicated specially for hand written Braille since dot/cell spacing is not constant. Also, this image is of uncontracted type (Grade I) so translating contracted Braille (Grade II) could require a significant mapping dictionary and a more elaborated algorithm to identify cell indexes.
Key points of the algorithm:
p0=(520,69)
, current point p1=(69, 140)
.xydiff = (-451, 71)
.
xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.. . --> (1,4) --> 'c'
import sys
import cv2
import numpy as np
cell_map = {
(1,): 'a', (1,2): 'b', (1,4): 'c', (1,4,5): 'd', (1,5): 'e',
(1,2,4): 'f', (1,2,4,5): 'g', (1,2,5): 'h', (2,4): 'i', (2,4,5): 'j',
(1,3): 'k', (1,2,3): 'l', (1,3,4): 'm', (1,3,4,5): 'n', (1,3,5): 'o',
(1,2,3,4): 'p', (1,2,3,4,5): 'q', (1,2,3,5): 'r', (2,3,4): 's', (2,3,4,5): 't',
(1,3,6): 'u', (1,2,3,6): 'v', (2,4,5,6): 'w', (1,3,4,6): 'x', (1,3,4,5,6): 'y', (1,3,5,6): 'z'
}
def get_build_detector_params():
# Set up SimpleBlobDetector
params = cv2.SimpleBlobDetector_Params()
# Filter by area (size of the blob)
params.filterByArea = True
params.minArea = 10 # Adjust based on dot size
params.maxArea = 1000 # Filter by circularity
params.filterByCircularity = True
params.minCircularity = 0.9 # Adjust for shape of the dots
# Filter by convexity
params.filterByConvexity = False
params.minConvexity = 0.7
# Filter by inertia (roundness)
params.filterByInertia = True
params.minInertiaRatio = 0.95
return params
def show_detection(image, detected_lines, xcell, xsep, xmin, ymax):
"""Help to visually debug if lines are correctly detected since dots would be colored by line.
Black dots represent not correctly detected cells/lines.
Color will repeat every for lines."""
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0), (178,102,255)]
while len(colors) < len(detected_lines):
colors.extend(colors)
# Draw detected blobs as red circles
output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
# x = int(xmin)
# for i in range(1,7):
# output_image = cv2.line(image, ( x * i + int(xsep), 50), (x * i + int(xsep), ymax), (0, 255, 0), thickness=2)
for i, line in enumerate(detected_lines):
output_image = cv2.drawKeypoints(output_image, line, np.array([]), colors[i], cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
print("output image")
cv2.imshow("outputimage", output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def get_cell_parameters(blob_coords, xydiff):
"""Parameters to help find cells from detected coordinates.
"""
xuniq = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > 1]))
yuniq = np.unique(np.array([round(xy[1]) for xy in xydiff if xy[1] > 1]))
# x separation between dots in a cell
xcell = round(xuniq.min())
# y separation between dots in a cell
ycell = round(yuniq.min())
xcoords = [xy[0] for xy in blob_coords if xy[0] > 1]
ycoords = [xy[1] for xy in blob_coords if xy[1] > 1]
# minimum x in the whole image
xmin = round(np.array(xcoords).min())
# max x in the whole image. Represents last dot in a line.
xmax = round(np.array(xcoords).max())
# x separation between cells
xsep = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > xcell + xcell / 2])).min()
# minimum y in the whole image
ymin = round(np.array(ycoords).min())
return ycell, ymin, xmin, xcell, xsep, xmax
def group_by_lines(kp_map, blob_coords, xydiff, ycell):
"""Group coordinates by lines."""
detected_lines = [[kp_map[blob_coords[0][0], blob_coords[0][1]]]]
print(f"new line at: {int(blob_coords[0][0])},{int(blob_coords[0][1])}")
# split coordinates by lines
for i, d in enumerate(xydiff):
curr_pt = blob_coords[i + 1]
#print(d, curr_pt, blob_coords[i+1], f"xdiff {d}, ydiff: {blob_coords[i+1][1] - blob_coords[i][1]}")
if d[0] < 0 and d[1] >= ycell * 3:
print(f"new line at: {curr_pt}, curr xdiff: {d}, {ycell * 3}, previous: {blob_coords[i]}")
detected_lines.append([kp_map[curr_pt[0], curr_pt[1]]])
else:
detected_lines[-1].append(kp_map[curr_pt[0], curr_pt[1]])
return detected_lines
def char_to_tuple(ycell, offset, cur_char):
"""Return a sorted tuple representing dot indexes in the cell.
The tuple should map to a text character in cell_map dict.
Indexes are
1 4
2 5
3 6
Cell Indexes Text
.
.
. . --> (1,2,3,6) --> 'v'
"""
cell = []
x1 = np.array([cc[0] for cc in cur_char]).min() + offset
y1 = np.array([cc[1] for cc in cur_char]).min() + offset
y2 = y1 + ycell + offset
cell_idx = None
for cc in cur_char:
if cc[0] <= x1:
if cc[1] <= y1:
cell_idx = 1
elif cc[1] > y1 and cc[1] <= y2:
cell_idx = 2
elif cc[1] >= y2:
cell_idx = 3
elif cc[0] >= x1:
if cc[1] <= y1:
cell_idx = 4
elif cc[1] > y1 and cc[1] <= y2:
cell_idx = 5
elif cc[1] >= y2:
cell_idx = 6
#print(" ", cell_idx, cc, x1, y1)
if cell_idx is None or cell_idx in cell:
print(f"WARNING. cell_idx duplicate or not found: {cell_idx}, {cc}", x1, y1, y2)
if len(cell) == 0 and cell_idx == 3:
print("ERROR. First cell_idx can't be 3")
cell.append(cell_idx)
cell_idx = None
return tuple(sorted(cell))
text = ''
count = 0
image_path = "braille.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
params = get_build_detector_params()
# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs
keypoints = detector.detect(image)
# map of keypoints coordinates to keypoints
kp_map = { (round(kp.pt[0], 2), round(kp.pt[1], 2)): kp for kp in keypoints}
# all dots coordinates, sorted to help find lines.
blob_coords = np.array(list(kp_map.keys()))
blob_coords = blob_coords[np.lexsort((blob_coords[:,0], blob_coords[:,1]))]
# x,y differences between contiguous dots. Negative values mean second/third rows in a cell and the start of a line.
# e.g.: previous point p0=(520,69), current point =(69, 140). xydiff = (-451, 71).
# xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.
xydiff = np.array([ (kp[0] - blob_coords[i-1][0], kp[1] - blob_coords[i-1][1]) for i,kp in enumerate(blob_coords) if i > 0 ])
print(f"blob_coords: {len(blob_coords)}, xydiff: {len(xydiff)}")
ycell, ymin, xmin, xcell, xsep, xmax = get_cell_parameters(blob_coords, xydiff)
#print(xuniq)
#print(yuniq)
print(f"x params: xcell {xcell}, xmin {xmin}, xsep {xsep}, xmax {xmax}")
print(f"y params: ycell {ycell}, ymin {ymin}")
print(f"max cells per line: {round((xmax)/(xcell + xsep))}")
offset = 2
# List of list of cells by line
detected_lines = group_by_lines(kp_map, blob_coords, xydiff, ycell)
p0 = 0
p1 = len(detected_lines[0])
# process each line
for j, line in enumerate(detected_lines):
if j > 0:
p0 = p0 + len(detected_lines[j - 1])
p1 = p0 + len(detected_lines[j])
print(f"\nSTARTING line: {j}, p0: {p0}, p1: {p1} {len(xydiff[p0:p1])}/{len(line)}")
# coordinates of a line
cur_coor = blob_coords[p0:p1]
xchar1 = xmin
xchar2 = xmin + xcell + xsep
while xchar1 <= xmax:
#cur_char = np.array([xy for xy in cur_coor if xy[0] >= xchar1 and xy[0] <= xchar2])
cur_char = cur_coor[ (xchar1 <= cur_coor[:,0]) & (cur_coor[:,0] <= xchar2) ]
if len(cur_char) == 0 or cur_char[0][0] > xchar2:
# No coordinates found at the x-range. It's a space so shift x range and move to next cell.
print(f"WARNING: No char found. adding space to text. {xchar1}, {xchar2}")
text += ' '
xchar1 += xcell + xsep
xchar2 = xchar1 + xcell + xsep
continue
cur_char = cur_char[np.lexsort((cur_char[:,0], ))]
# build a cell indexes tuple to finally decode the cell to text
cell = char_to_tuple(ycell, offset, cur_char)
if cell in cell_map:
print(cell_map[cell], ': ', cell )
text += cell_map[cell]
else:
print('ERROR. Cell to text mapping not found: ', cell, cur_char)
text += '?'
xchar1 += xcell + xsep
xchar2 = xchar1 + xcell + xsep
if xchar2 > xmax:
xchar2 = xmax
print(f"\nFound text:\n'{text}'")
show_detection(image, detected_lines, xcell, xsep, xmin, 400)
sys.exit()
Text from below image (bug: extra space at the end).
'abcdefghijklmnopqrstu vwxyz '