I have a JPG image that contain mobile brand names:
Now I want to detect each word first character by python script
I wrote following python script for this:
import cv2
import numpy as np
from tkinter import Tk, Canvas, Frame, Scrollbar, BOTH, VERTICAL, HORIZONTAL
from PIL import Image, ImageTk
# Function to draw rectangles around shapes and display using Tkinter
def draw_rectangles(image_path):
# Create a Tkinter window to display the image
root = Tk()
root.title("Image with Left-Most Rectangles Only")
# Load the image
image = cv2.imread(image_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding to get better separation of text
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
)
# Find contours in the binary image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Dictionary to store contours grouped by Y-coordinate ranges
contours_by_y = {}
# Sort contours by X-coordinate to ensure we pick the left-most character first
sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])
# Group contours by their Y coordinate to keep only the left-most rectangle per Y range
for contour in sorted_contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 15 and h > 15: # Adjust the size filter to remove small artifacts
aspect_ratio = w / float(h)
# Ensure the aspect ratio is within the typical range of letters
if 0.2 < aspect_ratio < 5:
y_range = y // 20 # Group by a smaller Y coordinate range for better separation
# Check if the current rectangle is more left-most in X within its Y range
if y_range not in contours_by_y:
contours_by_y[y_range] = (x, y, w, h) # Store the first contour found in this range
else:
# Compare and keep the left-most (smallest X) rectangle
current_x, _, _, _ = contours_by_y[y_range]
# Check distance between new contour and the existing one to avoid close detection
if x < current_x and (x - current_x) > 20: # Distance threshold to filter out close contours
contours_by_y[y_range] = (x, y, w, h)
# Draw only the left-most rectangles
for (x, y, w, h) in contours_by_y.values():
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2) # Red color in BGR
# Convert the image to RGB (OpenCV uses BGR by default)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Convert the image to a format Tkinter can use
image_pil = Image.fromarray(image_rgb)
image_tk = ImageTk.PhotoImage(image_pil)
# Create a frame for the Canvas and scrollbars
frame = Frame(root)
frame.pack(fill=BOTH, expand=True)
# Create a Canvas widget to display the image
canvas = Canvas(frame, width=image_tk.width(), height=image_tk.height())
canvas.pack(side="left", fill="both", expand=True)
# Add scrollbars to the Canvas
v_scrollbar = Scrollbar(frame, orient=VERTICAL, command=canvas.yview)
v_scrollbar.pack(side="right", fill="y")
h_scrollbar = Scrollbar(frame, orient=HORIZONTAL, command=canvas.xview)
h_scrollbar.pack(side="bottom", fill="x")
canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
canvas.create_image(0, 0, anchor="nw", image=image_tk)
canvas.config(scrollregion=canvas.bbox("all"))
# Keep a reference to the image to prevent garbage collection
canvas.image = image_tk
root.mainloop()
# Path to your image
image_path = r"E:\Desktop\mobile_brands\ORG_027081-Recovered.jpg"
# Call the function
draw_rectangles(image_path)
But I don't know why it not working good. The accuracy of this script is 90%. for example in above image it detect "a" character in "Samsung"
where is my script problem?
How can I fix this problem?
maybe by Y and X coordinate can not detect left-most boxes in image.
Note that I don't want to use OCR
I will first sort those contours by y-coordinate, then group the sorted contours by rows. Finally draw the left-most one in each row:
# Sort contours by Y-coordinate
sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[1])
# group contours by row
last_y = None
rows = []
for contour in sorted_contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 15 or h > 15:
if last_y is None or abs(last_y-y) > 50: # 50 is for provided image, it may not work for other image
# this is a new row
rows.append([])
last_y = y # save the reference y-coordinate
rows[-1].append((x, y, w, h)) # append item into current row
for row in rows:
# show left-most one in current row
x, y, w, h = sorted(row, key=lambda x: x[0])[0]
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2)
Note that I have an assumption that difference of y-coordinate between rows is larger than 50 pixels.
Result: