I am trying to detect texts and remove those thick letters. The goal is same proposed by the following link: How to detect text on an X-Ray image with OpenCV. ("to extract the oriented bounding boxes as a matrix")
Two sample images: Sample Image #1 Sample Image #2
Here, the fonts L, J, C, O (from 1st image) and L, D, A, N, circle-shape (from 2nd image) must be detected. I tried the methods proposed by above link (How to detect text on an X-Ray image with OpenCV), however, it fails to detect the texts in the background.
Original Image --> Binary image
Morph close --> Detected text
Morph close detected texts (nothing)
As you can see, it fails to detect the texts from the background. It just returns black image. Don't know what happened.
The code:
import cv2
import numpy as np
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('HandImage.png')
original = image.copy()
blank = np.zeros(image.shape[:2], dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Filter using contour area and aspect ratio
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
ar = w / float(h)
if (ar > 1.4 and ar < 4) or ar < .85 and area > 10 and area < 500:
# Find rotated bounding box
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
cv2.drawContours(blank,[box],0,(255,255,255),-1)
# Bitwise operations to isolate text
extract = cv2.bitwise_and(thresh, blank)
extract = cv2.bitwise_and(original, original, mask=extract)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('extract', extract)
cv2.waitKey()
I am new to the computer vision and image manipulation... Please help!
Update:
I have figured out the error in my code. I had to adjust and identify the right size/area of the bounding box that selected for the texts.
The code:
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
area = cv2.contourArea(c)
if (30 < x1 < 200 or 350 < x1 < 500) and 50 < y1 < 350:
# Using the area above to calculate the rotation
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
translated_box = box - np.mean(box, axis=0)
scaled_box = translated_box * 2 # 2 is scale factor
retranslated_box = scaled_box + np.mean(box, axis=0)
box = np.int0(retranslated_box)
cv2.drawContours(image, [box], 0, (36,255,12), 2)
cv2.drawContours(blank, [box], 0, (255,255,255), -1)
But I also liked the suggestion by @Mikel B, where he used a neural network method.
Not a solution in OpenCV as requested. But I leave it here in case a neural network-based solution could be of interest for anybody
First of all install the package by:
pip install craft-text-detector
Then copy paste the following
from craft_text_detector import Craft
# import craft functions
from craft_text_detector import (
read_image,
load_craftnet_model,
load_refinenet_model,
get_prediction,
export_detected_regions,
export_extra_results,
empty_cuda_cache
)
# set image path and export folder directory
image = '/path/to/JP6x0.png' # can be filepath, PIL image or numpy array
output_dir = 'outputs/'
# read image
image = read_image(image)
# load models
refine_net = load_refinenet_model(cuda=False)
craft_net = load_craftnet_model(cuda=False)
# perform prediction
prediction_result = get_prediction(
image=image,
craft_net=craft_net,
refine_net=refine_net,
text_threshold=0.5,
link_threshold=0.1,
low_text=0.1,
cuda=False,
long_size=1280
)
# export detected text regions
exported_file_paths = export_detected_regions(
image=image,
regions=prediction_result["boxes"],
output_dir=output_dir,
rectify=True
)
# export heatmap, detection points, box visualization
export_extra_results(
image=image,
regions=prediction_result["boxes"],
heatmaps=prediction_result["heatmaps"],
output_dir=output_dir
)
# unload models from gpu
empty_cuda_cache()
My results with these parameters: