How to use opencv to perfectly extract the digits text from the below image? The color of the text are dynamic.
It's simple. The follow code can be highly optimized (I did it fast - @Silencer could do this) and work (tested) also with other images (for some you will have to tweak some values).
import cv2
import numpy as np
# import image
image = cv2.imread('image.png')
cv2.imshow('original', image)
cv2.waitKey(0)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
cv2.imshow('hsv', hsv[:, :, 1])
cv2.waitKey(0)
# this calculate the histogram of the image you input
# if this is under/below a certain value (which depend of the colors in the image), a certain thresh will be choosed among another
hist, bins = np.histogram(hsv.ravel(), 256, [0, 256])
print(hist[-1])
if hist[-1] > 15000:
# binary
ret, thresh = cv2.threshold(hsv[:, :, 0], 55, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow('second', thresh)
cv2.waitKey(0)
# dilation
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
# find contours
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
else:
# binary
ret, thresh = cv2.threshold(hsv[:, :, 0], 55, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imshow('second', thresh)
cv2.waitKey(0)
# dilation
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
# find contours
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)