I need support to treat a captcha image, I made an algorithm for testing to treat some images that come from a website's captcha and for some types I was able to treat and use tesseract to extract the text successfully! However, there is one of the images that follows a different pattern and I am not able to evolve much, I need to remove these diagonal lines so that I can get the text, follow my code below and the images before and after the treatment. Do you have any ideas on how to help me deal with this stage of these noises that still appear?
import pytesseract
import cv2
from PIL import Image
import numpy as np
def limpar_imagem(imagem: Image.Image):
imagem = imagem.convert('RGB')
largura, altura = imagem.size
array = np.array(imagem)
# Converte para tons inteiros para facilitar comparação
def rgb_to_hex_val(pixel):
return (pixel[0] << 16) + (pixel[1] << 8) + pixel[2]
# Define limites
escuro_min = 0x000000
escuro_max = 0x999999
branco1 = 0xFEFEFE
branco2 = 0xFAFAFA
preto = 0x000000
resultado = array.copy()
for x in range(1, largura - 1): # evita bordas
for y in range(altura):
cor = rgb_to_hex_val(array[y, x])
if (escuro_min <= cor <= escuro_max) or (cor in [branco1, branco2, preto]):
viz_esq = rgb_to_hex_val(array[y, x - 1])
viz_dir = rgb_to_hex_val(array[y, x + 1])
if escuro_min <= viz_esq <= escuro_max and escuro_min <= viz_dir <= escuro_max:
resultado[y, x] = array[y, x]
return Image.fromarray(resultado)
# Caminho para o Tesseract no Windows
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
metodos = [
cv2.THRESH_BINARY,
cv2.THRESH_BINARY_INV,
cv2.THRESH_TRUNC,
cv2.THRESH_TOZERO,
cv2.THRESH_TOZERO_INV,
]
imagem = cv2.imread('image.png')
# transformar a imagem em escala de cinza
imagem_cinza = cv2.cvtColor(imagem, cv2.COLOR_RGB2GRAY)
for index,metodo in enumerate(metodos):
_, imagem_tratada = cv2.threshold(imagem_cinza, 127, 255, metodo or cv2.THRESH_OTSU)
cv2.imwrite(f'imagem_tratada_{index}.png', imagem_tratada)
# Abre a imagem tratada
imagem = Image.open("imagem_tratada_2.png")
imagem_array = np.array(imagem)
# Define os tons que consideras como "cinza"
limiar_inferior = 100
limiar_superior = 255
# Cria uma máscara onde o pixel está dentro do intervalo de cinza
mascara_cinza = (imagem_array >= limiar_inferior) & (imagem_array <= limiar_superior)
# Define esses pixels como brancos
imagem_array[mascara_cinza] = 255
imagem_array[0:6, :] = 255
limiar_preto = 30 # Pode ajustar
# Máscara dos pixels pretos na imagem original
mascara_preto = imagem_array <= limiar_preto
# Deixa ainda mais preto (define como 0)
imagem_array[mascara_preto] = 0
# Converte de volta para imagem e salva
imagem_resultado = Image.fromarray(imagem_array)
imagem_resultado.save('teste/imagemFinal.png')
imagem = Image.open('teste/imagemFinal.png')
img = limpar_imagem(imagem)
img.save("saida_2.png")
texto = pytesseract.image_to_string(Image.open("saida_2.png"), config='--psm 7')
print("Texto reconhecido:", texto)
image to manipulate:
image result:
Focusing on the problem presented, which is removing repeating patterns from an image, this can be solved by cycling through all the rows or columns and looking for repeating pixel values. I have a sample implementation here which works well on a grayscale image, but could be adapted to work on multi-channel images if necessary.
testimage.png
is a direct download of your original image above.
import PIL.Image
import numpy as np
import matplotlib.pyplot as plt
def get_max_channel_img(img: np.ndarray | PIL.Image.Image) -> np.ndarray:
if type(img) is PIL.Image.Image:
img = np.array(img)
return np.max(img, axis=2)
def get_gray_img(img: np.ndarray | PIL.Image.Image) -> np.ndarray:
if type(img) is PIL.Image.Image:
return np.array(img.convert('L'))
return np.linalg.norm(img, axis=2)
def filter_repeats(img: np.ndarray) -> np.ndarray:
# Whats the minimum number of times the pattern repeats itself
MIN_REPEATS = 8
# Whats the minimum period in pixels of the pattern
MIN_STEP = 2
# Whats the "closeness" required to be considered similar
CLOSENESS_RATIO = 0.2
# What percent of the pixels have to match in order to filter it out
MIN_THRESHOLD = 0.95
# How many periods need to be checked
total_periods = img.shape[1] // MIN_REPEATS
# This is the altered array that will be returned
out_array = img.copy()
# Cycle through each row in the image
for row_num in range(img.shape[0]):
# Cycle through each period
for period_val in range(MIN_STEP, total_periods):
# Check for matches at every possible offset
# (The matches may not start in the first pixel)
for offset in range(period_val):
# What are the columns of the pixels to check
col_values = [
offset + period_val * x
for x in range(img.shape[1] // period_val + 1)
]
# A +1 was added above to capture the last pixel
# which is sometimes missed otherwise.
# But that means that sometimes the last position exceeds img dimensions
if col_values[-1] >= img.shape[1]:
col_values = col_values[:-1]
# Grab each pixel's value
values = [int(img[row_num, i]) for i in col_values]
# Calculate the average of all the pixels
avg_value = np.average(values)
# Compare each pixel to that average
deviations = [
float(abs(x - avg_value)) / 255 < CLOSENESS_RATIO
for x in values
]
# Check to see if enough of them meet the closeness
if np.average(deviations) > MIN_THRESHOLD:
# If they met it, replace each pixel with white
for i in col_values:
out_array[row_num, i] = 255
return out_array
def _main():
fpath = 'testimage.png'
image = PIL.Image.open(fpath)
colour_image2 = get_max_channel_img(image)
colour_image3 = filter_repeats(colour_image2)
colour_image4 = colour_image3.copy()
colour_image4[colour_image4 > 230] = 255
fig, axarr = plt.subplots(3, 1)
axarr[0].imshow(colour_image2, cmap='gray', vmin=0, vmax=255)
axarr[1].imshow(colour_image3, cmap='gray', vmin=0, vmax=255)
axarr[2].imshow(colour_image4, cmap='gray', vmin=0, vmax=255)
plt.savefig('OUTPUT.PNG')
plt.show()
if __name__ == '__main__':
_main()
Here is the plot it produces:
The top image is the original but grayscaled, the middle one is filtered for patterns and the bottom one is filtered for patterns then with a basic threshold which removed a bunch of near-white pixels scattered throughout the image, leaving behind a fairly pristine view of the letters.
The variables with all caps names are just general values I grabbed without much testing. You should change these to work well with your other images, these general values just worked pretty well on the sample you provided, if they are all like that then these values will probably be fine.
In the example code, I showed only a row based filtering since your image is much wider than it is tall, but if you need column based filtering that function could be adjusted.
Let me know if you have any questions.