pythonpython-3.ximage-processingpython-imaging-libraryimage-conversion

How to extract number from an image?


I have this image

input image

I want to convert the text colour of the image to the same colour, then extract the number from the image as a string.

Here's my code for what I have done so far

import numpy as np
import cv2
import matplotlib.pyplot as plt


def downloadImage(URL):
    """Downloads the image on the URL, and convers to cv2 BGR format"""
    from io import BytesIO
    from PIL import Image as PIL_Image
    import requests

    response = requests.get(URL)
    image = PIL_Image.open(BytesIO(response.content))
    return cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)


URL = "https://i.imgur.com/prvJaK3.jpg"

# Read image
colorImage = downloadImage(URL)

RED, GREEN, BLUE = 0, 1, 2
# Filter image with much of GREEN, and little of RED and BLUE
greenImage = (
      (colorImage[:, :, RED] < 50)
    & (colorImage[:, :, GREEN] > 100)
    & (colorImage[:, :, BLUE] < 50)
)

blackImage = (
      (colorImage[:, :, RED] < 60)
    & (colorImage[:, :, GREEN] < 100)
    & (colorImage[:, :, BLUE] < 60)
)

plt.imshow(blackImage)
plt.show()

It returns like this

output image

How could I convert it so that every number is the same text colour and I can print it as image to string?


Solution

  • output: 6479

    enter image description here

    #!/usr/bin/env python3
    
    import cv2
    import numpy as np
    import pytesseract
    
    im_path="./"
    im_name = "input.jpg"
    
    # Read Image and Crop Borders
    img = cv2.imread(im_path+im_name)[3:-2, 5:]
    
    # Threshold on Red Channel
    th=185
    img[img[...,2]>th]=0
    
    # Gray Color
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Thresholding
    binary = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    
    # Resize as original image is small 
    scale_ = 5 # of original size
    width = int(img.shape[1] * scale_ )
    height = int(img.shape[0] * scale_ )
    dim = (width, height)
    # resize image
    resized = cv2.resize(binary, dim, interpolation = cv2.INTER_CUBIC)
    
    # Morhpological Erosion
    resized = cv2.erode(resized, None)
    
    # OCR
    print(pytesseract.image_to_string(resized, config='--psm 13 outputbase digits'))
    
    # Visualization
    cv2.namedWindow("output", cv2.WINDOW_NORMAL)
    cv2.imshow("output", resized)
    cv2.waitKey(0)