pythonocrtesseract

Python to Automate mini game. Tesseract unable to find single character


I'm still new to Python, and I've been using ChatGBT as my tutor, I'm trying to automate a mini game, it needs to read the single character (letter or number) on my screen, and then when the timing is right, press that button. I've been at 3 different versions trying to find a solution to this, I simply can't get it to recognize the character. I really hope someone can help me move forward!

Images:

  1. game
  2. cut out
  3. processed
import pyautogui
import pytesseract
from PIL import Image, ImageEnhance
import time
import re

def preprocess_image(image):
    """
    Preprocess the image to enhance OCR detection.

    Args:
        image (PIL.Image.Image): The input image.

    Returns:
        PIL.Image.Image: The preprocessed image.
    """
    # Convert to grayscale
    gray_image = image.convert("L")

    # Enhance contrast significantly
    enhancer = ImageEnhance.Contrast(gray_image)
    enhanced_image = enhancer.enhance(5.0)

    # Apply thresholding to retain only the most prominent white text
    threshold_image = enhanced_image.point(lambda p: p > 200 and 255)

    return threshold_image

def analyze_region(region, region_name):
    """
    Analyzes a specific region for characters.

    Args:
        region (tuple): The region to analyze (x, y, width, height).
        region_name (str): A name for the region for debugging purposes.

    Returns:
        str: Detected valid character, if any.
    """
    region_screenshot = pyautogui.screenshot(region=region)

    # Save the original scanned image for debugging
    region_screenshot.save(f"scanned_region_{region_name}_original.png")

    # Preprocess the image
    processed_image = preprocess_image(region_screenshot)

    # Save the processed image for debugging
    processed_image.save(f"scanned_region_{region_name}_processed.png")

    # Analyze the region for text
    config = '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    text = pytesseract.image_to_string(processed_image, config=config)
    print(f"Raw OCR output from {region_name}: {text.strip()}\nFiltered characters: {re.findall(r'[A-Z0-9]', text)}")  # Debugging output

    valid_characters = re.findall(r'[A-Z0-9]', text)
    return ''.join(valid_characters)

def find_color_on_screen(target_color):
    """
    Continuously scans the screen for a specific color.

    Args:
        target_color (tuple): RGB values of the target color (e.g., (255, 0, 0) for red).

    Outputs:
        Prints a message when the color is found and analyzes specific regions to identify a character.
    """
    print(f"Scanning for color: {target_color} (RGB)...")
    while True:
        screenshot = pyautogui.screenshot()
        width, height = screenshot.size

        pixels = screenshot.load()
        for y in range(height):
            for x in range(width):
                if pixels[x, y] == target_color:
                    print(f"Color {target_color} found at pixel ({x}, {y})!")

                    # Define regions for progress bar positions
                    TOP_BAR_REGION = (1010, 103, 530, 77)  # (x, y, width, height)
                    RIGHT_BAR_REGION = (1840, 120, 75, 695)  # (x, y, width, height)

                    for _ in range(10):  # Perform 10 checks within 2 seconds
                        top_bar_result = analyze_region(TOP_BAR_REGION, "top_bar")
                        right_bar_result = analyze_region(RIGHT_BAR_REGION, "right_bar")

                        if top_bar_result:
                            print(f"Detected character(s) in top bar: {top_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        if right_bar_result:
                            print(f"Detected character(s) in right bar: {right_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        time.sleep(0.2)  # Brief delay to ensure 10 checks within 2 seconds

                    print("No valid capital letters or numbers detected within the time frame.")
                    input("Press Enter to exit...")
                    return

# Example usage
if __name__ == "__main__":
    # Replace with the RGB value of the color you want to detect
    target_color = (0, 141, 146)  # 008D92 in HEX
    find_color_on_screen(target_color)

Command Prompt:

Raw OCR output from top_bar: Filtered characters: [] Raw OCR output from right_bar: Filtered characters: [] No valid capital letters or numbers detected within the time frame. Press Enter to exit...


Solution

  • I did some testing with pytesseract and found out that it didn't like the white borders around the letter.

    I rewrote the preprocess_image() to get the letter only, it's probably not the best way of doing it it but it's just to give an understanding of what pytesseract needs:

    def preprocess_image(image: Image, top_right: bool):
        npimage = np.array(image.convert('RGB'))
    
        Y, X = np.where(np.all(npimage==[255, 255, 255], axis=2))
    
        output = Image.new('RGB', image.size, (255, 255, 255))
    
        for x, y in zip(X, Y):
            if (top and (x != max(X) and x != min(X))) or (not top and (y != max(Y) and y != min(Y))):
                output.putpixel((x, y), (0, 0, 0))
    
        return output
    

    It's taking the positions of all the white pixels in the image ([255, 255, 255]) and checking that it isn't the first or last x position, which corresponds to the white bars around the number (if x != max(X) and x != min(X):).

    Here's an image of what it returns: code output

    Sources :
    Find the coordinates in an image where a specified colour is detected