pythonopencvimage-processinghandwriting-recognition

Detecting handwritten boxes using OpenCV


I have the following image:

enter image description here

I want to extract the boxed diagrams as so:

enter image description here

enter image description here

Here's what I've attempted:

import cv2
import matplotlib.pyplot as plt

# Load the image
image = cv2.imread('diagram.jpg')

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

# Find contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw the contours
cv2.drawContours(image, contours, -1, (0, 0, 255), 2)

# Show the final image
plt.imshow(image), plt.show()

However, I've realized it'll be difficult to extract the diagrams because the contours aren't closed:

enter image description here

I've tried using morphological closing to close the gaps in the box edges:

# Define a rectangular kernel for morphological closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))

# Perform morphological closing to close the gaps in the box edges
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

But this changes almost nothing. How should I approach this problem?


Solution

  • We may replace morphological closing with dilate then erode, but filling the contours between the dilate and erode.

    For filling the gaps, the kernel size should be much larger than 5x5 (I used 51x51).


    Assuming the handwritten boxes are colored, we may convert from BGR to HSV, and apply the threshold on the saturation channel of HSV:

    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # Convert from BGR to HSV color space 
    gray = hsv[:, :, 1]  # Use saturation from HSV channel as "gray".
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)  # Apply automatic thresholding (use THRESH_OTSU).
    

    Apply dilate with large kernel, and use drawContours for filling the contours:

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))  # Use relatively large kernel for closing the gaps   
    dilated = cv2.dilate(thresh, kernel)  # Dilate with large kernel
    
    contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(dilated, contours, -1, 255, -1)
    

    Apply erode after filling the contours Erode after dilate is equivalent to closing, but here we are closing after filling.

    closed = cv2.erode(dilated, kernel)
    

    Code sample:

    import cv2
    import numpy as np
    
    # Load the image
    image = cv2.imread('diagram.png')
    
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # Convert from BGR to HSV color space 
    
    # Convert to grayscale
    #gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = hsv[:, :, 1]  # Use saturation from HSV channel as "gray".
    
    # Apply thresholding to create a binary image
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)  # Apply automatic thresholding (use THRESH_OTSU).
    
    thresh = np.pad(thresh, ((100, 100), (100, 100)))  # Add zero padding (required due to large dilate kernels).
    
    # Define a rectangular kernel for morphological operations.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))  # Use relatively large kernel for closing the gaps
    
    dilated = cv2.dilate(thresh, kernel)  # Dilate with large kernel
    
    # Fill the contours, before applying erode.
    contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(dilated, contours, -1, 255, -1)
    
    closed = cv2.erode(dilated, kernel)  # Apply erode after filling the contours.
    
    closed = closed[100:-100, 100:-100]  # Remove the padding.
    
    # Find contours
    contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Draw the contours
    cv2.drawContours(image, contours, -1, (255, 0, 0), 2)
    
    # Show images for testing
    # plt.imshow(image), plt.show()
    cv2.imshow('gray', gray)
    cv2.imshow('thresh', thresh)
    cv2.imshow('dilated', dilated)
    cv2.imshow('closed', closed)
    cv2.imshow('image', image)
    cv2.waitKey()
    cv2.destroyAllWindows()
    

    Result:
    enter image description here

    gray (saturation channel):
    enter image description here

    thresh:
    enter image description here

    dilated (after filling):
    enter image description here

    closed:
    enter image description here