I have created a CNN model using the MNIST dataset. I want to make predictions for the sequence of numbers present in the images. The technique involves segmenting each image and feeding it into the model, but I am facing difficulties in segmenting numbers from the images because there are two different types of images present. I need a robust technique that removes all the noise and shadows present in the images and segments each number separately. I am sharing the images here as well. I am looking for the robust technique and the code.
I was trying this code and technique, but it doesn't work for the images attached
def segment_and_display_digits(image_path):# Read imageimg = cv2.imread(image_path)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Get image dimensions
height, width = gray.shape
total_area = height * width
# Apply adaptive thresholding
thresh = cv2.adaptiveThreshold(
gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
21, # Block size
10 # C constant
)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filter contours based on area
valid_contours = []
min_area = total_area * 0.001 # Minimum 0.1% of image area
max_area = total_area * 0.5 # Maximum 50% of image area
for cnt in contours:
area = cv2.contourArea(cnt)
if min_area < area < max_area:
x, y, w, h = cv2.boundingRect(cnt)
aspect_ratio = w / float(h)
# Check if aspect ratio is reasonable for a digit (not too wide or tall)
if 0.2 < aspect_ratio < 2:
valid_contours.append(cnt)
# Sort contours from left to right
valid_contours = sorted(valid_contours, key=lambda x: cv2.boundingRect(x)[0])
# Extract and display digits
digits = []
padding = int(min(height, width) * 0.02) # Adaptive padding based on image size
for cnt in valid_contours:
x, y, w, h = cv2.boundingRect(cnt)
# Add padding while keeping within image bounds
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(width, x + w + padding)
y2 = min(height, y + h + padding)
digit = img[y1:y2, x1:x2]
digits.append(digit)
# Display results
if digits:
# Create visualization of original image with detected digits
img_with_boxes = img.copy()
for cnt in valid_contours:
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(img_with_boxes, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Plot original image with boxes and segmented digits
plt.figure(figsize=(15, 5))
# Original image with boxes
plt.subplot(2, 1, 1)
plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
plt.title('Detected Digits')
plt.axis('off')
# Segmented digits
plt.subplot(2, 1, 2)
for i, digit in enumerate(digits):
plt.subplot(2, len(digits), len(digits) + i + 1)
plt.imshow(cv2.cvtColor(digit, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title(f'Digit {i+1}')
plt.tight_layout()
plt.show()
else:
print("No digits found in the image")
To compensate for uneven illumination, a standard technique is to first estimate illumination and then divide.
The background is a white sheet of paper, so that's great. I'll estimate illumination with a median blur. The kernel size must be large enough such that no part of the foreground (written text) remains.
This will also, coincidentally, correct white balance. If the text were colored, it'd still be colored.
im = cv.imread("KifRNuGy.jpg")
illumination = cv.medianBlur(im, 101)
compensated = im / illumination
# arbitrary 0.8 to keep the bright background within range
compensated = (0.8 * 255 * np.clip(compensated, 0, 1)).astype(np.uint8)
For the segmentation, perform morphological "closing". That will erase the fine lines. Now you can get connected components from that, get their bounding boxes, but take the images from the source, because all that will have distorted the handwritten digits.
Or with a fixed 256x256 region: