import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
def generate_square_image(size, square_size, noise_level=0.0):
"""
Generates an image with a white square in the center.
Args:
size (int): The size of the image (size x size).
square_size (int): The size of the square.
noise_level (float): Standard deviation of Gaussian noise.
Returns:
numpy.ndarray: The image as a numpy array.
numpy.ndarray: The mask.
tuple: Bounding box (x_min, y_min, width, height).
"""
# create mask
mask = np.zeros((size, size))
start = (size - square_size) // 2
end = start + square_size
mask[start:end, start:end] = 1
# create bounding box
bbox = (start, start, square_size, square_size)
# create noisy image
img = mask.copy()
if noise_level > 0:
noise = np.random.normal(0, noise_level, img.shape)
img = np.clip(img + noise, 0, 1)
return img, mask, bbox
# Example usage:
size = 100
square_size = 40
img, mask, bbox = generate_square_image(size, square_size, noise_level=0.1)
# Plot the image
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].imshow(img, cmap='gray')
ax[0].set_title('Generated Image')
ax[1].imshow(mask, cmap='gray')
ax[1].set_title('Mask')
# Display the bounding box overlayed on the image
ax[2].imshow(img, cmap='gray')
x, y, width, height = bbox
# The key fix: in matplotlib, the Rectangle coordinates start at the bottom-left corner
# But imshow displays arrays with the origin at the top-left corner
rect = Rectangle((x, y), width, height, linewidth=2, edgecolor='r', facecolor='none')
ax[2].add_patch(rect)
ax[2].set_title('Image with Bounding Box')
# Ensure origin is set to 'upper' to match imshow defaults
for a in ax:
a.set_ylim([size, 0]) # Reverse y-axis to match array indexing
plt.tight_layout()
plt.show()
Question: What is the right code to align the box properly?
It seems to be the most straight forward approach to create one?
As you can see, I have already tried prompting this to work, but even that fix (which seems to be the one thing to explore here, which is the difference in coordinate systems) does not seem to work either.
Fixes:
1. Subtract 0.5
from x and y in Rectangle().
Matplotlib positions pixels at the center of grid cells, but imshow() assumes pixel edges align exactly with grid lines. Adjusting by -0.5 shifts the bounding box to align properly.
2. origin='upper' ensures consistency with NumPy's top-left origin.
3. Hiding axis ticks makes visualization clearer.
The full corrected code is provided below:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
def generate_square_image(size, square_size, noise_level=0.0):
"""
Generates an image with a white square in the center.
Args:
size (int): The size of the image (size x size).
square_size (int): The size of the square.
noise_level (float): Standard deviation of Gaussian noise.
Returns:
numpy.ndarray: The image as a numpy array.
numpy.ndarray: The mask.
tuple: Bounding box (x_min, y_min, width, height).
"""
# Create mask
mask = np.zeros((size, size))
start = (size - square_size) // 2
end = start + square_size
mask[start:end, start:end] = 1
# Create bounding box (x_min, y_min, width, height)
bbox = (start, start, square_size, square_size)
# Create noisy image
img = mask.copy()
if noise_level > 0:
noise = np.random.normal(0, noise_level, img.shape)
img = np.clip(img + noise, 0, 1)
return img, mask, bbox
# Example usage:
size = 100
square_size = 40
img, mask, bbox = generate_square_image(size, square_size, noise_level=0.1)
# Plot the image
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
# Display the generated image
ax[0].imshow(img, cmap='gray', origin='upper')
ax[0].set_title('Generated Image')
# Display the mask
ax[1].imshow(mask, cmap='gray', origin='upper')
ax[1].set_title('Mask')
# Display the image with bounding box
ax[2].imshow(img, cmap='gray', origin='upper')
x, y, width, height = bbox
# Adjust bounding box position to match imshow's top-left origin
rect = Rectangle((x - 0.5, y - 0.5), width, height, linewidth=2, edgecolor='r', facecolor='none')
ax[2].add_patch(rect)
ax[2].set_title('Image with Bounding Box')
# Ensure correct axis orientation
for a in ax:
a.set_xticks([])
a.set_yticks([])
plt.tight_layout()
plt.show()
OutPut: