I am tasked with the problem of finding the colors on the face of a stickerless Rubik's cube using OpenCV but I can't figure out what the best way might be to do so.
I tried to look at various solutions for the same problem online but most of them deal with canny edge detection which works great if the Rubik's cube has a face with stickers on black background like this one:
However, this fails miserably when presented with faces of a stickerless Rubik's cube like this one:
So my question is, how do I use OpenCV in order to detect the colors on the faces for this stickerless Rubik's cube?
This is what I have tried till now:
import cv2 as cv
import numpy as np
from google.colab.patches import cv2_imshow
image = cv.imread('cube.png')
grey_frame = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
noiseless_frame = cv.fastNlMeansDenoising(grey_frame, None, 20, 7, 7)
blurred_frame = cv.blur(noiseless_frame, (3, 3))
canny_frame = cv.Canny(blurred_frame, 30, 60, 3)
dilated_frame = cv.dilate(canny_frame, cv.getStructuringElement(cv.MORPH_RECT, (9, 9)))
contours, _ = cv.findContours(dilated_frame, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
square_contours = []
for contour in contours:
approx = cv.approxPolyDP(contour, 0.1*cv.arcLength(contour, True), True)
if len(approx) == 4 or True:
x, y, w, h = cv.boundingRect(approx)
ratio = float(w) / h
area = cv.contourArea(approx)
if ratio >= 0.8 and ratio <= 1.2 and w >= 30 and w <= 80 and area >= 900:
square_contours.append({"x": x, "y": y, "w": w, "h": h})
new_img = image.copy()
for contour in square_contours:
x, y, w, h = contour["x"], contour["y"], contour["w"], contour["h"]
cv.rectangle(new_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow(new_img)
However, this fails to detect the cubes on the faces of a stickerless cube most of the times.
Furthermore, sometimes the boundary between two adjacent cubes which have the same color is not very distinct. This leads to it detecting both of them as part of the same contour.
Here is another image for a stickerless Rubik's cube Stickerless cube
I particularly, trained a YOLOv11 segmentation model in order to detect positions for Rubik's cubes.
First of all, data has to be prepared in the YOLOv11 Dataset format. and a data.yaml file has to be created:
train: ../train/images
val: ../valid/images
test: ../test/images
nc: 6
names: ['Cube']
Then, install ultralytics and train the model
!pip install ultralytics
from ultralytics import YOLO
model = YOLO('best.pt')
model.train(data='./data/data.yaml', epochs=100, batch=64, device='cuda')
After using the segmentation model on a frame, I do some checks to see if the object is a Rubiks' cube or not:
import cv2
import numpy as np
from ultralytics import YOLO
def is_patch_cube(patch, epsilon=0.2):
h, w = patch.shape[:2]
ratio, inverse = h/w, w/h
if ratio < 1 - epsilon or ratio > 1 + epsilon:
return False
if inverse < 1 - epsilon or inverse > 1 + epsilon:
return False
return True
def is_patch_mostly_colored(patch, threshold=0.85):
h, w, c = patch.shape
num_pixels = h*w*c
num_colored_pixels = np.sum(patch > 0)
return num_colored_pixels/num_pixels > threshold
def check_homogenous_color(patch, color, threshold):
if color not in color_ranges: return False
h, w = patch.shape[:2]
patch = cv2.cvtColor(patch, cv2.COLOR_BGR2HSV)
lower, upper = color_ranges[color]
thres = cv2.inRange(patch, np.array(lower), np.array(upper))
# print(thres.shape)
return (np.count_nonzero(thres)/(h*w)) > threshold
def find_segments(seg_model: YOLO, image):
return seg_model(image, verbose=False)
def get_face(results, n, homogenity_thres=0.6):
for i, r in enumerate(results):
original_img = r.orig_img
img_h, img_w, c = original_img.shape
if r.masks is not None:
for obj_i, mask_tensor in enumerate(r.masks.data):
mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
if mask_np.shape[0] != original_img.shape[0] or mask_np.shape[1] != original_img.shape[1]:
mask_np = cv2.resize(mask_np, (img_w, img_h), interpolation=cv2.INTER_NEAREST)
mask_np, box = simplify_mask(mask_np, eps=0.005)
obj = cv2.bitwise_and(original_img, original_img, mask=mask_np)
x1, y1, w, h = box
x2, y2 = x1 + w, y1 + h
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(original_img.shape[1], x2)
y2 = min(original_img.shape[0], y2)
cropped_object = obj[y1:y2, x1:x2]
if not is_patch_cube(cropped_object):
continue
if not is_patch_mostly_colored(cropped_object):
continue
colors, homogenity = find_colors(cropped_object, n, color_detection_model)
if sum([sum(row) for row in homogenity]) < homogenity_thres * len(homogenity) * len(homogenity[0]):
continue
return colors, cropped_object, mask_np, box
return None, None, None, None
def find_colors(patch, n):
h, w, c = patch.shape
hh, ww = h//n, w//n
colors = [['' for _ in range(n)] for __ in range(n)]
homogenity = [[False for _ in range(n)] for __ in range(n)]
for i in range(n):
for j in range(n):
pp = patch[i*hh:(i+1)*hh, j*ww:(j+1)*ww]
colors[i][j] = find_best_matching_color_legacy(
get_median_color(pp), tpe='bgr') # whatever function you want to detect colors
homogenity[i][j] = check_homogenous_color(pp, colors[i][j], threshold=0.5)
return colors, homogenity
We can use this as follows:
results = find_segments(model, self.current_frame)
face, obj, mask, box = get_face(results, n=self.n, homogenity_thres=0.6)
Thanks to @ChristophRackwitz for recommending usage of semantic segmentation models