python opencv computer-vision object-detection

Extracting the colors on the face of a stickerless Rubik's cube

I am tasked with the problem of finding the colors on the face of a stickerless Rubik's cube using OpenCV but I can't figure out what the best way might be to do so.

I tried to look at various solutions for the same problem online but most of them deal with canny edge detection which works great if the Rubik's cube has a face with stickers on black background like this one:

stickered Rubik's cube

However, this fails miserably when presented with faces of a stickerless Rubik's cube like this one:

stickerless Rubik's cube

So my question is, how do I use OpenCV in order to detect the colors on the faces for this stickerless Rubik's cube?

This is what I have tried till now:

import cv2 as cv
import numpy as np
from google.colab.patches import cv2_imshow


image = cv.imread('cube.png')


grey_frame = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
noiseless_frame = cv.fastNlMeansDenoising(grey_frame, None, 20, 7, 7)
blurred_frame = cv.blur(noiseless_frame, (3, 3))
canny_frame = cv.Canny(blurred_frame, 30, 60, 3)
dilated_frame = cv.dilate(canny_frame, cv.getStructuringElement(cv.MORPH_RECT, (9, 9)))

contours, _ = cv.findContours(dilated_frame, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

square_contours = []

for contour in contours:

    approx = cv.approxPolyDP(contour, 0.1*cv.arcLength(contour, True), True)
    
    if len(approx) == 4 or True:

        x, y, w, h = cv.boundingRect(approx)
        ratio = float(w) / h
        area = cv.contourArea(approx)

        if ratio >= 0.8 and ratio <= 1.2 and w >= 30 and w <= 80 and area >= 900:

            square_contours.append({"x": x, "y": y, "w": w, "h": h})


new_img = image.copy()
for contour in square_contours:

    x, y, w, h = contour["x"], contour["y"], contour["w"], contour["h"]
    cv.rectangle(new_img, (x, y), (x + w, y + h), (0, 255, 0), 2)


cv2.imshow(new_img)

However, this fails to detect the cubes on the faces of a stickerless cube most of the times.

Furthermore, sometimes the boundary between two adjacent cubes which have the same color is not very distinct. This leads to it detecting both of them as part of the same contour.

Here is another image for a stickerless Rubik's cube Stickerless cube

Solution

I particularly, trained a YOLOv11 segmentation model in order to detect positions for Rubik's cubes.

First of all, data has to be prepared in the YOLOv11 Dataset format. and a data.yaml file has to be created:

train: ../train/images
val: ../valid/images
test: ../test/images

nc: 6
names: ['Cube']

Then, install ultralytics and train the model

!pip install ultralytics
from ultralytics import YOLO

model = YOLO('best.pt')

model.train(data='./data/data.yaml', epochs=100, batch=64, device='cuda')

After using the segmentation model on a frame, I do some checks to see if the object is a Rubiks' cube or not:

import cv2
import numpy as np
from ultralytics import YOLO

def is_patch_cube(patch, epsilon=0.2):
    h, w = patch.shape[:2]
    ratio, inverse = h/w, w/h

    if ratio < 1 - epsilon or ratio > 1 + epsilon:
        return False
    if inverse < 1 - epsilon or inverse > 1 + epsilon:
        return False

    return True

def is_patch_mostly_colored(patch, threshold=0.85):
    h, w, c = patch.shape
    num_pixels = h*w*c
    num_colored_pixels = np.sum(patch > 0)
    return num_colored_pixels/num_pixels > threshold

def check_homogenous_color(patch, color, threshold):
    if color not in color_ranges: return False
    h, w = patch.shape[:2]
    patch = cv2.cvtColor(patch, cv2.COLOR_BGR2HSV)
    lower, upper = color_ranges[color]
    thres = cv2.inRange(patch, np.array(lower), np.array(upper))
    # print(thres.shape)
    return (np.count_nonzero(thres)/(h*w)) > threshold


def find_segments(seg_model: YOLO, image):
    return seg_model(image, verbose=False)


def get_face(results, n, homogenity_thres=0.6):
    for i, r in enumerate(results):
        original_img = r.orig_img
        img_h, img_w, c = original_img.shape

        if r.masks is not None:
            for obj_i, mask_tensor in enumerate(r.masks.data):
                mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)

                if mask_np.shape[0] != original_img.shape[0] or mask_np.shape[1] != original_img.shape[1]:
                    mask_np = cv2.resize(mask_np, (img_w, img_h), interpolation=cv2.INTER_NEAREST)
                
                mask_np, box = simplify_mask(mask_np, eps=0.005)
                obj = cv2.bitwise_and(original_img, original_img, mask=mask_np)

                x1, y1, w, h = box
                x2, y2 = x1 + w, y1 + h

                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(original_img.shape[1], x2)
                y2 = min(original_img.shape[0], y2)

                cropped_object = obj[y1:y2, x1:x2]

                if not is_patch_cube(cropped_object):
                    continue

                if not is_patch_mostly_colored(cropped_object):
                    continue

                colors, homogenity = find_colors(cropped_object, n, color_detection_model)

                if sum([sum(row) for row in homogenity]) < homogenity_thres * len(homogenity) * len(homogenity[0]):
                    continue

                return colors, cropped_object, mask_np, box
            
    return None, None, None, None



def find_colors(patch, n):
    h, w, c = patch.shape
    hh, ww = h//n, w//n

    colors = [['' for _ in range(n)] for __ in range(n)]
    homogenity = [[False for _ in range(n)] for __ in range(n)]

    for i in range(n):
        for j in range(n):
            pp = patch[i*hh:(i+1)*hh, j*ww:(j+1)*ww]
            colors[i][j] = find_best_matching_color_legacy(
                get_median_color(pp), tpe='bgr') # whatever function you want to detect colors
            homogenity[i][j] = check_homogenous_color(pp, colors[i][j], threshold=0.5)
    
    return colors, homogenity

We can use this as follows:

results = find_segments(model, self.current_frame)

face, obj, mask, box = get_face(results, n=self.n, homogenity_thres=0.6)

Thanks to @ChristophRackwitz for recommending usage of semantic segmentation models