python c++opencv computer-vision camera-calibration

Blending an inflated equirectilinear image with another camera image

I'm struggling for this problem for one week. And I can't find a solution, or any approach. I'm wondering is that something impossible to do. I have provided all the information and trying to find a solution. So I have the following fisheye distorted image, and I'm trying to enhance the fisheye inflated equirectilinear image.

The fish distorted image, is the following and the next is fisheye inflated equirectilinear image. The purpose of this, I'm doing NERF and the output for the second image doesn't have all the details, and having a lot of bad pixels as shown in the figure. So the point is to blend or whatever, to have better image in terms of details and clarity. The engine that is used: https://github.com/fbriggs/lifecast_public/blob/main/nerf/source/lifecast_nerf_lib.cc#L1069 The paper that is describing their approach https://lifecast.ai/baking_nerf_to_ldi.pdf

The equation for generating their distored image

This is the output from the script provided in the post

My approach: Tried to generate the camera image to have similar distortion to target image, plus matching features, and add blending but the result is really not promising. This is the output:

I'm looking for a C++ or a python solution(for trial). Here is my attempt and MVC

import cv2
import numpy as np

camera_image = cv2.imread('camera_0001 (1).tif')
undistorted_layer = cv2.imread('fused_bgra_000001.jpg')

height, width = camera_image.shape[:2]
undistorted_layer = cv2.resize(undistorted_layer, (width, height))
beta = 0.7
gamma = 3
S = 1.7  # Scaling factor
r90 = S * (width / 2)  # Adjust r90 with scaling

map_x = np.zeros((height, width), dtype=np.float32)
map_y = np.zeros((height, width), dtype=np.float32)

for y in range(height):
    for x in range(width):
        nx = (x - width / 2) / r90
        ny = (y - height / 2) / r90
        r = np.sqrt(nx**2 + ny**2)
        
        # Apply the inflated equiangular transformation
        phi = (np.pi / 2) * (beta * r + (1 - beta) * (r ** gamma))
        
        if r != 0:
            new_x = (phi / r) * nx * r90 + width / 2
            new_y = (phi / r) * ny * r90 + height / 2
        else:
            new_x = width / 2
            new_y = height / 2
        
        map_x[y, x] = new_x
        map_y[y, x] = new_y

destination_image = cv2.remap(camera_image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
cv2.imwrite('destination_image.png', destination_image)

sift = cv2.SIFT_create()

keypoints1, descriptors1 = sift.detectAndCompute(undistorted_layer, None)
keypoints2, descriptors2 = sift.detectAndCompute(destination_image, None)

FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)

flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors1, descriptors2, k=2)

# Apply Lowe's ratio test
good_matches = []
for m, n in matches:
    if m.distance < 0.7 * n.distance:
        good_matches.append(m)

matched_image = cv2.drawMatches(undistorted_layer, keypoints1, destination_image, keypoints2, good_matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
cv2.imwrite('matched_image.png', matched_image)

points1 = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
points2 = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

H, mask = cv2.findHomography(points1, points2, cv2.RANSAC, 5.0)

aligned_layer = cv2.warpPerspective(undistorted_layer, H, (destination_image.shape[1], destination_image.shape[0]))

cv2.imwrite('aligned_layer.png', aligned_layer)

# Blend the images
blended = cv2.addWeighted(destination_image, 0.5, aligned_layer, 0.5, 0)

# Save and display the result
cv2.imwrite('blended_image.png', blended)
cv2.imshow('Blended Image', blended)
cv2.waitKey(0)
cv2.destroyAllWindows()

This image is the one that I want to correct and to have more details using the camera image (the next image)

This is the input image

expected output: Notice the clarity and the details of the image compared to synsethized one from NERF

This is expected output that are two layers, I just need one layer of the two, as you see the image has higher details, no missing things

Solution

Here is some Python/OpenCV code to transform from perspective image to fisheye image. Currently my code follows the equations from Wikipedia (https://en.wikipedia.org/wiki/Fisheye_lens), which need both the input and output FOV. A circular output the size of the image only occurs if both FOVs are equal. But the output looses quality for FOV near 180 deg in the input perspective (as is expected, since perspective geometry fails at 180 deg)

Input:

import numpy as np
import cv2
import math

img = cv2.imread("camera_0001.png")

# get dimensions of input (source)
# output will be same dimensions
h, w = img.shape[:2]

# set input (source) perspective fov; must be less than 180 and gets very distorted as approach 180
ifov = 160

# set output (destination) fisheye fov
ofov = 180

# center of perspective and fisheye assumed to be center of image
xcent = w / 2
ycent = h / 2

# set background -- choices are "color" or "transparency"
background = "color"  # otherwise specify "transparency"

# set background color (for background=color)
bgcolor = (0,0,0)

# set up the x and y maps as float32
map_X = np.zeros((h, w), np.float32)
map_Y = np.zeros((h, w), np.float32)
mask = np.zeros((h, w), np.uint8)

# create map with the perspective and linear fisheye
#
# see https://en.wikipedia.org/wiki/Fisheye_lens
# note phi=fov/2 in below formulate; fov=field of view (aperture)
# note r=radius from dimension of image=N/2; N=width
# perspective: r=f*tan(phi); f=r/tan(phi); f=(N/2)/tan((fov/2)*(pi/180))=N/(2*tan(fov*pi/360))
# linear (equidistant) fisheye: r=f*phi; f=r/phi; f=(N/2)/((fov/2)*(pi/180))=N*180/(fov*pi)
# X,Y is source (perspective) and x,y is destination (fisheye)

# compute input (source) focal length from perspective equation and ifov
ifoc = w/(2*math.tan(ifov*math.pi/360))

# compute inverse of output (fisheye) focal length using ofov
ofocinv = (ofov*math.pi)/(w*180)

for y in range(h):
    # compute input (source) coords xx,yy as centered in range -1 to 1 from x,y
    yd = (y - ycent)
    for x in range(w):
        xd = (x - xcent)
        rd = math.hypot(xd,yd)
        if rd == 0:
            map_X[y, x] = x
            map_Y[y, x] = y
            mask[y,x] = 255
        elif rd > xcent:
            map_X[y, x] = x
            map_Y[y, x] = y
            mask[y,x] = 0
        else:
            # compute phiang for output (destination), but same phi as input
            phiang = ofocinv*rd
            
            # compute radius in input (perspective) from ifoc and phi
            rs = ifoc*math.tan(phiang)
            
            # compute source/input (perspective) X,Y ie map_X,map_Y coordinates from destination/output (fisheye) x,y coordinates
            # note: ternary equation good only if no rotation of fisheye output
            #(rs/rd)*
            map_X[y, x] = (rs/rd)*xd+xcent if rd != 0 else 0
            map_Y[y, x] = (rs/rd)*yd+ycent if rd != 0 else 0
            mask[y,x] = 255

# do the remap
result = cv2.remap(img, map_X, map_Y, interpolation=cv2.INTER_LANCZOS4, borderMode = cv2.BORDER_CONSTANT, borderValue=(0,0,0))

if background == "color":
    result[mask==0] = bgcolor
else: # transparency
    result = cv2.cvtColor(result, cv2.COLOR_BGR2BGRA)
    result[:,:,3] = mask
       
# save results
cv2.imwrite("camera_0001_fisheye.png", result)

# display images
cv2.imshow('img', img)
cv2.imshow('mask', mask)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Fisheye for input FOV=120 and output FOV=180:

Fisheye for input FOV=160 and output FOV=180:

Fisheye for input FOV=160 and output FOV=160:

ADDITION

Here is the modified code for the Phi computation with the distortion arguments: scale (S), beta, gamma from the reference in the question.

import numpy as np
import cv2
import math

img = cv2.imread("camera_0001.png")

# get dimensions of input (source)
# output will be same dimensions
h, w = img.shape[:2]

# set input (source) perspective fov; must be less than 180 deg and gets very distorted as approach 180
# note output (destination) fisheye fixed at 180
ifov = 160

# set phi angle distortion parameters
scale = 1.15
beta = 0.5
gamma = 3

# center of perspective and fisheye assumed to be center of image
xcent = w / 2
ycent = h / 2

# set background -- choices are "color" or "transparency"
background = "color"  # otherwise specify "transparency"

# set background color (for background=color)
bgcolor = (0,0,0)

# set up the x and y maps as float32
map_X = np.zeros((h, w), np.float32)
map_Y = np.zeros((h, w), np.float32)
mask = np.zeros((h, w), np.uint8)

# create map with the perspective and linear fisheye
# see https://en.wikipedia.org/wiki/Fisheye_lens

# note phi=fov/2 in below formulate for phi corresponding to fov; fov=field of view (aperture)
# note r=radius from dimension of image=N/2; N=width
# perspective: r=f*tan(phi); f=r/tan(phi); f=(N/2)/tan((fov/2)*(pi/180))=N/(2*tan(fov*pi/360))
# linear: r=f*phi; f=r/phi; f=(N/2)/((fov/2)*(pi/180))=N*180/(fov*pi)
# X,Y is source (perspective) and x,y is destination (fisheye)

# see https://stackoverflow.com/questions/78952264/blending-an-inflated-equirectilinear-image-with-another-camera-image?noredirect=1#comment139232795_78952264
# phi = (pi/2)*(beta*rprime + (1-beta)*rprime**gamma)
# where rprime = r/r90 and r90 = scale*width/2 and output FOV fixed at 180
# for beta=1 and scale=1, phi=(pi/2)*r/(2*w) = (pi*r/w)

# compute input (source) focal length from perspective equation and ifov
ifoc = w/(2*math.tan(ifov*math.pi/360))

# compute r90 = scale*w/2 for use later in rprime = rd/r90
r90 = scale*w/2

for y in range(h):
    # compute input (source) coords xx,yy as centered in range -1 to 1 from x,y
    yd = (y - ycent)
    for x in range(w):
        xd = (x - xcent)
        rd = math.hypot(xd,yd)
        if rd == 0:
            map_X[y, x] = xcent
            map_Y[y, x] = ycent
            mask[y,x] = 255
        elif rd > xcent:
            map_X[y, x] = 0
            map_Y[y, x] = 0
            mask[y,x] = 0
        else:
            # compute phiang for output (destination), but same phi as input
            rprime = rd/r90
            phiang = (math.pi/2) * ( beta*rprime + (1-beta)*(rprime)**gamma )
            
            # compute radius in input (perspective) from ifoc and phi
            rs = ifoc*math.tan(phiang)
            
            # compute source/input (perspective) X,Y ie map_X,map_Y coordinates from destination/output (fisheye) x,y coordinates
            # note: ternary equation good only if no rotation of fisheye output
            #(rs/rd)*
            map_X[y, x] = (rs/rd)*xd+xcent if rd != 0 else 0
            map_Y[y, x] = (rs/rd)*yd+ycent if rd != 0 else 0
            mask[y,x] = 255

# do the remap
result = cv2.remap(img, map_X, map_Y, interpolation=cv2.INTER_LANCZOS4, borderMode = cv2.BORDER_CONSTANT, borderValue=(0,0,0))

if background == "color":
    result[mask==0] = bgcolor
else: # transparency
    result = cv2.cvtColor(result, cv2.COLOR_BGR2BGRA)
    result[:,:,3] = mask
       
# save results
cv2.imwrite("camera_0001_fisheye2.png", result)

# display images
cv2.imshow('img', img)
cv2.imshow('mask', mask)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Fisheye for ifov = 160, scale = 1.15, beta = 0.5, gamma = 3: