i need to convert a frame of the video (which is a nparray) to a pytorch tensor, do some particular actions with it and convert it back but i'm struggling
so, i have a frame returned from video_capture.read() and, as i understood, it's a np array. firstly, i convert it to a tensor and checks if looks correctly (sorry i can't add photos for some reason) then i analyze it (no mistakes), try to rotate it and here's a problem.
can somebody please help me to fix this, i'm so exhausted, chatgpt confuses me even more and don't understand anything... i guess the problem with colours is related to how i convert tensor to pil image, but i tried several changes (commented lines) and nothing hepled. also is there a way to avoid converting tensor to pil image before the rotation? can't i just rotate a tensor?
def tensor_to_image(tensor):
tensor = (tensor * 255).byte()
tensor = tensor.squeeze(0)
tensor = tensor.permute(1, 2, 0)
image = Image.fromarray(np.array(tensor).astype(np.uint8))
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
image = Image.fromarray(np.asarray(image))
return image
def rotate_tensor(frame_tensor, landmarks):
roll = calc_face_angle(landmarks)
frame = tf.to_pil_image(frame_tensor.squeeze(0))
#frame = tensor_to_image(frame_tensor)
frame.show()
if not np.isnan(roll):
rotated_frame = frame.rotate(roll, resample=Image.BICUBIC, expand=True)
else:
print("Failed to calculate face angle for rotation")
return frame_tensor
#rotated_tensor = tf.to_tensor(rotated_frame).unsqueeze(0)
transform = transforms.ToTensor() # Используем torchvision для преобразования в тензор
rotated_tensor = transform(rotated_frame).unsqueeze(0)
return rotated_tensor
def check_tensor(self, frame_tensor):
frame_numpy = frame_tensor.squeeze(0).permute(1, 2, 0).byte().numpy()
#frame_numpy = cv2.cvtColor(frame_numpy, cv2.COLOR_RGB2BGR)
cv2.imshow("Frame", frame_numpy)
cv2.waitKey(0)
cv2.destroyAllWindows()
def analyze_video(self, video_path):
video_capture = cv2.VideoCapture(video_path)
for i in range(1):
ret, frame = video_capture.read()
if not ret:
break
# преобразуем фрейм в тензор
frame_tensor = torch.from_numpy(frame).float()
frame_tensor = frame_tensor.permute(2, 0, 1).unsqueeze(0)
#frame_tensor = frame_tensor[:, [2, 1, 0], :, :]
self.check_tensor(frame_tensor)
orig_prediction = self.analyze_frame(frame_tensor)
rotated_tensor = im.rotate_tensor(frame_tensor, orig_prediction.head())
self.check_tensor(rotated_tensor)
frame.show()
is showing the wrong colors is because OpenCV uses the BGR format while PyTorch and PIL use the RGB format. Your frame is created from OpenCV (BGR format) and then you attempt to display it with PIL (RGB format) without any conversion.self.check_tensor(rotated_tensor)
shows a black screen is because the image-to-tensor conversion in rotate_tensor()
normalizes the values to the range [0, 1] and when you cast it to an integer in check_tensor()
it floors all of the values to 0 since they are all decimals between 0 and 1 so you need to multiply the tensor by 255 after rotating.tensor_to_image()
and if you do OpenCV and PIL will usually mod the values leading to unexpected colors.def tensor_to_image(tensor):
tensor = tensor.byte()
tensor = tensor.squeeze(0)
tensor = tensor.permute(1, 2, 0)
image = Image.fromarray(np.array(tensor).astype(np.uint8))
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
image = Image.fromarray(np.asarray(image))
return image
def rotate_tensor(frame_tensor, landmarks):
roll = calc_face_angle(landmarks)
frame = tensor_to_image(frame_tensor)
frame.show()
if not np.isnan(roll):
rotated_frame = frame.rotate(roll, resample=Image.BICUBIC, expand=True)
else:
print("Failed to calculate face angle for rotation")
return frame_tensor
rotated_frame = cv2.cvtColor(np.asarray(rotated_frame), cv2.COLOR_RGB2BGR)
transform = transforms.ToTensor()
rotated_tensor = transform(rotated_frame).unsqueeze(0)
return rotated_tensor * 255