i need to convert a frame of the video (which is a nparray) to a pytorch tensor, do some particular actions with it and convert it back but i'm struggling
so, i have a frame returned from video_capture.read() and, as i understood, it's a np array. firstly, i convert it to a tensor and checks if looks correctly (sorry i can't add photos for some reason) then i analyze it (no mistakes), try to rotate it and here's a problem.
can somebody please help me to fix this, i'm so exhausted, chatgpt confuses me even more and don't understand anything... i guess the problem with colours is related to how i convert tensor to pil image, but i tried several changes (commented lines) and nothing hepled. also is there a way to avoid converting tensor to pil image before the rotation? can't i just rotate a tensor?
def tensor_to_image(tensor):
tensor = (tensor * 255).byte()
tensor = tensor.squeeze(0)
tensor = tensor.permute(1, 2, 0)
image = Image.fromarray(np.array(tensor).astype(np.uint8))
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
image = Image.fromarray(np.asarray(image))
return image
def rotate_tensor(frame_tensor, landmarks):
roll = calc_face_angle(landmarks)
frame = tf.to_pil_image(frame_tensor.squeeze(0))
#frame = tensor_to_image(frame_tensor)
frame.show()
if not np.isnan(roll):
rotated_frame = frame.rotate(roll, resample=Image.BICUBIC, expand=True)
else:
print("Failed to calculate face angle for rotation")
return frame_tensor
#rotated_tensor = tf.to_tensor(rotated_frame).unsqueeze(0)
transform = transforms.ToTensor() # Используем torchvision для преобразования в тензор
rotated_tensor = transform(rotated_frame).unsqueeze(0)
return rotated_tensor
def check_tensor(self, frame_tensor):
frame_numpy = frame_tensor.squeeze(0).permute(1, 2, 0).byte().numpy()
#frame_numpy = cv2.cvtColor(frame_numpy, cv2.COLOR_RGB2BGR)
cv2.imshow("Frame", frame_numpy)
cv2.waitKey(0)
cv2.destroyAllWindows()
def analyze_video(self, video_path):
video_capture = cv2.VideoCapture(video_path)
for i in range(1):
ret, frame = video_capture.read()
if not ret:
break
# преобразуем фрейм в тензор
frame_tensor = torch.from_numpy(frame).float()
frame_tensor = frame_tensor.permute(2, 0, 1).unsqueeze(0)
#frame_tensor = frame_tensor[:, [2, 1, 0], :, :]
self.check_tensor(frame_tensor)
orig_prediction = self.analyze_frame(frame_tensor)
rotated_tensor = im.rotate_tensor(frame_tensor, orig_prediction.head())
self.check_tensor(rotated_tensor)
This is how you can do it:
def frame_to_tensor(frame):
# OpenCV frame (BGR) -> RGB tensor
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Convert to tensor and add batch dimension
tensor = torch.from_numpy(frame_rgb).float().permute(2, 0, 1).unsqueeze(0)
# Normalize to [0, 1]
tensor = tensor / 255.0
return tensor
def tensor_to_frame(tensor):
# Remove batch dimension and move channels last
frame = tensor.squeeze(0).permute(1, 2, 0)
# Scale back to [0, 255]
frame = (frame * 255).byte().numpy()
# Convert RGB to BGR for OpenCV
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
return frame
def rotate_tensor(tensor, angle):
# Convert to PIL (expects RGB tensor in [0, 1])
image = tf.to_pil_image(tensor.squeeze(0))
# Rotate image
rotated = image.rotate(angle, resample=Image.BICUBIC, expand=True)
# Back to tensor
return tf.to_tensor(rotated).unsqueeze(0)
# Usage in your analyze_video:
ret, frame = video_capture.read()
frame_tensor = frame_to_tensor(frame)
# Do your analysis...
rotated_tensor = rotate_tensor(frame_tensor, angle)
# Display/save
output_frame = tensor_to_frame(rotated_tensor)
cv2.imshow("Frame", output_frame)
Key point: