I have a video (test.mkv
) that I have converted into a 4D NumPy array - (frame, height, width, color_channel). I have even managed to convert that array back into the same video (test_2.mkv
) without altering anything. However, after reading this new, test_2.mkv
, back into a new NumPy array, the array of the first video is different from the second video's array i.e. their hashes don't match and the numpy.array_equal()
function returns false. I have tried using both python-ffmpeg and scikit-video but cannot get the arrays to match.
import ffmpeg
import numpy as np
import hashlib
file_name = 'test.mkv'
# Get video dimensions and framerate
probe = ffmpeg.probe(file_name)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
frame_rate = video_stream['avg_frame_rate']
# Read video into buffer
out, error = (
ffmpeg
.input(file_name, threads=120)
.output("pipe:", format='rawvideo', pix_fmt='rgb24')
.run(capture_stdout=True)
)
# Convert video buffer to array
video = (
np
.frombuffer(out, np.uint8)
.reshape([-1, height, width, 3])
)
# Convert array to buffer
video_buffer = (
np.ndarray
.flatten(video)
.tobytes()
)
# Write buffer back into a video
process = (
ffmpeg
.input('pipe:', format='rawvideo', s='{}x{}'.format(width, height))
.output("test_2.mkv", r=frame_rate)
.overwrite_output()
.run_async(pipe_stdin=True)
)
process.communicate(input=video_buffer)
# Read the newly written video
out_2, error = (
ffmpeg
.input("test_2.mkv", threads=40)
.output("pipe:", format='rawvideo', pix_fmt='rgb24')
.run(capture_stdout=True)
)
# Convert new video into array
video_2 = (
np
.frombuffer(out_2, np.uint8)
.reshape([-1, height, width, 3])
)
# Video dimesions change
print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (2025, 1080, 608, 3)
print(f'{np.array_equal(video, video_2)}') # False
# Hashes don't match
print(hashlib.sha256(bytes(video_2)).digest()) # b'\x88\x00\xc8\x0ed\x84!\x01\x9e\x08 \xd0U\x9a(\x02\x0b-\xeeA\xecU\xf7\xad0xa\x9e\\\xbck\xc3'
print(hashlib.sha256(bytes(video)).digest()) # b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'
import skvideo.io as sk
import numpy as np
video_data = sk.vread('test.mkv')
sk.vwrite('test_2_ski.mkv', video_data)
video_data_2 = sk.vread('test_2_ski.mkv')
# Dimensions match but...
print(video_data.shape) # (844, 1080, 608, 3)
print(video_data_2.shape) # (844, 1080, 608, 3)
# ...array elements don't
print(np.array_equal(video_data, video_data_2)) # False
# Hashes don't match either
print(hashlib.sha256(bytes(video_2)).digest()) # b'\x8b?]\x8epD:\xd9B\x14\xc7\xba\xect\x15G\xfaRP\xde\xad&EC\x15\xc3\x07\n{a[\x80'
print(hashlib.sha256(bytes(video)).digest()) # b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'
I don't understand where I'm going wrong and both the respective documentations do not highlight how to do this particular task. Any help is appreciated. Thank you.
Getting the same hash requires when writing and reading a video file requires careful attention.
Before comparing the hash, try to look at the video first.
Executing your code gave me the following output (first frame of video_2):
When the input (first frame of video) is:
I suggest the following modifications:
test_2
video in raw video format.test_2
video.pixel_format='rgb24'
.pixel_format='bgr24'
, because AVI supports bgr24
and not rgb24
.test_2
video.vcodec='rawvideo'
(rawvideo codec is supported by AVI but not supported by MKV).Note:
For getting equal hash, you need to look for lossless video codec that supports rgb24
(or bgr24
) pixel format.
Most of the lossless codecs, converts the pixel format from RGB to YUV.
The RGB to YUV conversion has rounding errors that prevents equal hash.
(I suppose there are ways to get around it, but it's a bit complicated).
Here is your complete code with few modifications:
import ffmpeg
import numpy as np
import hashlib
file_name = 'test.mkv'
# Get video dimensions and framerate
probe = ffmpeg.probe(file_name)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
frame_rate = video_stream['avg_frame_rate']
# Read video into buffer
out, error = (
ffmpeg
.input(file_name, threads=120)
.output("pipe:", format='rawvideo', pix_fmt='bgr24') # Select bgr24 instead of rgb24 (becasue raw AVI requires bgr24).
.run(capture_stdout=True)
)
# Convert video buffer to array
video = (
np
.frombuffer(out, np.uint8)
.reshape([-1, height, width, 3])
)
# Convert array to buffer
video_buffer = (
np.ndarray
.flatten(video)
.tobytes()
)
# Write buffer back into a video
process = (
ffmpeg
.input('pipe:', format='rawvideo', s='{}x{}'.format(width, height), pixel_format='bgr24', r=frame_rate) # Set input pixel format.
.output("test_2.avi", vcodec='rawvideo') # Select video code "rawvideo"
.overwrite_output()
.run_async(pipe_stdin=True)
)
process.communicate(input=video_buffer)
# Read the newly written video
out_2, error = (
ffmpeg
.input("test_2.avi", threads=40)
.output("pipe:", format='rawvideo', pix_fmt='bgr24')
.run(capture_stdout=True)
)
# Convert new video into array
video_2 = (
np
.frombuffer(out_2, np.uint8)
.reshape([-1, height, width, 3])
)
# Video dimesions change
print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (844, 1080, 608, 3)
print(f'{np.array_equal(video, video_2)}') # True
# Hashes do match
print(hashlib.sha256(bytes(video_2)).digest())
print(hashlib.sha256(bytes(video)).digest())
Result (same hash):
True
b"\xd1yy\x97\x8e\xce\x13\xbcI#\xd2PMP\x80(i+5\xe1\xcd\xab\xf3f\xbe\xcd\xd5'\xbaq\xdd\x9b"
b"\xd1yy\x97\x8e\xce\x13\xbcI#\xd2PMP\x80(i+5\xe1\xcd\xab\xf3f\xbe\xcd\xd5'\xbaq\xdd\x9b"
Using ffv1 encoder:
The same hashes are achieved using the ffv1 encoder for .mkv
vcodec='ffv1'
in the arguments of output()
.One more thing:
Move the argument r=frame_rate
from the output arguments to the input arguments.
It's not intuitive... but when creating video out of frames, the frame rate should be defined as an argument of the input.
# Write buffer back into a video
process = (
ffmpeg
.input('pipe:', format='rawvideo', s='{}x{}'.format(width, height), pixel_format='rgb24', r=frame_rate) # Set input pixel format.
.output("test_2.mkv", vcodec='ffv1') # Select video code "rawvideo"
.overwrite_output()
.run_async(pipe_stdin=True)
)
Here is the complete code sample:
import ffmpeg
import numpy as np
import hashlib
file_name = 'test.mkv'
# Get video dimensions and framerate
probe = ffmpeg.probe(file_name)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
frame_rate = video_stream['avg_frame_rate']
# Read video into buffer
out, error = (
ffmpeg
.input(file_name, threads=120)
.output("pipe:", format='rawvideo', pix_fmt='rgb24') # Select rgb24 instead of rgb24 (becasue raw AVI requires rgb24).
.run(capture_stdout=True)
)
# Convert video buffer to array
video = (
np
.frombuffer(out, np.uint8)
.reshape([-1, height, width, 3])
)
# Convert array to buffer
video_buffer = (
np.ndarray
.flatten(video)
.tobytes()
)
# Write buffer back into a video
process = (
ffmpeg
.input('pipe:', format='rawvideo', s='{}x{}'.format(width, height), pixel_format='rgb24', r=frame_rate) # Set input pixel format.
.output("test_2.mkv", vcodec='ffv1') # Select video code "rawvideo"
.overwrite_output()
.run_async(pipe_stdin=True)
)
process.communicate(input=video_buffer)
# Read the newly written video
out_2, error = (
ffmpeg
.input("test_2.mkv", threads=40)
.output("pipe:", format='rawvideo', pix_fmt='rgb24')
.run(capture_stdout=True)
)
# Convert new video into array
video_2 = (
np
.frombuffer(out_2, np.uint8)
.reshape([-1, height, width, 3])
)
# Video dimesions change
print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (844, 1080, 608, 3)
print(f'{np.array_equal(video, video_2)}') # True
# Hashes do match
print(hashlib.sha256(bytes(video_2)).digest())
print(hashlib.sha256(bytes(video)).digest())
Result (same hash, using your input file):
True
b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'
b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'
Using Scikit-Video:
The following code sample uses Scikit-Video.
I couldn't find a way for selecting ffv1
codec, without using skvideo.io.FFmpegWriter
.
The implementation uses a for loop for writing the video frame by frame.
import skvideo.io as sk
import numpy as np
import hashlib
video_data = sk.vread('test.mkv')
# Create FFmpeg vidoe writer
writer = sk.FFmpegWriter('test_2_ski.mkv', outputdict={'-vcodec': 'ffv1' })
#sk.vwrite('test_2_ski.mkv', video_data)
# Write frame by frame in a loop
for i in range(video_data.shape[0]):
writer.writeFrame(video_data[i, :, :, :])
writer.close() # Close video writer.
video_data_2 = sk.vread('test_2_ski.mkv')
# Dimensions match
print(video_data.shape) # (844, 1080, 608, 3)
print(video_data_2.shape) # (844, 1080, 608, 3)
# Array elements match
print(np.array_equal(video_data, video_data_2))
# Hashes match
print(hashlib.sha256(bytes(video_data_2)).digest())
print(hashlib.sha256(bytes(video_data)).digest())