pythonffmpegvideo-processingscikitsffmpeg-python

NumPy array of a video changes from the original after writing into the same video


I have a video (test.mkv) that I have converted into a 4D NumPy array - (frame, height, width, color_channel). I have even managed to convert that array back into the same video (test_2.mkv) without altering anything. However, after reading this new, test_2.mkv, back into a new NumPy array, the array of the first video is different from the second video's array i.e. their hashes don't match and the numpy.array_equal() function returns false. I have tried using both python-ffmpeg and scikit-video but cannot get the arrays to match.

Python-ffmpeg attempt:

import ffmpeg
import numpy as np
import hashlib

file_name = 'test.mkv'

# Get video dimensions and framerate
probe = ffmpeg.probe(file_name)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
frame_rate = video_stream['avg_frame_rate']

# Read video into buffer
out, error = (
    ffmpeg
        .input(file_name, threads=120)
        .output("pipe:", format='rawvideo', pix_fmt='rgb24')
        .run(capture_stdout=True)
)

# Convert video buffer to array
video = (
    np
        .frombuffer(out, np.uint8)
        .reshape([-1, height, width, 3])
)

# Convert array to buffer
video_buffer = (
    np.ndarray
        .flatten(video)
        .tobytes()
)

# Write buffer back into a video
process = (
    ffmpeg
        .input('pipe:', format='rawvideo', s='{}x{}'.format(width, height))
        .output("test_2.mkv", r=frame_rate)
        .overwrite_output()
        .run_async(pipe_stdin=True)
)
process.communicate(input=video_buffer)

# Read the newly written video
out_2, error = (
    ffmpeg
        .input("test_2.mkv", threads=40)
        .output("pipe:", format='rawvideo', pix_fmt='rgb24')
        .run(capture_stdout=True)
)

# Convert new video into array
video_2 = (
    np
        .frombuffer(out_2, np.uint8)
        .reshape([-1, height, width, 3])
)

# Video dimesions change
print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (2025, 1080, 608, 3)
print(f'{np.array_equal(video, video_2)}') # False

# Hashes don't match
print(hashlib.sha256(bytes(video_2)).digest()) # b'\x88\x00\xc8\x0ed\x84!\x01\x9e\x08 \xd0U\x9a(\x02\x0b-\xeeA\xecU\xf7\xad0xa\x9e\\\xbck\xc3'
print(hashlib.sha256(bytes(video)).digest()) # b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'

Scikit-video attempt:

import skvideo.io as sk
import numpy as np

video_data = sk.vread('test.mkv')

sk.vwrite('test_2_ski.mkv', video_data)

video_data_2 = sk.vread('test_2_ski.mkv')

# Dimensions match but...
print(video_data.shape) # (844, 1080, 608, 3)
print(video_data_2.shape) # (844, 1080, 608, 3)

# ...array elements don't
print(np.array_equal(video_data, video_data_2)) # False

# Hashes don't match either
print(hashlib.sha256(bytes(video_2)).digest()) # b'\x8b?]\x8epD:\xd9B\x14\xc7\xba\xect\x15G\xfaRP\xde\xad&EC\x15\xc3\x07\n{a[\x80'
print(hashlib.sha256(bytes(video)).digest()) # b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'

I don't understand where I'm going wrong and both the respective documentations do not highlight how to do this particular task. Any help is appreciated. Thank you.


Solution

  • Getting the same hash requires when writing and reading a video file requires careful attention.

    Before comparing the hash, try to look at the video first.

    Executing your code gave me the following output (first frame of video_2):
    enter image description here

    When the input (first frame of video) is:
    enter image description here

    I suggest the following modifications:

    Note:
    For getting equal hash, you need to look for lossless video codec that supports rgb24 (or bgr24) pixel format.
    Most of the lossless codecs, converts the pixel format from RGB to YUV.
    The RGB to YUV conversion has rounding errors that prevents equal hash.
    (I suppose there are ways to get around it, but it's a bit complicated).


    Here is your complete code with few modifications:

    import ffmpeg
    import numpy as np
    import hashlib
    
    file_name = 'test.mkv'
    
    # Get video dimensions and framerate
    probe = ffmpeg.probe(file_name)
    video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
    width = int(video_stream['width'])
    height = int(video_stream['height'])
    frame_rate = video_stream['avg_frame_rate']
    
    # Read video into buffer
    out, error = (
        ffmpeg
            .input(file_name, threads=120)
            .output("pipe:", format='rawvideo', pix_fmt='bgr24')  # Select bgr24 instead of rgb24 (becasue raw AVI requires bgr24).
            .run(capture_stdout=True)
    )
    
    # Convert video buffer to array
    video = (
        np
            .frombuffer(out, np.uint8)
            .reshape([-1, height, width, 3])
    )
    
    # Convert array to buffer
    video_buffer = (
        np.ndarray
            .flatten(video)
            .tobytes()
    )
    
    # Write buffer back into a video
    process = (
        ffmpeg
            .input('pipe:', format='rawvideo', s='{}x{}'.format(width, height), pixel_format='bgr24', r=frame_rate)  # Set input pixel format.
            .output("test_2.avi", vcodec='rawvideo')  # Select video code "rawvideo"
            .overwrite_output()
            .run_async(pipe_stdin=True)
    )
    process.communicate(input=video_buffer)
    
    # Read the newly written video
    out_2, error = (
        ffmpeg
            .input("test_2.avi", threads=40)
            .output("pipe:", format='rawvideo', pix_fmt='bgr24')
            .run(capture_stdout=True)
    )
    
    # Convert new video into array
    video_2 = (
        np
            .frombuffer(out_2, np.uint8)
            .reshape([-1, height, width, 3])
    )
    
    # Video dimesions change
    print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (844, 1080, 608, 3)
    print(f'{np.array_equal(video, video_2)}') # True
    
    # Hashes do match
    print(hashlib.sha256(bytes(video_2)).digest())
    print(hashlib.sha256(bytes(video)).digest())
    

    Result (same hash):

    True

    b"\xd1yy\x97\x8e\xce\x13\xbcI#\xd2PMP\x80(i+5\xe1\xcd\xab\xf3f\xbe\xcd\xd5'\xbaq\xdd\x9b"

    b"\xd1yy\x97\x8e\xce\x13\xbcI#\xd2PMP\x80(i+5\xe1\xcd\xab\xf3f\xbe\xcd\xd5'\xbaq\xdd\x9b"


    Update:

    Using ffv1 encoder:

    The same hashes are achieved using the ffv1 encoder for .mkv

    One more thing:

    Here is the complete code sample:

    import ffmpeg
    import numpy as np
    import hashlib
    
    file_name = 'test.mkv'
    
    # Get video dimensions and framerate
    probe = ffmpeg.probe(file_name)
    video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
    width = int(video_stream['width'])
    height = int(video_stream['height'])
    frame_rate = video_stream['avg_frame_rate']
    
    # Read video into buffer
    out, error = (
        ffmpeg
            .input(file_name, threads=120)
            .output("pipe:", format='rawvideo', pix_fmt='rgb24')  # Select rgb24 instead of rgb24 (becasue raw AVI requires rgb24).
            .run(capture_stdout=True)
    )
    
    # Convert video buffer to array
    video = (
        np
            .frombuffer(out, np.uint8)
            .reshape([-1, height, width, 3])
    )
    
    # Convert array to buffer
    video_buffer = (
        np.ndarray
            .flatten(video)
            .tobytes()
    )
    
    # Write buffer back into a video
    process = (
        ffmpeg
            .input('pipe:', format='rawvideo', s='{}x{}'.format(width, height), pixel_format='rgb24', r=frame_rate)  # Set input pixel format.
            .output("test_2.mkv", vcodec='ffv1')  # Select video code "rawvideo"
            .overwrite_output()
            .run_async(pipe_stdin=True)
    )
    process.communicate(input=video_buffer)
    
    # Read the newly written video
    out_2, error = (
        ffmpeg
            .input("test_2.mkv", threads=40)
            .output("pipe:", format='rawvideo', pix_fmt='rgb24')
            .run(capture_stdout=True)
    )
    
    # Convert new video into array
    video_2 = (
        np
            .frombuffer(out_2, np.uint8)
            .reshape([-1, height, width, 3])
    )
    
    # Video dimesions change
    print(f'{video.shape} vs {video_2.shape}') # (844, 1080, 608, 3) vs (844, 1080, 608, 3)
    print(f'{np.array_equal(video, video_2)}') # True
    
    # Hashes do match
    print(hashlib.sha256(bytes(video_2)).digest())
    print(hashlib.sha256(bytes(video)).digest())
    

    Result (same hash, using your input file):

    True

    b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'

    b'\x9d\xc1\x07xh\x1b\x04I\xed\x906\xe57\xba\xf3\xf1k\x08\xfa\xf1\xfaM\x9a\xcf\xa9\t8\xf0\xc9\t\xa9\xb7'


    Update:

    Using Scikit-Video:

    The following code sample uses Scikit-Video.
    I couldn't find a way for selecting ffv1 codec, without using skvideo.io.FFmpegWriter.
    The implementation uses a for loop for writing the video frame by frame.

    import skvideo.io as sk
    import numpy as np
    import hashlib
    
    video_data = sk.vread('test.mkv')
    
    # Create FFmpeg vidoe writer
    writer = sk.FFmpegWriter('test_2_ski.mkv', outputdict={'-vcodec': 'ffv1' })
    
    #sk.vwrite('test_2_ski.mkv', video_data)
    
    # Write frame by frame in a loop
    for i in range(video_data.shape[0]):
        writer.writeFrame(video_data[i, :, :, :])
    
    writer.close()  # Close video writer.
    
    video_data_2 = sk.vread('test_2_ski.mkv')
    
    # Dimensions match
    print(video_data.shape) # (844, 1080, 608, 3)
    print(video_data_2.shape) # (844, 1080, 608, 3)
    
    # Array elements match
    print(np.array_equal(video_data, video_data_2))
    
    # Hashes match
    print(hashlib.sha256(bytes(video_data_2)).digest())
    print(hashlib.sha256(bytes(video_data)).digest())