pythonimagemagickjupytermoviepy

MoviePy Shouldn't Be This Slow


I'm making an illustrated audiobook from a json script. So it holds on a single image for sometimes many seconds, and never less than 1. But I think moviepy is still generating every single frame.
There has got to be a faster way to render this - I'd consider another library / tool even. It's taking multiple hours for 1 hour book every time I make a small revision!

Python version: 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]

My code

!pip install python-ffmpeg
# remember to restart the kernel if this wasn't installed before
!pip install moviepy
pip install ImageMagic  # need to install 'legacy tools' for this to work

mp3_f = "d:/bookcontent/mp3s/"
image_f = "d:/bookcontent/images/"
json_file = "d:/bookcontent/script.json"
output_file = "d:/desktop/render_debug.mp4"

from moviepy.editor import ImageClip, AudioFileClip, concatenate_audioclips, concatenate_videoclips, TextClip, CompositeVideoClip
import json
import time
import matplotlib.pyplot as plt

def render_movie(mp3_folder, images_folder, json_file, output_file, debug_mode=False, thread_count=14):
    start_time = time.time()  # Start timing
    with open(json_file, 'r', encoding="UTF-8") as file:
        data = json.load(file)

    clips = []
    current_img_clip = None
    audio_duration = 0

    for item in data:
        if 'img' in item:
            if current_img_clip is not None:
                # Finalize the previous clip before starting a new one
                clips.append(current_img_clip.set_duration(audio_duration))

            img_path = f"{images_folder}/{item['img']}"
            current_img_clip = ImageClip(img_path)
            audio_duration = 0  # Reset audio duration for the new img clip

            if debug_mode:
                debug_text = f"Image: {item['img']}"

        elif 'mp3' in item:
            mp3_path = f"{mp3_folder}/{item['mp3']}"
            audio_clip = AudioFileClip(mp3_path)
            audio_duration += audio_clip.duration

            if current_img_clip.audio is None:
                current_img_clip = current_img_clip.set_audio(audio_clip)
            else:
                current_img_clip.audio = concatenate_audioclips([current_img_clip.audio, audio_clip])

            if debug_mode:
                debug_text += f" | MP3: {item['mp3']}"

    # Append the last image clip if it exists, with any pending audio adjustments
    if current_img_clip is not None:
        clips.append(current_img_clip.set_duration(audio_duration))

    # Apply debug mode text to all clips if debug_mode is True
    if debug_mode:
        for i, clip in enumerate(clips):
            txt_clip = TextClip(debug_text, fontsize=20, color='white', bg_color='black').set_position('bottom').set_duration(clip.duration)
            clips[i] = CompositeVideoClip([clip, txt_clip])

    # Concatenate all clips into one video
    final_clip = concatenate_videoclips(clips, method="compose")

    # Export the video
    
    fps_num = 24
    if debug_mode:
        fps_num = 4
    
    final_clip.write_videofile(output_file, fps=24, codec="libx264", audio_codec="aac", bitrate="4000k", threads=thread_count)


    end_time = time.time()  # End timing
    return end_time - start_time  # Return the duration of the render operation

duration = render_movie(mp3_f, image_f, json_file, output_file + str(i)+".mp4", debug_mode=True)


Sample JSON data

[
    {
        "img": "maintitle.png"
    },
    {
        "mp3": "intro_music.mp3",
    },
    {
        "mp3": "10000300_7095387230477472453.mp3",
    },
    {
        "img": "CH1.png"
    },
    {
        "mp3": "10000200_13107803339676511791.mp3",
    }
]

Solution

  • I was able to speed it up a lot. There was a bug in my earlier code where debug mode FPS wasn't being applied. I think trying to 'combine' the audio rather than just re-using images was slowing it down. Also the text overlay was only for debugging, but I think it was slowing it down a lot. I got it down from hours to minutes. If I were going to re-attempt the text overlay, I would modify the image FIRST and then make the ImageClip from that rather than having it rendered over every frame.

    from moviepy.editor import ImageClip, AudioFileClip, concatenate_audioclips, concatenate_videoclips, TextClip, CompositeVideoClip
    import json
    import time
    import matplotlib.pyplot as plt
    
    def render_movie(mp3_folder, images_folder, json_file, output_file, debug_mode=False, thread_count=14):
        print("Rendering")
        image_clips = []
        
        with open(json_file, 'r', encoding="UTF-8") as file:
            data = json.load(file)
        for item in data:
            if 'img' in item:
                image_path = f"{images_folder}/{item['img']}"
            elif 'mp3' in item:
                audio_path = f"{mp3_folder}/{item['mp3']}"
                audio = AudioFileClip(audio_path)
                image_clips.append(ImageClip(image_path).set_audio(audio).set_duration(audio.duration))
        print("Concatenating")
        final_clip = concatenate_videoclips(image_clips, method="compose")
        print("Finalizing")
        final_clip.write_videofile(output_file, fps=(30 if not debug_mode else 4), threads=thread_count, verbose=False)
        print("Done")
    
        
    render_movie(mp3_f, image_f, json_file, output_file, debug_mode=True)