I'm making an illustrated audiobook from a json script. So it holds on a single image for sometimes many seconds, and never less than 1. But I think moviepy is still generating every single frame.
There has got to be a faster way to render this - I'd consider another library / tool even. It's taking multiple hours for 1 hour book every time I make a small revision!
Python version: 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
My code
!pip install python-ffmpeg
# remember to restart the kernel if this wasn't installed before
!pip install moviepy
pip install ImageMagic # need to install 'legacy tools' for this to work
mp3_f = "d:/bookcontent/mp3s/"
image_f = "d:/bookcontent/images/"
json_file = "d:/bookcontent/script.json"
output_file = "d:/desktop/render_debug.mp4"
from moviepy.editor import ImageClip, AudioFileClip, concatenate_audioclips, concatenate_videoclips, TextClip, CompositeVideoClip
import json
import time
import matplotlib.pyplot as plt
def render_movie(mp3_folder, images_folder, json_file, output_file, debug_mode=False, thread_count=14):
start_time = time.time() # Start timing
with open(json_file, 'r', encoding="UTF-8") as file:
data = json.load(file)
clips = []
current_img_clip = None
audio_duration = 0
for item in data:
if 'img' in item:
if current_img_clip is not None:
# Finalize the previous clip before starting a new one
clips.append(current_img_clip.set_duration(audio_duration))
img_path = f"{images_folder}/{item['img']}"
current_img_clip = ImageClip(img_path)
audio_duration = 0 # Reset audio duration for the new img clip
if debug_mode:
debug_text = f"Image: {item['img']}"
elif 'mp3' in item:
mp3_path = f"{mp3_folder}/{item['mp3']}"
audio_clip = AudioFileClip(mp3_path)
audio_duration += audio_clip.duration
if current_img_clip.audio is None:
current_img_clip = current_img_clip.set_audio(audio_clip)
else:
current_img_clip.audio = concatenate_audioclips([current_img_clip.audio, audio_clip])
if debug_mode:
debug_text += f" | MP3: {item['mp3']}"
# Append the last image clip if it exists, with any pending audio adjustments
if current_img_clip is not None:
clips.append(current_img_clip.set_duration(audio_duration))
# Apply debug mode text to all clips if debug_mode is True
if debug_mode:
for i, clip in enumerate(clips):
txt_clip = TextClip(debug_text, fontsize=20, color='white', bg_color='black').set_position('bottom').set_duration(clip.duration)
clips[i] = CompositeVideoClip([clip, txt_clip])
# Concatenate all clips into one video
final_clip = concatenate_videoclips(clips, method="compose")
# Export the video
fps_num = 24
if debug_mode:
fps_num = 4
final_clip.write_videofile(output_file, fps=24, codec="libx264", audio_codec="aac", bitrate="4000k", threads=thread_count)
end_time = time.time() # End timing
return end_time - start_time # Return the duration of the render operation
duration = render_movie(mp3_f, image_f, json_file, output_file + str(i)+".mp4", debug_mode=True)
Sample JSON data
[
{
"img": "maintitle.png"
},
{
"mp3": "intro_music.mp3",
},
{
"mp3": "10000300_7095387230477472453.mp3",
},
{
"img": "CH1.png"
},
{
"mp3": "10000200_13107803339676511791.mp3",
}
]
I was able to speed it up a lot. There was a bug in my earlier code where debug mode FPS wasn't being applied. I think trying to 'combine' the audio rather than just re-using images was slowing it down. Also the text overlay was only for debugging, but I think it was slowing it down a lot. I got it down from hours to minutes. If I were going to re-attempt the text overlay, I would modify the image FIRST and then make the ImageClip from that rather than having it rendered over every frame.
from moviepy.editor import ImageClip, AudioFileClip, concatenate_audioclips, concatenate_videoclips, TextClip, CompositeVideoClip
import json
import time
import matplotlib.pyplot as plt
def render_movie(mp3_folder, images_folder, json_file, output_file, debug_mode=False, thread_count=14):
print("Rendering")
image_clips = []
with open(json_file, 'r', encoding="UTF-8") as file:
data = json.load(file)
for item in data:
if 'img' in item:
image_path = f"{images_folder}/{item['img']}"
elif 'mp3' in item:
audio_path = f"{mp3_folder}/{item['mp3']}"
audio = AudioFileClip(audio_path)
image_clips.append(ImageClip(image_path).set_audio(audio).set_duration(audio.duration))
print("Concatenating")
final_clip = concatenate_videoclips(image_clips, method="compose")
print("Finalizing")
final_clip.write_videofile(output_file, fps=(30 if not debug_mode else 4), threads=thread_count, verbose=False)
print("Done")
render_movie(mp3_f, image_f, json_file, output_file, debug_mode=True)