#!/usr/bin/env python3 """Merge per-shot videos into a single film, ordered by storyboard.json. Because adjacent shots share keyframes (shot i's last frame == shot i+1's first frame), a plain hard concat is already seamless. An optional crossfade mode is provided for cases where soft dissolves are explicitly wanted. Requires ffmpeg on PATH. Examples: # Hard concat in storyboard order (default, seamless via shared frames) python merge_videos.py --storyboard storyboard.json --dir ./outputs --out final.mp4 # Crossfade between every shot (0.5s dissolve) python merge_videos.py --storyboard storyboard.json --dir ./outputs \ --out final.mp4 --crossfade 0.5 """ import argparse import json import os import shutil import subprocess import sys import tempfile def log(msg): print(msg, file=sys.stderr) def ensure_ffmpeg(): if shutil.which("ffmpeg") is None: log("ERROR: ffmpeg not found on PATH. Install it first " "(macOS: brew install ffmpeg).") sys.exit(1) def probe_duration(path): """Return clip duration in seconds via ffprobe.""" out = subprocess.run( ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", path], capture_output=True, text=True, check=True, ) return float(out.stdout.strip()) def resolve_shot_files(storyboard, video_dir): """Map each shot to its video file in storyboard order.""" files = [] for shot in storyboard["shots"]: sid = shot["id"] # Accept common naming patterns produced by the agnes step. candidates = [ f"shot_{sid}.mp4", f"shot{sid}.mp4", f"{sid}.mp4", f"shot_{sid:02d}.mp4", ] found = None for name in candidates: p = os.path.join(video_dir, name) if os.path.exists(p): found = p break if not found: log(f"ERROR: no video file for shot id={sid} in {video_dir}. " f"Tried: {candidates}") sys.exit(1) files.append(found) return files def hard_concat(files, out, width, height, fps): """Re-encode every clip to a common format, then concat losslessly.""" tmpdir = tempfile.mkdtemp(prefix="poem_merge_") normalized = [] try: for i, f in enumerate(files): np = os.path.join(tmpdir, f"n_{i:03d}.mp4") subprocess.run( ["ffmpeg", "-y", "-i", f, "-vf", f"scale={width}:{height}:force_original_aspect_ratio=decrease," f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,fps={fps}", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-an", np], check=True, ) normalized.append(np) listfile = os.path.join(tmpdir, "list.txt") with open(listfile, "w") as fh: for np in normalized: fh.write(f"file '{np}'\n") subprocess.run( ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", listfile, "-c:v", "libx264", "-pix_fmt", "yuv420p", out], check=True, ) finally: shutil.rmtree(tmpdir, ignore_errors=True) def crossfade_concat(files, out, width, height, fps, fade): """Chain clips with an xfade dissolve of `fade` seconds between each.""" norm_filters = [] inputs = [] for f in files: inputs += ["-i", f] # Normalize each input stream. for i in range(len(files)): norm_filters.append( f"[{i}:v]scale={width}:{height}:force_original_aspect_ratio=decrease," f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2,fps={fps}," f"settb=AVTB[v{i}]" ) durations = [probe_duration(f) for f in files] chain = [] prev = "[v0]" offset = durations[0] - fade for i in range(1, len(files)): out_label = f"[x{i}]" if i < len(files) - 1 else "[vout]" chain.append( f"{prev}[v{i}]xfade=transition=fade:duration={fade}:" f"offset={offset:.3f}{out_label}" ) prev = out_label offset += durations[i] - fade filter_complex = ";".join(norm_filters + chain) subprocess.run( ["ffmpeg", "-y", *inputs, "-filter_complex", filter_complex, "-map", "[vout]", "-c:v", "libx264", "-pix_fmt", "yuv420p", out], check=True, ) def main(): ap = argparse.ArgumentParser(description="Merge per-shot videos by storyboard order.") ap.add_argument("--storyboard", required=True, help="Path to storyboard.json") ap.add_argument("--dir", required=True, help="Directory holding shot_.mp4 files") ap.add_argument("--out", required=True, help="Output merged video path") ap.add_argument("--crossfade", type=float, default=0.0, help="Crossfade seconds between shots (0 = hard concat, default)") ap.add_argument("--width", type=int, default=1152) ap.add_argument("--height", type=int, default=768) ap.add_argument("--fps", type=int, default=24) args = ap.parse_args() ensure_ffmpeg() with open(args.storyboard) as fh: storyboard = json.load(fh) # Derive resolution from aspect if present (e.g. "1152x768"). aspect = storyboard.get("aspect") if aspect and "x" in aspect: try: w, h = aspect.lower().split("x") args.width, args.height = int(w), int(h) except ValueError: pass files = resolve_shot_files(storyboard, args.dir) log(f"Merging {len(files)} shots -> {args.out} " f"({args.width}x{args.height}@{args.fps}fps, " f"{'crossfade ' + str(args.crossfade) + 's' if args.crossfade > 0 else 'hard concat'})") if args.crossfade > 0: crossfade_concat(files, args.out, args.width, args.height, args.fps, args.crossfade) else: hard_concat(files, args.out, args.width, args.height, args.fps) print(f"SAVED: {args.out}") if __name__ == "__main__": main()