cut-from-edl.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. #!/usr/bin/env python3
  2. """EDL JSON -> validated cuts + concat: the deterministic core of edit-as-code.
  3. Reads an edit decision list (schema: assets/edl-schema.json — scenes, clips,
  4. time ranges, written rationale), validates it, and produces the final video via
  5. per-clip cuts + the concat demuxer. DRY-RUN BY DEFAULT: prints every command it
  6. would run and touches nothing until --execute.
  7. Re-encode mode (default) is frame-accurate and normalizes codec/resolution/fps
  8. across clips so the concat is always safe; --copy is faster but requires
  9. keyframe-aligned cut points and identical source parameters.
  10. Usage: cut-from-edl.py [--execute] [--copy] [-o OUT] [--workdir DIR] [--json] <edl.json>
  11. Input: EDL JSON as positional; clip paths resolve relative to the EDL's directory
  12. Output: stdout = planned/executed command list (or --json envelope,
  13. schema claude-mods.ffmpeg-ops.edl/v1)
  14. Stderr: progress, warnings, errors
  15. Exit: 0 ok, 2 usage, 3 EDL or source file missing, 4 EDL invalid,
  16. 5 ffmpeg missing (--execute only)
  17. Examples:
  18. cut-from-edl.py edit.json # dry-run: show the plan
  19. cut-from-edl.py edit.json --execute -o final.mp4
  20. cut-from-edl.py edit.json --execute --copy # keyframe-aligned EDLs only
  21. cut-from-edl.py edit.json --json | jq '.data.commands'
  22. """
  23. import argparse
  24. import json
  25. import shutil
  26. import subprocess
  27. import sys
  28. from pathlib import Path
  29. from typing import NoReturn
  30. SCHEMA = "claude-mods.ffmpeg-ops.edl/v1"
  31. EXIT_OK, EXIT_USAGE, EXIT_NOT_FOUND, EXIT_VALIDATION, EXIT_MISSING_DEP = 0, 2, 3, 4, 5
  32. def err(json_mode: bool, code: str, message: str, exit_code: int) -> NoReturn:
  33. if json_mode:
  34. print(json.dumps({"error": {"code": code, "message": message, "details": {}}}))
  35. print(f"ERROR: {message}", file=sys.stderr)
  36. sys.exit(exit_code)
  37. def validate_edl(edl: dict) -> list:
  38. """Stdlib structural validation mirroring assets/edl-schema.json."""
  39. problems = []
  40. scenes = edl.get("scenes")
  41. if not isinstance(scenes, list) or not scenes:
  42. return ["'scenes' must be a non-empty array"]
  43. for i, scene in enumerate(scenes):
  44. where = f"scenes[{i}]"
  45. if not isinstance(scene, dict):
  46. problems.append(f"{where} must be an object")
  47. continue
  48. clips = scene.get("clips")
  49. if not isinstance(clips, list) or not clips:
  50. problems.append(f"{where}.clips must be a non-empty array")
  51. continue
  52. for j, clip in enumerate(clips):
  53. cw = f"{where}.clips[{j}]"
  54. if not isinstance(clip, dict):
  55. problems.append(f"{cw} must be an object")
  56. continue
  57. if not isinstance(clip.get("file"), str) or not clip.get("file"):
  58. problems.append(f"{cw}.file must be a non-empty string")
  59. start, end = clip.get("start"), clip.get("end")
  60. if not isinstance(start, (int, float)) or start < 0:
  61. problems.append(f"{cw}.start must be a number >= 0")
  62. if not isinstance(end, (int, float)):
  63. problems.append(f"{cw}.end must be a number")
  64. elif isinstance(start, (int, float)) and end <= start:
  65. problems.append(f"{cw}: end ({end}) must be > start ({start})")
  66. return problems
  67. def video_props(ffprobe: str, path: Path) -> dict:
  68. proc = subprocess.run(
  69. [ffprobe, "-v", "error", "-select_streams", "v:0",
  70. "-show_entries", "stream=width,height,r_frame_rate", "-of", "csv=p=0",
  71. str(path)],
  72. capture_output=True, text=True)
  73. parts = proc.stdout.strip().split(",")
  74. if len(parts) == 3:
  75. try:
  76. num, den = parts[2].split("/")
  77. fps = round(int(num) / int(den), 3) if int(den) else 0
  78. return {"width": int(parts[0]), "height": int(parts[1]), "fps": fps}
  79. except (ValueError, ZeroDivisionError):
  80. pass
  81. return {}
  82. def main() -> int:
  83. ap = argparse.ArgumentParser(
  84. description="Cut + concat a final video from an EDL JSON (dry-run by default).",
  85. epilog="Examples:\n"
  86. " cut-from-edl.py edit.json\n"
  87. " cut-from-edl.py edit.json --execute -o final.mp4\n",
  88. formatter_class=argparse.RawDescriptionHelpFormatter)
  89. ap.add_argument("edl", help="EDL JSON file (see assets/edl-schema.json)")
  90. ap.add_argument("--execute", action="store_true",
  91. help="actually run the cuts (default: dry-run print only)")
  92. ap.add_argument("--copy", action="store_true",
  93. help="stream-copy cuts (fast; needs keyframe-aligned points "
  94. "and identical source params)")
  95. ap.add_argument("-o", "--output", default=None,
  96. help="final output path, resolved against the CWD (default: the "
  97. "EDL 'output' field resolved against the EDL file, else final.mp4)")
  98. ap.add_argument("--workdir", default=None,
  99. help="directory for cut segments (default: <edl-dir>/edl-cuts)")
  100. ap.add_argument("--json", action="store_true", help="emit JSON envelope on stdout")
  101. args = ap.parse_args()
  102. edl_path = Path(args.edl)
  103. if not edl_path.is_file():
  104. err(args.json, "NOT_FOUND", f"EDL not found: {edl_path}", EXIT_NOT_FOUND)
  105. try:
  106. edl = json.loads(edl_path.read_text(encoding="utf-8"))
  107. except json.JSONDecodeError as e:
  108. err(args.json, "VALIDATION", f"EDL is not valid JSON: {e}", EXIT_VALIDATION)
  109. problems = validate_edl(edl)
  110. if problems:
  111. err(args.json, "VALIDATION",
  112. "EDL failed validation: " + "; ".join(problems[:5])
  113. + (f" (+{len(problems) - 5} more)" if len(problems) > 5 else ""),
  114. EXIT_VALIDATION)
  115. base = edl_path.resolve().parent
  116. workdir = Path(args.workdir) if args.workdir else base / "edl-cuts"
  117. # CLI -o resolves against the CWD (normal CLI convention); the EDL's own
  118. # 'output' field resolves against the EDL file (schema contract).
  119. if args.output:
  120. output = Path(args.output).resolve()
  121. else:
  122. output = Path(edl.get("output") or "final.mp4")
  123. if not output.is_absolute():
  124. output = base / output
  125. # Resolve and existence-check sources (fatal in execute, warning in dry-run).
  126. clips, missing = [], []
  127. for scene in edl["scenes"]:
  128. for clip in scene["clips"]:
  129. src = Path(clip["file"])
  130. if not src.is_absolute():
  131. src = base / src
  132. if not src.is_file():
  133. missing.append(str(src))
  134. clips.append({"scene": scene.get("scene"), "src": src,
  135. "start": float(clip["start"]), "end": float(clip["end"])})
  136. if missing:
  137. for m in missing:
  138. print(f"warning: source missing: {m}", file=sys.stderr)
  139. if args.execute:
  140. err(args.json, "NOT_FOUND",
  141. f"{len(missing)} source file(s) missing (first: {missing[0]})",
  142. EXIT_NOT_FOUND)
  143. ffmpeg = shutil.which("ffmpeg")
  144. ffprobe = shutil.which("ffprobe")
  145. if args.execute and not ffmpeg:
  146. err(args.json, "MISSING_DEPENDENCY", "ffmpeg not found on PATH",
  147. EXIT_MISSING_DEP)
  148. # Re-encode mode normalizes every segment to the first clip's geometry/fps,
  149. # which is what makes the concat demuxer unconditionally safe.
  150. norm_filter = ""
  151. if not args.copy and ffprobe and not missing:
  152. props = [video_props(ffprobe, c["src"]) for c in clips]
  153. props = [p for p in props if p]
  154. if props:
  155. w, h, fps = props[0]["width"], props[0]["height"], props[0]["fps"] or 30
  156. if any((p["width"], p["height"]) != (w, h) or p["fps"] != props[0]["fps"]
  157. for p in props):
  158. print(f"note: mixed source params — normalizing all segments to "
  159. f"{w}x{h} @ {fps}fps", file=sys.stderr)
  160. norm_filter = (f"scale={w}:{h}:force_original_aspect_ratio=decrease,"
  161. f"pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,fps={fps}")
  162. commands, concat_lines = [], []
  163. for n, clip in enumerate(clips, 1):
  164. seg = workdir / f"seg{n:03d}.mp4"
  165. cmd = ["ffmpeg", "-y", "-ss", f"{clip['start']}", "-to", f"{clip['end']}",
  166. "-i", str(clip["src"])]
  167. if args.copy:
  168. cmd += ["-c", "copy", "-avoid_negative_ts", "make_zero"]
  169. else:
  170. if norm_filter:
  171. cmd += ["-vf", norm_filter]
  172. cmd += ["-c:v", "libx264", "-crf", "18", "-preset", "fast",
  173. "-pix_fmt", "yuv420p", "-c:a", "aac", "-b:a", "192k",
  174. "-ar", "48000"]
  175. cmd.append(str(seg))
  176. commands.append(cmd)
  177. concat_lines.append(f"file '{seg.as_posix()}'")
  178. concat_txt = workdir / "concat.txt"
  179. final_cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(concat_txt),
  180. "-c", "copy", "-movflags", "+faststart", str(output)]
  181. data = {
  182. "edl": str(edl_path), "mode": "copy" if args.copy else "reencode",
  183. "executed": bool(args.execute), "workdir": str(workdir),
  184. "output": str(output), "segments": len(clips),
  185. "missing_sources": missing,
  186. "commands": [" ".join(c) for c in commands] + [" ".join(final_cmd)],
  187. }
  188. if not args.execute:
  189. if args.json:
  190. print(json.dumps({"data": data, "meta": {"schema": SCHEMA}}, indent=2))
  191. else:
  192. print(f"# DRY-RUN — {len(clips)} segment(s) -> {output}")
  193. for c in data["commands"][:-1]:
  194. print(c)
  195. print(f"# concat.txt:\n" + "\n".join(f"# {l}" for l in concat_lines))
  196. print(data["commands"][-1])
  197. print("dry-run only; pass --execute to run", file=sys.stderr)
  198. return EXIT_OK
  199. workdir.mkdir(parents=True, exist_ok=True)
  200. for n, cmd in enumerate(commands, 1):
  201. print(f"cutting segment {n}/{len(commands)}...", file=sys.stderr)
  202. proc = subprocess.run(cmd, capture_output=True, text=True)
  203. if proc.returncode != 0:
  204. err(args.json, "VALIDATION",
  205. f"segment {n} failed: {(proc.stderr.strip().splitlines() or ['?'])[-1]}",
  206. EXIT_VALIDATION)
  207. concat_txt.write_text("\n".join(concat_lines) + "\n", encoding="utf-8")
  208. # Atomic final write: concat to a temp name, then rename over the
  209. # destination. The temp KEEPS the real extension — ffmpeg infers the muxer
  210. # from it, and "final.mp4.tmp" would fail with "Invalid argument".
  211. tmp_out = output.with_name(output.stem + ".tmp" + output.suffix)
  212. final_cmd[-1] = str(tmp_out)
  213. # the destination dir must exist BEFORE ffmpeg opens the temp output -
  214. # otherwise concat dies with a cryptic "Error opening output files"
  215. output.parent.mkdir(parents=True, exist_ok=True)
  216. print("concatenating...", file=sys.stderr)
  217. proc = subprocess.run(final_cmd, capture_output=True, text=True)
  218. if proc.returncode != 0:
  219. err(args.json, "VALIDATION",
  220. f"concat failed: {(proc.stderr.strip().splitlines() or ['?'])[-1]}",
  221. EXIT_VALIDATION)
  222. tmp_out.replace(output)
  223. if args.json:
  224. print(json.dumps({"data": data, "meta": {"schema": SCHEMA}}, indent=2))
  225. else:
  226. print(str(output))
  227. print(f"done: {output} ({len(clips)} segments)", file=sys.stderr)
  228. print("next: re-transcribe the output and verify no words were clipped "
  229. "(see references/edit-as-code.md)", file=sys.stderr)
  230. return EXIT_OK
  231. if __name__ == "__main__":
  232. sys.exit(main())