make-chapters.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. #!/usr/bin/env python3
  2. """Chapter authoring: scene/silence boundaries or explicit JSON -> embedded chapters.
  3. Derives chapter points (scene detection, speech-after-silence starts, or an
  4. explicit chapters JSON), merges points closer than --min-gap, and emits any of:
  5. ffmetadata (the format ffmpeg muxes), YouTube description text, WebVTT chapters,
  6. or JSON. --write muxes the chapters INTO a stream-copy of the media (atomic,
  7. original untouched).
  8. Usage: make-chapters.py (--from-scenes | --from-silence | --chapters FILE)
  9. [--media FILE] [--min-gap S] [--duration S]
  10. [--format ffmetadata|youtube|vtt|json] [--write OUT] [--json]
  11. Input: --media for detection modes and --write; --chapters JSON is
  12. [{"start": 0, "title": "Intro"}, ...] (or {"chapters": [...]})
  13. Output: stdout = the chosen format (default ffmetadata); --json = envelope
  14. (schema claude-mods.ffmpeg-ops.chapters/v1)
  15. Stderr: progress, YouTube-rule warnings, errors
  16. Exit: 0 ok, 2 usage, 3 media/chapters file missing, 4 invalid chapters JSON,
  17. 5 ffmpeg/ffprobe missing when required
  18. Examples:
  19. make-chapters.py --from-scenes --media talk.mp4 --min-gap 30
  20. make-chapters.py --from-silence --media lecture.mp4 --write chaptered.mp4
  21. make-chapters.py --chapters chapters.json --duration 3600 --format youtube
  22. make-chapters.py --from-scenes --media in.mp4 --format json | jq '.data.chapters'
  23. """
  24. import argparse
  25. import json
  26. import shutil
  27. import subprocess
  28. import sys
  29. from pathlib import Path
  30. from typing import NoReturn
  31. SCHEMA = "claude-mods.ffmpeg-ops.chapters/v1"
  32. EXIT_OK, EXIT_USAGE, EXIT_NOT_FOUND, EXIT_VALIDATION, EXIT_MISSING_DEP = 0, 2, 3, 4, 5
  33. def err(json_mode: bool, code: str, message: str, exit_code: int) -> NoReturn:
  34. if json_mode:
  35. print(json.dumps({"error": {"code": code, "message": message, "details": {}}}))
  36. print(f"ERROR: {message}", file=sys.stderr)
  37. sys.exit(exit_code)
  38. def media_duration(path: Path, json_mode: bool) -> float:
  39. ffprobe = shutil.which("ffprobe")
  40. if not ffprobe:
  41. err(json_mode, "MISSING_DEPENDENCY", "ffprobe not found on PATH", EXIT_MISSING_DEP)
  42. proc = subprocess.run(
  43. [ffprobe, "-v", "error", "-show_entries", "format=duration",
  44. "-of", "default=nw=1:nk=1", str(path)],
  45. capture_output=True, text=True)
  46. try:
  47. return float(proc.stdout.strip())
  48. except ValueError:
  49. err(json_mode, "VALIDATION", f"could not read duration of {path.name}",
  50. EXIT_VALIDATION)
  51. def detect_points(mode: str, media: Path, json_mode: bool) -> list:
  52. """Shell out to the sibling detect-segments.py — one detection implementation."""
  53. sibling = Path(__file__).resolve().parent / "detect-segments.py"
  54. flag = "--scenes" if mode == "scenes" else "--silence"
  55. proc = subprocess.run(
  56. [sys.executable, str(sibling), flag, "--json", str(media)],
  57. capture_output=True, text=True)
  58. if proc.returncode != 0:
  59. err(json_mode, "VALIDATION",
  60. f"detect-segments {flag} failed (exit {proc.returncode}): "
  61. f"{(proc.stderr.strip().splitlines() or ['?'])[-1]}", proc.returncode)
  62. data = json.loads(proc.stdout)["data"]
  63. if mode == "scenes":
  64. return [float(c) for c in data.get("cuts", [])]
  65. # silence mode: a chapter candidate is where speech RESUMES
  66. return [float(seg["start"]) for seg in data.get("speech", [])]
  67. def load_chapters_file(path: Path, json_mode: bool) -> list:
  68. if not path.is_file():
  69. err(json_mode, "NOT_FOUND", f"chapters file not found: {path}", EXIT_NOT_FOUND)
  70. try:
  71. raw = json.loads(path.read_text(encoding="utf-8"))
  72. except json.JSONDecodeError as e:
  73. err(json_mode, "VALIDATION", f"chapters file is not valid JSON: {e}",
  74. EXIT_VALIDATION)
  75. items = raw.get("chapters") if isinstance(raw, dict) else raw
  76. if not isinstance(items, list) or not items:
  77. err(json_mode, "VALIDATION",
  78. 'chapters JSON must be a non-empty array of {"start": s, "title": "..."}',
  79. EXIT_VALIDATION)
  80. chapters = []
  81. for i, c in enumerate(items):
  82. if not isinstance(c, dict) or not isinstance(c.get("start"), (int, float)):
  83. err(json_mode, "VALIDATION", f"chapters[{i}] needs a numeric 'start'",
  84. EXIT_VALIDATION)
  85. chapters.append({"start": float(c["start"]),
  86. "title": str(c.get("title") or f"Chapter {i + 1}")})
  87. return sorted(chapters, key=lambda c: c["start"])
  88. def build_chapters(points: list, min_gap: float, duration: float) -> list:
  89. """Merge close points, force a chapter at 0, attach END times."""
  90. merged = [0.0]
  91. for p in sorted(p for p in points if p > 0):
  92. if p - merged[-1] >= min_gap and (duration <= 0 or duration - p >= min_gap):
  93. merged.append(round(p, 3))
  94. return [{"start": s, "title": f"Chapter {i + 1}"} for i, s in enumerate(merged)]
  95. def attach_ends(chapters: list, duration: float) -> list:
  96. out = []
  97. for i, c in enumerate(chapters):
  98. end = chapters[i + 1]["start"] if i + 1 < len(chapters) else duration
  99. out.append({**c, "end": round(max(end, c["start"]), 3)})
  100. return out
  101. def esc_ffmeta(s: str) -> str:
  102. for ch in ("\\", "=", ";", "#"):
  103. s = s.replace(ch, "\\" + ch)
  104. return s.replace("\n", " ")
  105. def fmt_ffmetadata(chapters: list) -> str:
  106. lines = [";FFMETADATA1"]
  107. for c in chapters:
  108. lines += ["[CHAPTER]", "TIMEBASE=1/1000",
  109. f"START={int(c['start'] * 1000)}", f"END={int(c['end'] * 1000)}",
  110. f"title={esc_ffmeta(c['title'])}"]
  111. return "\n".join(lines) + "\n"
  112. def ts_youtube(s: float) -> str:
  113. h, rem = divmod(int(s), 3600)
  114. m, sec = divmod(rem, 60)
  115. return f"{h}:{m:02d}:{sec:02d}" if h else f"{m}:{sec:02d}"
  116. def ts_vtt(s: float) -> str:
  117. h, rem = divmod(int(s), 3600)
  118. m, sec = divmod(rem, 60)
  119. return f"{h:02d}:{m:02d}:{sec:02d}.{int(round((s % 1) * 1000)):03d}"
  120. def fmt_youtube(chapters: list) -> str:
  121. # YouTube parses chapters only if: first at 0:00, >= 3 chapters, each >= 10 s.
  122. if chapters and chapters[0]["start"] != 0:
  123. print("warning: YouTube requires the first chapter at 0:00", file=sys.stderr)
  124. if len(chapters) < 3:
  125. print("warning: YouTube needs >= 3 chapters to render them", file=sys.stderr)
  126. if any(c["end"] - c["start"] < 10 for c in chapters):
  127. print("warning: YouTube ignores chapter lists with any chapter < 10 s",
  128. file=sys.stderr)
  129. return "\n".join(f"{ts_youtube(c['start'])} {c['title']}" for c in chapters) + "\n"
  130. def fmt_vtt(chapters: list) -> str:
  131. blocks = [f"{ts_vtt(c['start'])} --> {ts_vtt(c['end'])}\n{c['title']}"
  132. for c in chapters]
  133. return "WEBVTT\n\n" + "\n\n".join(blocks) + "\n"
  134. def mux_chapters(media: Path, meta: str, out: Path, json_mode: bool) -> None:
  135. ffmpeg = shutil.which("ffmpeg")
  136. if not ffmpeg:
  137. err(json_mode, "MISSING_DEPENDENCY", "ffmpeg not found on PATH (--write)",
  138. EXIT_MISSING_DEP)
  139. meta_file = out.parent / (out.stem + ".ffmeta.tmp")
  140. tmp_out = out.with_name(out.stem + ".tmp" + out.suffix)
  141. out.parent.mkdir(parents=True, exist_ok=True)
  142. meta_file.write_text(meta, encoding="utf-8")
  143. try:
  144. proc = subprocess.run(
  145. [ffmpeg, "-y", "-v", "error", "-i", str(media),
  146. "-f", "ffmetadata", "-i", str(meta_file),
  147. "-map", "0", "-map_metadata", "0", "-map_chapters", "1",
  148. "-c", "copy", str(tmp_out)],
  149. capture_output=True, text=True)
  150. if proc.returncode != 0:
  151. err(json_mode, "VALIDATION",
  152. f"chapter mux failed: {(proc.stderr.strip().splitlines() or ['?'])[-1]}",
  153. EXIT_VALIDATION)
  154. tmp_out.replace(out)
  155. finally:
  156. meta_file.unlink(missing_ok=True)
  157. tmp_out.unlink(missing_ok=True)
  158. def main() -> int:
  159. ap = argparse.ArgumentParser(
  160. description="Derive chapters and emit ffmetadata/YouTube/VTT or mux them in.",
  161. epilog="Examples:\n"
  162. " make-chapters.py --from-scenes --media talk.mp4 --min-gap 30\n"
  163. " make-chapters.py --chapters ch.json --duration 3600 --format youtube\n",
  164. formatter_class=argparse.RawDescriptionHelpFormatter)
  165. src = ap.add_mutually_exclusive_group(required=True)
  166. src.add_argument("--from-scenes", action="store_true",
  167. help="chapter points from video scene changes")
  168. src.add_argument("--from-silence", action="store_true",
  169. help="chapter points where speech resumes after silence")
  170. src.add_argument("--chapters", metavar="FILE",
  171. help='explicit JSON: [{"start": s, "title": "..."}]')
  172. ap.add_argument("--media", metavar="FILE",
  173. help="media file (required for detection modes and --write)")
  174. ap.add_argument("--min-gap", type=float, default=15.0,
  175. help="merge detected points closer than this, seconds (default 15)")
  176. ap.add_argument("--duration", type=float, default=None,
  177. help="total duration override (skips the ffprobe lookup)")
  178. ap.add_argument("--format", default="ffmetadata",
  179. choices=("ffmetadata", "youtube", "vtt", "json"),
  180. help="stdout format (default ffmetadata)")
  181. ap.add_argument("--write", metavar="OUT", default=None,
  182. help="mux chapters into a stream-copy of --media at this path")
  183. ap.add_argument("--json", action="store_true",
  184. help="emit JSON envelope on stdout (same as --format json)")
  185. args = ap.parse_args()
  186. json_mode = args.json or args.format == "json"
  187. detection = args.from_scenes or args.from_silence
  188. if (detection or args.write) and not args.media:
  189. err(json_mode, "USAGE",
  190. "--media is required for --from-scenes/--from-silence/--write", EXIT_USAGE)
  191. media = Path(args.media) if args.media else None
  192. if media and not media.is_file():
  193. err(json_mode, "NOT_FOUND", f"media not found: {media}", EXIT_NOT_FOUND)
  194. if args.duration is not None:
  195. duration = args.duration
  196. elif media:
  197. duration = media_duration(media, json_mode)
  198. else:
  199. err(json_mode, "USAGE", "--duration is required when no --media is given",
  200. EXIT_USAGE)
  201. if args.chapters:
  202. chapters = load_chapters_file(Path(args.chapters), json_mode)
  203. chapters = [{**c} for c in chapters]
  204. else:
  205. mode = "scenes" if args.from_scenes else "silence"
  206. print(f"deriving chapter points from {mode}...", file=sys.stderr)
  207. points = detect_points(mode, media, json_mode) # type: ignore[arg-type]
  208. chapters = build_chapters(points, args.min_gap, duration)
  209. chapters = attach_ends(chapters, duration)
  210. meta = fmt_ffmetadata(chapters)
  211. written = None
  212. if args.write:
  213. mux_chapters(media, meta, Path(args.write), json_mode) # type: ignore[arg-type]
  214. written = str(Path(args.write))
  215. print(f"chapters muxed -> {written}", file=sys.stderr)
  216. if json_mode:
  217. data = {"media": str(media) if media else None, "duration_s": duration,
  218. "count": len(chapters), "chapters": chapters, "written": written}
  219. print(json.dumps({"data": data, "meta": {"schema": SCHEMA}}, indent=2))
  220. elif args.format == "youtube":
  221. sys.stdout.write(fmt_youtube(chapters))
  222. elif args.format == "vtt":
  223. sys.stdout.write(fmt_vtt(chapters))
  224. else:
  225. sys.stdout.write(meta)
  226. return EXIT_OK
  227. if __name__ == "__main__":
  228. sys.exit(main())