check-model-table.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. #!/usr/bin/env python3
  2. """Staleness verifier for the claude-api-ops model + cache-minimum tables.
  3. Guards the two fast-moving fact tables in this skill against silent drift:
  4. - the "Current Models" table in SKILL.md (ids, pricing, context, output)
  5. - the per-model prompt-cache minimum table in references/caching-and-cost.md
  6. Two modes (protocol SKILL-RESOURCE-PROTOCOL.md §7):
  7. --offline (default): parse both tables, assert internal consistency. No network.
  8. Exit 4 (VALIDATION) on a malformed/contradictory row.
  9. --live: curl the Anthropic Models API and compare its model-id set
  10. against the documented ids. Advisory only.
  11. Live-mode scope limit: the Models API returns model IDs but NOT pricing, context,
  12. or output limits. --live therefore verifies model-ID existence/coverage ONLY:
  13. - a documented id absent from the live list -> DRIFT (retired/typo)
  14. - a live id newer than anything documented -> DRIFT (table lacks a new model)
  15. Pricing/context/output drift is out of scope for --live (the API can't confirm it);
  16. --offline guards their well-formedness, and the SKILL.md "Live Documentation" links
  17. remain the human cross-check for pricing.
  18. Usage: check-model-table.py [--offline | --live] [--json] [--skill-dir DIR] [-q]
  19. Input: reads SKILL.md and references/caching-and-cost.md (resolved relative to
  20. this script, or --skill-dir)
  21. Output: stdout = data only (JSON envelope under --json, else a plain summary)
  22. Stderr: headers, progress, notes, errors
  23. Exit: 0 ok/consistent, 2 usage, 3 not-found, 4 validation (malformed/contradictory),
  24. 5 missing-dep (curl, --live only), 7 unavailable (no key / API unreachable),
  25. 10 drift (live id-set disagrees with the table)
  26. Examples:
  27. check-model-table.py --offline
  28. check-model-table.py --offline --json | python -m json.tool
  29. ANTHROPIC_API_KEY=sk-... check-model-table.py --live
  30. check-model-table.py --live # exits 7 (advisory) when the key is unset
  31. """
  32. from __future__ import annotations
  33. import argparse
  34. import json
  35. import os
  36. import re
  37. import subprocess
  38. import sys
  39. from pathlib import Path
  40. from typing import NoReturn
  41. # Windows consoles default to cp1252; force UTF-8 so em-dashes/§ in notes don't
  42. # raise UnicodeEncodeError or print mojibake (matches the repo's standard fix).
  43. for _stream in (sys.stdout, sys.stderr):
  44. try:
  45. _stream.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
  46. except (AttributeError, ValueError):
  47. pass
  48. EXIT_OK = 0
  49. EXIT_USAGE = 2
  50. EXIT_NOT_FOUND = 3
  51. EXIT_VALIDATION = 4
  52. EXIT_MISSING_DEP = 5
  53. EXIT_UNAVAILABLE = 7
  54. EXIT_DRIFT = 10
  55. SCHEMA = "claude-mods.claude-api-ops.model-table/v1"
  56. MODELS_API = "https://api.anthropic.com/v1/models?limit=1000"
  57. ANTHROPIC_VERSION = "2023-06-01"
  58. # A well-formed alias id: claude-<word>-<digit>... and NO date suffix.
  59. # Accepts claude-opus-4-8, claude-fable-5, claude-sonnet-4-6, claude-haiku-4-5.
  60. ID_RE = re.compile(r"^claude-[a-z]+-\d+(?:-\d+)?$")
  61. # A date suffix looks like an 8-digit run (e.g. -20251114).
  62. DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
  63. def note(msg: str, quiet: bool) -> None:
  64. if not quiet:
  65. print(msg, file=sys.stderr)
  66. def fail_validation(message: str, details: dict, json_mode: bool) -> NoReturn:
  67. if json_mode:
  68. print(json.dumps({"error": {"code": "VALIDATION", "message": message,
  69. "details": details}}))
  70. print(f"ERROR: {message}", file=sys.stderr)
  71. for k, v in details.items():
  72. print(f" {k}: {v}", file=sys.stderr)
  73. sys.exit(EXIT_VALIDATION)
  74. # ---------------------------------------------------------------------------
  75. # Parsing
  76. # ---------------------------------------------------------------------------
  77. def split_row(line: str) -> list[str]:
  78. """Split a markdown table row into trimmed cells (drops outer pipes)."""
  79. cells = [c.strip() for c in line.strip().strip("|").split("|")]
  80. return cells
  81. def is_separator(cells: list[str]) -> bool:
  82. return all(re.fullmatch(r":?-{2,}:?", c or "") for c in cells) and bool(cells)
  83. def parse_model_table(text: str) -> tuple[list[dict], list[str]]:
  84. """Parse the SKILL.md 'Current Models' table.
  85. Columns: Model | ID | Context | Max Output | Input $/MTok | Output $/MTok
  86. Returns one dict per data row.
  87. """
  88. lines = text.splitlines()
  89. # Locate the header row that contains the ID column and a price column.
  90. start = None
  91. for i, line in enumerate(lines):
  92. low = line.lower()
  93. if line.lstrip().startswith("|") and "id" in low and "context" in low and "output" in low:
  94. start = i
  95. break
  96. if start is None:
  97. return [], []
  98. header = split_row(lines[start])
  99. rows: list[dict] = []
  100. # Expect a separator row next, then data rows until a non-table line.
  101. j = start + 1
  102. if j < len(lines) and is_separator(split_row(lines[j])):
  103. j += 1
  104. while j < len(lines):
  105. line = lines[j]
  106. if not line.lstrip().startswith("|"):
  107. break
  108. cells = split_row(line)
  109. if is_separator(cells):
  110. j += 1
  111. continue
  112. if len(cells) >= 6:
  113. rows.append({
  114. "name": cells[0],
  115. "id_cell": cells[1],
  116. "context": cells[2],
  117. "max_output": cells[3],
  118. "input_price": cells[4],
  119. "output_price": cells[5],
  120. })
  121. j += 1
  122. return rows, header
  123. def parse_cache_min_table(text: str) -> list[dict]:
  124. """Parse the caching-and-cost.md 'Minimum prefix tokens' table.
  125. Columns: Model | Minimum prefix tokens. The Model cell holds friendly names
  126. (possibly several comma-separated), not ids.
  127. """
  128. lines = text.splitlines()
  129. start = None
  130. for i, line in enumerate(lines):
  131. low = line.lower()
  132. if line.lstrip().startswith("|") and "model" in low and "minimum" in low and "prefix" in low:
  133. start = i
  134. break
  135. if start is None:
  136. return []
  137. rows: list[dict] = []
  138. j = start + 1
  139. if j < len(lines) and is_separator(split_row(lines[j])):
  140. j += 1
  141. while j < len(lines):
  142. line = lines[j]
  143. if not line.lstrip().startswith("|"):
  144. break
  145. cells = split_row(line)
  146. if is_separator(cells):
  147. j += 1
  148. continue
  149. if len(cells) >= 2:
  150. rows.append({"names": cells[0], "min_tokens": cells[1]})
  151. j += 1
  152. return rows
  153. # ---------------------------------------------------------------------------
  154. # Offline validation
  155. # ---------------------------------------------------------------------------
  156. PRICE_RE = re.compile(r"^\$\d+(?:\.\d+)?$")
  157. SIZE_RE = re.compile(r"^\d+(?:\.\d+)?[KM]$")
  158. def clean_id(id_cell: str) -> str:
  159. """Strip backtick code fences from an ID cell."""
  160. return id_cell.strip().strip("`").strip()
  161. def validate_offline(skill_dir: Path, json_mode: bool, quiet: bool) -> dict:
  162. skill_md = skill_dir / "SKILL.md"
  163. cache_md = skill_dir / "references" / "caching-and-cost.md"
  164. for p in (skill_md, cache_md):
  165. if not p.is_file():
  166. if json_mode:
  167. print(json.dumps({"error": {"code": "NOT_FOUND",
  168. "message": f"missing file: {p}",
  169. "details": {}}}))
  170. print(f"ERROR: required file not found: {p}", file=sys.stderr)
  171. sys.exit(EXIT_NOT_FOUND)
  172. note("=== offline model-table consistency check ===", quiet)
  173. model_rows, _ = parse_model_table(skill_md.read_text(encoding="utf-8"))
  174. if not model_rows:
  175. fail_validation("could not locate a non-empty Current Models table in SKILL.md",
  176. {"file": str(skill_md)}, json_mode)
  177. documented_ids: list[str] = []
  178. models_out: list[dict] = []
  179. for row in model_rows:
  180. mid = clean_id(row["id_cell"])
  181. problems = []
  182. if not ID_RE.match(mid):
  183. problems.append("id does not match claude-[a-z]+-<digits>")
  184. if DATE_SUFFIX_RE.search(mid):
  185. problems.append("id carries a date suffix (should be a bare alias)")
  186. if not PRICE_RE.match(row["input_price"]):
  187. problems.append(f"input price not numeric: {row['input_price']!r}")
  188. if not PRICE_RE.match(row["output_price"]):
  189. problems.append(f"output price not numeric: {row['output_price']!r}")
  190. if not SIZE_RE.match(row["context"]):
  191. problems.append(f"context not a size (e.g. 1M/200K): {row['context']!r}")
  192. if not SIZE_RE.match(row["max_output"]):
  193. problems.append(f"max output not a size: {row['max_output']!r}")
  194. if problems:
  195. fail_validation(f"malformed model row: {row['name']!r}",
  196. {"id": mid, "problems": "; ".join(problems)}, json_mode)
  197. documented_ids.append(mid)
  198. models_out.append({
  199. "name": row["name"], "id": mid, "context": row["context"],
  200. "max_output": row["max_output"],
  201. "input_price": row["input_price"], "output_price": row["output_price"],
  202. })
  203. # No duplicate ids.
  204. dupes = {x for x in documented_ids if documented_ids.count(x) > 1}
  205. if dupes:
  206. fail_validation("duplicate model ids in the table",
  207. {"ids": ", ".join(sorted(dupes))}, json_mode)
  208. # Cache-minimum table.
  209. cache_rows = parse_cache_min_table(cache_md.read_text(encoding="utf-8"))
  210. if not cache_rows:
  211. fail_validation("could not locate the cache-minimum table in caching-and-cost.md",
  212. {"file": str(cache_md)}, json_mode)
  213. for crow in cache_rows:
  214. if not re.fullmatch(r"\d+", crow["min_tokens"]):
  215. fail_validation("cache-minimum value is not an integer",
  216. {"row": crow["names"], "value": crow["min_tokens"]},
  217. json_mode)
  218. # Cross-file consistency: every model NAME (e.g. "Opus 4.8", "Fable 5",
  219. # "Sonnet 4.6", "Haiku 4.5") in the model table must appear in the cache
  220. # table's name set, so the two files agree on the model lineup.
  221. cache_blob = " ".join(c["names"] for c in cache_rows).lower()
  222. missing_in_cache: list[str] = []
  223. for m in models_out:
  224. # Derive the short family+version token, e.g. "Claude Opus 4.8" -> "opus 4.8".
  225. short = re.sub(r"^claude\s+", "", m["name"], flags=re.I).strip().lower()
  226. if short not in cache_blob:
  227. missing_in_cache.append(m["name"])
  228. if missing_in_cache:
  229. fail_validation(
  230. "model(s) in SKILL.md absent from the cache-minimum table — files contradict",
  231. {"missing": ", ".join(missing_in_cache),
  232. "hint": "every documented model needs a prompt-cache minimum row"},
  233. json_mode)
  234. note(f" {len(models_out)} model rows, all well-formed", quiet)
  235. note(f" {len(cache_rows)} cache-minimum rows, all integer", quiet)
  236. note(" cross-file model lineup consistent", quiet)
  237. note("OK: tables internally consistent.", quiet)
  238. return {
  239. "mode": "offline",
  240. "models": models_out,
  241. "documented_ids": documented_ids,
  242. "cache_min_rows": cache_rows,
  243. "consistent": True,
  244. }
  245. # ---------------------------------------------------------------------------
  246. # Live validation
  247. # ---------------------------------------------------------------------------
  248. def fetch_live_ids(quiet: bool) -> list[str] | None:
  249. """Return the live model-id list, or None if unavailable (advisory)."""
  250. key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
  251. if not key:
  252. note("NOTE: ANTHROPIC_API_KEY is unset - skipping live check (advisory).",
  253. quiet)
  254. return None
  255. cmd = [
  256. "curl", "-fsS", "--max-time", "20",
  257. "-H", f"x-api-key: {key}",
  258. "-H", f"anthropic-version: {ANTHROPIC_VERSION}",
  259. MODELS_API,
  260. ]
  261. try:
  262. proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
  263. except (subprocess.TimeoutExpired, OSError) as exc:
  264. note(f"NOTE: Models API call failed ({exc}) — advisory, not a failure.",
  265. quiet)
  266. return None
  267. if proc.returncode != 0:
  268. note(f"NOTE: Models API unreachable (curl exit {proc.returncode}) — advisory.",
  269. quiet)
  270. if proc.stderr.strip():
  271. note(f" {proc.stderr.strip().splitlines()[-1]}", quiet)
  272. return None
  273. try:
  274. payload = json.loads(proc.stdout)
  275. except json.JSONDecodeError:
  276. note("NOTE: Models API returned non-JSON — advisory, not a failure.", quiet)
  277. return None
  278. data = payload.get("data")
  279. if not isinstance(data, list):
  280. note("NOTE: Models API JSON missing 'data' list — advisory.", quiet)
  281. return None
  282. return [m.get("id", "") for m in data if isinstance(m, dict) and m.get("id")]
  283. def validate_live(skill_dir: Path, json_mode: bool, quiet: bool) -> dict:
  284. if not _have("curl"):
  285. if json_mode:
  286. print(json.dumps({"error": {"code": "PRECONDITION",
  287. "message": "curl required for --live",
  288. "details": {}}}))
  289. print("ERROR: curl is required for --live", file=sys.stderr)
  290. sys.exit(EXIT_MISSING_DEP)
  291. # Reuse offline parse for the documented id set (also validates well-formedness).
  292. note("=== live model-id coverage check ===", quiet)
  293. skill_md = skill_dir / "SKILL.md"
  294. if not skill_md.is_file():
  295. print(f"ERROR: required file not found: {skill_md}", file=sys.stderr)
  296. sys.exit(EXIT_NOT_FOUND)
  297. parsed = parse_model_table(skill_md.read_text(encoding="utf-8"))
  298. if not parsed or not parsed[0]:
  299. fail_validation("could not parse the model table for live comparison",
  300. {"file": str(skill_md)}, json_mode)
  301. documented = [clean_id(r["id_cell"]) for r in parsed[0]]
  302. live = fetch_live_ids(quiet)
  303. if live is None:
  304. # Advisory: not a failure. Exit 7.
  305. if json_mode:
  306. print(json.dumps({"data": {"mode": "live", "status": "unavailable",
  307. "documented_ids": documented, "live_ids": None},
  308. "meta": {"schema": SCHEMA, "status": "unavailable"}}))
  309. sys.exit(EXIT_UNAVAILABLE)
  310. live_set = set(live)
  311. doc_set = set(documented)
  312. # A documented id absent from the live list = drift (retired/typo).
  313. missing = sorted(doc_set - live_set)
  314. # A live id NEWER than anything documented = drift (table lacks a new model).
  315. # Restrict "newer" to well-formed alias ids so we ignore date-suffixed and
  316. # snapshot variants the docs intentionally don't list.
  317. live_alias = {m for m in live_set if ID_RE.match(m) and not DATE_SUFFIX_RE.search(m)}
  318. new_models = sorted(live_alias - doc_set)
  319. drift = bool(missing or new_models)
  320. result = {
  321. "mode": "live",
  322. "status": "drift" if drift else "ok",
  323. "documented_ids": documented,
  324. "live_ids": sorted(live_set),
  325. "missing_from_live": missing,
  326. "new_in_live": new_models,
  327. }
  328. if drift:
  329. if missing:
  330. note("DRIFT: documented id(s) absent from live Models API:", quiet)
  331. for m in missing:
  332. note(f" - {m}", quiet)
  333. if new_models:
  334. note("DRIFT: live Models API has alias id(s) the table lacks:", quiet)
  335. for m in new_models:
  336. note(f" + {m}", quiet)
  337. if json_mode:
  338. print(json.dumps({"data": result, "meta": {"schema": SCHEMA,
  339. "status": "drift"}}))
  340. else:
  341. print("DRIFT: model-id table disagrees with the live Models API "
  342. f"(missing={missing}, new={new_models})")
  343. sys.exit(EXIT_DRIFT)
  344. note("OK: every documented id exists live; no newer alias id missing from the table.",
  345. quiet)
  346. return result
  347. def _have(tool: str) -> bool:
  348. from shutil import which
  349. return which(tool) is not None
  350. # ---------------------------------------------------------------------------
  351. # Main
  352. # ---------------------------------------------------------------------------
  353. def main(argv: list[str]) -> int:
  354. parser = argparse.ArgumentParser(
  355. prog="check-model-table.py", add_help=True,
  356. description="Staleness verifier for the claude-api-ops model + cache tables.",
  357. epilog=(
  358. "EXAMPLES:\n"
  359. " check-model-table.py --offline\n"
  360. " check-model-table.py --offline --json | python -m json.tool\n"
  361. " ANTHROPIC_API_KEY=sk-... check-model-table.py --live\n"
  362. " check-model-table.py --live # exits 7 (advisory) when key unset\n"
  363. ),
  364. formatter_class=argparse.RawDescriptionHelpFormatter,
  365. )
  366. mode = parser.add_mutually_exclusive_group()
  367. mode.add_argument("--offline", action="store_true",
  368. help="parse + assert internal consistency, no network (default)")
  369. mode.add_argument("--live", action="store_true",
  370. help="compare documented ids against the live Models API (advisory)")
  371. parser.add_argument("--json", action="store_true",
  372. help="emit the JSON envelope on stdout")
  373. parser.add_argument("--skill-dir", default=None,
  374. help="skill root (default: parent of this script's dir)")
  375. parser.add_argument("-q", "--quiet", action="store_true",
  376. help="suppress stderr progress/notes")
  377. args = parser.parse_args(argv)
  378. if args.skill_dir:
  379. skill_dir = Path(args.skill_dir).resolve()
  380. else:
  381. skill_dir = Path(__file__).resolve().parent.parent
  382. if not skill_dir.is_dir():
  383. print(f"ERROR: skill dir not found: {skill_dir}", file=sys.stderr)
  384. return EXIT_NOT_FOUND
  385. if args.live:
  386. result = validate_live(skill_dir, args.json, args.quiet)
  387. else:
  388. result = validate_offline(skill_dir, args.json, args.quiet)
  389. if args.json:
  390. print(json.dumps({"data": result,
  391. "meta": {"schema": SCHEMA, "status": "ok"}}))
  392. return EXIT_OK
  393. if __name__ == "__main__":
  394. try:
  395. sys.exit(main(sys.argv[1:]))
  396. except KeyboardInterrupt:
  397. sys.exit(EXIT_USAGE)