check-model-table.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. #!/usr/bin/env python3
  2. """Staleness verifier for the claude-api-ops model + cache-minimum tables.
  3. Guards the two fast-moving fact tables in this skill against silent drift:
  4. - the "Current Models" table in SKILL.md (ids, pricing, context, output)
  5. - the per-model prompt-cache minimum table in references/caching-and-cost.md
  6. Two modes (protocol SKILL-RESOURCE-PROTOCOL.md §7):
  7. --offline (default): parse both tables, assert internal consistency. No network.
  8. Exit 4 (VALIDATION) on a malformed/contradictory row.
  9. --live: curl the Anthropic Models API and compare its model-id set
  10. against the documented ids. Advisory only.
  11. Live-mode scope limit: the Models API returns model IDs but NOT pricing, context,
  12. or output limits. --live therefore verifies model-ID existence/coverage ONLY:
  13. - a documented id absent from the live list -> DRIFT (retired/typo)
  14. - a live id newer than anything documented -> DRIFT (table lacks a new model)
  15. Pricing/context/output drift is out of scope for --live (the API can't confirm it);
  16. --offline guards their well-formedness, and the SKILL.md "Live Documentation" links
  17. remain the human cross-check for pricing.
  18. Usage: check-model-table.py [--offline | --live] [--json] [--skill-dir DIR] [-q]
  19. Input: reads SKILL.md and references/caching-and-cost.md (resolved relative to
  20. this script, or --skill-dir)
  21. Output: stdout = data only (JSON envelope under --json, else a plain summary)
  22. Stderr: headers, progress, notes, errors
  23. Exit: 0 ok/consistent, 2 usage, 3 not-found, 4 validation (malformed/contradictory),
  24. 5 missing-dep (curl, --live only), 7 unavailable (no key / API unreachable),
  25. 10 drift (live id-set disagrees with the table)
  26. Examples:
  27. check-model-table.py --offline
  28. check-model-table.py --offline --json | python -m json.tool
  29. ANTHROPIC_API_KEY=sk-... check-model-table.py --live
  30. check-model-table.py --live # exits 7 (advisory) when the key is unset
  31. """
  32. from __future__ import annotations
  33. import argparse
  34. import json
  35. import os
  36. import re
  37. import subprocess
  38. import sys
  39. from pathlib import Path
  40. from typing import NoReturn
  41. # Windows consoles default to cp1252; force UTF-8 so em-dashes/§ in notes don't
  42. # raise UnicodeEncodeError or print mojibake (matches the repo's standard fix).
  43. for _stream in (sys.stdout, sys.stderr):
  44. try:
  45. _stream.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
  46. except (AttributeError, ValueError):
  47. pass
  48. class Term:
  49. """Tiny ANSI helper mirroring skills/_lib/term.sh (term.sh is bash-only; per
  50. TERMINAL-DESIGN.md §9 the Python port is inline with matching keys/glyphs).
  51. Honors FORCE_COLOR / NO_COLOR / TERM_ASCII; color tracks the bound stream's TTY,
  52. and glyphs fall back to ASCII on TERM_ASCII or a non-UTF stream encoding."""
  53. _C = {"green": "\033[32m", "yellow": "\033[33m", "orange": "\033[38;5;208m",
  54. "red": "\033[31m", "cyan": "\033[36m", "dim": "\033[2m", "off": "\033[0m"}
  55. _GLYPH = {"ok": "✓", "bad": "✗", "warn": "▲", "skip": "—", "na": "—", "unknown": "?"}
  56. _ASCII = {"ok": "+", "bad": "x", "warn": "!", "skip": "-", "na": "-", "unknown": "?"}
  57. _MARK_COLOR = {"ok": "green", "bad": "red", "warn": "orange", "skip": "dim",
  58. "na": "dim", "unknown": "yellow"}
  59. def __init__(self, stream=sys.stderr):
  60. enc = (getattr(stream, "encoding", "") or "").lower()
  61. self.ascii = (os.environ.get("TERM_ASCII") == "1"
  62. or os.environ.get("FLEET_ASCII") == "1" or "utf" not in enc)
  63. if os.environ.get("FORCE_COLOR"):
  64. self.color = True
  65. elif (os.environ.get("NO_COLOR") is not None or os.environ.get("TERM") == "dumb"
  66. or not getattr(stream, "isatty", lambda: False)()):
  67. self.color = False
  68. else:
  69. self.color = True
  70. def c(self, name, text):
  71. return f"{self._C.get(name, '')}{text}{self._C['off']}" if self.color else text
  72. def mark(self, state):
  73. return self.c(self._MARK_COLOR.get(state, ""),
  74. (self._ASCII if self.ascii else self._GLYPH).get(state, "."))
  75. def hdr(self, text):
  76. return self.c("cyan", f"=== {text} ===")
  77. TERM = Term(sys.stderr)
  78. EXIT_OK = 0
  79. EXIT_USAGE = 2
  80. EXIT_NOT_FOUND = 3
  81. EXIT_VALIDATION = 4
  82. EXIT_MISSING_DEP = 5
  83. EXIT_UNAVAILABLE = 7
  84. EXIT_DRIFT = 10
  85. SCHEMA = "claude-mods.claude-api-ops.model-table/v1"
  86. MODELS_API = "https://api.anthropic.com/v1/models?limit=1000"
  87. ANTHROPIC_VERSION = "2023-06-01"
  88. # A well-formed alias id: claude-<word>-<digit>... and NO date suffix.
  89. # Accepts claude-opus-4-8, claude-fable-5, claude-sonnet-4-6, claude-haiku-4-5.
  90. ID_RE = re.compile(r"^claude-[a-z]+-\d+(?:-\d+)?$")
  91. # A date suffix looks like an 8-digit run (e.g. -20251114).
  92. DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
  93. def note(msg: str, quiet: bool) -> None:
  94. if not quiet:
  95. print(msg, file=sys.stderr)
  96. def fail_validation(message: str, details: dict, json_mode: bool) -> NoReturn:
  97. if json_mode:
  98. print(json.dumps({"error": {"code": "VALIDATION", "message": message,
  99. "details": details}}))
  100. print(f"{TERM.mark('bad')} ERROR: {message}", file=sys.stderr)
  101. for k, v in details.items():
  102. print(f" {k}: {v}", file=sys.stderr)
  103. sys.exit(EXIT_VALIDATION)
  104. # ---------------------------------------------------------------------------
  105. # Parsing
  106. # ---------------------------------------------------------------------------
  107. def split_row(line: str) -> list[str]:
  108. """Split a markdown table row into trimmed cells (drops outer pipes)."""
  109. cells = [c.strip() for c in line.strip().strip("|").split("|")]
  110. return cells
  111. def is_separator(cells: list[str]) -> bool:
  112. return all(re.fullmatch(r":?-{2,}:?", c or "") for c in cells) and bool(cells)
  113. def parse_model_table(text: str) -> tuple[list[dict], list[str]]:
  114. """Parse the SKILL.md 'Current Models' table.
  115. Columns: Model | ID | Context | Max Output | Input $/MTok | Output $/MTok
  116. Returns one dict per data row.
  117. """
  118. lines = text.splitlines()
  119. # Locate the header row that contains the ID column and a price column.
  120. start = None
  121. for i, line in enumerate(lines):
  122. low = line.lower()
  123. if line.lstrip().startswith("|") and "id" in low and "context" in low and "output" in low:
  124. start = i
  125. break
  126. if start is None:
  127. return [], []
  128. header = split_row(lines[start])
  129. rows: list[dict] = []
  130. # Expect a separator row next, then data rows until a non-table line.
  131. j = start + 1
  132. if j < len(lines) and is_separator(split_row(lines[j])):
  133. j += 1
  134. while j < len(lines):
  135. line = lines[j]
  136. if not line.lstrip().startswith("|"):
  137. break
  138. cells = split_row(line)
  139. if is_separator(cells):
  140. j += 1
  141. continue
  142. if len(cells) >= 6:
  143. rows.append({
  144. "name": cells[0],
  145. "id_cell": cells[1],
  146. "context": cells[2],
  147. "max_output": cells[3],
  148. "input_price": cells[4],
  149. "output_price": cells[5],
  150. })
  151. j += 1
  152. return rows, header
  153. def parse_cache_min_table(text: str) -> list[dict]:
  154. """Parse the caching-and-cost.md 'Minimum prefix tokens' table.
  155. Columns: Model | Minimum prefix tokens. The Model cell holds friendly names
  156. (possibly several comma-separated), not ids.
  157. """
  158. lines = text.splitlines()
  159. start = None
  160. for i, line in enumerate(lines):
  161. low = line.lower()
  162. if line.lstrip().startswith("|") and "model" in low and "minimum" in low and "prefix" in low:
  163. start = i
  164. break
  165. if start is None:
  166. return []
  167. rows: list[dict] = []
  168. j = start + 1
  169. if j < len(lines) and is_separator(split_row(lines[j])):
  170. j += 1
  171. while j < len(lines):
  172. line = lines[j]
  173. if not line.lstrip().startswith("|"):
  174. break
  175. cells = split_row(line)
  176. if is_separator(cells):
  177. j += 1
  178. continue
  179. if len(cells) >= 2:
  180. rows.append({"names": cells[0], "min_tokens": cells[1]})
  181. j += 1
  182. return rows
  183. # ---------------------------------------------------------------------------
  184. # Offline validation
  185. # ---------------------------------------------------------------------------
  186. PRICE_RE = re.compile(r"^\$\d+(?:\.\d+)?$")
  187. SIZE_RE = re.compile(r"^\d+(?:\.\d+)?[KM]$")
  188. def clean_id(id_cell: str) -> str:
  189. """Strip backtick code fences from an ID cell."""
  190. return id_cell.strip().strip("`").strip()
  191. def validate_offline(skill_dir: Path, json_mode: bool, quiet: bool) -> dict:
  192. skill_md = skill_dir / "SKILL.md"
  193. cache_md = skill_dir / "references" / "caching-and-cost.md"
  194. for p in (skill_md, cache_md):
  195. if not p.is_file():
  196. if json_mode:
  197. print(json.dumps({"error": {"code": "NOT_FOUND",
  198. "message": f"missing file: {p}",
  199. "details": {}}}))
  200. print(f"ERROR: required file not found: {p}", file=sys.stderr)
  201. sys.exit(EXIT_NOT_FOUND)
  202. note(TERM.hdr("offline model-table consistency check"), quiet)
  203. model_rows, _ = parse_model_table(skill_md.read_text(encoding="utf-8"))
  204. if not model_rows:
  205. fail_validation("could not locate a non-empty Current Models table in SKILL.md",
  206. {"file": str(skill_md)}, json_mode)
  207. documented_ids: list[str] = []
  208. models_out: list[dict] = []
  209. for row in model_rows:
  210. mid = clean_id(row["id_cell"])
  211. problems = []
  212. if not ID_RE.match(mid):
  213. problems.append("id does not match claude-[a-z]+-<digits>")
  214. if DATE_SUFFIX_RE.search(mid):
  215. problems.append("id carries a date suffix (should be a bare alias)")
  216. if not PRICE_RE.match(row["input_price"]):
  217. problems.append(f"input price not numeric: {row['input_price']!r}")
  218. if not PRICE_RE.match(row["output_price"]):
  219. problems.append(f"output price not numeric: {row['output_price']!r}")
  220. if not SIZE_RE.match(row["context"]):
  221. problems.append(f"context not a size (e.g. 1M/200K): {row['context']!r}")
  222. if not SIZE_RE.match(row["max_output"]):
  223. problems.append(f"max output not a size: {row['max_output']!r}")
  224. if problems:
  225. fail_validation(f"malformed model row: {row['name']!r}",
  226. {"id": mid, "problems": "; ".join(problems)}, json_mode)
  227. documented_ids.append(mid)
  228. models_out.append({
  229. "name": row["name"], "id": mid, "context": row["context"],
  230. "max_output": row["max_output"],
  231. "input_price": row["input_price"], "output_price": row["output_price"],
  232. })
  233. # No duplicate ids.
  234. dupes = {x for x in documented_ids if documented_ids.count(x) > 1}
  235. if dupes:
  236. fail_validation("duplicate model ids in the table",
  237. {"ids": ", ".join(sorted(dupes))}, json_mode)
  238. # Cache-minimum table.
  239. cache_rows = parse_cache_min_table(cache_md.read_text(encoding="utf-8"))
  240. if not cache_rows:
  241. fail_validation("could not locate the cache-minimum table in caching-and-cost.md",
  242. {"file": str(cache_md)}, json_mode)
  243. for crow in cache_rows:
  244. if not re.fullmatch(r"\d+", crow["min_tokens"]):
  245. fail_validation("cache-minimum value is not an integer",
  246. {"row": crow["names"], "value": crow["min_tokens"]},
  247. json_mode)
  248. # Cross-file consistency: every model NAME (e.g. "Opus 4.8", "Fable 5",
  249. # "Sonnet 4.6", "Haiku 4.5") in the model table must appear in the cache
  250. # table's name set, so the two files agree on the model lineup.
  251. cache_blob = " ".join(c["names"] for c in cache_rows).lower()
  252. missing_in_cache: list[str] = []
  253. for m in models_out:
  254. # Derive the short family+version token, e.g. "Claude Opus 4.8" -> "opus 4.8".
  255. short = re.sub(r"^claude\s+", "", m["name"], flags=re.I).strip().lower()
  256. if short not in cache_blob:
  257. missing_in_cache.append(m["name"])
  258. if missing_in_cache:
  259. fail_validation(
  260. "model(s) in SKILL.md absent from the cache-minimum table — files contradict",
  261. {"missing": ", ".join(missing_in_cache),
  262. "hint": "every documented model needs a prompt-cache minimum row"},
  263. json_mode)
  264. note(f" {len(models_out)} model rows, all well-formed", quiet)
  265. note(f" {len(cache_rows)} cache-minimum rows, all integer", quiet)
  266. note(" cross-file model lineup consistent", quiet)
  267. note(f"{TERM.mark('ok')} OK: tables internally consistent.", quiet)
  268. return {
  269. "mode": "offline",
  270. "models": models_out,
  271. "documented_ids": documented_ids,
  272. "cache_min_rows": cache_rows,
  273. "consistent": True,
  274. }
  275. # ---------------------------------------------------------------------------
  276. # Live validation
  277. # ---------------------------------------------------------------------------
  278. def fetch_live_ids(quiet: bool) -> list[str] | None:
  279. """Return the live model-id list, or None if unavailable (advisory)."""
  280. key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
  281. if not key:
  282. note("NOTE: ANTHROPIC_API_KEY is unset - skipping live check (advisory).",
  283. quiet)
  284. return None
  285. cmd = [
  286. "curl", "-fsS", "--max-time", "20",
  287. "-H", f"x-api-key: {key}",
  288. "-H", f"anthropic-version: {ANTHROPIC_VERSION}",
  289. MODELS_API,
  290. ]
  291. try:
  292. proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
  293. except (subprocess.TimeoutExpired, OSError) as exc:
  294. note(f"NOTE: Models API call failed ({exc}) — advisory, not a failure.",
  295. quiet)
  296. return None
  297. if proc.returncode != 0:
  298. note(f"NOTE: Models API unreachable (curl exit {proc.returncode}) — advisory.",
  299. quiet)
  300. if proc.stderr.strip():
  301. note(f" {proc.stderr.strip().splitlines()[-1]}", quiet)
  302. return None
  303. try:
  304. payload = json.loads(proc.stdout)
  305. except json.JSONDecodeError:
  306. note("NOTE: Models API returned non-JSON — advisory, not a failure.", quiet)
  307. return None
  308. data = payload.get("data")
  309. if not isinstance(data, list):
  310. note("NOTE: Models API JSON missing 'data' list — advisory.", quiet)
  311. return None
  312. return [m.get("id", "") for m in data if isinstance(m, dict) and m.get("id")]
  313. def validate_live(skill_dir: Path, json_mode: bool, quiet: bool) -> dict:
  314. if not _have("curl"):
  315. if json_mode:
  316. print(json.dumps({"error": {"code": "PRECONDITION",
  317. "message": "curl required for --live",
  318. "details": {}}}))
  319. print("ERROR: curl is required for --live", file=sys.stderr)
  320. sys.exit(EXIT_MISSING_DEP)
  321. # Reuse offline parse for the documented id set (also validates well-formedness).
  322. note(TERM.hdr("live model-id coverage check"), quiet)
  323. skill_md = skill_dir / "SKILL.md"
  324. if not skill_md.is_file():
  325. print(f"ERROR: required file not found: {skill_md}", file=sys.stderr)
  326. sys.exit(EXIT_NOT_FOUND)
  327. parsed = parse_model_table(skill_md.read_text(encoding="utf-8"))
  328. if not parsed or not parsed[0]:
  329. fail_validation("could not parse the model table for live comparison",
  330. {"file": str(skill_md)}, json_mode)
  331. documented = [clean_id(r["id_cell"]) for r in parsed[0]]
  332. live = fetch_live_ids(quiet)
  333. if live is None:
  334. # Advisory: not a failure. Exit 7.
  335. if json_mode:
  336. print(json.dumps({"data": {"mode": "live", "status": "unavailable",
  337. "documented_ids": documented, "live_ids": None},
  338. "meta": {"schema": SCHEMA, "status": "unavailable"}}))
  339. sys.exit(EXIT_UNAVAILABLE)
  340. live_set = set(live)
  341. doc_set = set(documented)
  342. # A documented id absent from the live list = drift (retired/typo).
  343. missing = sorted(doc_set - live_set)
  344. # A live id NEWER than anything documented = drift (table lacks a new model).
  345. # Restrict "newer" to well-formed alias ids so we ignore date-suffixed and
  346. # snapshot variants the docs intentionally don't list.
  347. live_alias = {m for m in live_set if ID_RE.match(m) and not DATE_SUFFIX_RE.search(m)}
  348. new_models = sorted(live_alias - doc_set)
  349. drift = bool(missing or new_models)
  350. result = {
  351. "mode": "live",
  352. "status": "drift" if drift else "ok",
  353. "documented_ids": documented,
  354. "live_ids": sorted(live_set),
  355. "missing_from_live": missing,
  356. "new_in_live": new_models,
  357. }
  358. if drift:
  359. if missing:
  360. note(f"{TERM.mark('bad')} {TERM.c('red', 'DRIFT: documented id(s) absent from live Models API:')}", quiet)
  361. for m in missing:
  362. note(f" {TERM.c('red', '-')} {m}", quiet)
  363. if new_models:
  364. note(f"{TERM.mark('bad')} {TERM.c('red', 'DRIFT: live Models API has alias id(s) the table lacks:')}", quiet)
  365. for m in new_models:
  366. note(f" {TERM.c('green', '+')} {m}", quiet)
  367. if json_mode:
  368. print(json.dumps({"data": result, "meta": {"schema": SCHEMA,
  369. "status": "drift"}}))
  370. else:
  371. print("DRIFT: model-id table disagrees with the live Models API "
  372. f"(missing={missing}, new={new_models})")
  373. sys.exit(EXIT_DRIFT)
  374. note("OK: every documented id exists live; no newer alias id missing from the table.",
  375. quiet)
  376. return result
  377. def _have(tool: str) -> bool:
  378. from shutil import which
  379. return which(tool) is not None
  380. # ---------------------------------------------------------------------------
  381. # Main
  382. # ---------------------------------------------------------------------------
  383. def main(argv: list[str]) -> int:
  384. parser = argparse.ArgumentParser(
  385. prog="check-model-table.py", add_help=True,
  386. description="Staleness verifier for the claude-api-ops model + cache tables.",
  387. epilog=(
  388. "EXAMPLES:\n"
  389. " check-model-table.py --offline\n"
  390. " check-model-table.py --offline --json | python -m json.tool\n"
  391. " ANTHROPIC_API_KEY=sk-... check-model-table.py --live\n"
  392. " check-model-table.py --live # exits 7 (advisory) when key unset\n"
  393. ),
  394. formatter_class=argparse.RawDescriptionHelpFormatter,
  395. )
  396. mode = parser.add_mutually_exclusive_group()
  397. mode.add_argument("--offline", action="store_true",
  398. help="parse + assert internal consistency, no network (default)")
  399. mode.add_argument("--live", action="store_true",
  400. help="compare documented ids against the live Models API (advisory)")
  401. parser.add_argument("--json", action="store_true",
  402. help="emit the JSON envelope on stdout")
  403. parser.add_argument("--skill-dir", default=None,
  404. help="skill root (default: parent of this script's dir)")
  405. parser.add_argument("-q", "--quiet", action="store_true",
  406. help="suppress stderr progress/notes")
  407. args = parser.parse_args(argv)
  408. if args.skill_dir:
  409. skill_dir = Path(args.skill_dir).resolve()
  410. else:
  411. skill_dir = Path(__file__).resolve().parent.parent
  412. if not skill_dir.is_dir():
  413. print(f"ERROR: skill dir not found: {skill_dir}", file=sys.stderr)
  414. return EXIT_NOT_FOUND
  415. if args.live:
  416. result = validate_live(skill_dir, args.json, args.quiet)
  417. else:
  418. result = validate_offline(skill_dir, args.json, args.quiet)
  419. if args.json:
  420. print(json.dumps({"data": result,
  421. "meta": {"schema": SCHEMA, "status": "ok"}}))
  422. return EXIT_OK
  423. if __name__ == "__main__":
  424. try:
  425. sys.exit(main(sys.argv[1:]))
  426. except KeyboardInterrupt:
  427. sys.exit(EXIT_USAGE)