check-mapbox-facts.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. #!/usr/bin/env python3
  2. # Staleness verifier for the fast-moving facts the mapbox-ops skill encodes.
  3. #
  4. # Two modes (SKILL-RESOURCE-PROTOCOL.md §7):
  5. # --offline (default): NO network. Asserts the skill is internally consistent —
  6. # style-catalog.json parses, the v3 Standard config enums
  7. # (lightPreset/theme) agree between catalog and references,
  8. # the terrain tileset IDs and version gates (weather >= 3.7,
  9. # camera roll >= 3.5) are stated consistently, every classic
  10. # style url matches its id, every third-party entry is
  11. # addressable. Runs in PR CI and MAY block.
  12. # --live: network. Resolves the concrete third-party style-JSON URLs
  13. # and probes whether Mapbox GL JS has shipped a major beyond
  14. # v3 (which would mean the whole skill needs a review pass).
  15. # Runs in the scheduled freshness workflow and NEVER blocks a
  16. # PR: a transient network failure is UNAVAILABLE (exit 7), only
  17. # a confirmed change is DRIFT (exit 10).
  18. #
  19. # Usage: check-mapbox-facts.py [--offline|--live] [--json] [-q] [--timeout SEC]
  20. # Input: none (reads the skill's own assets/ + references/ relative to this file)
  21. # Output: stdout = data only (text findings, or the --json envelope)
  22. # Stderr: headers, progress, warnings, errors
  23. # Exit: 0 ok, 2 usage, 3 not-found (skill files missing), 4 validation
  24. # (offline inconsistency), 5 missing-dep, 7 unavailable (live network),
  25. # 10 drift (live: a URL 404'd, or GL JS major bumped past v3)
  26. #
  27. # Examples:
  28. # check-mapbox-facts.py --offline
  29. # check-mapbox-facts.py --offline --json | jq '.data[] | select(.status!="ok")'
  30. # check-mapbox-facts.py --live --timeout 15
  31. """Staleness verifier for mapbox-ops (see header comment)."""
  32. from __future__ import annotations
  33. import argparse
  34. import json
  35. import re
  36. import sys
  37. from pathlib import Path
  38. EXIT_OK = 0
  39. EXIT_USAGE = 2
  40. EXIT_NOT_FOUND = 3
  41. EXIT_VALIDATION = 4
  42. EXIT_MISSING_DEP = 5
  43. EXIT_UNAVAILABLE = 7
  44. EXIT_DRIFT = 10
  45. SCHEMA = "claude-mods.mapbox-ops.facts/v1"
  46. SKILL_ROOT = Path(__file__).resolve().parent.parent
  47. CATALOG = SKILL_ROOT / "assets" / "style-catalog.json"
  48. REFS = SKILL_ROOT / "references"
  49. SKILL_MD = SKILL_ROOT / "SKILL.md"
  50. # Facts the skill commits to. Changing these is a deliberate edit; the verifier
  51. # asserts the skill states them consistently across catalog + references.
  52. EXPECTED_LIGHT_PRESET = {"dawn", "day", "dusk", "night"}
  53. EXPECTED_THEME = {"default", "faded", "monochrome"}
  54. TERRAIN_DEM_ID = "mapbox.mapbox-terrain-dem-v1"
  55. TERRAIN_VECTOR_ID = "mapbox.mapbox-terrain-v2"
  56. GLJS_MAJOR = 3 # the skill is scoped to mapbox-gl-js v3.x
  57. class Finding:
  58. __slots__ = ("check", "status", "detail")
  59. def __init__(self, check: str, status: str, detail: str) -> None:
  60. self.check = check
  61. self.status = status # ok | fail | drift | unavailable
  62. self.detail = detail
  63. def as_dict(self) -> dict:
  64. return {"check": self.check, "status": self.status, "detail": self.detail}
  65. def read_text(path: Path) -> str:
  66. return path.read_text(encoding="utf-8", errors="replace")
  67. # --------------------------------------------------------------------------- #
  68. # Offline checks #
  69. # --------------------------------------------------------------------------- #
  70. def run_offline(findings: list[Finding]) -> None:
  71. # Required files present (else NOT_FOUND, distinct from inconsistency).
  72. missing = [p for p in (CATALOG, SKILL_MD, REFS) if not p.exists()]
  73. if missing:
  74. for p in missing:
  75. findings.append(Finding("files-present", "fail", f"missing: {p}"))
  76. raise _NotFound()
  77. # O1 — catalog parses.
  78. try:
  79. catalog = json.loads(read_text(CATALOG))
  80. findings.append(Finding("catalog-json", "ok", "style-catalog.json parses"))
  81. except json.JSONDecodeError as exc:
  82. findings.append(Finding("catalog-json", "fail", f"invalid JSON: {exc}"))
  83. return # nothing else is checkable
  84. presets = catalog.get("standard_presets", {})
  85. v3_md = read_text(REFS / "v3-standard-style.md") if (REFS / "v3-standard-style.md").exists() else ""
  86. # O2 — lightPreset enum: catalog matches the committed set AND each value is
  87. # documented in v3-standard-style.md.
  88. light = set(presets.get("lightPreset", []))
  89. if light != EXPECTED_LIGHT_PRESET:
  90. findings.append(Finding("lightPreset-enum", "fail",
  91. f"catalog {sorted(light)} != expected {sorted(EXPECTED_LIGHT_PRESET)}"))
  92. else:
  93. undoc = [v for v in EXPECTED_LIGHT_PRESET if v not in v3_md]
  94. if undoc:
  95. findings.append(Finding("lightPreset-enum", "fail",
  96. f"values not documented in v3-standard-style.md: {undoc}"))
  97. else:
  98. findings.append(Finding("lightPreset-enum", "ok", "dawn|day|dusk|night consistent"))
  99. # O3 — theme enum.
  100. theme = set(presets.get("theme", []))
  101. if theme != EXPECTED_THEME:
  102. findings.append(Finding("theme-enum", "fail",
  103. f"catalog {sorted(theme)} != expected {sorted(EXPECTED_THEME)}"))
  104. else:
  105. findings.append(Finding("theme-enum", "ok", "default|faded|monochrome consistent"))
  106. # O4 — terrain tileset IDs present in terrain.md.
  107. terrain_md = read_text(REFS / "terrain.md") if (REFS / "terrain.md").exists() else ""
  108. for tid in (TERRAIN_DEM_ID, TERRAIN_VECTOR_ID):
  109. if tid in terrain_md:
  110. findings.append(Finding(f"terrain-id:{tid}", "ok", "present in terrain.md"))
  111. else:
  112. findings.append(Finding(f"terrain-id:{tid}", "fail", "absent from terrain.md"))
  113. # O5 — weather version gate agrees between catalog effects comment and dataviz ref.
  114. effects_comment = catalog.get("effects", {}).get("_comment", "")
  115. dataviz_md = read_text(REFS / "dataviz-and-3d.md") if (REFS / "dataviz-and-3d.md").exists() else ""
  116. cat_ver = _first_gl_gate(effects_comment)
  117. ref_ver = _first_gl_gate(dataviz_md, near="setRain") or _first_gl_gate(dataviz_md, near="Weather")
  118. if cat_ver and ref_ver and cat_ver == ref_ver == "3.7":
  119. findings.append(Finding("weather-gate", "ok", "GL JS >= 3.7 consistent (catalog + dataviz-and-3d.md)"))
  120. else:
  121. findings.append(Finding("weather-gate", "fail",
  122. f"weather version gate mismatch (catalog={cat_ver!r}, ref={ref_ver!r}, want 3.7)"))
  123. # O6 — camera roll gate >= 3.5 stated in camera-and-animation.md.
  124. camera_md = read_text(REFS / "camera-and-animation.md") if (REFS / "camera-and-animation.md").exists() else ""
  125. roll_ver = _first_gl_gate(camera_md, near="roll")
  126. if roll_ver == "3.5":
  127. findings.append(Finding("camera-roll-gate", "ok", "native roll GL JS >= 3.5 stated"))
  128. else:
  129. findings.append(Finding("camera-roll-gate", "fail",
  130. f"camera roll gate = {roll_ver!r}, want 3.5"))
  131. # O7 — GL JS major scope: SKILL.md says v3.
  132. skill_md = read_text(SKILL_MD)
  133. if re.search(rf"v{GLJS_MAJOR}\.x", skill_md) and re.search(rf"v{GLJS_MAJOR}\b", skill_md):
  134. findings.append(Finding("gljs-major", "ok", f"skill scoped to v{GLJS_MAJOR}.x"))
  135. else:
  136. findings.append(Finding("gljs-major", "fail", f"SKILL.md no longer clearly scopes v{GLJS_MAJOR}.x"))
  137. # O8 — every classic style url tail matches its id.
  138. bad_urls = []
  139. for s in catalog.get("styles", []):
  140. sid, url = s.get("id", ""), s.get("url", "")
  141. if not url.endswith("/" + sid) and not url.endswith(sid):
  142. bad_urls.append(f"{sid} -> {url}")
  143. if bad_urls:
  144. findings.append(Finding("style-url-id", "fail", "url/id mismatch: " + "; ".join(bad_urls)))
  145. else:
  146. findings.append(Finding("style-url-id", "ok", f"{len(catalog.get('styles', []))} style urls match ids"))
  147. # O9 — every third-party entry is addressable (has a url or an explanatory note).
  148. unaddressable = [t.get("id", "?") for t in catalog.get("third_party", [])
  149. if not t.get("url") and not t.get("note")]
  150. if unaddressable:
  151. findings.append(Finding("third-party-addressable", "fail",
  152. "no url and no note: " + ", ".join(unaddressable)))
  153. else:
  154. findings.append(Finding("third-party-addressable", "ok",
  155. f"{len(catalog.get('third_party', []))} third-party entries addressable"))
  156. def _first_gl_gate(text: str, near: str | None = None) -> str | None:
  157. """Return the first 'GL JS >= 3.N' version found, optionally on a line mentioning `near`."""
  158. pat = re.compile(r"(?:GL JS\s*)?[>≥]=?\s*(3\.\d+)")
  159. if near:
  160. for line in text.splitlines():
  161. if near in line:
  162. m = pat.search(line)
  163. if m:
  164. return m.group(1)
  165. return None
  166. m = pat.search(text)
  167. return m.group(1) if m else None
  168. class _NotFound(Exception):
  169. pass
  170. # --------------------------------------------------------------------------- #
  171. # Live checks #
  172. # --------------------------------------------------------------------------- #
  173. def run_live(findings: list[Finding], timeout: float) -> None:
  174. import urllib.error
  175. import urllib.request
  176. try:
  177. catalog = json.loads(read_text(CATALOG))
  178. except (OSError, json.JSONDecodeError) as exc:
  179. findings.append(Finding("catalog-json", "fail", f"cannot read catalog: {exc}"))
  180. raise _NotFound()
  181. def probe(url: str) -> str:
  182. """Return resolved | notfound | unavailable for a URL (HEAD, GET fallback)."""
  183. for method in ("HEAD", "GET"):
  184. req = urllib.request.Request(url, method=method,
  185. headers={"User-Agent": "mapbox-ops-staleness/1"})
  186. try:
  187. with urllib.request.urlopen(req, timeout=timeout) as resp:
  188. return "resolved" if resp.status < 400 else "unavailable"
  189. except urllib.error.HTTPError as e:
  190. if e.code in (404, 410):
  191. return "notfound"
  192. if e.code in (403, 405, 429):
  193. # forbidden/method-not-allowed/rate-limited: exists or can't tell.
  194. if method == "HEAD":
  195. continue # retry with GET
  196. return "unavailable" if e.code == 429 else "resolved"
  197. return "unavailable"
  198. except (urllib.error.URLError, TimeoutError, OSError):
  199. return "unavailable"
  200. return "unavailable"
  201. # L1 — concrete third-party style URLs (skip templated/keyed ones).
  202. for t in catalog.get("third_party", []):
  203. url = t.get("url")
  204. if not url or "<" in url or "key=" in url:
  205. continue
  206. res = probe(url)
  207. status = {"resolved": "ok", "notfound": "drift", "unavailable": "unavailable"}[res]
  208. findings.append(Finding(f"url:{t.get('id', url)}", status, url))
  209. # L2 — has Mapbox GL JS shipped a major beyond v3? A live v4.0.0 on the CDN
  210. # means the skill's scope assumption needs a human review pass (drift, not error).
  211. cdn = "https://api.mapbox.com/mapbox-gl-js/v{}.0.0/mapbox-gl.js"
  212. v3 = probe(cdn.format(GLJS_MAJOR))
  213. if v3 == "unavailable":
  214. findings.append(Finding("gljs-cdn", "unavailable", "Mapbox CDN unreachable"))
  215. else:
  216. nxt = probe(cdn.format(GLJS_MAJOR + 1))
  217. if nxt == "resolved":
  218. findings.append(Finding("gljs-major-bump", "drift",
  219. f"mapbox-gl-js v{GLJS_MAJOR + 1}.0.0 is live — review skill scope"))
  220. elif nxt == "unavailable":
  221. findings.append(Finding("gljs-major-bump", "unavailable",
  222. f"could not probe v{GLJS_MAJOR + 1} (network)"))
  223. else:
  224. findings.append(Finding("gljs-major-bump", "ok",
  225. f"v{GLJS_MAJOR} current; no v{GLJS_MAJOR + 1} GA"))
  226. # --------------------------------------------------------------------------- #
  227. # Main #
  228. # --------------------------------------------------------------------------- #
  229. def main(argv: list[str]) -> int:
  230. ap = argparse.ArgumentParser(add_help=True, description="mapbox-ops staleness verifier")
  231. mode = ap.add_mutually_exclusive_group()
  232. mode.add_argument("--offline", action="store_true", help="structural/internal-consistency only (default)")
  233. mode.add_argument("--live", action="store_true", help="resolve URLs + probe GL JS major (network)")
  234. ap.add_argument("--json", action="store_true", help="emit the JSON envelope on stdout")
  235. ap.add_argument("-q", "--quiet", action="store_true", help="suppress stderr progress")
  236. ap.add_argument("--timeout", type=float, default=10.0, help="per-request timeout for --live (seconds)")
  237. try:
  238. args = ap.parse_args(argv)
  239. except SystemExit as e:
  240. # argparse exits 2 on bad args (matches USAGE); 0 on --help.
  241. return EXIT_USAGE if e.code not in (0, None) else EXIT_OK
  242. live = args.live
  243. mode_name = "live" if live else "offline"
  244. def emit(msg: str) -> None:
  245. if not args.quiet:
  246. print(msg, file=sys.stderr)
  247. findings: list[Finding] = []
  248. emit(f"== check-mapbox-facts ({mode_name}) ==")
  249. try:
  250. if live:
  251. run_live(findings, args.timeout)
  252. else:
  253. run_offline(findings)
  254. except _NotFound:
  255. if args.json:
  256. print(json.dumps({"error": {"code": "NOT_FOUND",
  257. "message": "skill files missing",
  258. "details": [f.as_dict() for f in findings]}}))
  259. for f in findings:
  260. emit(f" [{f.status.upper()}] {f.check}: {f.detail}")
  261. return EXIT_NOT_FOUND
  262. n_fail = sum(1 for f in findings if f.status == "fail")
  263. n_drift = sum(1 for f in findings if f.status == "drift")
  264. n_unavail = sum(1 for f in findings if f.status == "unavailable")
  265. # Output: stdout is data only.
  266. if args.json:
  267. print(json.dumps({
  268. "data": [f.as_dict() for f in findings],
  269. "meta": {"mode": mode_name, "count": len(findings),
  270. "fail": n_fail, "drift": n_drift, "unavailable": n_unavail,
  271. "schema": SCHEMA},
  272. }, indent=2))
  273. else:
  274. for f in findings:
  275. print(f"{f.check}\t{f.status}\t{f.detail}")
  276. # Progress summary to stderr.
  277. for f in findings:
  278. if f.status != "ok":
  279. emit(f" [{f.status.upper()}] {f.check}: {f.detail}")
  280. emit(f"-- {len(findings)} checks: {n_fail} fail, {n_drift} drift, {n_unavail} unavailable")
  281. # Exit precedence: a real inconsistency (offline) or 404 (live) is the loudest
  282. # signal; an unavailable network is advisory and must never mask a clean run as
  283. # failing — but if the ONLY non-ok results are unavailable, exit 7, never 0.
  284. if n_fail:
  285. return EXIT_VALIDATION
  286. if n_drift:
  287. return EXIT_DRIFT
  288. if n_unavail:
  289. return EXIT_UNAVAILABLE
  290. return EXIT_OK
  291. if __name__ == "__main__":
  292. sys.exit(main(sys.argv[1:]))