check-okf.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. #!/usr/bin/env python3
  2. # Validate an Open Knowledge Format (OKF v0.1) bundle for conformance.
  3. #
  4. # Usage: check-okf.py [OPTIONS] [BUNDLE_DIR]
  5. # Input: argv only. BUNDLE_DIR is a directory of markdown files (default ".").
  6. # Output: stdout = data only. Default = TSV of findings (file<TAB>severity<TAB>message);
  7. # --json = envelope {"data":[...],"meta":{"schema":"claude-mods.okf-ops.check-okf/v1",...}}.
  8. # Stderr: headers, progress, the human-readable verdict, errors.
  9. # Exit: 0 conformant, 2 usage, 3 not-found, 4 frontmatter-present-but-unparseable,
  10. # 10 non-conformant (hard conformance failures, or soft warnings under --strict).
  11. #
  12. # OKF rules enforced (hard): every non-reserved .md has parseable YAML frontmatter
  13. # with a non-empty `type`. Reserved files (index.md, log.md) get light structural
  14. # sanity only. Per the permissive-consumption rule, broken links / missing optional
  15. # fields are INFO and never cause a conformance failure (unless --strict).
  16. #
  17. # Examples:
  18. # check-okf.py ./my-bundle
  19. # check-okf.py --json ./my-bundle | jq '.data[] | select(.severity=="error")'
  20. # check-okf.py --strict . # soft warnings also fail (exit 10)
  21. import argparse
  22. import json
  23. import sys
  24. from pathlib import Path
  25. SCHEMA = "claude-mods.okf-ops.check-okf/v1"
  26. SKIP_DIRS = {".git", "node_modules", ".claude", ".venv", "dist", "build"}
  27. RESERVED = {"index.md", "log.md"}
  28. RECOMMENDED = ("title", "description", "resource", "tags", "timestamp")
  29. try:
  30. import yaml # type: ignore
  31. _HAVE_YAML = True
  32. except Exception:
  33. yaml = None
  34. _HAVE_YAML = False
  35. def log(msg=""):
  36. print(msg, file=sys.stderr)
  37. def split_frontmatter(text):
  38. """Return (frontmatter_str_or_None, found_fences_bool).
  39. A document has frontmatter iff it starts (after optional BOM/whitespace-free
  40. leading newlines) with a line that is exactly '---' and has a closing '---'.
  41. """
  42. # Normalise leading BOM
  43. if text.startswith(""):
  44. text = text[1:]
  45. lines = text.splitlines()
  46. # Allow leading blank lines before the opening fence
  47. i = 0
  48. while i < len(lines) and lines[i].strip() == "":
  49. i += 1
  50. if i >= len(lines) or lines[i].strip() != "---":
  51. return None, False
  52. # find closing fence
  53. for j in range(i + 1, len(lines)):
  54. if lines[j].strip() == "---":
  55. return "\n".join(lines[i + 1:j]), True
  56. # opening fence with no close
  57. return None, True
  58. def parse_frontmatter(fm_str):
  59. """Parse frontmatter into a dict. Returns (dict_or_None, used_fallback_bool).
  60. dict is None when the block is genuinely unparseable.
  61. """
  62. _yaml = yaml # local alias narrows cleanly (module global won't)
  63. if _yaml is not None:
  64. try:
  65. data = _yaml.safe_load(fm_str)
  66. if data is None:
  67. return {}, False
  68. if not isinstance(data, dict):
  69. return None, False
  70. return data, False
  71. except Exception:
  72. return None, False
  73. # Fallback: minimal key: value line parser. Good enough to detect `type`.
  74. data = {}
  75. for raw in fm_str.splitlines():
  76. line = raw.rstrip()
  77. if not line.strip() or line.lstrip().startswith("#"):
  78. continue
  79. # only treat top-level (non-indented) key: value lines as keys
  80. if line[0] in (" ", "\t", "-"):
  81. continue
  82. if ":" not in line:
  83. return None, True # not a simple key:value block -> unparseable in fallback
  84. key, _, val = line.partition(":")
  85. key = key.strip()
  86. val = val.strip()
  87. # strip surrounding quotes
  88. if len(val) >= 2 and val[0] == val[-1] and val[0] in ("'", '"'):
  89. val = val[1:-1]
  90. if key:
  91. data[key] = val
  92. return data, True
  93. def main(argv=None):
  94. p = argparse.ArgumentParser(
  95. prog="check-okf.py",
  96. description="Validate an OKF v0.1 bundle for conformance.",
  97. add_help=True,
  98. )
  99. p.add_argument("bundle", nargs="?", default=".", help="bundle directory (default .)")
  100. p.add_argument("--json", action="store_true", help="emit JSON envelope to stdout")
  101. p.add_argument("--strict", action="store_true",
  102. help="soft warnings also count toward non-conformance (exit 10)")
  103. try:
  104. args = p.parse_args(argv)
  105. except SystemExit as e:
  106. # argparse exits 0 on --help, 2 on error — both acceptable per protocol
  107. raise
  108. root = Path(args.bundle)
  109. if not root.exists() or not root.is_dir():
  110. msg = f"bundle path not found or not a directory: {args.bundle}"
  111. log(f"error: {msg}")
  112. if args.json:
  113. print(json.dumps({"error": {"code": "NOT_FOUND", "message": msg, "details": {}}}))
  114. return 3
  115. if not _HAVE_YAML:
  116. log("note: PyYAML not available — using minimal fallback frontmatter parser.")
  117. root = root.resolve()
  118. log(f"OKF conformance check: {root}")
  119. findings = [] # list of {file, severity, message}
  120. unparseable = False # any frontmatter-present-but-unparseable
  121. md_total = 0
  122. concept_total = 0
  123. okf_version = None
  124. for path in sorted(root.rglob("*.md")):
  125. # skip excluded dirs
  126. if any(part in SKIP_DIRS or part == "worktrees" for part in path.parts):
  127. # only skip 'worktrees' when under a .claude dir
  128. if "worktrees" in path.parts:
  129. idx = path.parts.index("worktrees")
  130. if idx > 0 and path.parts[idx - 1] == ".claude":
  131. continue
  132. if any(part in SKIP_DIRS for part in path.parts):
  133. continue
  134. md_total += 1
  135. rel = path.relative_to(root).as_posix()
  136. name = path.name.lower()
  137. try:
  138. text = path.read_text(encoding="utf-8", errors="replace")
  139. except Exception as e:
  140. findings.append({"file": rel, "severity": "error",
  141. "message": f"could not read file: {e}"})
  142. continue
  143. fm_str, found_fences = split_frontmatter(text)
  144. if name in RESERVED:
  145. # Light structural sanity only.
  146. is_root_index = (name == "index.md" and path.parent == root)
  147. if found_fences and fm_str is not None and name == "index.md":
  148. # allowed exception: root index.md may declare okf_version
  149. data, _ = parse_frontmatter(fm_str)
  150. if data and "okf_version" in data:
  151. okf_version = data.get("okf_version")
  152. if not is_root_index and data is not None:
  153. findings.append({"file": rel, "severity": "warning",
  154. "message": "non-root index.md has frontmatter "
  155. "(only root index.md may declare okf_version)"})
  156. if found_fences and fm_str is None:
  157. findings.append({"file": rel, "severity": "warning",
  158. "message": "reserved file opens '---' fence but never closes it"})
  159. # very light content sanity
  160. if name == "log.md":
  161. import re as _re
  162. if not _re.search(r"(?m)^#{1,6}\s*\d{4}-\d{2}-\d{2}", text) and text.strip():
  163. findings.append({"file": rel, "severity": "info",
  164. "message": "log.md has no ISO-8601 (YYYY-MM-DD) date headings"})
  165. continue
  166. # Non-reserved => concept document. Hard requirements apply.
  167. concept_total += 1
  168. if not found_fences or fm_str is None:
  169. if found_fences and fm_str is None:
  170. # fence opened but unparseable / unclosed
  171. findings.append({"file": rel, "severity": "error",
  172. "message": "frontmatter fence present but block is unparseable "
  173. "(no closing '---')"})
  174. unparseable = True
  175. else:
  176. findings.append({"file": rel, "severity": "error",
  177. "message": "missing YAML frontmatter (no leading '---' block)"})
  178. continue
  179. data, _ = parse_frontmatter(fm_str)
  180. if data is None:
  181. findings.append({"file": rel, "severity": "error",
  182. "message": "frontmatter present but not parseable as YAML"})
  183. unparseable = True
  184. continue
  185. type_val = data.get("type")
  186. if type_val is None or (isinstance(type_val, str) and type_val.strip() == ""):
  187. findings.append({"file": rel, "severity": "error",
  188. "message": "frontmatter missing non-empty `type` field"})
  189. continue
  190. # Soft INFO: note missing recommended fields (never a hard failure).
  191. missing = [k for k in RECOMMENDED if k not in data]
  192. if missing:
  193. findings.append({"file": rel, "severity": "info",
  194. "message": "missing recommended fields: " + ", ".join(missing)})
  195. errors = [f for f in findings if f["severity"] == "error"]
  196. warnings = [f for f in findings if f["severity"] == "warning"]
  197. infos = [f for f in findings if f["severity"] == "info"]
  198. # Determine exit code.
  199. if unparseable:
  200. exit_code = 4
  201. elif errors:
  202. exit_code = 10
  203. elif args.strict and warnings:
  204. exit_code = 10
  205. else:
  206. exit_code = 0
  207. conformant = (exit_code == 0)
  208. meta = {
  209. "schema": SCHEMA,
  210. "bundle": str(root),
  211. "okf_version": okf_version,
  212. "md_total": md_total,
  213. "concept_total": concept_total,
  214. "errors": len(errors),
  215. "warnings": len(warnings),
  216. "infos": len(infos),
  217. "conformant": conformant,
  218. "yaml_parser": "PyYAML" if _HAVE_YAML else "fallback",
  219. "strict": args.strict,
  220. }
  221. # Human verdict to stderr.
  222. log("")
  223. log(f" markdown files scanned : {md_total}")
  224. log(f" concept documents : {concept_total}")
  225. log(f" errors : {len(errors)}")
  226. log(f" warnings : {len(warnings)}")
  227. log(f" info : {len(infos)}")
  228. if okf_version:
  229. log(f" declared okf_version : {okf_version}")
  230. if exit_code == 0:
  231. log(" verdict : CONFORMANT")
  232. elif exit_code == 4:
  233. log(" verdict : INVALID (unparseable frontmatter present)")
  234. else:
  235. log(" verdict : NON-CONFORMANT")
  236. # Data product.
  237. if args.json:
  238. print(json.dumps({"data": findings, "meta": meta}, ensure_ascii=False))
  239. else:
  240. # TSV: file<TAB>severity<TAB>message (data only, no header line on stdout)
  241. for f in findings:
  242. print(f"{f['file']}\t{f['severity']}\t{f['message']}")
  243. return exit_code
  244. if __name__ == "__main__":
  245. sys.exit(main())