assess-okf.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #!/usr/bin/env python3
  2. # Read-only OKF-readiness scanner for a markdown doc-tree. NEVER writes.
  3. #
  4. # Usage: assess-okf.py [OPTIONS] [DOC_TREE]
  5. # Input: argv only. DOC_TREE is a directory of markdown files (default ".").
  6. # Output: stdout = data. Default = human-readable summary; --json = envelope
  7. # {"data":{...},"meta":{"schema":"claude-mods.okf-ops.assess-okf/v1",...}}.
  8. # Stderr: progress + the scan header.
  9. # Exit: 0 on a successful scan (readiness is DATA, not a failure), 2 usage, 3 not-found.
  10. #
  11. # Reports: total .md, how many have frontmatter, how many have non-empty `type`,
  12. # a histogram of frontmatter KEYS, a histogram of `type` VALUES, reserved files
  13. # present, files that would need a `type` to become conformant, OKF-readiness %,
  14. # and which OKF recommended fields already commonly appear.
  15. #
  16. # Examples:
  17. # assess-okf.py /path/to/docs
  18. # assess-okf.py --json /path/to/docs | jq '.data.readiness_pct'
  19. # assess-okf.py --top 10 .
  20. import argparse
  21. import json
  22. import sys
  23. from collections import Counter
  24. from pathlib import Path
  25. SCHEMA = "claude-mods.okf-ops.assess-okf/v1"
  26. SKIP_DIRS = {".git", "node_modules", ".claude", ".venv", "dist", "build"}
  27. RESERVED = {"index.md", "log.md"}
  28. RECOMMENDED = ("title", "description", "resource", "tags", "timestamp")
  29. try:
  30. import yaml # type: ignore
  31. _HAVE_YAML = True
  32. except Exception:
  33. yaml = None
  34. _HAVE_YAML = False
  35. def log(msg=""):
  36. print(msg, file=sys.stderr)
  37. def split_frontmatter(text):
  38. if text.startswith(""):
  39. text = text[1:]
  40. lines = text.splitlines()
  41. i = 0
  42. while i < len(lines) and lines[i].strip() == "":
  43. i += 1
  44. if i >= len(lines) or lines[i].strip() != "---":
  45. return None
  46. for j in range(i + 1, len(lines)):
  47. if lines[j].strip() == "---":
  48. return "\n".join(lines[i + 1:j])
  49. return None
  50. def parse_frontmatter(fm_str):
  51. """Return dict (possibly empty) or None if unparseable."""
  52. _yaml = yaml # local alias narrows cleanly (module global won't)
  53. if _yaml is not None:
  54. try:
  55. data = _yaml.safe_load(fm_str)
  56. if data is None:
  57. return {}
  58. if not isinstance(data, dict):
  59. return None
  60. return data
  61. except Exception:
  62. return None
  63. data = {}
  64. for raw in fm_str.splitlines():
  65. line = raw.rstrip()
  66. if not line.strip() or line.lstrip().startswith("#"):
  67. continue
  68. if line[0] in (" ", "\t", "-"):
  69. continue
  70. if ":" not in line:
  71. return None
  72. key, _, val = line.partition(":")
  73. key = key.strip()
  74. val = val.strip()
  75. if len(val) >= 2 and val[0] == val[-1] and val[0] in ("'", '"'):
  76. val = val[1:-1]
  77. if key:
  78. data[key] = val
  79. return data
  80. def main(argv=None):
  81. p = argparse.ArgumentParser(
  82. prog="assess-okf.py",
  83. description="Read-only OKF-readiness scanner (never writes).",
  84. add_help=True,
  85. )
  86. p.add_argument("tree", nargs="?", default=".", help="doc-tree directory (default .)")
  87. p.add_argument("--json", action="store_true", help="emit JSON envelope to stdout")
  88. p.add_argument("--top", type=int, default=20, metavar="N",
  89. help="cap histogram rows (default 20)")
  90. args = p.parse_args(argv)
  91. if args.top < 1:
  92. log("error: --top must be >= 1")
  93. return 2
  94. root = Path(args.tree)
  95. if not root.exists() or not root.is_dir():
  96. msg = f"doc-tree path not found or not a directory: {args.tree}"
  97. log(f"error: {msg}")
  98. if args.json:
  99. print(json.dumps({"error": {"code": "NOT_FOUND", "message": msg, "details": {}}}))
  100. return 3
  101. if not _HAVE_YAML:
  102. log("note: PyYAML not available — using minimal fallback frontmatter parser.")
  103. root = root.resolve()
  104. log(f"OKF-readiness scan: {root}")
  105. md_total = 0
  106. concept_total = 0
  107. have_frontmatter = 0
  108. have_type = 0
  109. unparseable = 0
  110. reserved_index = 0
  111. reserved_log = 0
  112. need_type = 0 # non-reserved concept docs lacking a non-empty type
  113. key_hist = Counter()
  114. type_hist = Counter()
  115. recommended_present = Counter()
  116. for path in sorted(root.rglob("*.md")):
  117. if any(part in SKIP_DIRS for part in path.parts):
  118. continue
  119. md_total += 1
  120. name = path.name.lower()
  121. try:
  122. text = path.read_text(encoding="utf-8", errors="replace")
  123. except Exception:
  124. text = ""
  125. if name == "index.md":
  126. reserved_index += 1
  127. if name == "log.md":
  128. reserved_log += 1
  129. fm_str = split_frontmatter(text)
  130. has_fm = fm_str is not None
  131. data = parse_frontmatter(fm_str) if has_fm else None
  132. if has_fm and data is not None:
  133. have_frontmatter += 1
  134. for k in data.keys():
  135. key_hist[str(k)] += 1
  136. for rk in RECOMMENDED:
  137. if rk in data:
  138. recommended_present[rk] += 1
  139. tv = data.get("type")
  140. if tv is not None and not (isinstance(tv, str) and tv.strip() == ""):
  141. have_type += 1
  142. type_hist[str(tv).strip()] += 1
  143. elif has_fm and data is None:
  144. unparseable += 1
  145. if name not in RESERVED:
  146. concept_total += 1
  147. tv = data.get("type") if isinstance(data, dict) else None
  148. conformant = (data is not None and tv is not None
  149. and not (isinstance(tv, str) and tv.strip() == ""))
  150. if not conformant:
  151. need_type += 1
  152. conformant_concepts = concept_total - need_type
  153. readiness_pct = round(100.0 * conformant_concepts / concept_total, 1) if concept_total else 0.0
  154. def top(counter):
  155. return [{"key": k, "count": c} for k, c in counter.most_common(args.top)]
  156. data_out = {
  157. "md_total": md_total,
  158. "concept_total": concept_total,
  159. "have_frontmatter": have_frontmatter,
  160. "have_frontmatter_pct": round(100.0 * have_frontmatter / md_total, 1) if md_total else 0.0,
  161. "have_nonempty_type": have_type,
  162. "have_type_pct": round(100.0 * have_type / md_total, 1) if md_total else 0.0,
  163. "unparseable_frontmatter": unparseable,
  164. "reserved_index_md": reserved_index,
  165. "reserved_log_md": reserved_log,
  166. "concepts_needing_type": need_type,
  167. "conformant_concepts": conformant_concepts,
  168. "readiness_pct": readiness_pct,
  169. "key_histogram": top(key_hist),
  170. "type_value_histogram": top(type_hist),
  171. "recommended_fields_present": [
  172. {"field": k, "count": recommended_present.get(k, 0)} for k in RECOMMENDED
  173. ],
  174. }
  175. meta = {
  176. "schema": SCHEMA,
  177. "tree": str(root),
  178. "top": args.top,
  179. "yaml_parser": "PyYAML" if _HAVE_YAML else "fallback",
  180. "distinct_keys": len(key_hist),
  181. "distinct_type_values": len(type_hist),
  182. }
  183. if args.json:
  184. print(json.dumps({"data": data_out, "meta": meta}, ensure_ascii=False))
  185. return 0
  186. # Human-readable summary to stdout.
  187. out = []
  188. out.append("OKF-readiness summary")
  189. out.append("=" * 60)
  190. out.append(f" doc-tree : {root}")
  191. out.append(f" yaml parser : {meta['yaml_parser']}")
  192. out.append("")
  193. out.append(f" markdown files (.md) : {md_total}")
  194. out.append(f" reserved index.md : {reserved_index}")
  195. out.append(f" reserved log.md : {reserved_log}")
  196. out.append(f" concept documents : {concept_total} (non-reserved)")
  197. out.append("")
  198. out.append(f" with parseable frontmatter : {have_frontmatter} "
  199. f"({data_out['have_frontmatter_pct']}% of all .md)")
  200. out.append(f" with non-empty `type` : {have_type} "
  201. f"({data_out['have_type_pct']}% of all .md)")
  202. out.append(f" unparseable frontmatter : {unparseable}")
  203. out.append("")
  204. out.append(f" concepts needing a `type` : {need_type}")
  205. out.append(f" conformant concepts : {conformant_concepts} / {concept_total}")
  206. out.append(f" OKF-READINESS : {readiness_pct}%")
  207. out.append("")
  208. out.append(f" Frontmatter KEYS (top {args.top}, {meta['distinct_keys']} distinct):")
  209. if key_hist:
  210. for k, c in key_hist.most_common(args.top):
  211. out.append(f" {c:6d} {k}")
  212. else:
  213. out.append(" (none)")
  214. out.append("")
  215. out.append(f" `type` VALUES (top {args.top}, {meta['distinct_type_values']} distinct):")
  216. if type_hist:
  217. for k, c in type_hist.most_common(args.top):
  218. out.append(f" {c:6d} {k}")
  219. else:
  220. out.append(" (none — no `type` keys present yet)")
  221. out.append("")
  222. out.append(" OKF recommended fields already present:")
  223. for k in RECOMMENDED:
  224. out.append(f" {recommended_present.get(k, 0):6d} {k}")
  225. print("\n".join(out))
  226. return 0
  227. if __name__ == "__main__":
  228. sys.exit(main())