validate-markdown-links.ts 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #!/usr/bin/env bun
  2. import { existsSync, readdirSync, readFileSync } from 'node:fs'
  3. import { dirname, join, normalize } from 'node:path'
  4. import { fileURLToPath } from 'node:url'
  5. const REPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '..', '..')
  6. const SKIP_RULES_FILE = join(REPO_ROOT, 'scripts', 'validation', 'markdown-link-skip-patterns.txt')
  7. const TARGET_DIRS = [
  8. '.opencode/agent',
  9. '.opencode/skills',
  10. '.opencode/command',
  11. '.opencode/context'
  12. ]
  13. type MissingRef = {
  14. source: string
  15. line: number
  16. target: string
  17. resolved: string
  18. kind: 'link' | 'path'
  19. }
  20. function escapeRegExp(value: string): string {
  21. return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
  22. }
  23. function parseRegexRule(rule: string): RegExp {
  24. const trimmed = rule.trim()
  25. if (trimmed.startsWith('/') && trimmed.length > 2) {
  26. const lastSlash = trimmed.lastIndexOf('/')
  27. if (lastSlash > 0) {
  28. const pattern = trimmed.slice(1, lastSlash)
  29. const flags = trimmed.slice(lastSlash + 1)
  30. return new RegExp(pattern, flags)
  31. }
  32. }
  33. return new RegExp(trimmed)
  34. }
  35. function loadSkipPatterns(): RegExp[] {
  36. if (!existsSync(SKIP_RULES_FILE)) {
  37. throw new Error(`Skip rules file not found: ${SKIP_RULES_FILE}`)
  38. }
  39. const lines = readFileSync(SKIP_RULES_FILE, 'utf-8').split('\n')
  40. const patterns: RegExp[] = []
  41. for (const line of lines) {
  42. const rule = line.trim()
  43. if (!rule || rule.startsWith('#')) continue
  44. // Literal rule: literal:foo/bar
  45. if (rule.startsWith('literal:')) {
  46. const literal = rule.slice('literal:'.length).trim()
  47. if (!literal) continue
  48. patterns.push(new RegExp(escapeRegExp(literal)))
  49. continue
  50. }
  51. patterns.push(parseRegexRule(rule))
  52. }
  53. return patterns
  54. }
  55. function shouldSkipFile(filePath: string, skipPatterns: RegExp[]): boolean {
  56. const relPath = filePath.replace(`${REPO_ROOT}/`, '')
  57. return skipPatterns.some((pattern) => pattern.test(relPath))
  58. }
  59. function walkMdFiles(dir: string, out: string[], skipPatterns: RegExp[]): void {
  60. for (const entry of readdirSync(dir, { withFileTypes: true })) {
  61. if (entry.name === '.git' || entry.name === 'node_modules' || entry.name === 'build') continue
  62. const full = join(dir, entry.name)
  63. if (entry.isDirectory()) {
  64. walkMdFiles(full, out, skipPatterns)
  65. continue
  66. }
  67. if (entry.isFile() && entry.name.toLowerCase().endsWith('.md')) {
  68. if (!shouldSkipFile(full, skipPatterns)) {
  69. out.push(full)
  70. }
  71. }
  72. }
  73. }
  74. function lineFromIndex(content: string, index: number): number {
  75. return content.slice(0, index).split('\n').length
  76. }
  77. function shouldSkip(rawTarget: string, skipPatterns: RegExp[]): boolean {
  78. const value = rawTarget.trim()
  79. if (!value) return true
  80. if (value.startsWith('http://') || value.startsWith('https://')) return true
  81. if (value.startsWith('mailto:') || value.startsWith('#')) return true
  82. return skipPatterns.some((pattern) => pattern.test(value))
  83. }
  84. function cleanupTarget(rawTarget: string): string {
  85. let target = rawTarget.trim()
  86. if (!target) return target
  87. if (target.startsWith('<') && target.endsWith('>')) {
  88. target = target.slice(1, -1)
  89. }
  90. const firstSpace = target.indexOf(' ')
  91. if (firstSpace >= 0) {
  92. target = target.slice(0, firstSpace)
  93. }
  94. target = target.replace(/^@/, '')
  95. target = target.split('#')[0]
  96. target = target.split('?')[0]
  97. return target
  98. }
  99. function resolveTarget(sourceFile: string, rawTarget: string): string {
  100. const target = cleanupTarget(rawTarget)
  101. if (target.startsWith('.opencode/')) return join(REPO_ROOT, target)
  102. if (target.startsWith('/')) return join(REPO_ROOT, target.slice(1))
  103. return join(dirname(sourceFile), target)
  104. }
  105. function collectMissingRefs(file: string, skipPatterns: RegExp[]): MissingRef[] {
  106. const content = readFileSync(file, 'utf-8')
  107. const rel = file.replace(`${REPO_ROOT}/`, '')
  108. const missing: MissingRef[] = []
  109. const linkRegex = /\[[^\]]*\]\(([^)]+)\)/g
  110. for (const match of content.matchAll(linkRegex)) {
  111. const rawTarget = match[1]
  112. if (!rawTarget) continue
  113. if (shouldSkip(rawTarget, skipPatterns)) continue
  114. const cleaned = cleanupTarget(rawTarget)
  115. if (!cleaned) continue
  116. if (!cleaned.includes('.md') && !cleaned.startsWith('.opencode/')) continue
  117. const resolved = normalize(resolveTarget(file, cleaned))
  118. if (!existsSync(resolved)) {
  119. missing.push({
  120. source: rel,
  121. line: lineFromIndex(content, match.index ?? 0),
  122. target: rawTarget,
  123. resolved: resolved.replace(`${REPO_ROOT}/`, ''),
  124. kind: 'link',
  125. })
  126. }
  127. }
  128. const pathRegex = /@?(\.opencode\/[A-Za-z0-9_./-]+\.md)/g
  129. for (const match of content.matchAll(pathRegex)) {
  130. const rawTarget = match[1]
  131. if (!rawTarget) continue
  132. if (shouldSkip(rawTarget, skipPatterns)) continue
  133. const cleaned = cleanupTarget(rawTarget)
  134. const resolved = normalize(resolveTarget(file, cleaned))
  135. if (!existsSync(resolved)) {
  136. missing.push({
  137. source: rel,
  138. line: lineFromIndex(content, match.index ?? 0),
  139. target: rawTarget,
  140. resolved: resolved.replace(`${REPO_ROOT}/`, ''),
  141. kind: 'path',
  142. })
  143. }
  144. }
  145. return missing
  146. }
  147. function dedupe(refs: MissingRef[]): MissingRef[] {
  148. const seen = new Set<string>()
  149. const out: MissingRef[] = []
  150. for (const ref of refs) {
  151. const key = `${ref.source}:${ref.line}:${ref.target}:${ref.kind}`
  152. if (seen.has(key)) continue
  153. seen.add(key)
  154. out.push(ref)
  155. }
  156. return out
  157. }
  158. const skipPatterns = loadSkipPatterns()
  159. const files: string[] = []
  160. for (const dir of TARGET_DIRS) {
  161. const absolute = join(REPO_ROOT, dir)
  162. if (existsSync(absolute)) walkMdFiles(absolute, files, skipPatterns)
  163. }
  164. let missing: MissingRef[] = []
  165. for (const file of files) {
  166. missing = missing.concat(collectMissingRefs(file, skipPatterns))
  167. }
  168. missing = dedupe(missing)
  169. if (missing.length === 0) {
  170. console.log(`OK: validated ${files.length} markdown files, no broken internal references found.`)
  171. process.exit(0)
  172. }
  173. console.log(`ERROR: found ${missing.length} broken internal markdown references:`)
  174. for (const ref of missing) {
  175. console.log(`- ${ref.source}:${ref.line} [${ref.kind}] ${ref.target} -> ${ref.resolved}`)
  176. }
  177. process.exit(1)