pre-commit-unicode-scan.sh 4.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #!/bin/bash
  2. # hooks/pre-commit-unicode-scan.sh
  3. # Git pre-commit hook — refuse commits that ADD hidden Unicode to instruction files.
  4. #
  5. # This is a GIT hook (not a Claude Code hook). It catches the one case nothing at
  6. # read-time can: a poisoned CLAUDE.md / AGENTS.md / SKILL.md / .cursorrules entering
  7. # the repo via your own commit (PR, template, or pasted-from-untrusted-source content).
  8. #
  9. # Install (per repo):
  10. # ln -sf ../../hooks/pre-commit-unicode-scan.sh .git/hooks/pre-commit
  11. # # or, if combining with other pre-commit logic, call it from your existing hook:
  12. # # bash hooks/pre-commit-unicode-scan.sh || exit 1
  13. #
  14. # Behaviour (silent guardian, severity-graded):
  15. # clean → no output, exit 0 (commit proceeds)
  16. # high/medium finding→ warning to stderr, exit 0 (commit proceeds — legit in
  17. # multilingual files; you decide)
  18. # critical finding → block message to stderr, exit 1 (commit refused — tag-block /
  19. # bidi override are never legitimate; sanitise first)
  20. #
  21. # Override a block once (you've confirmed it's intentional, e.g. a doc demonstrating
  22. # an attack as a literal): PROMPT_INJECTION_ALLOW=1 git commit ...
  23. #
  24. # Exit codes:
  25. # 0 = allow commit (clean, advisory-only finding, or scanner/python unavailable)
  26. # 1 = block commit (critical finding, not overridden)
  27. set -uo pipefail # NOT -e: only an explicit critical finding should block
  28. # ── Locate the scanner (repo + installed layouts share the hooks/ ↔ skills/ sibling) ─
  29. SELF_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
  30. SCANNER=""
  31. for cand in \
  32. "$SELF_DIR/../skills/prompt-injection-defense/scripts/scan-hidden-unicode.py" \
  33. "$HOME/.claude/skills/prompt-injection-defense/scripts/scan-hidden-unicode.py"; do
  34. [ -f "$cand" ] && { SCANNER="$cand"; break; }
  35. done
  36. [ -n "$SCANNER" ] || exit 0 # scanner not installed → don't break commits
  37. PY=""
  38. for c in python3 python py; do
  39. command -v "$c" >/dev/null 2>&1 && "$c" -c "import sys" >/dev/null 2>&1 && { PY="$c"; break; }
  40. done
  41. [ -n "$PY" ] || exit 0
  42. # ── Staged added/modified instruction files ───────────────────────────────────
  43. INSTR_RE='\.(md|mdc)$|(^|/)(CLAUDE|AGENTS|GEMINI|COPILOT|CURSOR|WARP)\.md$|(^|/)\.(cursorrules|windsurfrules|clinerules)$'
  44. mapfile -t FILES < <(git diff --cached --name-only --diff-filter=AM 2>/dev/null | grep -iE "$INSTR_RE" || true)
  45. [ "${#FILES[@]}" -eq 0 ] && exit 0 # no instruction files staged → silent
  46. # Only scan files that exist in the working tree (staged content on disk).
  47. EXIST=()
  48. for f in "${FILES[@]}"; do [ -f "$f" ] && EXIST+=("$f"); done
  49. [ "${#EXIST[@]}" -eq 0 ] && exit 0
  50. # ── Scan with --json to read the worst severity ───────────────────────────────
  51. JSON="$("$PY" "$SCANNER" --json "${EXIST[@]}" 2>/dev/null)"
  52. RC=$?
  53. [ "$RC" -eq 0 ] && exit 0 # clean → silent, commit proceeds
  54. WORST="$(printf '%s' "$JSON" | "$PY" -c 'import sys,json
  55. try: print(json.load(sys.stdin)["meta"]["worst_severity"])
  56. except Exception: print("unknown")' 2>/dev/null)"
  57. # Human-readable finding lines (file:line:col band) for the message.
  58. DETAIL="$("$PY" "$SCANNER" "${EXIST[@]}" 2>/dev/null | head -20)"
  59. if [ "$WORST" = "critical" ]; then
  60. if [ "${PROMPT_INJECTION_ALLOW:-0}" = "1" ]; then
  61. echo "prompt-injection: CRITICAL hidden-Unicode in staged instruction files —" >&2
  62. echo " allowed by PROMPT_INJECTION_ALLOW=1. Make sure this is intentional." >&2
  63. exit 0
  64. fi
  65. {
  66. echo "COMMIT BLOCKED — prompt-injection-defense"
  67. echo "Critical hidden-Unicode (tag-block ASCII smuggling or bidi override) in staged"
  68. echo "instruction files. These render as nothing / reorder text — never legitimate here:"
  69. echo ""
  70. printf '%s\n' "$DETAIL"
  71. echo ""
  72. echo "Fix: python <skills>/prompt-injection-defense/scripts/sanitize-content.py <file> -o <file>"
  73. echo "Then re-stage and commit. Override (only if intentional, e.g. an attack-demo doc):"
  74. echo " PROMPT_INJECTION_ALLOW=1 git commit ..."
  75. } >&2
  76. exit 1
  77. fi
  78. # high / medium → advisory, allow the commit
  79. {
  80. echo "prompt-injection ADVISORY: ${WORST}-severity hidden-Unicode in staged instruction files."
  81. echo "Legitimate in genuinely multilingual text; suspicious otherwise. Commit allowed."
  82. printf '%s\n' "$DETAIL" | head -8
  83. } >&2
  84. exit 0