| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- #!/bin/bash
- # hooks/pre-commit-unicode-scan.sh
- # Git pre-commit hook — refuse commits that ADD hidden Unicode to instruction files.
- #
- # This is a GIT hook (not a Claude Code hook). It catches the one case nothing at
- # read-time can: a poisoned CLAUDE.md / AGENTS.md / SKILL.md / .cursorrules entering
- # the repo via your own commit (PR, template, or pasted-from-untrusted-source content).
- #
- # Install (per repo):
- # ln -sf ../../hooks/pre-commit-unicode-scan.sh .git/hooks/pre-commit
- # # or, if combining with other pre-commit logic, call it from your existing hook:
- # # bash hooks/pre-commit-unicode-scan.sh || exit 1
- #
- # Behaviour (silent guardian, severity-graded):
- # clean → no output, exit 0 (commit proceeds)
- # high/medium finding→ warning to stderr, exit 0 (commit proceeds — legit in
- # multilingual files; you decide)
- # critical finding → block message to stderr, exit 1 (commit refused — tag-block /
- # bidi override are never legitimate; sanitise first)
- #
- # Override a block once (you've confirmed it's intentional, e.g. a doc demonstrating
- # an attack as a literal): PROMPT_INJECTION_ALLOW=1 git commit ...
- #
- # Exit codes:
- # 0 = allow commit (clean, advisory-only finding, or scanner/python unavailable)
- # 1 = block commit (critical finding, not overridden)
- set -uo pipefail # NOT -e: only an explicit critical finding should block
- # ── Locate the scanner (repo + installed layouts share the hooks/ ↔ skills/ sibling) ─
- SELF_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
- SCANNER=""
- for cand in \
- "$SELF_DIR/../skills/prompt-injection-defense/scripts/scan-hidden-unicode.py" \
- "$HOME/.claude/skills/prompt-injection-defense/scripts/scan-hidden-unicode.py"; do
- [ -f "$cand" ] && { SCANNER="$cand"; break; }
- done
- [ -n "$SCANNER" ] || exit 0 # scanner not installed → don't break commits
- PY=""
- for c in python3 python py; do
- command -v "$c" >/dev/null 2>&1 && "$c" -c "import sys" >/dev/null 2>&1 && { PY="$c"; break; }
- done
- [ -n "$PY" ] || exit 0
- # ── Staged added/modified instruction files ───────────────────────────────────
- INSTR_RE='\.(md|mdc)$|(^|/)(CLAUDE|AGENTS|GEMINI|COPILOT|CURSOR|WARP)\.md$|(^|/)\.(cursorrules|windsurfrules|clinerules)$'
- mapfile -t FILES < <(git diff --cached --name-only --diff-filter=AM 2>/dev/null | grep -iE "$INSTR_RE" || true)
- [ "${#FILES[@]}" -eq 0 ] && exit 0 # no instruction files staged → silent
- # Only scan files that exist in the working tree (staged content on disk).
- EXIST=()
- for f in "${FILES[@]}"; do [ -f "$f" ] && EXIST+=("$f"); done
- [ "${#EXIST[@]}" -eq 0 ] && exit 0
- # ── Scan with --json to read the worst severity ───────────────────────────────
- JSON="$("$PY" "$SCANNER" --json "${EXIST[@]}" 2>/dev/null)"
- RC=$?
- [ "$RC" -eq 0 ] && exit 0 # clean → silent, commit proceeds
- WORST="$(printf '%s' "$JSON" | "$PY" -c 'import sys,json
- try: print(json.load(sys.stdin)["meta"]["worst_severity"])
- except Exception: print("unknown")' 2>/dev/null)"
- # Human-readable finding lines (file:line:col band) for the message.
- DETAIL="$("$PY" "$SCANNER" "${EXIST[@]}" 2>/dev/null | head -20)"
- if [ "$WORST" = "critical" ]; then
- if [ "${PROMPT_INJECTION_ALLOW:-0}" = "1" ]; then
- echo "prompt-injection: CRITICAL hidden-Unicode in staged instruction files —" >&2
- echo " allowed by PROMPT_INJECTION_ALLOW=1. Make sure this is intentional." >&2
- exit 0
- fi
- {
- echo "COMMIT BLOCKED — prompt-injection-defense"
- echo "Critical hidden-Unicode (tag-block ASCII smuggling or bidi override) in staged"
- echo "instruction files. These render as nothing / reorder text — never legitimate here:"
- echo ""
- printf '%s\n' "$DETAIL"
- echo ""
- echo "Fix: python <skills>/prompt-injection-defense/scripts/sanitize-content.py <file> -o <file>"
- echo "Then re-stage and commit. Override (only if intentional, e.g. an attack-demo doc):"
- echo " PROMPT_INJECTION_ALLOW=1 git commit ..."
- } >&2
- exit 1
- fi
- # high / medium → advisory, allow the commit
- {
- echo "prompt-injection ADVISORY: ${WORST}-severity hidden-Unicode in staged instruction files."
- echo "Legitimate in genuinely multilingual text; suspicious otherwise. Commit allowed."
- printf '%s\n' "$DETAIL" | head -8
- } >&2
- exit 0
|