evaluate.sh 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #!/bin/bash
  2. # evaluate.sh - Stop hook: evaluate session for skill-worthy workflows
  3. #
  4. # Suggests skill creation only when a session shows genuine workflow complexity:
  5. # - 8+ mutating tool calls (high threshold, reduces noise)
  6. # - 4+ distinct mutating tool types (diversity = workflow, not repetitive edits)
  7. # - No non-harness skill was loaded (novel work, not following a recipe).
  8. # Harness skills (sync, save, introspect, auto-skill, setperms, tool-discovery)
  9. # are whitelisted — they're meta/bootstrap, not domain-specific, so loading
  10. # them shouldn't disqualify an otherwise novel session.
  11. # - Per-session cooldown file prevents re-fire on resume
  12. #
  13. # Output channels (when a suggestion fires):
  14. # 1. systemMessage JSON on stdout - visible to Claude on next turn
  15. # 2. Appended line to ~/.claude/auto-skill/pending.log - visible to user
  16. # at next /sync (since Claude's systemMessage often dies silently if
  17. # the user's next prompt doesn't invite it to be mentioned).
  18. #
  19. # Toggle: touch ~/.claude/auto-skill.disable (global off)
  20. # touch .claude/auto-skill.disable (project off)
  21. # rm either file to re-enable
  22. #
  23. # CRITICAL: This hook must NEVER fail visibly. All errors suppressed.
  24. {
  25. INPUT=$(cat)
  26. SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null)
  27. [ -z "$SESSION_ID" ] && exit 0
  28. SHORT_ID="${SESSION_ID:0:8}"
  29. TRACK_FILE="/tmp/claude_autoskill_${SHORT_ID}"
  30. # No tracking file = no tool calls recorded
  31. [ -f "$TRACK_FILE" ] || exit 0
  32. # --- Toggle: global or project disable ---
  33. if [ -f "$HOME/.claude/auto-skill.disable" ] || [ -f ".claude/auto-skill.disable" ]; then
  34. rm -f "$TRACK_FILE"
  35. exit 0
  36. fi
  37. # --- Per-session cooldown: only suggest once per session ---
  38. SUGGESTED_FILE="/tmp/claude_autoskill_suggested_${SHORT_ID}"
  39. if [ -f "$SUGGESTED_FILE" ]; then
  40. rm -f "$TRACK_FILE"
  41. exit 0
  42. fi
  43. # --- Classify tools ---
  44. READ_ONLY_LIST=" Read Glob Grep LS NotebookRead TaskList TaskGet TaskCreate TaskUpdate TaskOutput TaskStop "
  45. # Harness skills: loading these should NOT disqualify a session
  46. HARNESS_SKILLS=" sync save introspect auto-skill setperms tool-discovery "
  47. SKILL_LOADED=false
  48. TOTAL=0
  49. WRITES=0
  50. UNIQUE_TYPES=""
  51. while IFS= read -r tool; do
  52. [ -z "$tool" ] && continue
  53. TOTAL=$((TOTAL + 1))
  54. # Handle Skill tool (tagged as "Skill:<name>" by track-tools.sh, or bare
  55. # "Skill" from pre-whitelist versions)
  56. case "$tool" in
  57. Skill:*)
  58. skill_name="${tool#Skill:}"
  59. # Is it a harness skill? If so, ignore entirely.
  60. case "$HARNESS_SKILLS" in
  61. *" ${skill_name} "*) continue ;;
  62. *) SKILL_LOADED=true; continue ;;
  63. esac
  64. ;;
  65. Skill)
  66. # Legacy format (pre-whitelist): conservatively disqualify
  67. SKILL_LOADED=true
  68. continue
  69. ;;
  70. esac
  71. # Check if read-only (space-padded list for exact word match)
  72. case "$READ_ONLY_LIST" in
  73. *" ${tool} "*) continue ;;
  74. esac
  75. WRITES=$((WRITES + 1))
  76. # Track unique mutating tool types
  77. case " $UNIQUE_TYPES " in
  78. *" ${tool} "*) ;; # already seen
  79. *) UNIQUE_TYPES="${UNIQUE_TYPES} ${tool}" ;;
  80. esac
  81. done < "$TRACK_FILE"
  82. # Count unique types (count words in UNIQUE_TYPES)
  83. UNIQUE_COUNT=0
  84. for _ in $UNIQUE_TYPES; do
  85. UNIQUE_COUNT=$((UNIQUE_COUNT + 1))
  86. done
  87. # Build tool summary before cleanup
  88. TOOL_SUMMARY=$(sort "$TRACK_FILE" | uniq -c | sort -rn | head -6 | awk '{printf "%s(%d) ", $2, $1}')
  89. # Clean up tracking file
  90. rm -f "$TRACK_FILE"
  91. # --- Gate 1: Non-harness skill was loaded = following a recipe, not novel ---
  92. [ "$SKILL_LOADED" = true ] && exit 0
  93. # --- Gate 2: Minimum 8 mutating operations ---
  94. [ "$WRITES" -lt 8 ] && exit 0
  95. # --- Gate 3: Minimum 4 distinct mutating tool types ---
  96. [ "$UNIQUE_COUNT" -lt 4 ] && exit 0
  97. # --- All gates passed: suggest ---
  98. # Mark this session as suggested (prevents repeat on resume)
  99. touch "$SUGGESTED_FILE" 2>/dev/null
  100. # Append to persistent log so the human can see suggestions at next /sync.
  101. # systemMessage goes to Claude; this log goes to the user.
  102. # Format: ISO8601 | session_id | cwd | writes | unique | total | summary
  103. LOG_DIR="$HOME/.claude/auto-skill"
  104. LOG_FILE="$LOG_DIR/pending.log"
  105. mkdir -p "$LOG_DIR" 2>/dev/null
  106. TS=$(date -Iseconds 2>/dev/null || date '+%Y-%m-%dT%H:%M:%S%z')
  107. CWD=$(pwd 2>/dev/null || echo "unknown")
  108. CLEAN_SUMMARY=$(printf '%s' "$TOOL_SUMMARY" | tr '|' '/' | tr -s ' ')
  109. printf '%s|%s|%s|%d|%d|%d|%s\n' \
  110. "$TS" "$SHORT_ID" "$CWD" "$WRITES" "$UNIQUE_COUNT" "$TOTAL" "$CLEAN_SUMMARY" \
  111. >> "$LOG_FILE" 2>/dev/null
  112. MSG="Skill-worthy session: ${WRITES} mutating ops across ${UNIQUE_COUNT} tool types (${TOTAL} total): ${TOOL_SUMMARY}- run /auto-skill to capture this workflow."
  113. ESCAPED=$(printf '%s' "$MSG" | sed 's/"/\\"/g' | tr '\n' ' ')
  114. printf '{"systemMessage":"%s"}\n' "$ESCAPED"
  115. } 2>/dev/null
  116. exit 0