count-agent-tokens.sh 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #!/usr/bin/env bash
  2. #
  3. # OpenCode Agent Token Counter
  4. # Estimates token count for agent prompts by analyzing all context sources
  5. #
  6. # Usage: ./count-agent-tokens.sh [agent-name] [model-id] [provider-id]
  7. # Example: ./count-agent-tokens.sh build claude-3-5-sonnet-20241022 anthropic
  8. #
  9. set -euo pipefail
  10. # Colors for output
  11. RED='\033[0;31m'
  12. GREEN='\033[0;32m'
  13. YELLOW='\033[1;33m'
  14. BLUE='\033[0;34m'
  15. CYAN='\033[0;36m'
  16. BOLD='\033[1m'
  17. NC='\033[0m' # No Color
  18. # Default values
  19. AGENT="${1:-build}"
  20. MODEL="${2:-claude-3-5-sonnet-20241022}"
  21. PROVIDER="${3:-anthropic}"
  22. WORKSPACE_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
  23. # Token estimation: ~1.3 tokens per word (average for English)
  24. TOKEN_MULTIPLIER=1.3
  25. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  26. echo -e "${BOLD}${CYAN} OpenCode Agent Token Counter${NC}"
  27. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  28. echo -e "${BLUE}Agent:${NC} $AGENT"
  29. echo -e "${BLUE}Model:${NC} $MODEL"
  30. echo -e "${BLUE}Provider:${NC} $PROVIDER"
  31. echo -e "${BLUE}Workspace:${NC} $WORKSPACE_ROOT"
  32. echo ""
  33. TOTAL_TOKENS=0
  34. # Function to count tokens in a file
  35. count_tokens() {
  36. local file="$1"
  37. local label="$2"
  38. if [[ ! -f "$file" ]]; then
  39. return 0
  40. fi
  41. local word_count=$(wc -w < "$file" 2>/dev/null || echo 0)
  42. local token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  43. echo -e "${GREEN}✓${NC} ${label}"
  44. echo -e " ${YELLOW}→${NC} $file"
  45. echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)"
  46. echo ""
  47. TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate))
  48. }
  49. # Function to count tokens from command output
  50. count_tokens_from_output() {
  51. local output="$1"
  52. local label="$2"
  53. if [[ -z "$output" ]]; then
  54. return 0
  55. fi
  56. local word_count=$(echo "$output" | wc -w)
  57. local token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  58. echo -e "${GREEN}✓${NC} ${label}"
  59. echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)"
  60. echo ""
  61. TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate))
  62. }
  63. echo -e "${BOLD}${YELLOW}1. System Prompt Header${NC}"
  64. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  65. # Header (only for anthropic)
  66. if [[ "$PROVIDER" == *"anthropic"* ]]; then
  67. HEADER_TOKENS=$(echo "You are Claude Code, Anthropic's official CLI for Claude." | wc -w | xargs -I {} echo "{} * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  68. TOTAL_TOKENS=$((TOTAL_TOKENS + HEADER_TOKENS))
  69. echo -e "${GREEN}✓${NC} Anthropic Header"
  70. echo -e " ${CYAN}~$HEADER_TOKENS tokens${NC}"
  71. echo ""
  72. else
  73. echo -e "${YELLOW}⊘${NC} No header (non-Anthropic provider)"
  74. echo ""
  75. fi
  76. echo -e "${BOLD}${YELLOW}2. Base Model Prompt${NC}"
  77. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  78. PROMPT_DIR="$WORKSPACE_ROOT/packages/opencode/src/session/prompt"
  79. # Determine which base prompt to use
  80. if [[ "$MODEL" == *"gpt-5"* ]]; then
  81. PROMPT_FILE="$PROMPT_DIR/codex.txt"
  82. elif [[ "$MODEL" == *"gpt-"* ]] || [[ "$MODEL" == *"o1"* ]] || [[ "$MODEL" == *"o3"* ]]; then
  83. PROMPT_FILE="$PROMPT_DIR/beast.txt"
  84. elif [[ "$MODEL" == *"gemini-"* ]]; then
  85. PROMPT_FILE="$PROMPT_DIR/gemini.txt"
  86. elif [[ "$MODEL" == *"claude"* ]]; then
  87. PROMPT_FILE="$PROMPT_DIR/anthropic.txt"
  88. else
  89. PROMPT_FILE="$PROMPT_DIR/qwen.txt"
  90. fi
  91. count_tokens "$PROMPT_FILE" "Base Model Prompt"
  92. echo -e "${BOLD}${YELLOW}3. Environment Context${NC}"
  93. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  94. # Environment info (static text)
  95. ENV_TEXT="Here is some useful information about the environment you are running in:
  96. <env>
  97. Working directory: $WORKSPACE_ROOT
  98. Is directory a git repo: yes
  99. Platform: $(uname -s | tr '[:upper:]' '[:lower:]')
  100. Today's date: $(date +"%A %b %d, %Y")
  101. </env>"
  102. count_tokens_from_output "$ENV_TEXT" "Environment Info"
  103. # Project tree (git ls-files limited to 200)
  104. if [[ -d "$WORKSPACE_ROOT/.git" ]]; then
  105. cd "$WORKSPACE_ROOT"
  106. TREE_OUTPUT=$(git ls-files 2>/dev/null | head -n 200 | sed 's/^/ - /' 2>/dev/null || echo " (tree unavailable)")
  107. PROJECT_TREE="<project>
  108. $TREE_OUTPUT
  109. </project>"
  110. count_tokens_from_output "$PROJECT_TREE" "Project Tree (up to 200 files)"
  111. else
  112. echo -e "${YELLOW}⊘${NC} No project tree (not a git repo)"
  113. echo ""
  114. fi
  115. echo -e "${BOLD}${YELLOW}4. Custom Instructions${NC}"
  116. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  117. CUSTOM_FILES_FOUND=0
  118. # Local rule files (search up from current dir)
  119. LOCAL_RULES=("AGENTS.md" "CLAUDE.md" "CONTEXT.md")
  120. for rule in "${LOCAL_RULES[@]}"; do
  121. # Find the file searching upward from workspace root
  122. FOUND_FILE=$(find "$WORKSPACE_ROOT" -maxdepth 3 -name "$rule" 2>/dev/null | head -n 1)
  123. if [[ -n "$FOUND_FILE" ]]; then
  124. count_tokens "$FOUND_FILE" "Local: $rule"
  125. CUSTOM_FILES_FOUND=1
  126. fi
  127. done
  128. # Global rule files
  129. GLOBAL_RULES=(
  130. "$HOME/.config/opencode/AGENTS.md"
  131. "$HOME/.claude/CLAUDE.md"
  132. )
  133. for rule_file in "${GLOBAL_RULES[@]}"; do
  134. if [[ -f "$rule_file" ]]; then
  135. count_tokens "$rule_file" "Global: $(basename "$rule_file")"
  136. CUSTOM_FILES_FOUND=1
  137. fi
  138. done
  139. # Check config.instructions (only loads files explicitly listed)
  140. CONFIG_FILE="$WORKSPACE_ROOT/opencode.json"
  141. if [[ -f "$CONFIG_FILE" ]] && grep -q '"instructions"' "$CONFIG_FILE" 2>/dev/null; then
  142. echo -e "${BLUE}Checking config.instructions...${NC}"
  143. # Extract instructions array (simplified - won't handle complex JSON)
  144. # This is a best-effort check; actual implementation uses proper JSON parsing
  145. echo -e "${YELLOW}Note: Script cannot parse instructions array. Check opencode.json manually.${NC}"
  146. echo ""
  147. fi
  148. if [[ $CUSTOM_FILES_FOUND -eq 0 ]]; then
  149. echo -e "${YELLOW}⊘${NC} No custom instruction files found"
  150. echo ""
  151. fi
  152. echo -e "${BOLD}${YELLOW}5. Tool Definitions${NC}"
  153. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  154. TOOL_DIR="$WORKSPACE_ROOT/packages/opencode/src/tool"
  155. if [[ -d "$TOOL_DIR" ]]; then
  156. echo -e "${BLUE}Counting built-in tool descriptions...${NC}"
  157. TOOL_COUNT=0
  158. TOOL_TOKENS=0
  159. for tool_desc in "$TOOL_DIR"/*.txt; do
  160. if [[ -f "$tool_desc" ]] && [[ ! "$tool_desc" =~ (todoread|todowrite) ]]; then
  161. word_count=$(wc -w < "$tool_desc")
  162. token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  163. TOOL_TOKENS=$((TOOL_TOKENS + token_estimate))
  164. TOOL_COUNT=$((TOOL_COUNT + 1))
  165. fi
  166. done
  167. # Add ~50 tokens per tool for JSON schema overhead
  168. SCHEMA_OVERHEAD=$((TOOL_COUNT * 50))
  169. TOOL_TOKENS=$((TOOL_TOKENS + SCHEMA_OVERHEAD))
  170. echo -e "${GREEN}✓${NC} $TOOL_COUNT tool definitions found"
  171. echo -e " ${CYAN}~$TOOL_TOKENS tokens${NC} (descriptions + schemas)"
  172. echo ""
  173. TOTAL_TOKENS=$((TOTAL_TOKENS + TOOL_TOKENS))
  174. else
  175. echo -e "${RED}✗${NC} Tool directory not found"
  176. echo ""
  177. fi
  178. echo -e "${BOLD}${YELLOW}6. Agent-Specific Configuration${NC}"
  179. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  180. # Check opencode.json for agent config
  181. CONFIG_FILE="$WORKSPACE_ROOT/opencode.json"
  182. if [[ -f "$CONFIG_FILE" ]]; then
  183. echo -e "${GREEN}✓${NC} Found opencode.json"
  184. # Try to extract agent-specific prompt (this is a simplified check)
  185. if grep -q "\"$AGENT\"" "$CONFIG_FILE" 2>/dev/null; then
  186. echo -e " ${YELLOW}→${NC} Agent '$AGENT' has custom configuration"
  187. echo -e " ${CYAN}Note: Custom prompts counted separately if present${NC}"
  188. else
  189. echo -e " ${YELLOW}→${NC} Using default configuration for '$AGENT' agent"
  190. fi
  191. echo ""
  192. else
  193. echo -e "${YELLOW}⊘${NC} No opencode.json found (using defaults)"
  194. echo ""
  195. fi
  196. # Check for agent-specific markdown files
  197. AGENT_MD_DIRS=(
  198. "$HOME/.config/opencode/agent"
  199. "$WORKSPACE_ROOT/.opencode/agent"
  200. )
  201. for agent_dir in "${AGENT_MD_DIRS[@]}"; do
  202. if [[ -d "$agent_dir" ]]; then
  203. AGENT_FILE="$agent_dir/${AGENT}.md"
  204. if [[ -f "$AGENT_FILE" ]]; then
  205. count_tokens "$AGENT_FILE" "Agent-specific: ${AGENT}.md"
  206. fi
  207. fi
  208. done
  209. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  210. echo -e "${BOLD}${GREEN}TOTAL ESTIMATED TOKENS: ~$TOTAL_TOKENS${NC}"
  211. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  212. echo ""
  213. echo -e "${YELLOW}Note:${NC} This is an estimate using word count × 1.3"
  214. echo -e " Actual token count may vary by ±10-20% depending on the tokenizer"
  215. echo -e " This count includes system prompts + context, but not your actual message"
  216. echo ""