count-agent-tokens.sh 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #!/usr/bin/env bash
  2. #
  3. # OpenCode Agent Token Counter
  4. # Estimates token count for agent prompts by analyzing all context sources
  5. #
  6. # Usage: ./count-agent-tokens.sh [agent-name] [model-id] [provider-id]
  7. # Example: ./count-agent-tokens.sh build claude-3-5-sonnet-20241022 anthropic
  8. #
  9. set -euo pipefail
  10. # Colors for output
  11. RED='\033[0;31m'
  12. GREEN='\033[0;32m'
  13. YELLOW='\033[1;33m'
  14. BLUE='\033[0;34m'
  15. CYAN='\033[0;36m'
  16. BOLD='\033[1m'
  17. NC='\033[0m' # No Color
  18. # Default values
  19. AGENT="${1:-build}"
  20. MODEL="${2:-claude-3-5-sonnet-20241022}"
  21. PROVIDER="${3:-anthropic}"
  22. WORKSPACE_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
  23. # Token estimation: ~1.3 tokens per word (average for English)
  24. TOKEN_MULTIPLIER=1.3
  25. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  26. echo -e "${BOLD}${CYAN} OpenCode Agent Token Counter${NC}"
  27. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  28. echo -e "${BLUE}Agent:${NC} $AGENT"
  29. echo -e "${BLUE}Model:${NC} $MODEL"
  30. echo -e "${BLUE}Provider:${NC} $PROVIDER"
  31. echo -e "${BLUE}Workspace:${NC} $WORKSPACE_ROOT"
  32. echo ""
  33. TOTAL_TOKENS=0
  34. # Function to count tokens in a file
  35. count_tokens() {
  36. local file="$1"
  37. local label="$2"
  38. if [[ ! -f "$file" ]]; then
  39. return 0
  40. fi
  41. local word_count
  42. word_count=$(wc -w < "$file" 2>/dev/null || echo 0)
  43. local token_estimate
  44. token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  45. echo -e "${GREEN}✓${NC} ${label}"
  46. echo -e " ${YELLOW}→${NC} $file"
  47. echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)"
  48. echo ""
  49. TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate))
  50. }
  51. # Function to count tokens from command output
  52. count_tokens_from_output() {
  53. local output="$1"
  54. local label="$2"
  55. if [[ -z "$output" ]]; then
  56. return 0
  57. fi
  58. local word_count
  59. word_count=$(echo "$output" | wc -w)
  60. local token_estimate
  61. token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  62. echo -e "${GREEN}✓${NC} ${label}"
  63. echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)"
  64. echo ""
  65. TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate))
  66. }
  67. echo -e "${BOLD}${YELLOW}1. System Prompt Header${NC}"
  68. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  69. # Header (only for anthropic)
  70. if [[ "$PROVIDER" == *"anthropic"* ]]; then
  71. HEADER_TOKENS=$(echo "You are Claude Code, Anthropic's official CLI for Claude." | wc -w | xargs -I {} echo "{} * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  72. TOTAL_TOKENS=$((TOTAL_TOKENS + HEADER_TOKENS))
  73. echo -e "${GREEN}✓${NC} Anthropic Header"
  74. echo -e " ${CYAN}~$HEADER_TOKENS tokens${NC}"
  75. echo ""
  76. else
  77. echo -e "${YELLOW}⊘${NC} No header (non-Anthropic provider)"
  78. echo ""
  79. fi
  80. echo -e "${BOLD}${YELLOW}2. Base Model Prompt${NC}"
  81. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  82. PROMPT_DIR="$WORKSPACE_ROOT/packages/opencode/src/session/prompt"
  83. # Determine which base prompt to use
  84. if [[ "$MODEL" == *"gpt-5"* ]]; then
  85. PROMPT_FILE="$PROMPT_DIR/codex.txt"
  86. elif [[ "$MODEL" == *"gpt-"* ]] || [[ "$MODEL" == *"o1"* ]] || [[ "$MODEL" == *"o3"* ]]; then
  87. PROMPT_FILE="$PROMPT_DIR/beast.txt"
  88. elif [[ "$MODEL" == *"gemini-"* ]]; then
  89. PROMPT_FILE="$PROMPT_DIR/gemini.txt"
  90. elif [[ "$MODEL" == *"claude"* ]]; then
  91. PROMPT_FILE="$PROMPT_DIR/anthropic.txt"
  92. else
  93. PROMPT_FILE="$PROMPT_DIR/qwen.txt"
  94. fi
  95. count_tokens "$PROMPT_FILE" "Base Model Prompt"
  96. echo -e "${BOLD}${YELLOW}3. Environment Context${NC}"
  97. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  98. # Environment info (static text)
  99. ENV_TEXT="Here is some useful information about the environment you are running in:
  100. <env>
  101. Working directory: $WORKSPACE_ROOT
  102. Is directory a git repo: yes
  103. Platform: $(uname -s | tr '[:upper:]' '[:lower:]')
  104. Today's date: $(date +"%A %b %d, %Y")
  105. </env>"
  106. count_tokens_from_output "$ENV_TEXT" "Environment Info"
  107. # Project tree (git ls-files limited to 200)
  108. if [[ -d "$WORKSPACE_ROOT/.git" ]]; then
  109. cd "$WORKSPACE_ROOT"
  110. TREE_OUTPUT=$(git ls-files 2>/dev/null | head -n 200 | sed 's/^/ - /' 2>/dev/null || echo " (tree unavailable)")
  111. PROJECT_TREE="<project>
  112. $TREE_OUTPUT
  113. </project>"
  114. count_tokens_from_output "$PROJECT_TREE" "Project Tree (up to 200 files)"
  115. else
  116. echo -e "${YELLOW}⊘${NC} No project tree (not a git repo)"
  117. echo ""
  118. fi
  119. echo -e "${BOLD}${YELLOW}4. Custom Instructions${NC}"
  120. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  121. CUSTOM_FILES_FOUND=0
  122. # Local rule files (search up from current dir)
  123. LOCAL_RULES=("AGENTS.md" "CLAUDE.md" "CONTEXT.md")
  124. for rule in "${LOCAL_RULES[@]}"; do
  125. # Find the file searching upward from workspace root
  126. FOUND_FILE=$(find "$WORKSPACE_ROOT" -maxdepth 3 -name "$rule" 2>/dev/null | head -n 1)
  127. if [[ -n "$FOUND_FILE" ]]; then
  128. count_tokens "$FOUND_FILE" "Local: $rule"
  129. CUSTOM_FILES_FOUND=1
  130. fi
  131. done
  132. # Global rule files
  133. GLOBAL_RULES=(
  134. "$HOME/.config/opencode/AGENTS.md"
  135. "$HOME/.claude/CLAUDE.md"
  136. )
  137. for rule_file in "${GLOBAL_RULES[@]}"; do
  138. if [[ -f "$rule_file" ]]; then
  139. count_tokens "$rule_file" "Global: $(basename "$rule_file")"
  140. CUSTOM_FILES_FOUND=1
  141. fi
  142. done
  143. # Check config.instructions (only loads files explicitly listed)
  144. CONFIG_FILE="$WORKSPACE_ROOT/opencode.json"
  145. if [[ -f "$CONFIG_FILE" ]] && grep -q '"instructions"' "$CONFIG_FILE" 2>/dev/null; then
  146. echo -e "${BLUE}Checking config.instructions...${NC}"
  147. # Extract instructions array (simplified - won't handle complex JSON)
  148. # This is a best-effort check; actual implementation uses proper JSON parsing
  149. echo -e "${YELLOW}Note: Script cannot parse instructions array. Check opencode.json manually.${NC}"
  150. echo ""
  151. fi
  152. if [[ $CUSTOM_FILES_FOUND -eq 0 ]]; then
  153. echo -e "${YELLOW}⊘${NC} No custom instruction files found"
  154. echo ""
  155. fi
  156. echo -e "${BOLD}${YELLOW}5. Tool Definitions${NC}"
  157. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  158. TOOL_DIR="$WORKSPACE_ROOT/packages/opencode/src/tool"
  159. if [[ -d "$TOOL_DIR" ]]; then
  160. echo -e "${BLUE}Counting built-in tool descriptions...${NC}"
  161. TOOL_COUNT=0
  162. TOOL_TOKENS=0
  163. for tool_desc in "$TOOL_DIR"/*.txt; do
  164. if [[ -f "$tool_desc" ]] && [[ ! "$tool_desc" =~ (todoread|todowrite) ]]; then
  165. word_count=$(wc -w < "$tool_desc")
  166. token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1)
  167. TOOL_TOKENS=$((TOOL_TOKENS + token_estimate))
  168. TOOL_COUNT=$((TOOL_COUNT + 1))
  169. fi
  170. done
  171. # Add ~50 tokens per tool for JSON schema overhead
  172. SCHEMA_OVERHEAD=$((TOOL_COUNT * 50))
  173. TOOL_TOKENS=$((TOOL_TOKENS + SCHEMA_OVERHEAD))
  174. echo -e "${GREEN}✓${NC} $TOOL_COUNT tool definitions found"
  175. echo -e " ${CYAN}~$TOOL_TOKENS tokens${NC} (descriptions + schemas)"
  176. echo ""
  177. TOTAL_TOKENS=$((TOTAL_TOKENS + TOOL_TOKENS))
  178. else
  179. echo -e "${RED}✗${NC} Tool directory not found"
  180. echo ""
  181. fi
  182. echo -e "${BOLD}${YELLOW}6. Agent-Specific Configuration${NC}"
  183. echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  184. # Check opencode.json for agent config
  185. CONFIG_FILE="$WORKSPACE_ROOT/opencode.json"
  186. if [[ -f "$CONFIG_FILE" ]]; then
  187. echo -e "${GREEN}✓${NC} Found opencode.json"
  188. # Try to extract agent-specific prompt (this is a simplified check)
  189. if grep -q "\"$AGENT\"" "$CONFIG_FILE" 2>/dev/null; then
  190. echo -e " ${YELLOW}→${NC} Agent '$AGENT' has custom configuration"
  191. echo -e " ${CYAN}Note: Custom prompts counted separately if present${NC}"
  192. else
  193. echo -e " ${YELLOW}→${NC} Using default configuration for '$AGENT' agent"
  194. fi
  195. echo ""
  196. else
  197. echo -e "${YELLOW}⊘${NC} No opencode.json found (using defaults)"
  198. echo ""
  199. fi
  200. # Check for agent-specific markdown files
  201. AGENT_MD_DIRS=(
  202. "$HOME/.config/opencode/agent"
  203. "$WORKSPACE_ROOT/.opencode/agent"
  204. )
  205. for agent_dir in "${AGENT_MD_DIRS[@]}"; do
  206. if [[ -d "$agent_dir" ]]; then
  207. AGENT_FILE="$agent_dir/${AGENT}.md"
  208. if [[ -f "$AGENT_FILE" ]]; then
  209. count_tokens "$AGENT_FILE" "Agent-specific: ${AGENT}.md"
  210. fi
  211. fi
  212. done
  213. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  214. echo -e "${BOLD}${GREEN}TOTAL ESTIMATED TOKENS: ~$TOTAL_TOKENS${NC}"
  215. echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
  216. echo ""
  217. echo -e "${YELLOW}Note:${NC} This is an estimate using word count × 1.3"
  218. echo -e " Actual token count may vary by ±10-20% depending on the tokenizer"
  219. echo -e " This count includes system prompts + context, but not your actual message"
  220. echo ""