#!/usr/bin/env bash # # OpenCode Agent Token Counter # Estimates token count for agent prompts by analyzing all context sources # # Usage: ./count-agent-tokens.sh [agent-name] [model-id] [provider-id] # Example: ./count-agent-tokens.sh build claude-3-5-sonnet-20241022 anthropic # set -euo pipefail # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' # No Color # Default values AGENT="${1:-build}" MODEL="${2:-claude-3-5-sonnet-20241022}" PROVIDER="${3:-anthropic}" WORKSPACE_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" # Token estimation: ~1.3 tokens per word (average for English) TOKEN_MULTIPLIER=1.3 echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BOLD}${CYAN} OpenCode Agent Token Counter${NC}" echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE}Agent:${NC} $AGENT" echo -e "${BLUE}Model:${NC} $MODEL" echo -e "${BLUE}Provider:${NC} $PROVIDER" echo -e "${BLUE}Workspace:${NC} $WORKSPACE_ROOT" echo "" TOTAL_TOKENS=0 # Function to count tokens in a file count_tokens() { local file="$1" local label="$2" if [[ ! -f "$file" ]]; then return 0 fi local word_count=$(wc -w < "$file" 2>/dev/null || echo 0) local token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1) echo -e "${GREEN}✓${NC} ${label}" echo -e " ${YELLOW}→${NC} $file" echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)" echo "" TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate)) } # Function to count tokens from command output count_tokens_from_output() { local output="$1" local label="$2" if [[ -z "$output" ]]; then return 0 fi local word_count=$(echo "$output" | wc -w) local token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1) echo -e "${GREEN}✓${NC} ${label}" echo -e " ${CYAN}~$token_estimate tokens${NC} ($word_count words)" echo "" TOTAL_TOKENS=$((TOTAL_TOKENS + token_estimate)) } echo -e "${BOLD}${YELLOW}1. System Prompt Header${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Header (only for anthropic) if [[ "$PROVIDER" == *"anthropic"* ]]; then HEADER_TOKENS=$(echo "You are Claude Code, Anthropic's official CLI for Claude." | wc -w | xargs -I {} echo "{} * $TOKEN_MULTIPLIER" | bc | cut -d. -f1) TOTAL_TOKENS=$((TOTAL_TOKENS + HEADER_TOKENS)) echo -e "${GREEN}✓${NC} Anthropic Header" echo -e " ${CYAN}~$HEADER_TOKENS tokens${NC}" echo "" else echo -e "${YELLOW}⊘${NC} No header (non-Anthropic provider)" echo "" fi echo -e "${BOLD}${YELLOW}2. Base Model Prompt${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" PROMPT_DIR="$WORKSPACE_ROOT/packages/opencode/src/session/prompt" # Determine which base prompt to use if [[ "$MODEL" == *"gpt-5"* ]]; then PROMPT_FILE="$PROMPT_DIR/codex.txt" elif [[ "$MODEL" == *"gpt-"* ]] || [[ "$MODEL" == *"o1"* ]] || [[ "$MODEL" == *"o3"* ]]; then PROMPT_FILE="$PROMPT_DIR/beast.txt" elif [[ "$MODEL" == *"gemini-"* ]]; then PROMPT_FILE="$PROMPT_DIR/gemini.txt" elif [[ "$MODEL" == *"claude"* ]]; then PROMPT_FILE="$PROMPT_DIR/anthropic.txt" else PROMPT_FILE="$PROMPT_DIR/qwen.txt" fi count_tokens "$PROMPT_FILE" "Base Model Prompt" echo -e "${BOLD}${YELLOW}3. Environment Context${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Environment info (static text) ENV_TEXT="Here is some useful information about the environment you are running in: Working directory: $WORKSPACE_ROOT Is directory a git repo: yes Platform: $(uname -s | tr '[:upper:]' '[:lower:]') Today's date: $(date +"%A %b %d, %Y") " count_tokens_from_output "$ENV_TEXT" "Environment Info" # Project tree (git ls-files limited to 200) if [[ -d "$WORKSPACE_ROOT/.git" ]]; then cd "$WORKSPACE_ROOT" TREE_OUTPUT=$(git ls-files 2>/dev/null | head -n 200 | sed 's/^/ - /' 2>/dev/null || echo " (tree unavailable)") PROJECT_TREE=" $TREE_OUTPUT " count_tokens_from_output "$PROJECT_TREE" "Project Tree (up to 200 files)" else echo -e "${YELLOW}⊘${NC} No project tree (not a git repo)" echo "" fi echo -e "${BOLD}${YELLOW}4. Custom Instructions${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" CUSTOM_FILES_FOUND=0 # Local rule files (search up from current dir) LOCAL_RULES=("AGENTS.md" "CLAUDE.md" "CONTEXT.md") for rule in "${LOCAL_RULES[@]}"; do # Find the file searching upward from workspace root FOUND_FILE=$(find "$WORKSPACE_ROOT" -maxdepth 3 -name "$rule" 2>/dev/null | head -n 1) if [[ -n "$FOUND_FILE" ]]; then count_tokens "$FOUND_FILE" "Local: $rule" CUSTOM_FILES_FOUND=1 fi done # Global rule files GLOBAL_RULES=( "$HOME/.config/opencode/AGENTS.md" "$HOME/.claude/CLAUDE.md" ) for rule_file in "${GLOBAL_RULES[@]}"; do if [[ -f "$rule_file" ]]; then count_tokens "$rule_file" "Global: $(basename "$rule_file")" CUSTOM_FILES_FOUND=1 fi done # Check config.instructions (only loads files explicitly listed) CONFIG_FILE="$WORKSPACE_ROOT/opencode.json" if [[ -f "$CONFIG_FILE" ]] && grep -q '"instructions"' "$CONFIG_FILE" 2>/dev/null; then echo -e "${BLUE}Checking config.instructions...${NC}" # Extract instructions array (simplified - won't handle complex JSON) # This is a best-effort check; actual implementation uses proper JSON parsing echo -e "${YELLOW}Note: Script cannot parse instructions array. Check opencode.json manually.${NC}" echo "" fi if [[ $CUSTOM_FILES_FOUND -eq 0 ]]; then echo -e "${YELLOW}⊘${NC} No custom instruction files found" echo "" fi echo -e "${BOLD}${YELLOW}5. Tool Definitions${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" TOOL_DIR="$WORKSPACE_ROOT/packages/opencode/src/tool" if [[ -d "$TOOL_DIR" ]]; then echo -e "${BLUE}Counting built-in tool descriptions...${NC}" TOOL_COUNT=0 TOOL_TOKENS=0 for tool_desc in "$TOOL_DIR"/*.txt; do if [[ -f "$tool_desc" ]] && [[ ! "$tool_desc" =~ (todoread|todowrite) ]]; then word_count=$(wc -w < "$tool_desc") token_estimate=$(echo "$word_count * $TOKEN_MULTIPLIER" | bc | cut -d. -f1) TOOL_TOKENS=$((TOOL_TOKENS + token_estimate)) TOOL_COUNT=$((TOOL_COUNT + 1)) fi done # Add ~50 tokens per tool for JSON schema overhead SCHEMA_OVERHEAD=$((TOOL_COUNT * 50)) TOOL_TOKENS=$((TOOL_TOKENS + SCHEMA_OVERHEAD)) echo -e "${GREEN}✓${NC} $TOOL_COUNT tool definitions found" echo -e " ${CYAN}~$TOOL_TOKENS tokens${NC} (descriptions + schemas)" echo "" TOTAL_TOKENS=$((TOTAL_TOKENS + TOOL_TOKENS)) else echo -e "${RED}✗${NC} Tool directory not found" echo "" fi echo -e "${BOLD}${YELLOW}6. Agent-Specific Configuration${NC}" echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Check opencode.json for agent config CONFIG_FILE="$WORKSPACE_ROOT/opencode.json" if [[ -f "$CONFIG_FILE" ]]; then echo -e "${GREEN}✓${NC} Found opencode.json" # Try to extract agent-specific prompt (this is a simplified check) if grep -q "\"$AGENT\"" "$CONFIG_FILE" 2>/dev/null; then echo -e " ${YELLOW}→${NC} Agent '$AGENT' has custom configuration" echo -e " ${CYAN}Note: Custom prompts counted separately if present${NC}" else echo -e " ${YELLOW}→${NC} Using default configuration for '$AGENT' agent" fi echo "" else echo -e "${YELLOW}⊘${NC} No opencode.json found (using defaults)" echo "" fi # Check for agent-specific markdown files AGENT_MD_DIRS=( "$HOME/.config/opencode/agent" "$WORKSPACE_ROOT/.opencode/agent" ) for agent_dir in "${AGENT_MD_DIRS[@]}"; do if [[ -d "$agent_dir" ]]; then AGENT_FILE="$agent_dir/${AGENT}.md" if [[ -f "$AGENT_FILE" ]]; then count_tokens "$AGENT_FILE" "Agent-specific: ${AGENT}.md" fi fi done echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BOLD}${GREEN}TOTAL ESTIMATED TOKENS: ~$TOTAL_TOKENS${NC}" echo -e "${BOLD}${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" echo -e "${YELLOW}Note:${NC} This is an estimate using word count × 1.3" echo -e " Actual token count may vary by ±10-20% depending on the tokenizer" echo -e " This count includes system prompts + context, but not your actual message" echo ""