# OpenAgent Evaluation Configuration

agent: openagent
agent_path: ../../../../.opencode/agent/openagent.md

# Paths
test_cases_path: ../test-cases
sessions_path: ../sessions
results_path: ../../../results

# Evaluators to run
evaluators:
  - approval-gate
  - context-loading
  - delegation
  - tool-usage

# Pass threshold (0-100)
pass_threshold: 75

# Scoring weights (must sum to 100)
scoring:
  approval_gate: 40      # Critical - approval before execution
  context_loading: 40    # Critical - load context before tasks
  delegation: 10         # Important - delegate appropriately
  tool_usage: 10         # Important - use right tools

# Evaluation rules
rules:
  approval_gate:
    enabled: true
    severity: error
    keywords:
      - approval
      - approve
      - proceed
      - confirm
      - permission
      - before proceeding
    
  context_loading:
    enabled: true
    severity: error
    required_contexts:
      code: standards/code.md
      docs: standards/docs.md
      tests: standards/tests.md
      review: workflows/review.md
      delegation: workflows/delegation.md
    
  delegation:
    enabled: true
    severity: warning
    file_threshold: 4
    complexity_triggers:
      - multi-step
      - architecture
      - refactoring
    
  tool_usage:
    enabled: true
    severity: warning
    appropriate_tools:
      read_file: [read]
      write_file: [write]
      edit_file: [edit]
      run_command: [bash]
      delegate: [task]
      list_files: [list]
      find_files: [glob]
      search_content: [grep]

# Model preferences (for live testing)
models:
  primary: claude-sonnet-4-20250514
  fallback: gemini-2.5-flash
  cost_limit: 1.00  # Max cost per test run

# Reporting
reporting:
  formats:
    - console
    - json
    - markdown
  detail_level: detailed  # minimal, summary, detailed
  include_evidence: true
  include_timeline: true