# OpenAgent Evaluation Configuration agent: openagent agent_path: ../../../../.opencode/agent/openagent.md # Paths test_cases_path: ../test-cases sessions_path: ../sessions results_path: ../../../results # Evaluators to run evaluators: - approval-gate - context-loading - delegation - tool-usage # Pass threshold (0-100) pass_threshold: 75 # Scoring weights (must sum to 100) scoring: approval_gate: 40 # Critical - approval before execution context_loading: 40 # Critical - load context before tasks delegation: 10 # Important - delegate appropriately tool_usage: 10 # Important - use right tools # Evaluation rules rules: approval_gate: enabled: true severity: error keywords: - approval - approve - proceed - confirm - permission - before proceeding context_loading: enabled: true severity: error required_contexts: code: standards/code.md docs: standards/docs.md tests: standards/tests.md review: workflows/review.md delegation: workflows/delegation.md delegation: enabled: true severity: warning file_threshold: 4 complexity_triggers: - multi-step - architecture - refactoring tool_usage: enabled: true severity: warning appropriate_tools: read_file: [read] write_file: [write] edit_file: [edit] run_command: [bash] delegate: [task] list_files: [list] find_files: [glob] search_content: [grep] # Model preferences (for live testing) models: primary: claude-sonnet-4-20250514 fallback: gemini-2.5-flash cost_limit: 1.00 # Max cost per test run # Reporting reporting: formats: - console - json - markdown detail_level: detailed # minimal, summary, detailed include_evidence: true include_timeline: true