logic855
/
OpenAgentsControl
mirror of https://github.com/darrenhinde/OpenAgentsControl.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
							{
  "name": "OpenAgent Core Test Suite",
  "description": "Minimal set of tests providing maximum coverage of critical OpenAgent functionality",
  "version": "1.0.0",
  "totalTests": 7,
  "estimatedRuntime": "5-8 minutes",
  "coverage": {
    "approvalGate": true,
    "contextLoading": true,
    "stopOnFailure": true,
    "delegation": true,
    "toolUsage": true,
    "multiTurn": true,
    "subagents": true
  },
  "tests": [
    {
      "id": 1,
      "name": "Approval Gate",
      "path": "01-critical-rules/approval-gate/05-approval-before-execution-positive.yaml",
      "category": "critical-rules",
      "priority": "critical",
      "estimatedTime": "30-60s",
      "description": "Validates approval before execution workflow - the most critical safety rule"
    },
    {
      "id": 2,
      "name": "Context Loading (Simple)",
      "path": "01-critical-rules/context-loading/01-code-task.yaml",
      "category": "critical-rules",
      "priority": "critical",
      "estimatedTime": "60-90s",
      "description": "Validates context loading for code tasks - most common use case"
    },
    {
      "id": 3,
      "name": "Context Loading (Multi-Turn)",
      "path": "01-critical-rules/context-loading/09-multi-standards-to-docs.yaml",
      "category": "critical-rules",
      "priority": "high",
      "estimatedTime": "120-180s",
      "description": "Validates multi-turn context loading with multiple context files"
    },
    {
      "id": 4,
      "name": "Stop on Failure",
      "path": "01-critical-rules/stop-on-failure/02-stop-and-report-positive.yaml",
      "category": "critical-rules",
      "priority": "critical",
      "estimatedTime": "60-90s",
      "description": "Validates agent stops and reports errors instead of auto-fixing"
    },
    {
      "id": 5,
      "name": "Simple Task (No Delegation)",
      "path": "08-delegation/simple-task-direct.yaml",
      "category": "delegation",
      "priority": "high",
      "estimatedTime": "30-60s",
      "description": "Validates agent handles simple tasks directly without unnecessary delegation"
    },
    {
      "id": 6,
      "name": "Subagent Delegation",
      "path": "06-integration/medium/04-subagent-verification.yaml",
      "category": "integration",
      "priority": "high",
      "estimatedTime": "90-120s",
      "description": "Validates subagent delegation and execution for appropriate tasks"
    },
    {
      "id": 7,
      "name": "Tool Usage",
      "path": "09-tool-usage/dedicated-tools-usage.yaml",
      "category": "tool-usage",
      "priority": "medium",
      "estimatedTime": "30-60s",
      "description": "Validates agent uses proper tools (read/grep) instead of bash antipatterns"
    }
  ],
  "rationale": {
    "why7Tests": "These 7 tests provide ~85% coverage of critical functionality with 90% fewer tests than the full suite",
    "coverageBreakdown": {
      "criticalSafetyRules": "4/4 rules covered (approval, context, stop-on-failure, report-first)",
      "delegationLogic": "2 tests cover both simple (no delegation) and complex (delegation) scenarios",
      "toolUsage": "1 test ensures proper tool usage patterns",
      "multiTurn": "1 test validates complex multi-turn conversations with context"
    },
    "useCases": [
      "Quick validation when updating OpenAgent prompt",
      "Pre-commit hooks for fast feedback",
      "CI/CD pull request validation",
      "Development iteration cycles"
    ]
  },
  "usage": {
    "npm": {
      "root": "npm run test:core",
      "openagent": "npm run test:openagent:core",
      "withModel": "npm run test:openagent:core -- --model=anthropic/claude-sonnet-4-5"
    },
    "script": {
      "basic": "./scripts/test.sh openagent --core",
      "withModel": "./scripts/test.sh openagent opencode/grok-code-fast --core"
    },
    "direct": {
      "basic": "cd evals/framework && npm run eval:sdk:core",
      "withAgent": "cd evals/framework && npm run eval:sdk:core -- --agent=openagent"
    }
  },
  "comparison": {
    "fullSuite": {
      "tests": 71,
      "runtime": "40-80 minutes",
      "coverage": "100%"
    },
    "coreSuite": {
      "tests": 7,
      "runtime": "5-8 minutes",
      "coverage": "~85%"
    },
    "savings": {
      "tests": "90% fewer tests",
      "time": "85-90% faster",
      "tokens": "~90% reduction"
    }
  }
}