logic855
/
OpenAgentsControl
mirror of https://github.com/darrenhinde/OpenAgentsControl.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
							#!/usr/bin/env node

/**
 * Main CLI entry point for SDK-based test execution
 * 
 * Usage:
 *   npm run eval:sdk
 *   npm run eval:sdk -- --debug
 *   npm run eval:sdk -- --no-evaluators
 *   npm run eval:sdk -- --model=opencode/grok-code-fast
 *   npm run eval:sdk -- --model=anthropic/claude-3-5-sonnet-20241022
 *   npm run eval:sdk -- --pattern="developer/*.yaml" --model=openai/gpt-4-turbo
 * 
 * Options:
 *   --debug              Enable debug logging
 *   --no-evaluators      Skip running evaluators (faster)
 *   --model=PROVIDER/MODEL  Override default model (default: opencode/grok-code-fast)
 *   --pattern=GLOB       Run specific test files (default: star-star/star.yaml)
 *   --timeout=MS         Test timeout in milliseconds (default: 60000)
 */

import { TestRunner } from './test-runner.js';
import { loadTestCase, loadTestCases } from './test-case-loader.js';
import glob from 'glob';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import type { TestResult } from './test-runner.js';

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

interface CliArgs {
  debug: boolean;
  noEvaluators: boolean;
  pattern?: string;
  timeout?: number;
  model?: string;
}

function parseArgs(): CliArgs {
  const args = process.argv.slice(2);
  
  return {
    debug: args.includes('--debug'),
    noEvaluators: args.includes('--no-evaluators'),
    pattern: args.find(a => a.startsWith('--pattern='))?.split('=')[1],
    timeout: parseInt(args.find(a => a.startsWith('--timeout='))?.split('=')[1] || '60000'),
    model: args.find(a => a.startsWith('--model='))?.split('=')[1],
  };
}

function printResults(results: TestResult[]): void {
  const passed = results.filter(r => r.passed).length;
  const failed = results.length - passed;
  
  console.log('\n' + '='.repeat(70));
  console.log('TEST RESULTS');
  console.log('='.repeat(70));
  
  results.forEach((result, idx) => {
    const icon = result.passed ? '✅' : '❌';
    console.log(`\n${idx + 1}. ${icon} ${result.testCase.id} - ${result.testCase.name}`);
    console.log(`   Duration: ${result.duration}ms`);
    console.log(`   Events: ${result.events.length}`);
    console.log(`   Approvals: ${result.approvalsGiven}`);
    
    if (result.evaluation) {
      console.log(`   Violations: ${result.evaluation.totalViolations} (${result.evaluation.violationsBySeverity.error} errors, ${result.evaluation.violationsBySeverity.warning} warnings)`);
    }
    
    if (result.errors.length > 0) {
      console.log(`   Errors:`);
      result.errors.forEach(err => console.log(`     - ${err}`));
    }
  });
  
  console.log('\n' + '='.repeat(70));
  console.log(`SUMMARY: ${passed}/${results.length} tests passed (${failed} failed)`);
  console.log('='.repeat(70) + '\n');
  
  // Print failed tests details
  if (failed > 0) {
    console.log('\nFailed Tests:');
    results.filter(r => !r.passed).forEach(result => {
      console.log(`\n  ❌ ${result.testCase.id}`);
      if (result.errors.length > 0) {
        console.log(`     Errors: ${result.errors.join(', ')}`);
      }
      if (result.evaluation && result.evaluation.totalViolations > 0) {
        console.log(`     Violations: ${result.evaluation.totalViolations}`);
        result.evaluation.allViolations.forEach(v => {
          console.log(`       - [${v.severity}] ${v.type}: ${v.message}`);
        });
      }
    });
    console.log();
  }
}

async function main() {
  const args = parseArgs();
  
  console.log('🚀 OpenCode SDK Test Runner\n');
  
  // Find test files
  const testDir = join(__dirname, '../../..', 'agents/openagent/tests');
  const pattern = args.pattern || '**/*.yaml';
  const testFiles = glob.sync(pattern, { cwd: testDir, absolute: true });
  
  if (testFiles.length === 0) {
    console.error(`❌ No test files found matching pattern: ${pattern}`);
    process.exit(1);
  }
  
  console.log(`Found ${testFiles.length} test file(s):\n`);
  testFiles.forEach((f: string, idx: number) => {
    const relativePath = f.replace(testDir + '/', '');
    console.log(`  ${idx + 1}. ${relativePath}`);
  });
  console.log();
  
  // Load test cases
  console.log('Loading test cases...');
  const testCases = await loadTestCases(testFiles);
  console.log(`✅ Loaded ${testCases.length} test case(s)\n`);
  
  // Create test runner
  const runner = new TestRunner({
    debug: args.debug,
    defaultTimeout: args.timeout,
    runEvaluators: !args.noEvaluators,
    defaultModel: args.model, // Will use 'opencode/grok-code-fast' if not specified
  });
  
  if (args.model) {
    console.log(`Using model: ${args.model}`);
  } else {
    console.log('Using default model: opencode/grok-code-fast (free tier)');
  }
  console.log();
  
  try {
    // Start runner
    console.log('Starting test runner...');
    await runner.start();
    console.log('✅ Test runner started\n');
    
    // Run tests
    console.log('Running tests...\n');
    const results = await runner.runTests(testCases);
    
    // Stop runner
    console.log('\nStopping test runner...');
    await runner.stop();
    console.log('✅ Test runner stopped\n');
    
    // Print results
    printResults(results);
    
    // Exit with appropriate code
    const allPassed = results.every(r => r.passed);
    process.exit(allPassed ? 0 : 1);
  } catch (error) {
    console.error('\n❌ Fatal error:', (error as Error).message);
    console.error((error as Error).stack);
    
    try {
      await runner.stop();
    } catch {
      // Ignore cleanup errors
    }
    
    process.exit(1);
  }
}

// Run main
main().catch((error) => {
  console.error('Unhandled error:', error);
  process.exit(1);
});