run-sdk-tests.ts 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. #!/usr/bin/env node
  2. /**
  3. * Main CLI entry point for SDK-based test execution
  4. *
  5. * Usage:
  6. * npm run eval:sdk
  7. * npm run eval:sdk -- --debug
  8. * npm run eval:sdk -- --no-evaluators
  9. * npm run eval:sdk -- --model=opencode/grok-code-fast
  10. * npm run eval:sdk -- --model=anthropic/claude-3-5-sonnet-20241022
  11. * npm run eval:sdk -- --pattern="developer/*.yaml" --model=openai/gpt-4-turbo
  12. *
  13. * Options:
  14. * --debug Enable debug logging
  15. * --no-evaluators Skip running evaluators (faster)
  16. * --model=PROVIDER/MODEL Override default model (default: opencode/grok-code-fast)
  17. * --pattern=GLOB Run specific test files (default: star-star/star.yaml)
  18. * --timeout=MS Test timeout in milliseconds (default: 60000)
  19. */
  20. import { TestRunner } from './test-runner.js';
  21. import { loadTestCase, loadTestCases } from './test-case-loader.js';
  22. import glob from 'glob';
  23. import { join, dirname } from 'path';
  24. import { fileURLToPath } from 'url';
  25. import type { TestResult } from './test-runner.js';
  26. const __filename = fileURLToPath(import.meta.url);
  27. const __dirname = dirname(__filename);
  28. interface CliArgs {
  29. debug: boolean;
  30. noEvaluators: boolean;
  31. pattern?: string;
  32. timeout?: number;
  33. model?: string;
  34. }
  35. function parseArgs(): CliArgs {
  36. const args = process.argv.slice(2);
  37. return {
  38. debug: args.includes('--debug'),
  39. noEvaluators: args.includes('--no-evaluators'),
  40. pattern: args.find(a => a.startsWith('--pattern='))?.split('=')[1],
  41. timeout: parseInt(args.find(a => a.startsWith('--timeout='))?.split('=')[1] || '60000'),
  42. model: args.find(a => a.startsWith('--model='))?.split('=')[1],
  43. };
  44. }
  45. function printResults(results: TestResult[]): void {
  46. const passed = results.filter(r => r.passed).length;
  47. const failed = results.length - passed;
  48. console.log('\n' + '='.repeat(70));
  49. console.log('TEST RESULTS');
  50. console.log('='.repeat(70));
  51. results.forEach((result, idx) => {
  52. const icon = result.passed ? '✅' : '❌';
  53. console.log(`\n${idx + 1}. ${icon} ${result.testCase.id} - ${result.testCase.name}`);
  54. console.log(` Duration: ${result.duration}ms`);
  55. console.log(` Events: ${result.events.length}`);
  56. console.log(` Approvals: ${result.approvalsGiven}`);
  57. if (result.evaluation) {
  58. console.log(` Violations: ${result.evaluation.totalViolations} (${result.evaluation.violationsBySeverity.error} errors, ${result.evaluation.violationsBySeverity.warning} warnings)`);
  59. }
  60. if (result.errors.length > 0) {
  61. console.log(` Errors:`);
  62. result.errors.forEach(err => console.log(` - ${err}`));
  63. }
  64. });
  65. console.log('\n' + '='.repeat(70));
  66. console.log(`SUMMARY: ${passed}/${results.length} tests passed (${failed} failed)`);
  67. console.log('='.repeat(70) + '\n');
  68. // Print failed tests details
  69. if (failed > 0) {
  70. console.log('\nFailed Tests:');
  71. results.filter(r => !r.passed).forEach(result => {
  72. console.log(`\n ❌ ${result.testCase.id}`);
  73. if (result.errors.length > 0) {
  74. console.log(` Errors: ${result.errors.join(', ')}`);
  75. }
  76. if (result.evaluation && result.evaluation.totalViolations > 0) {
  77. console.log(` Violations: ${result.evaluation.totalViolations}`);
  78. result.evaluation.allViolations.forEach(v => {
  79. console.log(` - [${v.severity}] ${v.type}: ${v.message}`);
  80. });
  81. }
  82. });
  83. console.log();
  84. }
  85. }
  86. async function main() {
  87. const args = parseArgs();
  88. console.log('🚀 OpenCode SDK Test Runner\n');
  89. // Find test files
  90. const testDir = join(__dirname, '../../..', 'agents/openagent/tests');
  91. const pattern = args.pattern || '**/*.yaml';
  92. const testFiles = glob.sync(pattern, { cwd: testDir, absolute: true });
  93. if (testFiles.length === 0) {
  94. console.error(`❌ No test files found matching pattern: ${pattern}`);
  95. process.exit(1);
  96. }
  97. console.log(`Found ${testFiles.length} test file(s):\n`);
  98. testFiles.forEach((f: string, idx: number) => {
  99. const relativePath = f.replace(testDir + '/', '');
  100. console.log(` ${idx + 1}. ${relativePath}`);
  101. });
  102. console.log();
  103. // Load test cases
  104. console.log('Loading test cases...');
  105. const testCases = await loadTestCases(testFiles);
  106. console.log(`✅ Loaded ${testCases.length} test case(s)\n`);
  107. // Create test runner
  108. const runner = new TestRunner({
  109. debug: args.debug,
  110. defaultTimeout: args.timeout,
  111. runEvaluators: !args.noEvaluators,
  112. defaultModel: args.model, // Will use 'opencode/grok-code-fast' if not specified
  113. });
  114. if (args.model) {
  115. console.log(`Using model: ${args.model}`);
  116. } else {
  117. console.log('Using default model: opencode/grok-code-fast (free tier)');
  118. }
  119. console.log();
  120. try {
  121. // Start runner
  122. console.log('Starting test runner...');
  123. await runner.start();
  124. console.log('✅ Test runner started\n');
  125. // Run tests
  126. console.log('Running tests...\n');
  127. const results = await runner.runTests(testCases);
  128. // Stop runner
  129. console.log('\nStopping test runner...');
  130. await runner.stop();
  131. console.log('✅ Test runner stopped\n');
  132. // Print results
  133. printResults(results);
  134. // Exit with appropriate code
  135. const allPassed = results.every(r => r.passed);
  136. process.exit(allPassed ? 0 : 1);
  137. } catch (error) {
  138. console.error('\n❌ Fatal error:', (error as Error).message);
  139. console.error((error as Error).stack);
  140. try {
  141. await runner.stop();
  142. } catch {
  143. // Ignore cleanup errors
  144. }
  145. process.exit(1);
  146. }
  147. }
  148. // Run main
  149. main().catch((error) => {
  150. console.error('Unhandled error:', error);
  151. process.exit(1);
  152. });