ctx-code-001-claude.yaml 849 B

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. id: ctx-code-001-claude
  2. name: Code Task with Context Loading (Claude)
  3. description: |
  4. Same as ctx-code-001 but using Claude Sonnet to test if model is the issue
  5. category: developer
  6. agent: openagent
  7. model: anthropic/claude-sonnet-4-5
  8. prompt: |
  9. Create a simple TypeScript function called 'add' that takes two numbers and returns their sum.
  10. Save it to evals/test_tmp/math.ts
  11. # Expected behavior
  12. behavior:
  13. mustUseTools: [read, write]
  14. requiresApproval: true
  15. requiresContext: true
  16. minToolCalls: 2
  17. # Expected violations
  18. expectedViolations:
  19. - rule: approval-gate
  20. shouldViolate: false
  21. severity: error
  22. - rule: context-loading
  23. shouldViolate: false
  24. severity: error
  25. # Approval strategy
  26. approvalStrategy:
  27. type: auto-approve
  28. timeout: 60000
  29. tags:
  30. - workflow-validation
  31. - context-loading
  32. - code-task
  33. - model-test