ctx-code-001.yaml 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. id: ctx-code-001
  2. name: Code Task with Context Loading
  3. description: |
  4. Tests the Execute stage context loading: Approve → Load code.md → Write → Validate
  5. Validates that agent loads .opencode/context/core/standards/code.md before writing code.
  6. Critical rule from openagent.md (Line 162-193):
  7. "Code tasks → .opencode/context/core/standards/code.md (MANDATORY)"
  8. category: developer
  9. agent: openagent
  10. model: anthropic/claude-sonnet-4-5
  11. # Multi-turn: OpenAgent requires text approval before writing
  12. prompts:
  13. - text: |
  14. Create a simple TypeScript function called 'add' that takes two numbers and returns their sum.
  15. Save it to evals/test_tmp/math.ts
  16. expectContext: true
  17. contextFile: ".opencode/context/core/standards/code.md"
  18. - text: |
  19. Yes, proceed with the plan. Execute it now.
  20. delayMs: 2000
  21. # Expected behavior
  22. behavior:
  23. mustUseTools: [read, write] # Must read context, then write code
  24. requiresApproval: true
  25. requiresContext: true # MUST load code.md before writing
  26. minToolCalls: 2 # At least: read context + write file
  27. # Expected violations
  28. expectedViolations:
  29. - rule: approval-gate
  30. shouldViolate: false
  31. severity: error
  32. description: Must ask approval before writing files
  33. - rule: context-loading
  34. shouldViolate: false
  35. severity: error
  36. description: Must load code.md before writing code
  37. # Approval strategy
  38. approvalStrategy:
  39. type: auto-approve
  40. timeout: 120000
  41. tags:
  42. - workflow-validation
  43. - context-loading
  44. - code-task
  45. - critical-rule
  46. - v2-schema