03-read-before-write.yaml 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. id: golden-03-read-before-write
  2. name: "Golden 03: Read Before Write - Inspect Then Modify"
  3. description: |
  4. Tests that the agent reads/inspects before writing.
  5. A quality agent should:
  6. 1. First inspect the target directory/file
  7. 2. Understand what exists
  8. 3. Then propose and execute the write
  9. This tests the execution-balance evaluator which ensures
  10. agents don't blindly write without understanding context.
  11. Validates:
  12. - Agent reads before writing
  13. - execution-balance evaluator works correctly
  14. - Proper approval flow for writes
  15. category: developer
  16. prompts:
  17. - text: |
  18. Look at the evals/test_tmp/ directory, then create a file called evals/test_tmp/golden-test-03.txt with the text "read before write test passed".
  19. - text: |
  20. Yes, proceed with the plan.
  21. delayMs: 2000
  22. approvalStrategy:
  23. type: auto-approve
  24. behavior:
  25. mustUseTools:
  26. - write
  27. mustUseAnyOf:
  28. - [list]
  29. - [read]
  30. - [glob]
  31. minToolCalls: 2
  32. requiresApproval: true
  33. expectedViolations:
  34. - rule: execution-balance
  35. shouldViolate: false
  36. severity: warning
  37. - rule: approval-gate
  38. shouldViolate: false
  39. severity: error
  40. timeout: 90000
  41. tags:
  42. - golden
  43. - execution-balance
  44. - read-before-write
  45. - safe