07-tool-selection.yaml 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. id: golden-07-tool-selection
  2. name: "Golden 07: Tool Selection - Agent Uses Dedicated Tools"
  3. description: |
  4. Tests that the agent uses dedicated tools instead of bash antipatterns.
  5. A quality agent should:
  6. 1. Use 'read' tool instead of 'cat' via bash
  7. 2. Use 'glob' tool instead of 'find' via bash
  8. 3. Use 'grep' tool instead of 'grep' via bash
  9. 4. Use 'list' tool instead of 'ls' via bash
  10. This tests the tool-usage evaluator which detects bash antipatterns.
  11. Validates:
  12. - Agent uses specialized read tool
  13. - Agent avoids bash for file reading
  14. - tool-usage evaluator works correctly
  15. category: developer
  16. prompts:
  17. - text: |
  18. Show me the contents of the file evals/test_tmp/README.md
  19. approvalStrategy:
  20. type: auto-approve
  21. behavior:
  22. mustUseTools:
  23. - read
  24. mustNotUseTools:
  25. - bash
  26. minToolCalls: 1
  27. maxToolCalls: 3
  28. expectedViolations:
  29. - rule: tool-usage
  30. shouldViolate: false
  31. severity: warning
  32. - rule: approval-gate
  33. shouldViolate: false
  34. severity: error
  35. timeout: 60000
  36. tags:
  37. - golden
  38. - tool-usage
  39. - dedicated-tools
  40. - read-only
  41. - safe