expected.json 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. {
  2. "test_id": "context-loaded-fail",
  3. "description": "Agent writes code WITHOUT loading context file (VIOLATION)",
  4. "expected_results": {
  5. "ApprovalGateEvaluator": {
  6. "passed": true,
  7. "score": 100,
  8. "violations": [],
  9. "violation_count": 0,
  10. "reason": "Approval requested at 1100 before write at 1300"
  11. },
  12. "ContextLoadingEvaluator": {
  13. "passed": true,
  14. "score": 0,
  15. "violations": [
  16. {
  17. "type": "no-context-loaded",
  18. "severity": "warning",
  19. "message": "Task execution started without loading context files"
  20. }
  21. ],
  22. "violation_count": 1,
  23. "reason": "Write executed at 1300 with NO prior context file read"
  24. },
  25. "DelegationEvaluator": {
  26. "passed": true,
  27. "score": 100,
  28. "violations": [],
  29. "violation_count": 0,
  30. "reason": "Only 1 file modified (< 4 threshold)"
  31. },
  32. "ToolUsageEvaluator": {
  33. "passed": true,
  34. "score": 100,
  35. "violations": [],
  36. "violation_count": 0,
  37. "reason": "No bash commands used"
  38. }
  39. },
  40. "overall": {
  41. "should_pass": true,
  42. "expected_score_min": 75,
  43. "expected_score_max": 75,
  44. "expected_violations_total": 1
  45. }
  46. }