validate-test-suites.sh 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. #!/bin/bash
  2. # validate-test-suites.sh
  3. # Validates all test suite JSON files against schema and checks paths exist
  4. #
  5. # Usage:
  6. # ./scripts/validation/validate-test-suites.sh [agent]
  7. # ./scripts/validation/validate-test-suites.sh openagent
  8. # ./scripts/validation/validate-test-suites.sh --all
  9. #
  10. # Exit codes:
  11. # 0 - All suites valid
  12. # 1 - Validation errors found
  13. # 2 - Missing dependencies
  14. set -e
  15. # Colors
  16. RED='\033[0;31m'
  17. GREEN='\033[0;32m'
  18. YELLOW='\033[1;33m'
  19. BLUE='\033[0;34m'
  20. NC='\033[0m' # No Color
  21. # Get script directory
  22. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  23. PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
  24. # Check for ajv-cli (JSON schema validator)
  25. # Use npx to run from local node_modules
  26. if ! command -v npx &> /dev/null; then
  27. echo -e "${RED}โŒ Error: npx not found (Node.js required)${NC}"
  28. exit 2
  29. fi
  30. # Check if ajv-cli is installed
  31. if ! (cd "$PROJECT_ROOT/evals/framework" && npx ajv validate -s /dev/null -d /dev/null 2>&1 | grep -q "valid"); then
  32. echo -e "${RED}โŒ Error: ajv-cli not found${NC}"
  33. echo ""
  34. echo "Install with: cd evals/framework && npm install"
  35. echo "Or globally: npm install -g ajv-cli"
  36. exit 2
  37. fi
  38. AJV_CMD="cd $PROJECT_ROOT/evals/framework && npx ajv"
  39. # Parse arguments
  40. AGENT="${1:-openagent}"
  41. VALIDATE_ALL=false
  42. if [[ "$1" == "--all" ]]; then
  43. VALIDATE_ALL=true
  44. fi
  45. # Counters
  46. TOTAL_SUITES=0
  47. VALID_SUITES=0
  48. INVALID_SUITES=0
  49. TOTAL_ERRORS=0
  50. TOTAL_WARNINGS=0
  51. echo -e "${BLUE}๐Ÿ” Validating Test Suites${NC}"
  52. echo ""
  53. # Function to validate a single suite
  54. validate_suite() {
  55. local agent=$1
  56. local suite_file=$2
  57. local suite_name
  58. suite_name=$(basename "$suite_file" .json)
  59. TOTAL_SUITES=$((TOTAL_SUITES + 1))
  60. echo -e "${BLUE}Validating:${NC} $agent/$suite_name"
  61. local schema_file="$PROJECT_ROOT/evals/agents/$agent/config/suite-schema.json"
  62. local tests_dir="$PROJECT_ROOT/evals/agents/$agent/tests"
  63. local suite_valid=true
  64. local suite_errors=0
  65. local suite_warnings=0
  66. # 1. Validate JSON syntax
  67. if ! jq empty "$suite_file" 2>/dev/null; then
  68. echo -e " ${RED}โŒ Invalid JSON syntax${NC}"
  69. suite_valid=false
  70. suite_errors=$((suite_errors + 1))
  71. INVALID_SUITES=$((INVALID_SUITES + 1))
  72. TOTAL_ERRORS=$((TOTAL_ERRORS + 1))
  73. return
  74. fi
  75. # 2. Validate against schema
  76. if [[ -f "$schema_file" ]]; then
  77. # shellcheck disable=SC2294
  78. validation_output=$(eval "$AJV_CMD validate -s \"$schema_file\" -d \"$suite_file\" --strict=false 2>&1")
  79. if ! echo "$validation_output" | grep -q "valid"; then
  80. echo -e " ${RED}โŒ Schema validation failed${NC}"
  81. echo "$validation_output" | grep -v "valid" | sed 's/^/ /'
  82. suite_valid=false
  83. suite_errors=$((suite_errors + 1))
  84. fi
  85. else
  86. echo -e " ${YELLOW}โš ๏ธ Schema not found: $schema_file${NC}"
  87. suite_warnings=$((suite_warnings + 1))
  88. fi
  89. # 3. Validate test paths exist
  90. local missing_tests=()
  91. local test_count=0
  92. while IFS= read -r test_path; do
  93. test_count=$((test_count + 1))
  94. local full_path="$tests_dir/$test_path"
  95. if [[ ! -f "$full_path" ]]; then
  96. missing_tests+=("$test_path")
  97. fi
  98. done < <(jq -r '.tests[].path' "$suite_file")
  99. # 4. Check test count matches
  100. local declared_count
  101. declared_count=$(jq -r '.totalTests' "$suite_file")
  102. if [[ "$test_count" -ne "$declared_count" ]]; then
  103. echo -e " ${YELLOW}โš ๏ธ Test count mismatch: found $test_count, declared $declared_count${NC}"
  104. suite_warnings=$((suite_warnings + 1))
  105. fi
  106. # 5. Report missing tests
  107. if [[ ${#missing_tests[@]} -gt 0 ]]; then
  108. echo -e " ${RED}โŒ Missing test files (${#missing_tests[@]}):${NC}"
  109. for missing in "${missing_tests[@]}"; do
  110. echo -e " - $missing"
  111. # Suggest similar files
  112. local dir
  113. dir=$(dirname "$missing")
  114. local filename
  115. filename=$(basename "$missing")
  116. if [[ -d "$tests_dir/$dir" ]]; then
  117. local similar
  118. # shellcheck disable=SC2001
  119. similar=$(find "$tests_dir/$dir" -name "*.yaml" -type f -exec basename {} \; | grep -i "$(echo "$filename" | cut -d'-' -f1)" | head -3)
  120. if [[ -n "$similar" ]]; then
  121. echo -e " ${YELLOW}Did you mean?${NC}"
  122. echo "$similar" | sed 's/^/ - /'
  123. fi
  124. fi
  125. done
  126. suite_valid=false
  127. suite_errors=$((suite_errors + ${#missing_tests[@]}))
  128. fi
  129. # 6. Summary for this suite
  130. if [[ "$suite_valid" == true ]]; then
  131. echo -e " ${GREEN}โœ… Valid${NC} ($test_count tests)"
  132. VALID_SUITES=$((VALID_SUITES + 1))
  133. else
  134. echo -e " ${RED}โŒ Invalid${NC} ($suite_errors errors, $suite_warnings warnings)"
  135. INVALID_SUITES=$((INVALID_SUITES + 1))
  136. fi
  137. TOTAL_ERRORS=$((TOTAL_ERRORS + suite_errors))
  138. TOTAL_WARNINGS=$((TOTAL_WARNINGS + suite_warnings))
  139. echo ""
  140. }
  141. # Validate suites
  142. if [[ "$VALIDATE_ALL" == true ]]; then
  143. # Validate all agents
  144. for agent_dir in "$PROJECT_ROOT/evals/agents"/*; do
  145. if [[ -d "$agent_dir" ]]; then
  146. agent=$(basename "$agent_dir")
  147. # Check for suites directory
  148. suites_dir="$agent_dir/config/suites"
  149. if [[ -d "$suites_dir" ]]; then
  150. for suite_file in "$suites_dir"/*.json; do
  151. if [[ -f "$suite_file" ]]; then
  152. validate_suite "$agent" "$suite_file"
  153. fi
  154. done
  155. fi
  156. # Check for legacy core-tests.json
  157. legacy_file="$agent_dir/config/core-tests.json"
  158. if [[ -f "$legacy_file" ]]; then
  159. validate_suite "$agent" "$legacy_file"
  160. fi
  161. fi
  162. done
  163. else
  164. # Validate specific agent
  165. agent_dir="$PROJECT_ROOT/evals/agents/$AGENT"
  166. if [[ ! -d "$agent_dir" ]]; then
  167. echo -e "${RED}โŒ Agent not found: $AGENT${NC}"
  168. exit 1
  169. fi
  170. # Check for suites directory
  171. suites_dir="$agent_dir/config/suites"
  172. if [[ -d "$suites_dir" ]]; then
  173. for suite_file in "$suites_dir"/*.json; do
  174. if [[ -f "$suite_file" ]]; then
  175. validate_suite "$AGENT" "$suite_file"
  176. fi
  177. done
  178. fi
  179. # Check for legacy core-tests.json
  180. legacy_file="$agent_dir/config/core-tests.json"
  181. if [[ -f "$legacy_file" ]]; then
  182. validate_suite "$AGENT" "$legacy_file"
  183. fi
  184. if [[ $TOTAL_SUITES -eq 0 ]]; then
  185. echo -e "${YELLOW}โš ๏ธ No test suites found for agent: $AGENT${NC}"
  186. echo ""
  187. echo "Expected locations:"
  188. echo " - $suites_dir/*.json"
  189. echo " - $legacy_file"
  190. exit 1
  191. fi
  192. fi
  193. # Final summary
  194. echo -e "${BLUE}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"
  195. echo -e "${BLUE}Summary${NC}"
  196. echo -e "${BLUE}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"
  197. echo -e "Total suites: $TOTAL_SUITES"
  198. echo -e "${GREEN}Valid suites: $VALID_SUITES${NC}"
  199. if [[ $INVALID_SUITES -gt 0 ]]; then
  200. echo -e "${RED}Invalid suites: $INVALID_SUITES${NC}"
  201. fi
  202. if [[ $TOTAL_ERRORS -gt 0 ]]; then
  203. echo -e "${RED}Total errors: $TOTAL_ERRORS${NC}"
  204. fi
  205. if [[ $TOTAL_WARNINGS -gt 0 ]]; then
  206. echo -e "${YELLOW}Total warnings: $TOTAL_WARNINGS${NC}"
  207. fi
  208. echo ""
  209. # Exit with appropriate code
  210. if [[ $INVALID_SUITES -gt 0 ]] || [[ $TOTAL_ERRORS -gt 0 ]]; then
  211. echo -e "${RED}โŒ Validation failed${NC}"
  212. exit 1
  213. else
  214. echo -e "${GREEN}โœ… All suites valid${NC}"
  215. exit 0
  216. fi