| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073 |
- <!DOCTYPE html>
- <html lang="en">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>OpenCode Agent Test Dashboard</title>
- <style>
- /* CSS Variables for theming */
- :root {
- --bg-primary: #ffffff;
- --bg-secondary: #f8f9fa;
- --bg-card: #ffffff;
- --text-primary: #212529;
- --text-secondary: #6c757d;
- --border-color: #dee2e6;
- --success: #28a745;
- --danger: #dc3545;
- --warning: #ffc107;
- --info: #17a2b8;
- --primary: #007bff;
- --shadow: rgba(0, 0, 0, 0.1);
- }
- [data-theme="dark"] {
- --bg-primary: #1a1a1a;
- --bg-secondary: #2d2d2d;
- --bg-card: #242424;
- --text-primary: #e9ecef;
- --text-secondary: #adb5bd;
- --border-color: #495057;
- --shadow: rgba(0, 0, 0, 0.3);
- }
- * {
- margin: 0;
- padding: 0;
- box-sizing: border-box;
- }
- body {
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
- background: var(--bg-primary);
- color: var(--text-primary);
- line-height: 1.6;
- transition: background-color 0.3s, color 0.3s;
- }
- .container {
- max-width: 1400px;
- margin: 0 auto;
- padding: 20px;
- }
- /* Header */
- header {
- background: var(--bg-card);
- border-bottom: 2px solid var(--border-color);
- padding: 20px 0;
- margin-bottom: 30px;
- box-shadow: 0 2px 4px var(--shadow);
- }
- .header-content {
- display: flex;
- justify-content: space-between;
- align-items: center;
- flex-wrap: wrap;
- gap: 20px;
- }
- h1 {
- font-size: 28px;
- font-weight: 600;
- color: var(--text-primary);
- }
- .header-actions {
- display: flex;
- gap: 10px;
- align-items: center;
- }
- /* Buttons */
- button {
- padding: 8px 16px;
- border: 1px solid var(--border-color);
- background: var(--bg-card);
- color: var(--text-primary);
- border-radius: 6px;
- cursor: pointer;
- font-size: 14px;
- transition: all 0.2s;
- }
- button:hover {
- background: var(--bg-secondary);
- transform: translateY(-1px);
- }
- button.primary {
- background: var(--primary);
- color: white;
- border-color: var(--primary);
- }
- button.primary:hover {
- background: #0056b3;
- }
- /* Filters */
- .filters {
- background: var(--bg-card);
- padding: 20px;
- border-radius: 8px;
- margin-bottom: 30px;
- box-shadow: 0 2px 4px var(--shadow);
- }
- .filter-row {
- display: flex;
- gap: 15px;
- flex-wrap: wrap;
- align-items: center;
- }
- .filter-group {
- display: flex;
- flex-direction: column;
- gap: 5px;
- }
- .filter-group label {
- font-size: 12px;
- font-weight: 600;
- color: var(--text-secondary);
- text-transform: uppercase;
- }
- select, input {
- padding: 8px 12px;
- border: 1px solid var(--border-color);
- background: var(--bg-primary);
- color: var(--text-primary);
- border-radius: 6px;
- font-size: 14px;
- }
- /* Stats Cards */
- .stats-grid {
- display: grid;
- grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
- gap: 20px;
- margin-bottom: 30px;
- }
- .stat-card {
- background: var(--bg-card);
- padding: 20px;
- border-radius: 8px;
- box-shadow: 0 2px 4px var(--shadow);
- border-left: 4px solid var(--primary);
- }
- .stat-card.success {
- border-left-color: var(--success);
- }
- .stat-card.danger {
- border-left-color: var(--danger);
- }
- .stat-card.warning {
- border-left-color: var(--warning);
- }
- .stat-label {
- font-size: 12px;
- font-weight: 600;
- color: var(--text-secondary);
- text-transform: uppercase;
- margin-bottom: 8px;
- }
- .stat-value {
- font-size: 32px;
- font-weight: 700;
- color: var(--text-primary);
- }
- .stat-subtitle {
- font-size: 14px;
- color: var(--text-secondary);
- margin-top: 5px;
- }
- /* Chart Container */
- .chart-container {
- background: var(--bg-card);
- padding: 20px;
- border-radius: 8px;
- margin-bottom: 30px;
- box-shadow: 0 2px 4px var(--shadow);
- }
- .chart-container h2 {
- font-size: 18px;
- margin-bottom: 15px;
- color: var(--text-primary);
- }
- #trendChart {
- max-height: 300px;
- }
- /* Test Results Table */
- .results-container {
- background: var(--bg-card);
- border-radius: 8px;
- box-shadow: 0 2px 4px var(--shadow);
- overflow: hidden;
- }
- .results-header {
- padding: 20px;
- border-bottom: 1px solid var(--border-color);
- display: flex;
- justify-content: space-between;
- align-items: center;
- }
- .results-header h2 {
- font-size: 18px;
- color: var(--text-primary);
- }
- .search-box {
- position: relative;
- }
- .search-box input {
- padding-left: 35px;
- width: 300px;
- }
- .search-box::before {
- content: "🔍";
- position: absolute;
- left: 12px;
- top: 50%;
- transform: translateY(-50%);
- }
- table {
- width: 100%;
- border-collapse: collapse;
- }
- thead {
- background: var(--bg-secondary);
- }
- th {
- padding: 12px 16px;
- text-align: left;
- font-size: 12px;
- font-weight: 600;
- color: var(--text-secondary);
- text-transform: uppercase;
- cursor: pointer;
- user-select: none;
- }
- th:hover {
- background: var(--border-color);
- }
- th.sortable::after {
- content: " ↕";
- opacity: 0.3;
- }
- th.sort-asc::after {
- content: " ↑";
- opacity: 1;
- }
- th.sort-desc::after {
- content: " ↓";
- opacity: 1;
- }
- td {
- padding: 12px 16px;
- border-bottom: 1px solid var(--border-color);
- }
- tr:hover {
- background: var(--bg-secondary);
- }
- .status-badge {
- display: inline-block;
- padding: 4px 8px;
- border-radius: 4px;
- font-size: 12px;
- font-weight: 600;
- }
- .status-badge.passed {
- background: #d4edda;
- color: #155724;
- }
- .status-badge.failed {
- background: #f8d7da;
- color: #721c24;
- }
- [data-theme="dark"] .status-badge.passed {
- background: #1e4620;
- color: #7dce82;
- }
- [data-theme="dark"] .status-badge.failed {
- background: #5a1f1f;
- color: #f5a3a3;
- }
- .category-badge {
- display: inline-block;
- padding: 4px 8px;
- border-radius: 4px;
- font-size: 11px;
- font-weight: 600;
- background: var(--bg-secondary);
- color: var(--text-secondary);
- }
- .variant-badge {
- display: inline-block;
- padding: 4px 8px;
- border-radius: 4px;
- font-size: 11px;
- font-weight: 600;
- background: #e3f2fd;
- color: #1565c0;
- }
- .variant-badge.default {
- background: var(--bg-secondary);
- color: var(--text-secondary);
- }
- [data-theme="dark"] .variant-badge {
- background: #1a237e;
- color: #90caf9;
- }
- [data-theme="dark"] .variant-badge.default {
- background: var(--bg-secondary);
- color: var(--text-secondary);
- }
- .expandable-row {
- cursor: pointer;
- }
- .details-row {
- display: none;
- background: var(--bg-secondary);
- }
- .details-row.show {
- display: table-row;
- }
- .details-content {
- padding: 20px;
- }
- .violation-item {
- padding: 10px;
- margin: 5px 0;
- border-left: 3px solid var(--danger);
- background: var(--bg-card);
- border-radius: 4px;
- }
- .violation-item.warning {
- border-left-color: var(--warning);
- }
- /* Loading State */
- .loading {
- text-align: center;
- padding: 40px;
- color: var(--text-secondary);
- }
- .spinner {
- border: 3px solid var(--border-color);
- border-top: 3px solid var(--primary);
- border-radius: 50%;
- width: 40px;
- height: 40px;
- animation: spin 1s linear infinite;
- margin: 0 auto 20px;
- }
- @keyframes spin {
- 0% { transform: rotate(0deg); }
- 100% { transform: rotate(360deg); }
- }
- /* Empty State */
- .empty-state {
- text-align: center;
- padding: 60px 20px;
- color: var(--text-secondary);
- }
- .empty-state-icon {
- font-size: 64px;
- margin-bottom: 20px;
- opacity: 0.3;
- }
- /* Responsive */
- @media (max-width: 768px) {
- .header-content {
- flex-direction: column;
- align-items: flex-start;
- }
- .filter-row {
- flex-direction: column;
- align-items: stretch;
- }
- .search-box input {
- width: 100%;
- }
- table {
- font-size: 14px;
- }
- th, td {
- padding: 8px;
- }
- }
- /* Theme Toggle */
- .theme-toggle {
- background: none;
- border: none;
- font-size: 24px;
- cursor: pointer;
- padding: 8px;
- }
- </style>
- </head>
- <body>
- <header>
- <div class="container">
- <div class="header-content">
- <h1>🎯 OpenCode Agent Test Dashboard</h1>
- <div class="header-actions">
- <button id="refreshBtn" class="primary">🔄 Refresh</button>
- <button id="exportBtn">📊 Export CSV</button>
- <button class="theme-toggle" id="themeToggle" title="Toggle dark mode">🌙</button>
- </div>
- </div>
- </div>
- </header>
- <div class="container">
- <!-- Filters -->
- <div class="filters">
- <div class="filter-row">
- <div class="filter-group">
- <label>Agent</label>
- <select id="agentFilter">
- <option value="all">All Agents</option>
- </select>
- </div>
- <div class="filter-group">
- <label>Category</label>
- <select id="categoryFilter">
- <option value="all">All Categories</option>
- <option value="developer">Developer</option>
- <option value="business">Business</option>
- <option value="creative">Creative</option>
- <option value="edge-case">Edge Case</option>
- </select>
- </div>
- <div class="filter-group">
- <label>Status</label>
- <select id="statusFilter">
- <option value="all">All Tests</option>
- <option value="passed">Passed Only</option>
- <option value="failed">Failed Only</option>
- </select>
- </div>
- <div class="filter-group">
- <label>Prompt Variant</label>
- <select id="variantFilter">
- <option value="all">All Variants</option>
- </select>
- </div>
- <div class="filter-group">
- <label>Time Range</label>
- <select id="timeFilter">
- <option value="latest">Latest Run</option>
- <option value="today">Today</option>
- <option value="week">Last 7 Days</option>
- <option value="month">Last 30 Days</option>
- </select>
- </div>
- </div>
- </div>
- <!-- Stats Cards -->
- <div class="stats-grid" id="statsGrid">
- <div class="stat-card">
- <div class="stat-label">Total Tests</div>
- <div class="stat-value" id="totalTests">-</div>
- <div class="stat-subtitle">Across all agents</div>
- </div>
- <div class="stat-card success">
- <div class="stat-label">Pass Rate</div>
- <div class="stat-value" id="passRate">-</div>
- <div class="stat-subtitle" id="passedCount">- passed</div>
- </div>
- <div class="stat-card danger">
- <div class="stat-label">Failed Tests</div>
- <div class="stat-value" id="failedTests">-</div>
- <div class="stat-subtitle" id="failedSubtitle">-</div>
- </div>
- <div class="stat-card warning">
- <div class="stat-label">Avg Duration</div>
- <div class="stat-value" id="avgDuration">-</div>
- <div class="stat-subtitle">Per test</div>
- </div>
- </div>
- <!-- Trend Chart -->
- <div class="chart-container">
- <h2>📈 Pass Rate Trend (Last 30 Days)</h2>
- <canvas id="trendChart"></canvas>
- </div>
- <!-- Test Results Table -->
- <div class="results-container">
- <div class="results-header">
- <h2>Test Results</h2>
- <div class="search-box">
- <input type="text" id="searchInput" placeholder="Search tests...">
- </div>
- </div>
- <div id="tableContainer">
- <div class="loading">
- <div class="spinner"></div>
- <p>Loading test results...</p>
- </div>
- </div>
- </div>
- </div>
- <!-- Chart.js from CDN -->
- <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
-
- <script>
- // Dashboard State
- let allResults = [];
- let filteredResults = [];
- let currentSort = { column: null, direction: 'asc' };
- let trendChart = null;
- // Initialize Dashboard
- document.addEventListener('DOMContentLoaded', () => {
- initializeTheme();
- setupEventListeners();
- loadResults();
- });
- // Theme Management
- function initializeTheme() {
- const savedTheme = localStorage.getItem('theme') || 'light';
- document.documentElement.setAttribute('data-theme', savedTheme);
- updateThemeIcon(savedTheme);
- }
- function toggleTheme() {
- const current = document.documentElement.getAttribute('data-theme');
- const newTheme = current === 'dark' ? 'light' : 'dark';
- document.documentElement.setAttribute('data-theme', newTheme);
- localStorage.setItem('theme', newTheme);
- updateThemeIcon(newTheme);
- }
- function updateThemeIcon(theme) {
- document.getElementById('themeToggle').textContent = theme === 'dark' ? '☀️' : '🌙';
- }
- // Event Listeners
- function setupEventListeners() {
- document.getElementById('themeToggle').addEventListener('click', toggleTheme);
- document.getElementById('refreshBtn').addEventListener('click', loadResults);
- document.getElementById('exportBtn').addEventListener('click', exportToCSV);
- document.getElementById('searchInput').addEventListener('input', applyFilters);
- document.getElementById('agentFilter').addEventListener('change', applyFilters);
- document.getElementById('categoryFilter').addEventListener('change', applyFilters);
- document.getElementById('statusFilter').addEventListener('change', applyFilters);
- document.getElementById('variantFilter').addEventListener('change', applyFilters);
- document.getElementById('timeFilter').addEventListener('change', loadResults);
- }
- // Load Results
- async function loadResults() {
- showLoading();
-
- try {
- const timeFilter = document.getElementById('timeFilter').value;
- const results = await fetchResults(timeFilter);
-
- allResults = results;
- populateAgentFilter(results);
- populateVariantFilter(results);
- applyFilters();
- updateStats(results);
- updateTrendChart(results);
- } catch (error) {
- showError('Failed to load results: ' + error.message);
- }
- }
- // Fetch Results
- async function fetchResults(timeFilter) {
- try {
- if (timeFilter === 'latest') {
- // Load latest.json
- const response = await fetch('latest.json');
- if (!response.ok) {
- throw new Error('Cannot load latest.json. See instructions below.');
- }
- const data = await response.json();
- return [data];
- } else {
- // Load from history
- const files = await fetchHistoryFiles(timeFilter);
- const results = await Promise.all(
- files.map(file => fetch(file).then(r => r.json()))
- );
- return results;
- }
- } catch (error) {
- // If fetch fails (CORS/local file), show helpful message
- throw new Error('Cannot load results from local file. Please use one of these methods:\n\n' +
- '1. Serve via HTTP:\n' +
- ' cd evals/results && python3 -m http.server 8000\n' +
- ' Then open: http://localhost:8000\n\n' +
- '2. Use browser flag:\n' +
- ' Chrome: --allow-file-access-from-files\n\n' +
- 'Original error: ' + error.message);
- }
- }
- // Fetch History Files
- async function fetchHistoryFiles(timeFilter) {
- // For now, we'll just load latest.json
- // In a real implementation, you'd need a file listing endpoint
- // or generate an index.json with all available files
- return ['latest.json'];
- }
- // Populate Agent Filter
- function populateAgentFilter(results) {
- const agents = [...new Set(results.map(r => r.meta.agent))];
- const select = document.getElementById('agentFilter');
-
- // Keep "All Agents" option
- select.innerHTML = '<option value="all">All Agents</option>';
-
- agents.forEach(agent => {
- const option = document.createElement('option');
- option.value = agent;
- option.textContent = agent.charAt(0).toUpperCase() + agent.slice(1);
- select.appendChild(option);
- });
- }
- // Populate Variant Filter
- function populateVariantFilter(results) {
- const variants = [...new Set(results.map(r => r.meta.prompt_variant).filter(Boolean))];
- const select = document.getElementById('variantFilter');
-
- // Keep "All Variants" option
- select.innerHTML = '<option value="all">All Variants</option>';
-
- // Add "No Variant" option if there are results without variant
- const hasNoVariant = results.some(r => !r.meta.prompt_variant);
- if (hasNoVariant) {
- const option = document.createElement('option');
- option.value = 'none';
- option.textContent = 'Default (no variant)';
- select.appendChild(option);
- }
-
- variants.forEach(variant => {
- const option = document.createElement('option');
- option.value = variant;
- option.textContent = variant.charAt(0).toUpperCase() + variant.slice(1);
- select.appendChild(option);
- });
- }
- // Apply Filters
- function applyFilters() {
- const searchTerm = document.getElementById('searchInput').value.toLowerCase();
- const agentFilter = document.getElementById('agentFilter').value;
- const categoryFilter = document.getElementById('categoryFilter').value;
- const statusFilter = document.getElementById('statusFilter').value;
- const variantFilter = document.getElementById('variantFilter').value;
- // Flatten all tests from all results
- const allTests = allResults.flatMap(result =>
- result.tests.map(test => ({
- ...test,
- agent: result.meta.agent,
- timestamp: result.meta.timestamp,
- model: result.meta.model,
- prompt_variant: result.meta.prompt_variant,
- model_family: result.meta.model_family
- }))
- );
- filteredResults = allTests.filter(test => {
- // Search filter
- if (searchTerm && !test.id.toLowerCase().includes(searchTerm)) {
- return false;
- }
- // Agent filter
- if (agentFilter !== 'all' && test.agent !== agentFilter) {
- return false;
- }
- // Category filter
- if (categoryFilter !== 'all' && test.category !== categoryFilter) {
- return false;
- }
- // Status filter
- if (statusFilter === 'passed' && !test.passed) {
- return false;
- }
- if (statusFilter === 'failed' && test.passed) {
- return false;
- }
- // Variant filter
- if (variantFilter !== 'all') {
- if (variantFilter === 'none' && test.prompt_variant) {
- return false;
- }
- if (variantFilter !== 'none' && test.prompt_variant !== variantFilter) {
- return false;
- }
- }
- return true;
- });
- renderTable(filteredResults);
- }
- // Render Table
- function renderTable(tests) {
- const container = document.getElementById('tableContainer');
- if (tests.length === 0) {
- container.innerHTML = `
- <div class="empty-state">
- <div class="empty-state-icon">📭</div>
- <h3>No results found</h3>
- <p>Try adjusting your filters or run some tests</p>
- </div>
- `;
- return;
- }
- const html = `
- <table>
- <thead>
- <tr>
- <th class="sortable" data-column="id">Test ID</th>
- <th class="sortable" data-column="agent">Agent</th>
- <th class="sortable" data-column="prompt_variant">Variant</th>
- <th class="sortable" data-column="category">Category</th>
- <th class="sortable" data-column="passed">Status</th>
- <th class="sortable" data-column="duration_ms">Duration</th>
- <th class="sortable" data-column="violations.total">Violations</th>
- </tr>
- </thead>
- <tbody>
- ${tests.map((test, idx) => renderTestRow(test, idx)).join('')}
- </tbody>
- </table>
- `;
- container.innerHTML = html;
- // Add sort listeners
- container.querySelectorAll('th.sortable').forEach(th => {
- th.addEventListener('click', () => sortTable(th.dataset.column));
- });
- // Add expand listeners
- container.querySelectorAll('.expandable-row').forEach(row => {
- row.addEventListener('click', () => toggleDetails(row.dataset.index));
- });
- }
- // Render Test Row
- function renderTestRow(test, index) {
- const statusClass = test.passed ? 'passed' : 'failed';
- const statusText = test.passed ? '✅ Passed' : '❌ Failed';
- const duration = (test.duration_ms / 1000).toFixed(2) + 's';
- const variant = test.prompt_variant || 'default';
- const variantClass = test.prompt_variant ? 'variant-badge' : 'variant-badge default';
-
- return `
- <tr class="expandable-row" data-index="${index}">
- <td><strong>${test.id}</strong></td>
- <td>${test.agent}</td>
- <td><span class="category-badge ${variantClass}">${variant}</span></td>
- <td><span class="category-badge">${test.category}</span></td>
- <td><span class="status-badge ${statusClass}">${statusText}</span></td>
- <td>${duration}</td>
- <td>${test.violations.total} ${test.violations.errors > 0 ? '⚠️' : ''}</td>
- </tr>
- <tr class="details-row" id="details-${index}">
- <td colspan="7">
- ${renderTestDetails(test)}
- </td>
- </tr>
- `;
- }
- // Render Test Details
- function renderTestDetails(test) {
- let html = '<div class="details-content">';
-
- html += `<p><strong>Model:</strong> ${test.model || 'unknown'}</p>`;
- if (test.prompt_variant) {
- html += `<p><strong>Prompt Variant:</strong> ${test.prompt_variant}</p>`;
- }
- if (test.model_family) {
- html += `<p><strong>Model Family:</strong> ${test.model_family}</p>`;
- }
- html += `<p><strong>Approvals:</strong> ${test.approvals}</p>`;
- html += `<p><strong>Events:</strong> ${test.events}</p>`;
-
- if (test.violations.total > 0) {
- html += '<h4>Violations:</h4>';
- test.violations.details?.forEach(v => {
- html += `
- <div class="violation-item ${v.severity}">
- <strong>[${v.severity.toUpperCase()}] ${v.type}</strong><br>
- ${v.message}
- </div>
- `;
- });
- } else {
- html += '<p>✅ No violations</p>';
- }
-
- html += '</div>';
- return html;
- }
- // Toggle Details
- function toggleDetails(index) {
- const detailsRow = document.getElementById(`details-${index}`);
- detailsRow.classList.toggle('show');
- }
- // Sort Table
- function sortTable(column) {
- if (currentSort.column === column) {
- currentSort.direction = currentSort.direction === 'asc' ? 'desc' : 'asc';
- } else {
- currentSort.column = column;
- currentSort.direction = 'asc';
- }
- filteredResults.sort((a, b) => {
- let aVal = getNestedValue(a, column);
- let bVal = getNestedValue(b, column);
- if (typeof aVal === 'string') {
- aVal = aVal.toLowerCase();
- bVal = bVal.toLowerCase();
- }
- if (aVal < bVal) return currentSort.direction === 'asc' ? -1 : 1;
- if (aVal > bVal) return currentSort.direction === 'asc' ? 1 : -1;
- return 0;
- });
- renderTable(filteredResults);
- updateSortIndicators();
- }
- // Get Nested Value
- function getNestedValue(obj, path) {
- return path.split('.').reduce((curr, prop) => curr?.[prop], obj);
- }
- // Update Sort Indicators
- function updateSortIndicators() {
- document.querySelectorAll('th.sortable').forEach(th => {
- th.classList.remove('sort-asc', 'sort-desc');
- if (th.dataset.column === currentSort.column) {
- th.classList.add(`sort-${currentSort.direction}`);
- }
- });
- }
- // Update Stats
- function updateStats(results) {
- const allTests = results.flatMap(r => r.tests);
- const total = allTests.length;
- const passed = allTests.filter(t => t.passed).length;
- const failed = total - passed;
- const passRate = total > 0 ? ((passed / total) * 100).toFixed(1) : 0;
- const avgDuration = total > 0
- ? (allTests.reduce((sum, t) => sum + t.duration_ms, 0) / total / 1000).toFixed(2)
- : 0;
- document.getElementById('totalTests').textContent = total;
- document.getElementById('passRate').textContent = passRate + '%';
- document.getElementById('passedCount').textContent = `${passed} passed`;
- document.getElementById('failedTests').textContent = failed;
- document.getElementById('failedSubtitle').textContent = failed === 0 ? 'All tests passing! 🎉' : `${failed} tests need attention`;
- document.getElementById('avgDuration').textContent = avgDuration + 's';
- }
- // Update Trend Chart
- function updateTrendChart(results) {
- const ctx = document.getElementById('trendChart');
-
- // Sort by timestamp
- const sorted = [...results].sort((a, b) =>
- new Date(a.meta.timestamp) - new Date(b.meta.timestamp)
- );
- const labels = sorted.map(r => {
- const date = new Date(r.meta.timestamp);
- return date.toLocaleDateString() + ' ' + date.toLocaleTimeString([], {hour: '2-digit', minute:'2-digit'});
- });
- const passRates = sorted.map(r => (r.summary.pass_rate * 100).toFixed(1));
- if (trendChart) {
- trendChart.destroy();
- }
- trendChart = new Chart(ctx, {
- type: 'line',
- data: {
- labels: labels,
- datasets: [{
- label: 'Pass Rate (%)',
- data: passRates,
- borderColor: '#28a745',
- backgroundColor: 'rgba(40, 167, 69, 0.1)',
- tension: 0.4,
- fill: true
- }]
- },
- options: {
- responsive: true,
- maintainAspectRatio: true,
- plugins: {
- legend: {
- display: false
- }
- },
- scales: {
- y: {
- beginAtZero: true,
- max: 100,
- ticks: {
- callback: function(value) {
- return value + '%';
- }
- }
- }
- }
- }
- });
- }
- // Export to CSV
- function exportToCSV() {
- const tests = filteredResults.length > 0 ? filteredResults : allResults.flatMap(r => r.tests);
-
- const headers = ['Test ID', 'Agent', 'Category', 'Status', 'Duration (ms)', 'Events', 'Approvals', 'Violations'];
- const rows = tests.map(test => [
- test.id,
- test.agent || 'unknown',
- test.category,
- test.passed ? 'Passed' : 'Failed',
- test.duration_ms,
- test.events,
- test.approvals,
- test.violations.total
- ]);
- const csv = [headers, ...rows]
- .map(row => row.map(cell => `"${cell}"`).join(','))
- .join('\n');
- const blob = new Blob([csv], { type: 'text/csv' });
- const url = URL.createObjectURL(blob);
- const a = document.createElement('a');
- a.href = url;
- a.download = `test-results-${new Date().toISOString().split('T')[0]}.csv`;
- a.click();
- URL.revokeObjectURL(url);
- }
- // Show Loading
- function showLoading() {
- document.getElementById('tableContainer').innerHTML = `
- <div class="loading">
- <div class="spinner"></div>
- <p>Loading test results...</p>
- </div>
- `;
- }
- // Show Error
- function showError(message) {
- // Format multi-line messages
- const formattedMessage = message.split('\n').map(line =>
- line.trim() ? `<p style="margin: 5px 0; text-align: left;">${line}</p>` : '<br>'
- ).join('');
-
- document.getElementById('tableContainer').innerHTML = `
- <div class="empty-state">
- <div class="empty-state-icon">⚠️</div>
- <h3>Cannot Load Results</h3>
- <div style="max-width: 600px; margin: 20px auto; background: var(--bg-secondary); padding: 20px; border-radius: 8px; text-align: left;">
- <h4 style="margin-top: 0;">Solution: Serve via HTTP</h4>
- <pre style="background: var(--bg-card); padding: 10px; border-radius: 4px; overflow-x: auto;">cd evals/results
- python3 -m http.server 8000</pre>
- <p>Then open: <a href="http://localhost:8000" target="_blank">http://localhost:8000</a></p>
- <hr style="margin: 15px 0; border: none; border-top: 1px solid var(--border-color);">
- <p style="font-size: 12px; color: var(--text-secondary);">
- <strong>Why?</strong> Browsers block loading local JSON files for security.
- Serving via HTTP solves this.
- </p>
- </div>
- <button onclick="loadResults()" class="primary">Try Again</button>
- </div>
- `;
- }
- </script>
- </body>
- </html>
|