4 months ago · 2be92ff8d1
--- a/.github/workflows/test-agents.yml
+++ b/.github/workflows/test-agents.yml
@@ -258,8 +258,8 @@ jobs:
 
				             echo "type=rc" >> $GITHUB_OUTPUT
			
 
				             echo "Detected [rc] tag - bumping rc version"
			
 
				           else
			
 
				-            echo "type=minor" >> $GITHUB_OUTPUT
			
 
				-            echo "No specific type detected - defaulting to minor version bump"
			
 
				+            echo "type=patch" >> $GITHUB_OUTPUT
			
 
				+            echo "No specific type detected - defaulting to patch version bump"
			
 
				           fi
			
 
				       
			
 
				       - name: Bump version
			
--- a/.opencode/command/commit-openagents.md
+++ b/.opencode/command/commit-openagents.md
@@ -139,11 +139,11 @@ This will trigger:
 
				 
			
 
				 ### Version Bumping (Automatic via CI/CD)
			
 
				 Commits trigger automatic version bumps:
			
 
				-- `feat:` → minor bump (v0.1.0 → v0.2.0)
			
 
				-- `fix:` → patch bump (v0.1.0 → v0.1.1)
			
 
				-- `feat!:` or `BREAKING CHANGE:` → major bump (v0.1.0 → v1.0.0)
			
 
				-- `[alpha]` in message → alpha bump (v0.1.0-alpha.1 → v0.1.0-alpha.2)
			
 
				-- Default → minor bump
			
 
				+- `feat:` → minor bump (0.0.1 → 0.1.0)
			
 
				+- `fix:` → patch bump (0.0.1 → 0.0.2)
			
 
				+- `feat!:` or `BREAKING CHANGE:` → major bump (0.1.0 → 1.0.0)
			
 
				+- `[alpha]` in message → alpha bump (0.1.0-alpha.1 → 0.1.0-alpha.2)
			
 
				+- Default → patch bump (0.0.1 → 0.0.2)
			
 
				 
			
 
				 ### Files to Always Check
			
 
				 Before committing, verify these are in sync:
			
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,135 +5,16 @@ All notable changes to this project will be documented in this file.
 
				 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
			
 
				 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
			
 
				 
			
 
				-## [0.1.0-alpha.1] - 2025-11-26
			
 
				-## [0.3.0] - 2025-11-26
			
 
				-
			
 
				-### Changes
			
 
				-- feat(ci): skip redundant tests on PR merge
			
 
				-
			
 
				-Tests only run on:
			
 
				-- Pull requests (CI checks)
			
 
				-- Direct pushes to main
			
 
				-- Manual workflow dispatch
			
 
				-
			
 
				-PR merges skip tests (already passed) but still trigger version bump.
			
 
				-
			
 
				-## [0.2.0] - 2025-11-26
			
 
				-
			
 
				-### Changes
			
 
				-- feat(ci): auto-create GitHub releases on version bump
			
 
				-
			
 
				-Releases now appear in the GitHub sidebar. The release notes are
			
 
				-extracted from CHANGELOG.md for the specific version.
			
 
				-
			
 
				-## [0.1.0] - 2025-11-26
			
 
				-
			
 
				-### Changes
			
 
				-- fix(ci): add contents:write permission for auto version bump
			
 
				-
			
 
				-The GITHUB_TOKEN needs explicit write permission to push commits
			
 
				-and tags back to the repository.
			
 
				-
			
 
				-
			
 
				-### Added
			
 
				-
			
 
				-#### SDK-Based Evaluation Framework
			
 
				-- Complete test execution framework using OpenCode SDK
			
 
				-- Support for openagent and opencoder testing
			
 
				-- Real agent testing with session management
			
 
				-- Smart timeout system with activity monitoring
			
 
				-- Multi-turn conversation support
			
 
				-
			
 
				-#### Modular Architecture
			
 
				-- Refactored test-runner.ts (884 lines → 4 focused modules):
			
 
				-  - `test-runner.ts` (411 lines): Thin orchestrator
			
 
				-  - `test-executor.ts` (392 lines): Core execution logic
			
 
				-  - `result-validator.ts` (253 lines): Validation logic
			
 
				-  - `event-logger.ts` (128 lines): Logging utilities
			
 
				-- Improved Single Responsibility Principle compliance
			
 
				-- Enhanced testability through dependency injection
			
 
				-
			
 
				-#### Test Infrastructure
			
 
				-- 20+ test cases across multiple categories:
			
 
				-  - OpenAgent: Developer (12), Context Loading (5), Business (2), Edge Cases (3)
			
 
				-  - OpenCoder: Developer (4)
			
 
				-- BehaviorEvaluator for validating expected agent actions
			
 
				-- Comprehensive evaluators: approval-gate, context-loading, delegation, tool-usage
			
 
				-
			
 
				-#### Interactive Results Dashboard
			
 
				-- Real-time test results visualization
			
 
				-- Filtering by agent, category, status
			
 
				-- Detailed violation tracking
			
 
				-- CSV export functionality
			
 
				-- Historical results tracking
			
 
				-- One-command deployment (`./serve.sh`)
			
 
				-
			
 
				-#### Documentation
			
 
				-- ARCHITECTURE.md: Comprehensive system review (456 lines)
			
 
				-- GETTING_STARTED.md: Quick start guide (435 lines)
			
 
				-- SDK_EVAL_README.md: Complete SDK guide (298 lines)
			
 
				-- Test design guide and architecture overview
			
 
				-- Documentation cleanup (removed 3 outdated files)
			
 
				-
			
 
				-#### Script Organization
			
 
				-- Organized 12 scripts into logical directories:
			
 
				-  - `scripts/debug/`: Session debugging tools (4 files)
			
 
				-  - `scripts/test/`: Test execution scripts (6 files)
			
 
				-  - `scripts/utils/`: Utility scripts (2 files)
			
 
				-- Comprehensive scripts/README.md with usage examples
			
 
				-
			
 
				-#### Monorepo Structure
			
 
				-- Root package.json with convenient npm scripts
			
 
				-- Easy agent selection (openagent, opencoder)
			
 
				-- Easy model selection (grok, claude, gpt-4)
			
 
				-- Quick dashboard access from root
			
 
				-- No folder navigation required
			
 
				-
			
 
				-#### CI/CD
			
 
				-- GitHub Actions workflow for automated testing
			
 
				-- Pre-merge validation for agent changes
			
 
				-- Fast smoke tests for both agents
			
 
				-- Automated test result reporting
			
 
				-
			
 
				-#### Agent Improvements
			
 
				-- Enhanced openagent with better context loading
			
 
				-- New opencoder agent with test suite
			
 
				-- Improved subagent invocation patterns
			
 
				-- Ultra-compact context index system
			
 
				-
			
 
				-### Changed
			
 
				-- Reorganized evaluation framework structure
			
 
				-- Improved test case schema with behavior expectations
			
 
				-- Enhanced context loading detection
			
 
				-
			
 
				-### Removed
			
 
				-- Outdated documentation files (TESTING_CONFIDENCE.md, TEST_REVIEW.md, SESSION_STORAGE_FIX.md)
			
 
				-- Redundant test files
			
 
				-
			
 
				-### Fixed
			
 
				-- Context loading evaluator detection accuracy
			
 
				-- Multi-turn prompt handling
			
 
				-- Test artifact cleanup
			
 
				+## [Unreleased]
			
 
				 
			
 
				 ---
			
 
				 
			
 
				 ## Version Format
			
 
				 
			
 
				 ```
			
 
				-v0.1.0-alpha.1
			
 
				-│ │ │  │      │
			
 
				-│ │ │  │      └─ Build/Iteration number
			
 
				-│ │ │  └──────── Release stage (alpha, beta, rc)
			
 
				-│ │ └─────────── Patch version
			
 
				-│ └───────────── Minor version
			
 
				-└─────────────── Major version (0 = pre-release)
			
 
				+v0.0.X
			
 
				+│ │ │
			
 
				+│ │ └─ Patch version (increments with each release)
			
 
				+│ └─── Minor version (feature additions)
			
 
				+└───── Major version (breaking changes)
			
 
				 ```
			
 
				-
			
 
				-### Version Progression
			
 
				-
			
 
				-- **Alpha** (`v0.x.0-alpha.N`): Early development, unstable
			
 
				-- **Beta** (`v0.x.0-beta.N`): Feature complete, testing
			
 
				-- **RC** (`v0.x.0-rc.N`): Release candidate, stable
			
 
				-- **Stable** (`v1.x.x`): Production ready
			
 
				-
			
 
				-[0.1.0-alpha.1]: https://github.com/darrenhinde/OpenAgents/releases/tag/v0.1.0-alpha.1
			
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
 
				-0.3.0
			
 
				+0.0.1
			
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 
				 {
			
 
				   "name": "opencode-agents",
			
 
				-  "version": "0.3.0",
			
 
				+  "version": "0.0.1",
			
 
				   "description": "OpenCode agent evaluation framework and test suites",
			
 
				   "private": true,
			
 
				   "workspaces": [
@@ -1 +1 @@
 				-0.3.0
 				+0.0.1