logic855
/
OpenAgentsControl
mirror of https://github.com/darrenhinde/OpenAgentsControl.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
							import type { Plugin } from "@opencode-ai/plugin"
import { tool } from "@opencode-ai/plugin"
import { writeFile } from "fs/promises"
import path from "path"

/**
 * Helper function to check if a message contains approval request language
 */
function checkForApprovalLanguage(msg: any): boolean {
  if (!msg.parts) return false

  const approvalKeywords = [
    "approval",
    "approve",
    "proceed",
    "confirm",
    "permission",
    "before proceeding",
    "should i",
    "may i",
    "can i proceed",
  ]

  for (const part of msg.parts) {
    if (part.type === "text" && part.text) {
      const text = part.text.toLowerCase()
      if (approvalKeywords.some(keyword => text.includes(keyword))) {
        return true
      }
    }
  }

  return false
}

/**
 * Helper function to check if a user message contains approval response
 */
function checkForUserApproval(msg: any): boolean {
  if (!msg.parts) return false

  const userApprovalKeywords = [
    "proceed",
    "approved",
    "yes",
    "go ahead",
    "ok",
    "okay",
    "sure",
    "do it",
    "continue",
  ]

  for (const part of msg.parts) {
    if (part.type === "text" && part.text) {
      const text = part.text.toLowerCase().trim()
      // Check for exact matches or phrases containing approval keywords
      if (userApprovalKeywords.some(keyword => text === keyword || text.includes(keyword))) {
        return true
      }
    }
  }

  return false
}

/**
 * Agent Validation Plugin
 * 
 * Validates that agents follow their defined prompts and execution rules.
 * Tracks tool calls, approval gates, delegation decisions, and critical rule compliance.
 */
export const AgentValidatorPlugin: Plugin = async ({ client, project, directory }) => {
  // Track agent behavior in real-time
  const behaviorLog: Array<{
    timestamp: number
    sessionID: string
    agent: string
    event: string
    data: any
  }> = []

  // Track tool execution for approval gate validation
  const toolExecutionTracker = new Map<string, {
    approvalRequested: boolean
    toolsExecuted: string[]
    timestamp: number
  }>()

  // Track current agent for each session
  const sessionAgentTracker = new Map<string, string>()

  return {
    // Listen to all events
    async event(input) {
      const { event } = input
      // Silently track events (removed console.log to reduce noise)
      
      // Track session-level events for validation
      if (event.type === "message.updated") {
        const msg = event.properties.info
        behaviorLog.push({
          timestamp: Date.now(),
          sessionID: msg.sessionID,
          agent: msg.role === "user" ? msg.agent : "assistant",
          event: "message_created",
          data: {
            messageID: msg.id,
            role: msg.role,
          },
        })
      }
    },

    // Capture agent information from chat messages
    "chat.message": async (input, output) => {
      const { sessionID, agent } = input
      
      // Track which agent is currently active for this session
      if (agent) {
        sessionAgentTracker.set(sessionID, agent)
      }
    },

    // Monitor tool execution
    "tool.execute.before": async (input, output) => {
      const { tool, sessionID, callID } = input
      const key = `${sessionID}-${callID}`
      
      // Silently track tools (removed console.log to reduce noise)

      // Get current agent for this session
      const currentAgent = sessionAgentTracker.get(sessionID) || "unknown"

      // Track context file reads
      if (tool === "read") {
        const filePath = output.args?.filePath || output.args?.target_file
        if (filePath && filePath.includes(".opencode/")) {
          // Context file read detected - track silently
          behaviorLog.push({
            timestamp: Date.now(),
            sessionID,
            agent: currentAgent,
            event: "context_file_read",
            data: {
              tool: "read",
              filePath,
              callID,
            },
          })
        }
      }

      // Track execution tools that require approval
      const executionTools = ["bash", "write", "edit", "task"]
      
      if (executionTools.includes(tool)) {
        // Track execution tool silently
        const tracker = toolExecutionTracker.get(sessionID) || {
          approvalRequested: false,
          toolsExecuted: [],
          timestamp: Date.now(),
        }
        
        // Check recent messages for approval flow
        try {
          const messagesResponse = await client.session.messages({
            path: { id: sessionID },
          })
          const messages = messagesResponse.data || []
          
          // Look at last few messages for approval pattern
          const recentMessages = messages.slice(-5)
          for (let i = 0; i < recentMessages.length - 1; i++) {
            const msg = recentMessages[i]
            const nextMsg = recentMessages[i + 1]
            const role = msg.info?.role
            const nextRole = nextMsg.info?.role
            
            if (role === "assistant" && checkForApprovalLanguage(msg) &&
                nextRole === "user" && checkForUserApproval(nextMsg)) {
              tracker.approvalRequested = true
              // Approval flow detected - tracked silently
              break
            }
          }
        } catch (err) {
          // Error checking messages - continue silently
        }
        
        tracker.toolsExecuted.push(tool)
        toolExecutionTracker.set(sessionID, tracker)

        behaviorLog.push({
          timestamp: Date.now(),
          sessionID,
          agent: currentAgent,
          event: "execution_tool_called",
          data: {
            tool,
            callID,
            args: output.args,
            approvalRequested: tracker.approvalRequested,
          },
        })
      }
    },

    // Track tool execution results
    "tool.execute.after": async (input, output) => {
      const { tool, sessionID } = input
      
      // Track tool completion silently
      const currentAgent = sessionAgentTracker.get(sessionID) || "unknown"

      behaviorLog.push({
        timestamp: Date.now(),
        sessionID,
        agent: currentAgent,
        event: "tool_executed",
        data: {
          tool,
          title: output.title,
          metadata: output.metadata,
        },
      })
    },

    // Provide validation tools
    tool: {
      // Validate current session
      validate_session: tool({
        description: "Validate that the current agent session is following its defined prompt rules and execution patterns. Returns a detailed validation report.",
        args: {
          include_details: tool.schema.boolean()
            .optional()
            .describe("Include detailed evidence for each validation check"),
        },
        async execute(args, context) {
          const { sessionID } = context

          try {
            // Fetch session messages using SDK
            const messagesResponse = await client.session.messages({
              path: { id: sessionID },
            })

            if (messagesResponse.error) {
              return `Error fetching session: ${messagesResponse.error}`
            }

            const messages = messagesResponse.data || []
            
            // Analyze agent behavior
            const validation = await validateSessionBehavior({
              sessionID,
              messages,
              behaviorLog: behaviorLog.filter(log => log.sessionID === sessionID),
              includeDetails: args.include_details ?? false,
            })

            return formatValidationReport(validation)
          } catch (err) {
            return `Validation error: ${err instanceof Error ? err.message : String(err)}`
          }
        },
      }),

      // Check approval gate compliance
      check_approval_gates: tool({
        description: "Check if approval gates were properly enforced before execution operations (bash, write, edit, task). Returns compliance status.",
        args: {},
        async execute(args, context) {
          const { sessionID } = context
          const tracker = toolExecutionTracker.get(sessionID)

          if (!tracker) {
            return "No execution operations tracked in this session."
          }

          const { approvalRequested, toolsExecuted } = tracker
          const violations = approvalRequested ? [] : toolsExecuted

          if (violations.length === 0) {
            return `✅ Approval gate compliance: PASSED\n\nAll ${toolsExecuted.length} execution operation(s) were properly approved.`
          }

          return `⚠️ Approval gate compliance: FAILED\n\nExecuted ${violations.length} operation(s) without approval:\n${violations.map(t => `  - ${t}`).join("\n")}\n\nCritical rule violated: approval_gate`
        },
      }),

      // Export validation report
      export_validation_report: tool({
        description: "Export a comprehensive validation report for the current session to a markdown file",
        args: {
          output_path: tool.schema.string()
            .optional()
            .describe("Path to save the report (defaults to .tmp/validation-{sessionID}.md)"),
        },
        async execute(args, context) {
          const { sessionID } = context

          try {
            const messagesResponse = await client.session.messages({
              path: { id: sessionID },
            })

            if (messagesResponse.error) {
              return `Error fetching session: ${messagesResponse.error}`
            }

            const messages = messagesResponse.data || []

            const validation = await validateSessionBehavior({
              sessionID,
              messages,
              behaviorLog: behaviorLog.filter(log => log.sessionID === sessionID),
              includeDetails: true,
            })

            const report = generateDetailedReport(validation, messages)
            const outputPath = args.output_path || path.join(directory, `.tmp/validation-${sessionID.slice(0, 8)}.md`)

            await writeFile(outputPath, report, "utf-8")

            return `✅ Validation report exported to: ${outputPath}\n\n${formatValidationReport(validation)}`
          } catch (err) {
            return `Export error: ${err instanceof Error ? err.message : String(err)}`
          }
        },
      }),

      // Analyze delegation decisions
      analyze_delegation: tool({
        description: "Analyze whether delegation decisions followed the 4+ file rule and complexity criteria",
        args: {},
        async execute(args, context) {
          const { sessionID } = context

          const messagesResponse = await client.session.messages({
            path: { id: sessionID },
          })

          if (messagesResponse.error) {
            return `Error: ${messagesResponse.error}`
          }

          const messages = messagesResponse.data || []
          const analysis = analyzeDelegationDecisions(messages)

          return formatDelegationAnalysis(analysis)
        },
      }),

      // Analyze context file reads
      analyze_context_reads: tool({
        description: "Show all context files that were read during the session (e.g., .opencode/agent/openagent.md)",
        args: {},
        async execute(args, context) {
          const { sessionID } = context

          // Filter behavior log for context file reads
          const contextReads = behaviorLog.filter(
            log => log.sessionID === sessionID && log.event === "context_file_read"
          )

          if (contextReads.length === 0) {
            return "📚 No context files read in this session yet.\n\nContext files are in `.opencode/` directories (agent definitions, workflows, standards, etc.)"
          }

          const lines: string[] = [
            `## Context Files Read`,
            ``,
            `**Total reads:** ${contextReads.length}`,
            ``,
          ]

          // Group by file path
          const fileReadCounts = new Map<string, number>()
          contextReads.forEach(log => {
            const filePath = log.data.filePath
            fileReadCounts.set(filePath, (fileReadCounts.get(filePath) || 0) + 1)
          })

          // Sort by read count (most read first)
          const sorted = Array.from(fileReadCounts.entries()).sort((a, b) => b[1] - a[1])

          lines.push(`### Files Read:`)
          sorted.forEach(([filePath, count]) => {
            const fileName = filePath.split('/').pop()
            const readText = count === 1 ? "read" : "reads"
            lines.push(`- **${fileName}** (${count} ${readText})`)
            lines.push(`  \`${filePath}\``)
          })

          lines.push(``)
          lines.push(`### Timeline:`)
          contextReads.forEach((log, idx) => {
            const time = new Date(log.timestamp).toLocaleTimeString()
            const fileName = log.data.filePath.split('/').pop()
            lines.push(`${idx + 1}. [${time}] ${fileName}`)
          })

          return lines.join("\n")
        },
      }),

      // Check context loading compliance
      check_context_compliance: tool({
        description: "Check if required context files were read BEFORE executing tasks (e.g., read docs.md before writing documentation)",
        args: {},
        async execute(args, context) {
          const { sessionID } = context

          const messagesResponse = await client.session.messages({
            path: { id: sessionID },
          })

          if (messagesResponse.error) {
            return `Error: ${messagesResponse.error}`
          }

          const messages = messagesResponse.data || []
          const sessionBehaviorLog = behaviorLog.filter(log => log.sessionID === sessionID)
          
          const checks = analyzeContextLoadingCompliance(messages, sessionBehaviorLog)

          if (checks.length === 0) {
            return "📋 No tasks detected that require specific context files.\n\nContext loading rules apply when:\n- Writing documentation → should read standards/docs.md\n- Writing code → should read standards/code.md\n- Reviewing code → should read workflows/review.md\n- Delegating tasks → should read workflows/delegation.md\n- Writing tests → should read standards/tests.md"
          }

          const passed = checks.filter(c => c.passed).length
          const failed = checks.filter(c => !c.passed).length
          const score = Math.round((passed / checks.length) * 100)

          const lines: string[] = [
            `## Context Loading Compliance`,
            ``,
            `**Score:** ${score}%`,
            `- ✅ Compliant: ${passed}`,
            `- ⚠️  Non-compliant: ${failed}`,
            ``,
          ]

          if (failed > 0) {
            lines.push(`### ⚠️  Issues Found:`)
            checks.filter(c => !c.passed).forEach(check => {
              lines.push(`- ${check.details}`)
            })
            lines.push(``)
          }

          if (passed > 0) {
            lines.push(`### ✅ Compliant Actions:`)
            checks.filter(c => c.passed).forEach(check => {
              lines.push(`- ${check.details}`)
            })
            lines.push(``)
          }

          lines.push(`### Context Loading Rules:`)
          lines.push(`According to OpenAgent prompt, the agent should:`)
          lines.push(`1. Detect task type from user request`)
          lines.push(`2. Read required context file FIRST`)
          lines.push(`3. Then execute task following those standards`)
          lines.push(``)
          lines.push(`**Pattern:** "Fetch context BEFORE starting work, not during or after"`)

          return lines.join("\n")
        },
      }),

      // Analyze which agents were used
      analyze_agent_usage: tool({
        description: "Show which agents were active during the session and what tools they used",
        args: {},
        async execute(args, context) {
          const { sessionID } = context

          const sessionBehaviorLog = behaviorLog.filter(log => log.sessionID === sessionID)

          if (sessionBehaviorLog.length === 0) {
            return "📊 No agent activity tracked yet in this session."
          }

          // Group by agent
          const agentStats = new Map<string, {
            toolCalls: Map<string, number>
            events: string[]
            firstSeen: number
            lastSeen: number
          }>()

          sessionBehaviorLog.forEach(log => {
            const agent = log.agent || "unknown"
            
            if (!agentStats.has(agent)) {
              agentStats.set(agent, {
                toolCalls: new Map(),
                events: [],
                firstSeen: log.timestamp,
                lastSeen: log.timestamp
              })
            }

            const stats = agentStats.get(agent)!
            stats.lastSeen = log.timestamp
            stats.events.push(log.event)

            // Track tool usage
            if (log.event === "execution_tool_called" || log.event === "tool_executed") {
              const tool = log.data.tool
              stats.toolCalls.set(tool, (stats.toolCalls.get(tool) || 0) + 1)
            }
          })

          const lines: string[] = [
            `## Agent Usage Report`,
            ``,
            `**Agents detected:** ${agentStats.size}`,
            `**Total events:** ${sessionBehaviorLog.length}`,
            ``,
          ]

          // Sort agents by first seen
          const sortedAgents = Array.from(agentStats.entries()).sort((a, b) => a[1].firstSeen - b[1].firstSeen)

          sortedAgents.forEach(([agent, stats]) => {
            const duration = stats.lastSeen - stats.firstSeen
            const durationStr = duration > 0 ? `${Math.round(duration / 1000)}s` : "instant"
            
            lines.push(`### ${agent === "unknown" ? "Unknown Agent" : agent}`)
            lines.push(``)
            lines.push(`**Active duration:** ${durationStr}`)
            lines.push(`**Events:** ${stats.events.length}`)
            
            if (stats.toolCalls.size > 0) {
              lines.push(``)
              lines.push(`**Tools used:**`)
              const sortedTools = Array.from(stats.toolCalls.entries()).sort((a, b) => b[1] - a[1])
              sortedTools.forEach(([tool, count]) => {
                lines.push(`- ${tool}: ${count}x`)
              })
            }
            
            lines.push(``)
          })

          return lines.join("\n")
        },
      }),

      // Debug tool to inspect tracking
      debug_validator: tool({
        description: "Debug tool to inspect what the validator is tracking (behavior log, messages, etc.)",
        args: {},
        async execute(args, context) {
          const { sessionID } = context
          
          // Debug tool - gather information silently

          // Get messages from SDK
          const messagesResponse = await client.session.messages({
            path: { id: sessionID },
          })

          const messages = messagesResponse.data || []
          const sessionBehaviorLog = behaviorLog.filter(log => log.sessionID === sessionID)
          const tracker = toolExecutionTracker.get(sessionID)

          const debug = {
            sessionID,
            behaviorLogEntries: sessionBehaviorLog.length,
            behaviorLogSampleFirst: sessionBehaviorLog.slice(0, 3),
            behaviorLogSampleLast: sessionBehaviorLog.slice(-3),
            messagesCount: messages.length,
            messagesSample: messages.slice(0, 2).map(m => ({
              role: m.info?.role,
              partsCount: m.parts?.length,
              partTypes: m.parts?.map((p: any) => p.type),
            })),
            toolTracker: tracker ? {
              approvalRequested: tracker.approvalRequested,
              toolsExecuted: tracker.toolsExecuted,
            } : null,
            allBehaviorLogs: behaviorLog.length,
          }

          return `## Debug Information\n\n\`\`\`json\n${JSON.stringify(debug, null, 2)}\n\`\`\`\n\n**Analysis:**\n- Behavior log entries for this session: ${sessionBehaviorLog.length}\n- Total behavior log entries: ${behaviorLog.length}\n- Messages in session: ${messages.length}\n- Tool execution tracker: ${tracker ? 'Active' : 'None'}`
        },
      }),
    },
  }
}

// Validation logic
interface ValidationCheck {
  rule: string
  passed: boolean
  severity: "info" | "warning" | "error"
  details: string
  evidence?: any
}

interface ValidationResult {
  sessionID: string
  checks: ValidationCheck[]
  summary: {
    passed: number
    failed: number
    warnings: number
    score: number
  }
}

async function validateSessionBehavior(input: {
  sessionID: string
  messages: any[]
  behaviorLog: any[]
  includeDetails: boolean
}): Promise<ValidationResult> {
  const checks: ValidationCheck[] = []

  // Check 1: Tool usage patterns
  const toolUsage = analyzeToolUsage(input.messages)
  checks.push(...toolUsage)

  // Check 2: Approval gate enforcement
  const approvalChecks = analyzeApprovalGates(input.messages, input.behaviorLog)
  checks.push(...approvalChecks)

  // Check 3: Lazy context loading
  const contextChecks = analyzeContextLoading(input.messages)
  checks.push(...contextChecks)

  // Check 4: Delegation appropriateness
  const delegationChecks = analyzeDelegation(input.messages)
  checks.push(...delegationChecks)

  // Check 5: Critical rule compliance
  const criticalChecks = analyzeCriticalRules(input.messages)
  checks.push(...criticalChecks)

  // Check 6: Context loading compliance (read required files BEFORE execution)
  const contextComplianceChecks = analyzeContextLoadingCompliance(input.messages, input.behaviorLog)
  checks.push(...contextComplianceChecks)

  // Calculate summary
  const passed = checks.filter(c => c.passed).length
  const failed = checks.filter(c => !c.passed && c.severity === "error").length
  const warnings = checks.filter(c => !c.passed && c.severity === "warning").length
  const score = checks.length > 0 ? Math.round((passed / checks.length) * 100) : 0

  return {
    sessionID: input.sessionID,
    checks,
    summary: { passed, failed, warnings, score },
  }
}

function analyzeToolUsage(messages: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []
  
  for (const msg of messages) {
    // Messages have structure: { info: Message, parts: Part[] }
    const role = msg.info?.role || msg.role
    if (role !== "assistant") continue
    
    const tools = extractToolsFromMessage(msg)
    
    if (tools.length > 0) {
      checks.push({
        rule: "tool_usage",
        passed: true,
        severity: "info",
        details: `Used ${tools.length} tool(s): ${tools.join(", ")}`,
      })
    }
  }

  return checks
}

function analyzeApprovalGates(messages: any[], behaviorLog: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []
  const executionTools = ["bash", "write", "edit", "task"]

  for (let i = 0; i < messages.length; i++) {
    const msg = messages[i]
    const role = msg.info?.role || msg.role
    if (role !== "assistant") continue

    const tools = extractToolsFromMessage(msg)
    const executionOps = tools.filter(t => executionTools.includes(t))

    if (executionOps.length > 0) {
      // Check if approval language is present in this message OR in recent previous messages
      let hasApprovalRequest = checkForApprovalLanguage(msg)
      
      // Look back up to 3 messages to find approval request
      if (!hasApprovalRequest) {
        for (let j = Math.max(0, i - 3); j < i; j++) {
          const prevMsg = messages[j]
          const prevRole = prevMsg.info?.role || prevMsg.role
          if (prevRole === "assistant" && checkForApprovalLanguage(prevMsg)) {
            // Check if there's a user approval response after the request
            if (j + 1 < messages.length) {
              const userResponse = messages[j + 1]
              const userRole = userResponse.info?.role || userResponse.role
              if (userRole === "user" && checkForUserApproval(userResponse)) {
                hasApprovalRequest = true
                break
              }
            }
          }
        }
      }

      checks.push({
        rule: "approval_gate_enforcement",
        passed: hasApprovalRequest,
        severity: hasApprovalRequest ? "info" : "warning",
        details: hasApprovalRequest
          ? `Properly requested approval before ${executionOps.length} execution op(s)`
          : `⚠️ Executed ${executionOps.length} operation(s) without explicit approval request`,
        evidence: { executionOps, hasApprovalRequest },
      })
    }
  }

  return checks
}

function analyzeContextLoading(messages: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []

  for (const msg of messages) {
    const role = msg.info?.role || msg.role
    if (role !== "assistant") continue

    // Look for read operations on .opencode/context/ files
    const contextReads = extractContextReads(msg)

    if (contextReads.length > 0) {
      checks.push({
        rule: "lazy_context_loading",
        passed: true,
        severity: "info",
        details: `Lazy-loaded ${contextReads.length} context file(s): ${contextReads.join(", ")}`,
      })
    }
  }

  return checks
}

function analyzeDelegation(messages: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []

  for (const msg of messages) {
    const role = msg.info?.role || msg.role
    if (role !== "assistant") continue

    const tools = extractToolsFromMessage(msg)
    const hasDelegation = tools.includes("task")
    const writeEditCount = tools.filter(t => t === "write" || t === "edit").length

    if (hasDelegation) {
      const shouldDelegate = writeEditCount >= 4

      checks.push({
        rule: "delegation_appropriateness",
        passed: shouldDelegate,
        severity: shouldDelegate ? "info" : "warning",
        details: shouldDelegate
          ? `Appropriately delegated (${writeEditCount} files)`
          : `Delegated but only ${writeEditCount} files (< 4 threshold)`,
      })
    } else if (writeEditCount >= 4) {
      checks.push({
        rule: "delegation_appropriateness",
        passed: false,
        severity: "warning",
        details: `Should have delegated (${writeEditCount} files >= 4 threshold)`,
      })
    }
  }

  return checks
}

function analyzeCriticalRules(messages: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []

  // Look for auto-fix attempts after errors
  for (let i = 0; i < messages.length - 1; i++) {
    const msg = messages[i]
    const nextMsg = messages[i + 1]

    const role = msg.info?.role || msg.role
    const metadata = msg.info?.metadata || msg.metadata

    if (role === "assistant" && metadata?.error) {
      const nextTools = extractToolsFromMessage(nextMsg)
      const hasAutoFix = nextTools.some(t => ["write", "edit", "bash"].includes(t))

      if (hasAutoFix) {
        checks.push({
          rule: "stop_on_failure",
          passed: false,
          severity: "error",
          details: "⛔ Auto-fix attempted after error - violates stop_on_failure rule",
          evidence: { error: metadata.error, autoFixTools: nextTools },
        })
      }
    }
  }

  return checks
}

function analyzeContextLoadingCompliance(messages: any[], behaviorLog: any[]): ValidationCheck[] {
  const checks: ValidationCheck[] = []
  
  // Define required context files for different task types
  const contextRules = [
    {
      taskKeywords: ["write doc", "create doc", "documentation", "write readme", "document"],
      requiredFile: "standards/docs.md",
      taskType: "documentation"
    },
    {
      taskKeywords: ["write code", "create function", "implement", "add feature", "build"],
      requiredFile: "standards/code.md",
      taskType: "code writing"
    },
    {
      taskKeywords: ["review code", "check code", "analyze code", "code review"],
      requiredFile: "workflows/review.md",
      taskType: "code review"
    },
    {
      taskKeywords: ["delegate", "create task", "subagent"],
      requiredFile: "workflows/delegation.md",
      taskType: "delegation"
    },
    {
      taskKeywords: ["write test", "create test", "test coverage", "unit test"],
      requiredFile: "standards/tests.md",
      taskType: "testing"
    }
  ]

  // Get all context file reads from behavior log
  const contextReads = behaviorLog
    .filter(log => log.event === "context_file_read")
    .map(log => ({
      timestamp: log.timestamp,
      filePath: log.data.filePath
    }))

  // Analyze each message for task execution
  for (let i = 0; i < messages.length; i++) {
    const msg = messages[i]
    const role = msg.info?.role || msg.role
    
    if (role !== "assistant") continue

    const tools = extractToolsFromMessage(msg)
    const executionTools = tools.filter(t => ["write", "edit", "bash", "task"].includes(t))
    
    if (executionTools.length === 0) continue

    // Get message text to detect task type
    const messageText = extractMessageText(msg).toLowerCase()
    
    // Check if this message matches any context loading rules
    for (const rule of contextRules) {
      const matchesTask = rule.taskKeywords.some(keyword => messageText.includes(keyword))
      
      if (matchesTask) {
        // Check if required context file was read BEFORE this message
        const msgTimestamp = msg.info?.timestamp || Date.now()
        const contextReadBefore = contextReads.some(read => 
          read.filePath.includes(rule.requiredFile) && read.timestamp < msgTimestamp
        )

        checks.push({
          rule: "context_loading_compliance",
          passed: contextReadBefore,
          severity: contextReadBefore ? "info" : "warning",
          details: contextReadBefore
            ? `✅ Loaded ${rule.requiredFile} before ${rule.taskType}`
            : `⚠️ Did not load ${rule.requiredFile} before ${rule.taskType} task`,
          evidence: {
            taskType: rule.taskType,
            requiredFile: rule.requiredFile,
            contextReadBefore,
            executionTools
          }
        })
      }
    }
  }

  return checks
}

function analyzeDelegationDecisions(messages: any[]): {
  delegations: number
  appropriate: number
  inappropriate: number
  fileCountStats: number[]
} {
  const stats = {
    delegations: 0,
    appropriate: 0,
    inappropriate: 0,
    fileCountStats: [] as number[],
  }

  for (const msg of messages) {
    const role = msg.info?.role || msg.role
    if (role !== "assistant") continue

    const tools = extractToolsFromMessage(msg)
    const hasDelegation = tools.includes("task")
    const writeEditCount = tools.filter(t => t === "write" || t === "edit").length

    if (hasDelegation) {
      stats.delegations++
      stats.fileCountStats.push(writeEditCount)
      
      if (writeEditCount >= 4) {
        stats.appropriate++
      } else {
        stats.inappropriate++
      }
    }
  }

  return stats
}

// Helper functions
function extractToolsFromMessage(msg: any): string[] {
  const tools: string[] = []
  
  // Messages from SDK have structure: { info: Message, parts: Part[] }
  const parts = msg.parts || []

  for (const part of parts) {
    // Check for tool type (from SDK: part.type === "tool")
    if (part.type === "tool" && part.tool) {
      tools.push(part.tool)
    }
    // Also check for tool-invocation format (legacy)
    if (part.type === "tool-invocation" && part.toolInvocation) {
      tools.push(part.toolInvocation.toolName)
    }
  }

  return tools
}

function extractMessageText(msg: any): string {
  if (!msg.parts) return ""
  
  let text = ""
  for (const part of msg.parts) {
    if (part.type === "text" && part.text) {
      text += part.text + " "
    }
  }
  
  return text.trim()
}

function extractContextReads(msg: any): string[] {
  const contextFiles: string[] = []
  
  if (!msg.parts) return contextFiles

  for (const part of msg.parts) {
    if (part.type === "tool-invocation" && 
        part.toolInvocation?.toolName === "read" &&
        part.toolInvocation?.args?.target_file?.includes(".opencode/context/")) {
      contextFiles.push(part.toolInvocation.args.target_file)
    }
  }

  return contextFiles
}

// Formatting functions
function formatValidationReport(validation: ValidationResult): string {
  const { summary, checks } = validation
  
  const lines: string[] = [
    `## Validation Report`,
    ``,
    `**Score:** ${summary.score}%`,
    `- ✅ Passed: ${summary.passed}`,
    `- ⚠️  Warnings: ${summary.warnings}`,
    `- ❌ Failed: ${summary.failed}`,
    ``,
  ]

  // Group by severity
  const errors = checks.filter(c => !c.passed && c.severity === "error")
  const warnings = checks.filter(c => !c.passed && c.severity === "warning")

  if (errors.length > 0) {
    lines.push(`### ❌ Errors`)
    errors.forEach(check => {
      lines.push(`- **${check.rule}**: ${check.details}`)
    })
    lines.push(``)
  }

  if (warnings.length > 0) {
    lines.push(`### ⚠️  Warnings`)
    warnings.forEach(check => {
      lines.push(`- **${check.rule}**: ${check.details}`)
    })
    lines.push(``)
  }

  return lines.join("\n")
}

function formatDelegationAnalysis(analysis: any): string {
  const lines: string[] = [
    `## Delegation Analysis`,
    ``,
    `**Total delegations:** ${analysis.delegations}`,
    `- ✅ Appropriate: ${analysis.appropriate}`,
    `- ⚠️  Questionable: ${analysis.inappropriate}`,
    ``,
  ]

  if (analysis.fileCountStats.length > 0) {
    const avg = analysis.fileCountStats.reduce((a: number, b: number) => a + b, 0) / analysis.fileCountStats.length
    lines.push(`**File count per delegation:**`)
    lines.push(`- Average: ${avg.toFixed(1)} files`)
    lines.push(`- Range: ${Math.min(...analysis.fileCountStats)} - ${Math.max(...analysis.fileCountStats)} files`)
    lines.push(`- Threshold: 4+ files`)
  }

  return lines.join("\n")
}

function generateDetailedReport(validation: ValidationResult, messages: any[]): string {
  const lines: string[] = [
    `# Agent Validation Report`,
    ``,
    `**Session:** ${validation.sessionID}`,
    `**Generated:** ${new Date().toISOString()}`,
    `**Messages analyzed:** ${messages.length}`,
    ``,
    formatValidationReport(validation),
    ``,
    `## Detailed Checks`,
    ``,
  ]

  validation.checks.forEach(check => {
    const icon = check.passed ? "✅" : check.severity === "error" ? "❌" : "⚠️"
    lines.push(`### ${icon} ${check.rule}`)
    lines.push(``)
    lines.push(check.details)
    lines.push(``)
    
    if (check.evidence) {
      lines.push(`**Evidence:**`)
      lines.push(`\`\`\`json`)
      lines.push(JSON.stringify(check.evidence, null, 2))
      lines.push(`\`\`\``)
      lines.push(``)
    }
  })

  return lines.join("\n")
}

export default AgentValidatorPlugin