Browse Source

Improve intra-turn todo hygiene

dhaern 2 days ago
parent
commit
3d71513c11

+ 89 - 5
src/hooks/todo-continuation/index.test.ts

@@ -1,6 +1,10 @@
 import { describe, expect, mock, test } from 'bun:test';
 import { SLIM_INTERNAL_INITIATOR_MARKER } from '../../utils';
 import { createTodoContinuationHook } from './index';
+import {
+  TODO_FINAL_ACTIVE_REMINDER,
+  TODO_HYGIENE_REMINDER,
+} from './todo-hygiene';
 
 describe('createTodoContinuationHook', () => {
   function createMockContext(overrides?: {
@@ -84,6 +88,86 @@ describe('createTodoContinuationHook', () => {
     });
   });
 
+  describe('todo hygiene routing', () => {
+    test('does not inject hygiene reminder for unknown non-orchestrator session', async () => {
+      const ctx = createMockContext({
+        todoResult: {
+          data: [
+            { id: '1', content: 'todo1', status: 'pending', priority: 'high' },
+          ],
+        },
+      });
+      const hook = createTodoContinuationHook(ctx);
+      const system = { system: ['base'] };
+
+      await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 'sub1' });
+      await hook.handleChatSystemTransform({ sessionID: 'sub1' }, system);
+
+      expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+    });
+
+    test('injects hygiene reminder only for orchestrator session', async () => {
+      const ctx = createMockContext({
+        todoResult: {
+          data: [
+            { id: '1', content: 'todo1', status: 'pending', priority: 'high' },
+          ],
+        },
+      });
+      const hook = createTodoContinuationHook(ctx);
+      const system = { system: ['base'] };
+
+      hook.handleChatMessage({ sessionID: 'main1', agent: 'orchestrator' });
+      await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 'main1' });
+      await hook.handleChatSystemTransform({ sessionID: 'main1' }, system);
+
+      expect(system.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+    });
+
+    test('normal read-only work can arm hygiene reminder after todowrite reset', async () => {
+      const ctx = createMockContext({
+        todoResult: {
+          data: [
+            { id: '1', content: 'todo1', status: 'pending', priority: 'high' },
+          ],
+        },
+      });
+      const hook = createTodoContinuationHook(ctx);
+      const system = { system: ['base'] };
+
+      hook.handleChatMessage({ sessionID: 'main1', agent: 'orchestrator' });
+      await hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 'main1' });
+      await hook.handleToolExecuteAfter({ tool: 'read', sessionID: 'main1' });
+      await hook.handleChatSystemTransform({ sessionID: 'main1' }, system);
+
+      expect(system.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+    });
+
+    test('final active todo after todowrite uses the stronger finishing reminder', async () => {
+      const ctx = createMockContext({
+        todoResult: {
+          data: [
+            {
+              id: '1',
+              content: 'todo1',
+              status: 'in_progress',
+              priority: 'high',
+            },
+          ],
+        },
+      });
+      const hook = createTodoContinuationHook(ctx);
+      const system = { system: ['base'] };
+
+      hook.handleChatMessage({ sessionID: 'main1', agent: 'orchestrator' });
+      await hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 'main1' });
+      await hook.handleChatSystemTransform({ sessionID: 'main1' }, system);
+
+      expect(system.system.join('\n')).toContain(TODO_FINAL_ACTIVE_REMINDER);
+      expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+    });
+  });
+
   describe('continuation scheduling', () => {
     test('session idle + enabled + incomplete todos → schedules continuation', async () => {
       const ctx = createMockContext({
@@ -568,7 +652,7 @@ describe('createTodoContinuationHook', () => {
     });
 
     test('cooldownMs from config', async () => {
-      const customCooldownMs = 100;
+      const customCooldownMs = 150;
       const ctx = createMockContext({
         todoResult: {
           data: [
@@ -598,14 +682,14 @@ describe('createTodoContinuationHook', () => {
         },
       });
 
-      // Advance timer by less than custom cooldown
-      await delay(customCooldownMs - 1);
+      // Advance timer by well under the custom cooldown to avoid timer jitter
+      await delay(60);
 
       // Verify prompt not called yet
       expect(hasContinuation(ctx.client.session.prompt)).toBe(false);
 
-      // Advance timer by remaining 1ms
-      await delay(1);
+      // Advance timer past the configured cooldown
+      await delay(100);
 
       // Now prompt should be called
       expect(hasContinuation(ctx.client.session.prompt)).toBe(true);

+ 41 - 0
src/hooks/todo-continuation/index.ts

@@ -1,6 +1,7 @@
 import type { PluginInput } from '@opencode-ai/plugin';
 import { tool } from '@opencode-ai/plugin/tool';
 import { createInternalAgentTextPart, log } from '../../utils';
+import { createTodoHygiene } from './todo-hygiene';
 
 const HOOK_NAME = 'todo-continuation';
 const COMMAND_NAME = 'auto-continue';
@@ -107,6 +108,14 @@ export function createTodoContinuationHook(
   },
 ): {
   tool: Record<string, unknown>;
+  handleToolExecuteAfter: (input: {
+    tool: string;
+    sessionID?: string;
+  }) => Promise<void>;
+  handleChatSystemTransform: (
+    input: { sessionID?: string },
+    output: { system: string[] },
+  ) => Promise<void>;
   handleEvent: (input: {
     event: { type: string; properties?: Record<string, unknown> };
   }) => Promise<void>;
@@ -138,6 +147,28 @@ export function createTodoContinuationHook(
     notificationBusyUntilBySession: new Map<string, number>(),
   };
 
+  const hygiene = createTodoHygiene({
+    getTodoState: async (sessionID) => {
+      const result = await ctx.client.session.todo({
+        path: { id: sessionID },
+      });
+      const todos = result.data as TodoItem[];
+      const openTodos = todos.filter(
+        (todo) => !TERMINAL_TODO_STATUSES.includes(todo.status),
+      );
+      return {
+        hasOpenTodos: openTodos.length > 0,
+        inProgressCount: openTodos.filter(
+          (todo) => todo.status === 'in_progress',
+        ).length,
+        pendingCount: openTodos.filter((todo) => todo.status === 'pending')
+          .length,
+      };
+    },
+    shouldInject: (sessionID) => isOrchestratorSession(sessionID),
+    log: (message, meta) => log(`[${HOOK_NAME}] ${message}`, meta),
+  });
+
   function markNotificationStarted(sessionID: string): void {
     state.notifyingSessionIds.add(sessionID);
   }
@@ -218,6 +249,14 @@ export function createTodoContinuationHook(
     const { event } = input;
     const properties = event.properties ?? {};
 
+    hygiene.handleEvent({
+      type: event.type,
+      properties: {
+        info: properties.info as { id?: string } | undefined,
+        sessionID: properties.sessionID as string | undefined,
+      },
+    });
+
     if (
       event.type === 'session.idle' ||
       (event.type === 'session.status' &&
@@ -619,6 +658,8 @@ export function createTodoContinuationHook(
 
   return {
     tool: { auto_continue: autoContinue },
+    handleToolExecuteAfter: hygiene.handleToolExecuteAfter,
+    handleChatSystemTransform: hygiene.handleChatSystemTransform,
     handleEvent,
     handleChatMessage,
     handleCommandExecuteBefore,

+ 236 - 0
src/hooks/todo-continuation/todo-hygiene.test.ts

@@ -0,0 +1,236 @@
+import { describe, expect, test } from 'bun:test';
+import {
+  TODO_FINAL_ACTIVE_REMINDER,
+  TODO_HYGIENE_REMINDER,
+  createTodoHygiene,
+} from './todo-hygiene';
+
+describe('todo hygiene', () => {
+  test('injects once after a normal tool when todos stay open', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+    });
+    const first = { system: ['base'] };
+    const second = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'read', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, first);
+    await hook.handleToolExecuteAfter({ tool: 'grep', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, second);
+
+    expect(first.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+    expect(second.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('multiple tools are deduplicated while reminder is pending', async () => {
+    let count = 0;
+    const hook = createTodoHygiene({
+      getTodoState: async () => {
+        count++;
+        return {
+          hasOpenTodos: true,
+          inProgressCount: 0,
+          pendingCount: 1,
+        };
+      },
+    });
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    await hook.handleToolExecuteAfter({ tool: 'read', sessionID: 's1' });
+    await hook.handleToolExecuteAfter({ tool: 'background_output', sessionID: 's1' });
+
+    expect(count).toBe(1);
+  });
+
+  test('does not re-arm until todowrite resets the cycle', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+    });
+    const first = { system: ['base'] };
+    const second = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'read', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, first);
+    await hook.handleToolExecuteAfter({ tool: 'grep', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, second);
+
+    expect(first.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+    expect(second.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('consumes pending reminder when shouldInject rejects the session', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+      shouldInject: () => false,
+    });
+    const first = { system: ['base'] };
+    const second = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, first);
+    await hook.handleToolExecuteAfter({ tool: 'grep', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, second);
+
+    expect(first.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+    expect(second.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('todowrite clears a pending reminder', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+    });
+    const system = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    await hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, system);
+
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('todowrite re-enables the cycle when todos remain open', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+    });
+    const first = { system: ['base'] };
+    const second = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'read', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, first);
+    await hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 's1' });
+    await hook.handleToolExecuteAfter({ tool: 'grep', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, second);
+
+    expect(first.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+    expect(second.system.join('\n')).toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('cleans pending reminder on session.deleted', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 0,
+        pendingCount: 1,
+      }),
+    });
+    const system = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    hook.handleEvent({
+      type: 'session.deleted',
+      properties: { info: { id: 's1' } },
+    });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, system);
+
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('handleChatSystemTransform failures are best-effort and do not reject', async () => {
+    let calls = 0;
+    const hook = createTodoHygiene({
+      getTodoState: async () => {
+        calls++;
+        if (calls === 1) {
+          return {
+            hasOpenTodos: true,
+            inProgressCount: 0,
+            pendingCount: 1,
+          };
+        }
+        throw new Error('boom');
+      },
+    });
+    const system = { system: ['base'] };
+
+    await expect(
+      hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' }),
+    ).resolves.toBeUndefined();
+    await expect(
+      hook.handleChatSystemTransform({ sessionID: 's1' }, system),
+    ).resolves.toBeUndefined();
+
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+    expect(system.system.join('\n')).not.toContain(TODO_FINAL_ACTIVE_REMINDER);
+  });
+
+  test('todowrite state lookup failure clears stale pending state', async () => {
+    let fail = false;
+    const hook = createTodoHygiene({
+      getTodoState: async () => {
+        if (fail) {
+          throw new Error('boom');
+        }
+        return {
+          hasOpenTodos: true,
+          inProgressCount: 0,
+          pendingCount: 1,
+        };
+      },
+    });
+    const system = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    fail = true;
+    await expect(
+      hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 's1' }),
+    ).resolves.toBeUndefined();
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, system);
+
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+    expect(system.system.join('\n')).not.toContain(TODO_FINAL_ACTIVE_REMINDER);
+  });
+
+  test('uses the final-active reminder when only one in_progress remains', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 1,
+        pendingCount: 0,
+      }),
+    });
+    const system = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'task', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, system);
+
+    expect(system.system.join('\n')).toContain(TODO_FINAL_ACTIVE_REMINDER);
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+
+  test('todowrite rearms the final-active reminder when only one in_progress remains', async () => {
+    const hook = createTodoHygiene({
+      getTodoState: async () => ({
+        hasOpenTodos: true,
+        inProgressCount: 1,
+        pendingCount: 0,
+      }),
+    });
+    const system = { system: ['base'] };
+
+    await hook.handleToolExecuteAfter({ tool: 'todowrite', sessionID: 's1' });
+    await hook.handleChatSystemTransform({ sessionID: 's1' }, system);
+
+    expect(system.system.join('\n')).toContain(TODO_FINAL_ACTIVE_REMINDER);
+    expect(system.system.join('\n')).not.toContain(TODO_HYGIENE_REMINDER);
+  });
+});

+ 178 - 0
src/hooks/todo-continuation/todo-hygiene.ts

@@ -0,0 +1,178 @@
+export const TODO_HYGIENE_REMINDER =
+  'If the active task changed or finished, update the todo list to match the current work state.';
+export const TODO_FINAL_ACTIVE_REMINDER =
+  'If you are finishing now, do not leave the active todo in_progress. Mark it completed, or move unfinished work back to pending.';
+
+const RESET = new Set(['todowrite']);
+const IGNORE = new Set(['auto_continue']);
+
+interface ToolInput {
+  tool: string;
+  sessionID?: string;
+}
+
+interface SystemInput {
+  sessionID?: string;
+}
+
+interface SystemOutput {
+  system: string[];
+}
+
+interface EventInput {
+  type: string;
+  properties?: {
+    info?: { id?: string };
+    sessionID?: string;
+  };
+}
+
+interface Options {
+  getTodoState: (sessionID: string) => Promise<{
+    hasOpenTodos: boolean;
+    inProgressCount: number;
+    pendingCount: number;
+  }>;
+  shouldInject?: (sessionID: string) => boolean;
+  log?: (message: string, meta?: Record<string, unknown>) => void;
+}
+
+export function createTodoHygiene(options: Options) {
+  const pending = new Set<string>();
+  const done = new Set<string>();
+
+  function clear(sessionID: string): void {
+    pending.delete(sessionID);
+    done.delete(sessionID);
+  }
+
+  function isFinalActive(state: {
+    inProgressCount: number;
+    pendingCount: number;
+  }): boolean {
+    return state.inProgressCount === 1 && state.pendingCount === 0;
+  }
+
+  return {
+    async handleToolExecuteAfter(input: ToolInput): Promise<void> {
+      if (!input.sessionID) {
+        return;
+      }
+
+      const tool = input.tool.toLowerCase();
+      if (IGNORE.has(tool)) {
+        return;
+      }
+
+      try {
+        if (RESET.has(tool)) {
+          const state = await options.getTodoState(input.sessionID);
+          if (!state.hasOpenTodos) {
+            clear(input.sessionID);
+            options.log?.('Cleared todo hygiene cycle', {
+              sessionID: input.sessionID,
+              tool,
+            });
+            return;
+          }
+
+          pending.delete(input.sessionID);
+          done.delete(input.sessionID);
+
+          if (isFinalActive(state)) {
+            pending.add(input.sessionID);
+            options.log?.('Armed final-active todo hygiene reminder', {
+              sessionID: input.sessionID,
+              tool,
+            });
+            return;
+          }
+
+          options.log?.('Reset todo hygiene cycle', {
+            sessionID: input.sessionID,
+            tool,
+          });
+          return;
+        }
+
+        if (pending.has(input.sessionID) || done.has(input.sessionID)) {
+          return;
+        }
+
+        if (!(await options.getTodoState(input.sessionID)).hasOpenTodos) {
+          return;
+        }
+
+        pending.add(input.sessionID);
+        options.log?.('Armed todo hygiene reminder', {
+          sessionID: input.sessionID,
+          tool,
+        });
+      } catch (error) {
+        if (RESET.has(tool)) {
+          clear(input.sessionID);
+        }
+        options.log?.('Skipped todo hygiene reminder: failed to inspect todos', {
+          sessionID: input.sessionID,
+          tool,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+    },
+
+    async handleChatSystemTransform(
+      input: SystemInput,
+      output: SystemOutput,
+    ): Promise<void> {
+      if (!input.sessionID || !pending.has(input.sessionID)) {
+        return;
+      }
+
+      if (options.shouldInject && !options.shouldInject(input.sessionID)) {
+        clear(input.sessionID);
+        return;
+      }
+
+      try {
+        const state = await options.getTodoState(input.sessionID);
+        if (!state.hasOpenTodos) {
+          clear(input.sessionID);
+          return;
+        }
+
+        const finalActive = isFinalActive(state);
+        const reminder = finalActive
+          ? TODO_FINAL_ACTIVE_REMINDER
+          : TODO_HYGIENE_REMINDER;
+
+        pending.delete(input.sessionID);
+        done.add(input.sessionID);
+        output.system.push(reminder);
+        options.log?.('Injected todo hygiene reminder', {
+          sessionID: input.sessionID,
+          reminder: finalActive ? 'final-active' : 'general',
+        });
+      } catch (error) {
+        clear(input.sessionID);
+        options.log?.('Skipped todo hygiene reminder: failed to inspect todos', {
+          sessionID: input.sessionID,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+    },
+
+    handleEvent(event: EventInput): void {
+      if (event.type !== 'session.deleted') {
+        return;
+      }
+
+      const sessionID = event.properties?.sessionID ?? event.properties?.info?.id;
+      if (!sessionID) {
+        return;
+      }
+
+      clear(sessionID);
+    },
+
+  };
+}

+ 20 - 0
src/index.ts

@@ -465,6 +465,17 @@ const OhMyOpenCodeLite: Plugin = async (ctx) => {
           };
         },
       );
+
+      if (input.event.type === 'session.deleted') {
+        const props = input.event.properties as
+          | { info?: { id?: string }; sessionID?: string }
+          | undefined;
+        const sessionID =
+          props?.info?.id ?? props?.sessionID;
+        if (sessionID) {
+          sessionAgentMap.delete(sessionID);
+        }
+      }
     },
 
     // Best-effort rescue only for stale apply_patch input before native execution
@@ -541,6 +552,8 @@ const OhMyOpenCodeLite: Plugin = async (ctx) => {
             (output.system[0] ? `\n\n${output.system[0]}` : '');
         }
       }
+
+      await todoContinuationHook.handleChatSystemTransform(input, output);
       await postFileToolNudgeHook['experimental.chat.system.transform'](
         input,
         output,
@@ -593,6 +606,13 @@ const OhMyOpenCodeLite: Plugin = async (ctx) => {
         },
       );
 
+      await todoContinuationHook.handleToolExecuteAfter(
+        input as {
+          tool: string;
+          sessionID?: string;
+        },
+      );
+
       await postFileToolNudgeHook['tool.execute.after'](
         input as {
           tool: string;