Browse Source

feat: detect and retry empty provider responses (#237)

* feat: detect and retry empty provider responses

* fix: address review feedback — typo, comment clarity, doc default values
ReqX 1 week ago
parent
commit
e92f3d179f

+ 2 - 0
docs/configuration.md

@@ -96,6 +96,7 @@ All config files support **JSONC** (JSON with Comments):
 | `fallback.timeoutMs` | number | `15000` | Time before aborting and trying next model |
 | `fallback.retryDelayMs` | number | `500` | Delay between retry attempts |
 | `fallback.chains.<agent>` | string[] | — | Ordered fallback model IDs for an agent |
+| `fallback.retry_on_empty` | boolean | `true` | Treat silent empty provider responses (0 tokens) as failures and retry. Set `false` to accept empty responses |
 | `council.master.model` | string | — | **Required if using council.** Council master model |
 | `council.master.variant` | string | — | Council master variant |
 | `council.master.prompt` | string | — | Optional synthesis guidance for the master |
@@ -110,3 +111,4 @@ All config files support **JSONC** (JSON with Comments):
 | `council.master_timeout` | number | `300000` | Master synthesis timeout (ms) |
 | `council.councillors_timeout` | number | `180000` | Per-councillor timeout (ms) |
 | `council.master_fallback` | string[] | — | Fallback models for the council master |
+| `council.councillor_retries` | number | `3` | Max retries per councillor and master on empty provider response (0–5) |

+ 17 - 0
docs/council.md

@@ -117,6 +117,7 @@ Configure in `~/.config/opencode/oh-my-opencode-slim.json` (or `.jsonc`):
 | `master_timeout` | number | `300000` | Master synthesis timeout in ms (5 minutes) |
 | `councillors_timeout` | number | `180000` | Per-councillor timeout in ms (3 minutes) |
 | `master_fallback` | string[] | — | Optional fallback models for the master. Tried in order if the primary model fails or times out |
+| `councillor_retries` | number | `3` | Max retries per councillor and master on empty provider response (0–5). Each retry creates a fresh session |
 
 ### Councillor Configuration
 
@@ -432,6 +433,22 @@ Councillors that don't respond in time are marked `timed_out`. The master procee
 | All councillors fail | Returns error immediately — master is never invoked |
 | Master primary model fails | Tries `master_fallback` models in order before degrading |
 | All master models fail | Returns best single councillor response prefixed with `(Degraded — master failed, using <name>'s response)` |
+| Councillor gets empty response | Retries up to `councillor_retries` times with fresh sessions |
+
+### Empty Response Detection
+
+Providers sometimes silently drop requests — returning zero tokens with no error. This is detected automatically:
+
+- **Background tasks** (`@explorer`, `@fixer`, etc.): Empty responses trigger the fallback chain (next model in `fallback.chains`). Controlled by `fallback.retry_on_empty` (default `true`). Set to `false` to accept empty responses without retrying.
+- **Council councillors and master**: Empty responses trigger up to `councillor_retries` fresh sessions (default `3`). Only "Empty response from provider" errors are retried — timeouts and other failures return immediately.
+
+To disable empty-response retry globally:
+
+```jsonc
+{
+  "fallback": { "retry_on_empty": false }
+}
+```
 
 ### Master Fallback Chain
 

+ 222 - 0
src/background/background-manager.test.ts

@@ -486,6 +486,14 @@ describe('BackgroundTaskManager', () => {
     test('falls back to next model when first model prompt fails', async () => {
       let promptCalls = 0;
       const ctx = createMockContext({
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Response' }],
+            },
+          ],
+        },
         promptImpl: async (args) => {
           const isTaskPrompt =
             typeof args.path?.id === 'string' &&
@@ -720,6 +728,220 @@ describe('BackgroundTaskManager', () => {
         ),
       ).toBe(true);
     });
+
+    test('retries next fallback model when first model returns empty response', async () => {
+      let messagesCallCount = 0;
+      const ctx = createMockContext({
+        promptImpl: async (args) => {
+          const isTaskPrompt =
+            typeof args.path?.id === 'string' &&
+            args.path.id.startsWith('test-session-');
+          const isParentNotification = !isTaskPrompt;
+          if (isParentNotification) return {};
+          return {};
+        },
+      });
+
+      // Override messages mock to return empty on first call, then real content
+      ctx.client.session.messages = mock(async () => {
+        messagesCallCount++;
+        if (messagesCallCount === 1) {
+          // First model: empty response
+          return {
+            data: [
+              {
+                info: { role: 'assistant' },
+                parts: [{ type: 'text', text: '' }],
+              },
+            ],
+          };
+        }
+        // Second model: real content
+        return {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Response' }],
+            },
+          ],
+        };
+      });
+
+      const manager = new BackgroundTaskManager(ctx, undefined, {
+        fallback: {
+          enabled: true,
+          timeoutMs: 15000,
+          retryDelayMs: 0,
+          chains: {
+            explorer: ['openai/gpt-5.4', 'opencode/gpt-5-nano'],
+          },
+        },
+      });
+
+      const task = manager.launch({
+        agent: 'explorer',
+        prompt: 'test',
+        description: 'test',
+        parentSessionId: 'parent-123',
+      });
+
+      // Yield to let the fire-and-forget async chain complete
+      await new Promise((r) => setTimeout(r, 10));
+
+      expect(task.status).toBe('running');
+      // Messages should have been called twice (once per fallback attempt)
+      expect(messagesCallCount).toBe(2);
+      // Session abort should have been called between attempts
+      expect(ctx.client.session.abort).toHaveBeenCalled();
+    });
+
+    test('allows empty response when retry_on_empty is false (prompt loop)', async () => {
+      const ctx = createMockContext({
+        promptImpl: async (args) => {
+          const isTaskPrompt =
+            typeof args.path?.id === 'string' &&
+            args.path.id.startsWith('test-session-');
+          const isParentNotification = !isTaskPrompt;
+          if (isParentNotification) return {};
+          return {};
+        },
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: '' }], // empty response
+            },
+          ],
+        },
+      });
+
+      const manager = new BackgroundTaskManager(ctx, undefined, {
+        fallback: {
+          enabled: true,
+          timeoutMs: 15000,
+          retryDelayMs: 0,
+          retry_on_empty: false,
+          chains: {
+            explorer: ['openai/gpt-5.4', 'opencode/gpt-5-nano'],
+          },
+        },
+      });
+
+      const task = manager.launch({
+        agent: 'explorer',
+        prompt: 'test',
+        description: 'test',
+        parentSessionId: 'parent-123',
+      });
+
+      // Yield to let the fire-and-forget async chain complete
+      await new Promise((r) => setTimeout(r, 10));
+
+      // Task should be running (not failed) — empty response accepted
+      expect(task.status).toBe('running');
+      // Only one prompt call (no fallback attempt)
+      const promptCalls = ctx.client.session.prompt.mock.calls as Array<
+        [{ body?: { model?: { providerID?: string; modelID?: string } } }]
+      >;
+      const taskPromptCalls = promptCalls.filter(
+        (c) =>
+          c[0].body?.model?.providerID === 'openai' &&
+          c[0].body?.model?.modelID === 'gpt-5.4',
+      );
+      expect(taskPromptCalls.length).toBe(1);
+    });
+
+    test('completes task with empty text when retry_on_empty is false (extractAndCompleteTask)', async () => {
+      const ctx = createMockContext({
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: '' }], // empty response
+            },
+          ],
+        },
+      });
+      const manager = new BackgroundTaskManager(ctx, undefined, {
+        fallback: {
+          enabled: false, // fallback disabled, but retry_on_empty still applies
+          timeoutMs: 15000,
+          retryDelayMs: 0,
+          chains: {},
+          retry_on_empty: false,
+        },
+      } as any);
+
+      const task = manager.launch({
+        agent: 'explorer',
+        prompt: 'test',
+        description: 'test',
+        parentSessionId: 'parent-123',
+      });
+
+      // Wait for task to start
+      await Promise.resolve();
+      await Promise.resolve();
+
+      // Simulate session.idle event
+      await manager.handleSessionStatus({
+        type: 'session.status',
+        properties: {
+          sessionID: task.sessionId,
+          status: { type: 'idle' },
+        },
+      });
+
+      // Empty response should be treated as completed, not failed
+      expect(task.status).toBe('completed');
+      expect(task.result).toBe(''); // empty text, not error
+    });
+
+    test('fails task on empty response when retry_on_empty is true (extractAndCompleteTask)', async () => {
+      const ctx = createMockContext({
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: '' }], // empty response
+            },
+          ],
+        },
+      });
+      const manager = new BackgroundTaskManager(ctx, undefined, {
+        fallback: {
+          enabled: false, // fallback disabled, but retry_on_empty still applies
+          timeoutMs: 15000,
+          retryDelayMs: 0,
+          chains: {},
+          retry_on_empty: true,
+        },
+      } as any);
+
+      const task = manager.launch({
+        agent: 'explorer',
+        prompt: 'test',
+        description: 'test',
+        parentSessionId: 'parent-123',
+      });
+
+      // Wait for task to start
+      await Promise.resolve();
+      await Promise.resolve();
+
+      // Simulate session.idle event
+      await manager.handleSessionStatus({
+        type: 'session.status',
+        properties: {
+          sessionID: task.sessionId,
+          status: { type: 'idle' },
+        },
+      });
+
+      // Empty response should be treated as failed
+      expect(task.status).toBe('failed');
+      expect(task.error).toBe('Empty response from provider');
+    });
   });
 
   describe('subagent delegation restrictions', () => {

+ 19 - 4
src/background/background-manager.ts

@@ -351,6 +351,8 @@ export class BackgroundTaskManager {
       let succeeded = false;
       const sessionId = session.data.id;
 
+      const retryOnEmpty = this.config?.fallback?.retry_on_empty ?? true;
+
       for (let i = 0; i < attemptModels.length; i++) {
         const model = attemptModels[i];
         const modelLabel = model ?? 'default-model';
@@ -385,6 +387,14 @@ export class BackgroundTaskManager {
             timeoutMs,
           );
 
+          // Detect silent empty responses (e.g. provider rate-limited
+          // without error). When retry_on_empty is enabled (default),
+          // treat as failure so the fallback chain continues.
+          const extraction = await extractSessionResult(this.client, sessionId);
+          if (retryOnEmpty && extraction.empty) {
+            throw new Error('Empty response from provider');
+          }
+
           succeeded = true;
           break;
         } catch (error) {
@@ -503,20 +513,25 @@ export class BackgroundTaskManager {
 
   /**
    * Extract task result and mark complete.
+   * When retry_on_empty is enabled (default), empty responses are
+   * treated as failures so the fallback chain can retry.
+   * When disabled, empty responses succeed with an empty string result.
    */
   private async extractAndCompleteTask(task: BackgroundTask): Promise<void> {
     if (!task.sessionId) return;
 
+    const retryOnEmpty = this.config?.fallback?.retry_on_empty ?? true;
+
     try {
-      const responseText = await extractSessionResult(
+      const extraction = await extractSessionResult(
         this.client,
         task.sessionId,
       );
 
-      if (responseText) {
-        this.completeTask(task, 'completed', responseText);
+      if (extraction.empty && retryOnEmpty) {
+        this.completeTask(task, 'failed', 'Empty response from provider');
       } else {
-        this.completeTask(task, 'completed', '(No output)');
+        this.completeTask(task, 'completed', extraction.text);
       }
     } catch (error) {
       this.completeTask(

+ 16 - 5
src/config/agent-mcps.test.ts

@@ -7,11 +7,18 @@ describe('parseList', () => {
   });
 
   test('wildcard includes all available', () => {
-    expect(parseList(['*'], ['mcp1', 'mcp2', 'mcp3'])).toEqual(['mcp1', 'mcp2', 'mcp3']);
+    expect(parseList(['*'], ['mcp1', 'mcp2', 'mcp3'])).toEqual([
+      'mcp1',
+      'mcp2',
+      'mcp3',
+    ]);
   });
 
   test('wildcard with exclusions', () => {
-    expect(parseList(['*', '!mcp2'], ['mcp1', 'mcp2', 'mcp3'])).toEqual(['mcp1', 'mcp3']);
+    expect(parseList(['*', '!mcp2'], ['mcp1', 'mcp2', 'mcp3'])).toEqual([
+      'mcp1',
+      'mcp3',
+    ]);
   });
 
   test('exclude wildcard returns empty', () => {
@@ -19,14 +26,18 @@ describe('parseList', () => {
   });
 
   test('specific items only', () => {
-    expect(parseList(['mcp1', 'mcp3'], ['mcp1', 'mcp2', 'mcp3', 'mcp4'])).toEqual(['mcp1', 'mcp3']);
+    expect(
+      parseList(['mcp1', 'mcp3'], ['mcp1', 'mcp2', 'mcp3', 'mcp4']),
+    ).toEqual(['mcp1', 'mcp3']);
   });
 
   test('specific items with exclusions', () => {
-    expect(parseList(['mcp1', 'mcp3', '!mcp3'], ['mcp1', 'mcp2', 'mcp3'])).toEqual(['mcp1']);
+    expect(
+      parseList(['mcp1', 'mcp3', '!mcp3'], ['mcp1', 'mcp2', 'mcp3']),
+    ).toEqual(['mcp1']);
   });
 
   test('exclusions without matching allows', () => {
     expect(parseList(['!mcp2'], ['mcp1', 'mcp2', 'mcp3'])).toEqual([]);
   });
-});
+});

+ 3 - 1
src/config/agent-mcps.ts

@@ -38,7 +38,9 @@ export function parseList(items: string[], allAvailable: string[]): string[] {
     return allAvailable.filter((item) => !deny.includes(item));
   }
 
-  return allow.filter((item) => !deny.includes(item) && allAvailable.includes(item));
+  return allow.filter(
+    (item) => !deny.includes(item) && allAvailable.includes(item),
+  );
 }
 
 /**

+ 10 - 0
src/config/council-schema.ts

@@ -182,6 +182,16 @@ export const CouncilConfigSchema = z.object({
   councillor_execution_mode: CouncillorExecutionModeSchema.describe(
     'Execution mode for councillors. "serial" runs them one at a time (required for single-model systems). "parallel" runs them concurrently (default, faster for multi-model systems).',
   ),
+  councillor_retries: z
+    .number()
+    .int()
+    .min(0)
+    .max(5)
+    .default(3)
+    .describe(
+      'Number of retry attempts for councillors and master that return empty responses ' +
+        '(e.g. due to provider rate limiting). Default: 3 retries.',
+    ),
 });
 
 export type CouncilConfig = z.infer<typeof CouncilConfigSchema>;

+ 7 - 0
src/config/schema.ts

@@ -151,6 +151,13 @@ export const FailoverConfigSchema = z.object({
   timeoutMs: z.number().min(0).default(15000),
   retryDelayMs: z.number().min(0).default(500),
   chains: FallbackChainsSchema.default({}),
+  retry_on_empty: z
+    .boolean()
+    .default(true)
+    .describe(
+      'When true (default), empty provider responses are treated as failures, ' +
+        'triggering fallback/retry. Set to false to treat them as successes.',
+    ),
 });
 
 export type FailoverConfig = z.infer<typeof FailoverConfigSchema>;

+ 234 - 1
src/council/council-manager.test.ts

@@ -82,7 +82,16 @@ describe('CouncilManager', () => {
     });
 
     test('creates manager with plugin config', async () => {
-      const ctx = createMockContext();
+      const ctx = createMockContext({
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Councillor response' }],
+            },
+          ],
+        },
+      });
       const config = createTestCouncilConfig();
       const manager = new CouncilManager(ctx, config, undefined);
 
@@ -1285,5 +1294,229 @@ describe('CouncilManager', () => {
       const promptText = masterCall[0]?.body?.parts?.[0]?.text;
       expect(promptText).toContain('Global prompt.');
     });
+
+    test('retries councillor on empty response', async () => {
+      const ctx = createMockContext({
+        promptImpl: async () => ({}),
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Master synthesis' }],
+            },
+          ],
+        },
+      });
+
+      // Track messages call count and return empty first, then success
+      let councillorMessagesCallCount = 0;
+      const originalMessages = ctx.client.session.messages;
+      ctx.client.session.messages = mock(async (args) => {
+        // First call (first councillor attempt): empty response
+        // Second call (councillor retry): success
+        // Third call (master): master synthesis
+        councillorMessagesCallCount++;
+        if (councillorMessagesCallCount === 1) {
+          return {
+            data: [
+              {
+                info: { role: 'assistant' },
+                parts: [{ type: 'text', text: '' }],
+              },
+            ],
+          };
+        }
+        if (councillorMessagesCallCount === 2) {
+          return {
+            data: [
+              {
+                info: { role: 'assistant' },
+                parts: [{ type: 'text', text: 'Success' }],
+              },
+            ],
+          };
+        }
+        // Master and any other calls: use original
+        return originalMessages(args);
+      });
+
+      const config: PluginConfig = {
+        council: {
+          master: { model: 'anthropic/claude-opus-4-6' },
+          councillor_retries: 1,
+          presets: {
+            default: {
+              councillors: {
+                alpha: { model: 'openai/gpt-5.4-mini' },
+              },
+            },
+          },
+        },
+      } as any;
+      const manager = new CouncilManager(ctx, config, undefined);
+
+      const result = await manager.runCouncil(
+        'test prompt',
+        undefined,
+        'parent-id',
+      );
+
+      expect(result.success).toBe(true);
+      // First two messages calls are for councillor (empty + success)
+      expect(councillorMessagesCallCount).toBeGreaterThanOrEqual(2);
+      expect(result.councillorResults).toHaveLength(1);
+      expect(result.councillorResults[0].status).toBe('completed');
+      expect(result.councillorResults[0].result).toBe('Success');
+    });
+
+    test('does not retry councillor on non-empty failure (timeout)', async () => {
+      let messagesCallCount = 0;
+      const ctx = createMockContext({
+        promptImpl: async () => {
+          // Simulate timeout error
+          throw new Error('Prompt timed out after 180000ms');
+        },
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Response' }],
+            },
+          ],
+        },
+      });
+
+      // Override messages to track calls (won't be reached due to timeout)
+      ctx.client.session.messages = mock(async () => {
+        messagesCallCount++;
+        return {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: 'Success' }],
+            },
+          ],
+        };
+      });
+
+      const config: PluginConfig = {
+        council: {
+          master: { model: 'anthropic/claude-opus-4-6' },
+          councillor_retries: 2,
+          presets: {
+            default: {
+              councillors: {
+                alpha: { model: 'openai/gpt-5.4-mini' },
+              },
+            },
+          },
+        },
+      } as any;
+      const manager = new CouncilManager(ctx, config, undefined);
+
+      const result = await manager.runCouncil(
+        'test prompt',
+        undefined,
+        'parent-id',
+      );
+
+      expect(result.success).toBe(false);
+      // No retry on timeout — messages should not be called
+      expect(messagesCallCount).toBe(0);
+      expect(result.councillorResults).toHaveLength(1);
+      expect(result.councillorResults[0].status).toBe('timed_out');
+      expect(result.councillorResults[0].error).toContain('timed out');
+    });
+
+    test('exhausts councillor retries and returns failure', async () => {
+      const ctx = createMockContext({
+        promptImpl: async () => ({}),
+        sessionMessagesResult: {
+          data: [
+            {
+              info: { role: 'assistant' },
+              parts: [{ type: 'text', text: '' }],
+            },
+          ],
+        },
+      });
+
+      const config: PluginConfig = {
+        council: {
+          master: { model: 'anthropic/claude-opus-4-6' },
+          councillor_retries: 1,
+          presets: {
+            default: {
+              councillors: {
+                alpha: { model: 'openai/gpt-5.4-mini' },
+              },
+            },
+          },
+        },
+      } as any;
+      const manager = new CouncilManager(ctx, config, undefined);
+
+      const result = await manager.runCouncil(
+        'test prompt',
+        undefined,
+        'parent-id',
+      );
+
+      expect(result.success).toBe(false);
+      expect(result.error).toBe('All councillors failed or timed out');
+      expect(result.councillorResults).toHaveLength(1);
+      expect(result.councillorResults[0].status).toBe('failed');
+      expect(result.councillorResults[0].error).toContain(
+        'Empty response from provider',
+      );
+    });
+
+    test('returns empty councillor result when retry_on_empty is false', async () => {
+      const ctx = createMockContext({
+        promptImpl: async () => ({}),
+      });
+
+      // Always return empty response
+      ctx.client.session.messages = mock(async () => ({
+        data: [
+          {
+            info: { role: 'assistant' },
+            parts: [{ type: 'text', text: '' }],
+          },
+        ],
+      }));
+
+      const config: PluginConfig = {
+        council: {
+          master: { model: 'anthropic/claude-opus-4-6' },
+          councillor_retries: 1,
+          presets: {
+            default: {
+              councillors: {
+                alpha: { model: 'openai/gpt-5.4-mini' },
+              },
+            },
+          },
+        },
+        fallback: {
+          retry_on_empty: false,
+        },
+      } as any;
+      const manager = new CouncilManager(ctx, config, undefined);
+
+      const result = await manager.runCouncil(
+        'test prompt',
+        undefined,
+        'parent-id',
+      );
+
+      // With retry_on_empty: false, empty response is accepted
+      expect(result.councillorResults).toHaveLength(1);
+      expect(result.councillorResults[0].status).toBe('completed');
+      expect(result.councillorResults[0].result).toBe('');
+      // Council succeeds because empty is accepted as valid response
+      expect(result.success).toBe(true);
+      expect(result.result).toBe('');
+    });
   });
 });

+ 160 - 75
src/council/council-manager.ts

@@ -124,6 +124,7 @@ export class CouncilManager {
     const councillorsTimeout = councilConfig.councillors_timeout ?? 180000;
     const masterTimeout = councilConfig.master_timeout ?? 300000;
     const executionMode = councilConfig.councillor_execution_mode ?? 'parallel';
+    const maxRetries = councilConfig.councillor_retries ?? 3;
 
     const councillorCount = Object.keys(preset.councillors).length;
 
@@ -147,6 +148,7 @@ export class CouncilManager {
       parentSessionId,
       councillorsTimeout,
       executionMode,
+      maxRetries,
     );
 
     const completedCount = councillorResults.filter(
@@ -304,11 +306,18 @@ export class CouncilManager {
         options.timeout,
       );
 
-      const result = await extractSessionResult(this.client, sessionId, {
+      const extraction = await extractSessionResult(this.client, sessionId, {
         includeReasoning: options.includeReasoning,
       });
 
-      return result || '(No output)';
+      if (extraction.empty) {
+        const retryOnEmpty = this.config?.fallback?.retry_on_empty ?? true;
+        if (retryOnEmpty) {
+          throw new Error('Empty response from provider');
+        }
+      }
+
+      return extraction.text;
     } finally {
       if (sessionId) {
         this.client.session.abort({ path: { id: sessionId } }).catch(() => {});
@@ -329,6 +338,7 @@ export class CouncilManager {
     parentSessionId: string,
     timeout: number,
     executionMode: 'parallel' | 'serial' = 'parallel',
+    maxRetries: number = 1,
   ): Promise<CouncilResult['councillorResults']> {
     const entries = Object.entries(councillors);
     const results: Array<{
@@ -342,41 +352,16 @@ export class CouncilManager {
     if (executionMode === 'serial') {
       // Serial execution: run each councillor one at a time
       for (const [name, config] of entries) {
-        const modelLabel = shortModelLabel(config.model);
-        log(
-          `[council-manager] Running councillor "${name}" (${modelLabel}) serially`,
-        );
-
-        try {
-          const result = await this.runAgentSession({
+        results.push(
+          await this.runCouncillorWithRetry(
+            name,
+            config,
+            prompt,
             parentSessionId,
-            title: `Council ${name} (${modelLabel})`,
-            agent: 'councillor',
-            model: config.model,
-            promptText: formatCouncillorPrompt(prompt, config.prompt),
-            variant: config.variant,
             timeout,
-            includeReasoning: false,
-          });
-
-          results.push({
-            name,
-            model: config.model,
-            status: 'completed' as const,
-            result,
-          });
-        } catch (error) {
-          const msg = error instanceof Error ? error.message : String(error);
-
-          results.push({
-            name,
-            model: config.model,
-            status: msg.includes('timed out')
-              ? ('timed_out' as const)
-              : ('failed' as const),
-            error: `Councillor "${name}": ${msg}`,
-          });
-        }
+            maxRetries,
+          ),
+        );
       }
     } else {
       // Parallel execution (default): run all councillors concurrently
@@ -389,38 +374,14 @@ export class CouncilManager {
             );
           }
 
-          const modelLabel = shortModelLabel(config.model);
-
-          try {
-            const result = await this.runAgentSession({
-              parentSessionId,
-              title: `Council ${name} (${modelLabel})`,
-              agent: 'councillor',
-              model: config.model,
-              promptText: formatCouncillorPrompt(prompt, config.prompt),
-              variant: config.variant,
-              timeout,
-              includeReasoning: false,
-            });
-
-            return {
-              name,
-              model: config.model,
-              status: 'completed' as const,
-              result,
-            };
-          } catch (error) {
-            const msg = error instanceof Error ? error.message : String(error);
-
-            return {
-              name,
-              model: config.model,
-              status: msg.includes('timed out')
-                ? ('timed_out' as const)
-                : ('failed' as const),
-              error: `Councillor "${name}": ${msg}`,
-            };
-          }
+          return this.runCouncillorWithRetry(
+            name,
+            config,
+            prompt,
+            parentSessionId,
+            timeout,
+            maxRetries,
+          );
         })(),
       );
 
@@ -455,10 +416,135 @@ export class CouncilManager {
     return results;
   }
 
+  /**
+   * Run a single councillor with retry logic for empty responses.
+   * Only retries on "Empty response from provider" errors — timeouts
+   * and other failures are returned immediately.
+   */
+  private async runCouncillorWithRetry(
+    name: string,
+    config: CouncillorConfig,
+    prompt: string,
+    parentSessionId: string,
+    timeout: number,
+    maxRetries: number,
+  ): Promise<{
+    name: string;
+    model: string;
+    status: 'completed' | 'failed' | 'timed_out';
+    result?: string;
+    error?: string;
+  }> {
+    const modelLabel = shortModelLabel(config.model);
+    const totalAttempts = 1 + maxRetries;
+
+    for (let attempt = 1; attempt <= totalAttempts; attempt++) {
+      if (attempt > 1) {
+        log(
+          `[council-manager] Retrying councillor "${name}" (${modelLabel}), attempt ${attempt}/${totalAttempts}`,
+        );
+      }
+
+      try {
+        const result = await this.runAgentSession({
+          parentSessionId,
+          title: `Council ${name} (${modelLabel})`,
+          agent: 'councillor',
+          model: config.model,
+          promptText: formatCouncillorPrompt(prompt, config.prompt),
+          variant: config.variant,
+          timeout,
+          includeReasoning: false,
+        });
+
+        return {
+          name,
+          model: config.model,
+          status: 'completed' as const,
+          result,
+        };
+      } catch (error) {
+        const msg = error instanceof Error ? error.message : String(error);
+
+        // Only retry on empty responses (provider silently rate-limited)
+        const isEmptyResponse = msg.includes('Empty response from provider');
+        const canRetry = attempt < totalAttempts && isEmptyResponse;
+
+        if (!canRetry) {
+          return {
+            name,
+            model: config.model,
+            status: msg.includes('timed out')
+              ? ('timed_out' as const)
+              : ('failed' as const),
+            error: `Councillor "${name}": ${msg}`,
+          };
+        }
+      }
+    }
+
+    // Unreachable, but satisfies TypeScript
+    return {
+      name,
+      model: config.model,
+      status: 'failed' as const,
+      error: `Councillor "${name}": max retries exhausted`,
+    };
+  }
+
   // -------------------------------------------------------------------------
   // Phase 2: Master Synthesis
   // -------------------------------------------------------------------------
 
+  /**
+   * Run a single master model with retry logic for empty responses.
+   * Only retries on "Empty response from provider" — timeouts and
+   * other failures throw immediately so runMaster can try the next
+   * fallback model.
+   */
+  private async runMasterModelWithRetry(
+    parentSessionId: string,
+    model: string,
+    modelLabel: string,
+    promptText: string,
+    variant: string | undefined,
+    timeout: number,
+    maxRetries: number,
+  ): Promise<string> {
+    const totalAttempts = 1 + maxRetries;
+
+    for (let attempt = 1; attempt <= totalAttempts; attempt++) {
+      if (attempt > 1) {
+        log(
+          `[council-manager] Retrying master (${modelLabel}), attempt ${attempt}/${totalAttempts}`,
+        );
+      }
+
+      try {
+        return await this.runAgentSession({
+          parentSessionId,
+          title: `Council Master (${modelLabel})`,
+          agent: 'council-master',
+          model,
+          promptText,
+          variant,
+          timeout,
+        });
+      } catch (error) {
+        const msg = error instanceof Error ? error.message : String(error);
+        const isEmptyResponse = msg.includes('Empty response from provider');
+        const canRetry = attempt < totalAttempts && isEmptyResponse;
+
+        if (!canRetry) {
+          throw error;
+        }
+      }
+    }
+
+    // Unreachable, but satisfies TypeScript
+    throw new Error(`Master model ${modelLabel}: max retries exhausted`);
+  }
+
   private async runMaster(
     prompt: string,
     councillorResults: CouncilResult['councillorResults'],
@@ -486,8 +572,7 @@ export class CouncilManager {
       effectivePrompt,
     );
 
-    // Try each model in order — fresh session per attempt prevents
-    // transcript contamination and respects session lifecycle.
+    const maxRetries = councilConfig.councillor_retries ?? 3;
     const errors: string[] = [];
 
     for (let i = 0; i < attemptModels.length; i++) {
@@ -501,15 +586,15 @@ export class CouncilManager {
           );
         }
 
-        const result = await this.runAgentSession({
+        const result = await this.runMasterModelWithRetry(
           parentSessionId,
-          title: `Council Master (${currentLabel})`,
-          agent: 'council-master',
           model,
-          promptText: synthesisPrompt,
-          variant: effectiveVariant,
+          currentLabel,
+          synthesisPrompt,
+          effectiveVariant,
           timeout,
-        });
+          maxRetries,
+        );
 
         return { success: true, result };
       } catch (error) {

+ 14 - 3
src/utils/session.ts

@@ -83,19 +83,29 @@ export async function promptWithTimeout(
 }
 
 /**
+ * Result of extracting session content.
+ * `empty` is true when the assistant produced zero text content —
+ * the provider returned an empty response (e.g. rate-limited silently).
+ */
+export interface SessionExtractionResult {
+  text: string;
+  empty: boolean;
+}
+
+/**
  * Extract the result text from a session.
  * Collects all assistant messages and concatenates their text parts.
  * @param client - OpenCode client instance
  * @param sessionId - Session ID to extract from
  * @param options - Optional: `includeReasoning` (default true) controls whether
  *                  reasoning/chain-of-thought parts are included.
- * @returns Concatenated text from all assistant messages
+ * @returns Object with extracted text and an `empty` flag for zero-content detection
  */
 export async function extractSessionResult(
   client: OpencodeClient,
   sessionId: string,
   options?: { includeReasoning?: boolean },
-): Promise<string> {
+): Promise<SessionExtractionResult> {
   const includeReasoning = options?.includeReasoning ?? true;
 
   const messagesResult = await client.session.messages({
@@ -121,5 +131,6 @@ export async function extractSessionResult(
     }
   }
 
-  return extractedContent.filter((t) => t.length > 0).join('\n\n');
+  const text = extractedContent.filter((t) => t.length > 0).join('\n\n');
+  return { text, empty: text.length === 0 };
 }