background-manager.test.ts 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407
  1. import { describe, expect, mock, test } from 'bun:test';
  2. import { SLIM_INTERNAL_INITIATOR_MARKER } from '../utils';
  3. import { BackgroundTaskManager } from './background-manager';
  4. // Mock the plugin context
  5. function createMockContext(overrides?: {
  6. sessionCreateResult?: { data?: { id?: string } };
  7. sessionStatusResult?: { data?: Record<string, { type: string }> };
  8. sessionMessagesResult?: {
  9. data?: Array<{
  10. info?: { role: string };
  11. parts?: Array<{ type: string; text?: string }>;
  12. }>;
  13. };
  14. promptImpl?: (args: any) => Promise<unknown>;
  15. }) {
  16. let callCount = 0;
  17. return {
  18. client: {
  19. session: {
  20. create: mock(async () => {
  21. callCount++;
  22. return (
  23. overrides?.sessionCreateResult ?? {
  24. data: { id: `test-session-${callCount}` },
  25. }
  26. );
  27. }),
  28. status: mock(
  29. async () => overrides?.sessionStatusResult ?? { data: {} },
  30. ),
  31. messages: mock(
  32. async () => overrides?.sessionMessagesResult ?? { data: [] },
  33. ),
  34. prompt: mock(async (args: any) => {
  35. if (overrides?.promptImpl) {
  36. return await overrides.promptImpl(args);
  37. }
  38. return {};
  39. }),
  40. abort: mock(async () => ({})),
  41. },
  42. },
  43. directory: '/test/directory',
  44. } as any;
  45. }
  46. describe('BackgroundTaskManager', () => {
  47. describe('constructor', () => {
  48. test('creates manager with defaults', () => {
  49. const ctx = createMockContext();
  50. const manager = new BackgroundTaskManager(ctx);
  51. expect(manager).toBeDefined();
  52. });
  53. test('creates manager with tmux config', () => {
  54. const ctx = createMockContext();
  55. const manager = new BackgroundTaskManager(ctx, {
  56. enabled: true,
  57. layout: 'main-vertical',
  58. main_pane_size: 60,
  59. });
  60. expect(manager).toBeDefined();
  61. });
  62. test('creates manager with background config', () => {
  63. const ctx = createMockContext();
  64. const manager = new BackgroundTaskManager(ctx, undefined, {
  65. background: {
  66. maxConcurrentStarts: 5,
  67. },
  68. });
  69. expect(manager).toBeDefined();
  70. });
  71. });
  72. describe('launch (fire-and-forget)', () => {
  73. test('returns task immediately with pending or starting status', async () => {
  74. const ctx = createMockContext();
  75. const manager = new BackgroundTaskManager(ctx);
  76. const task = manager.launch({
  77. agent: 'explorer',
  78. prompt: 'Find all test files',
  79. description: 'Test file search',
  80. parentSessionId: 'parent-123',
  81. });
  82. expect(task.id).toMatch(/^bg_/);
  83. // Task may be pending (in queue) or starting (already started)
  84. expect(['pending', 'starting']).toContain(task.status);
  85. expect(task.sessionId).toBeUndefined();
  86. expect(task.agent).toBe('explorer');
  87. expect(task.description).toBe('Test file search');
  88. expect(task.startedAt).toBeDefined();
  89. });
  90. test('sessionId is set asynchronously when task starts', async () => {
  91. const ctx = createMockContext();
  92. const manager = new BackgroundTaskManager(ctx);
  93. const task = manager.launch({
  94. agent: 'explorer',
  95. prompt: 'test',
  96. description: 'test',
  97. parentSessionId: 'parent-123',
  98. });
  99. // Immediately after launch, no sessionId
  100. expect(task.sessionId).toBeUndefined();
  101. // Wait for microtask queue to process
  102. await Promise.resolve();
  103. await Promise.resolve();
  104. // After background start, sessionId should be set
  105. expect(task.sessionId).toBeDefined();
  106. expect(task.status).toBe('running');
  107. });
  108. test('task fails when session creation fails', async () => {
  109. const ctx = createMockContext({ sessionCreateResult: { data: {} } });
  110. const manager = new BackgroundTaskManager(ctx);
  111. const task = manager.launch({
  112. agent: 'explorer',
  113. prompt: 'test',
  114. description: 'test',
  115. parentSessionId: 'parent-123',
  116. });
  117. await Promise.resolve();
  118. await Promise.resolve();
  119. expect(task.status).toBe('failed');
  120. expect(task.error).toBe('Failed to create background session');
  121. });
  122. test('multiple launches return immediately', async () => {
  123. const ctx = createMockContext();
  124. const manager = new BackgroundTaskManager(ctx);
  125. const task1 = manager.launch({
  126. agent: 'explorer',
  127. prompt: 'test1',
  128. description: 'test1',
  129. parentSessionId: 'parent-123',
  130. });
  131. const task2 = manager.launch({
  132. agent: 'oracle',
  133. prompt: 'test2',
  134. description: 'test2',
  135. parentSessionId: 'parent-123',
  136. });
  137. const task3 = manager.launch({
  138. agent: 'fixer',
  139. prompt: 'test3',
  140. description: 'test3',
  141. parentSessionId: 'parent-123',
  142. });
  143. // All return immediately with pending or starting status
  144. expect(['pending', 'starting']).toContain(task1.status);
  145. expect(['pending', 'starting']).toContain(task2.status);
  146. expect(['pending', 'starting']).toContain(task3.status);
  147. });
  148. });
  149. describe('handleSessionStatus', () => {
  150. test('completes task when session becomes idle', async () => {
  151. const ctx = createMockContext({
  152. sessionMessagesResult: {
  153. data: [
  154. {
  155. info: { role: 'assistant' },
  156. parts: [{ type: 'text', text: 'Result text' }],
  157. },
  158. ],
  159. },
  160. });
  161. const manager = new BackgroundTaskManager(ctx);
  162. const task = manager.launch({
  163. agent: 'explorer',
  164. prompt: 'test',
  165. description: 'test',
  166. parentSessionId: 'parent-123',
  167. });
  168. // Wait for task to start
  169. await Promise.resolve();
  170. await Promise.resolve();
  171. // Simulate session.idle event
  172. await manager.handleSessionStatus({
  173. type: 'session.status',
  174. properties: {
  175. sessionID: task.sessionId,
  176. status: { type: 'idle' },
  177. },
  178. });
  179. expect(task.status).toBe('completed');
  180. expect(task.result).toBe('Result text');
  181. });
  182. test('ignores non-idle status', async () => {
  183. const ctx = createMockContext();
  184. const manager = new BackgroundTaskManager(ctx);
  185. const task = manager.launch({
  186. agent: 'explorer',
  187. prompt: 'test',
  188. description: 'test',
  189. parentSessionId: 'parent-123',
  190. });
  191. await Promise.resolve();
  192. await Promise.resolve();
  193. // Simulate session.busy event
  194. await manager.handleSessionStatus({
  195. type: 'session.status',
  196. properties: {
  197. sessionID: task.sessionId,
  198. status: { type: 'busy' },
  199. },
  200. });
  201. expect(task.status).toBe('running');
  202. });
  203. test('ignores non-matching session ID', async () => {
  204. const ctx = createMockContext();
  205. const manager = new BackgroundTaskManager(ctx);
  206. const task = manager.launch({
  207. agent: 'explorer',
  208. prompt: 'test',
  209. description: 'test',
  210. parentSessionId: 'parent-123',
  211. });
  212. await Promise.resolve();
  213. await Promise.resolve();
  214. // Simulate event for different session
  215. await manager.handleSessionStatus({
  216. type: 'session.status',
  217. properties: {
  218. sessionID: 'other-session-id',
  219. status: { type: 'idle' },
  220. },
  221. });
  222. expect(task.status).toBe('running');
  223. });
  224. });
  225. describe('getResult', () => {
  226. test('returns null for unknown task', () => {
  227. const ctx = createMockContext();
  228. const manager = new BackgroundTaskManager(ctx);
  229. const result = manager.getResult('unknown-task-id');
  230. expect(result).toBeNull();
  231. });
  232. test('returns task immediately (no blocking)', () => {
  233. const ctx = createMockContext();
  234. const manager = new BackgroundTaskManager(ctx);
  235. const task = manager.launch({
  236. agent: 'explorer',
  237. prompt: 'test',
  238. description: 'test',
  239. parentSessionId: 'parent-123',
  240. });
  241. const result = manager.getResult(task.id);
  242. expect(result).toBeDefined();
  243. expect(result?.id).toBe(task.id);
  244. });
  245. });
  246. describe('waitForCompletion', () => {
  247. test('waits for task to complete', async () => {
  248. const ctx = createMockContext({
  249. sessionMessagesResult: {
  250. data: [
  251. {
  252. info: { role: 'assistant' },
  253. parts: [{ type: 'text', text: 'Done' }],
  254. },
  255. ],
  256. },
  257. });
  258. const manager = new BackgroundTaskManager(ctx);
  259. const task = manager.launch({
  260. agent: 'explorer',
  261. prompt: 'test',
  262. description: 'test',
  263. parentSessionId: 'parent-123',
  264. });
  265. // Wait for task to start
  266. await Promise.resolve();
  267. await Promise.resolve();
  268. // Trigger completion via session.status event
  269. await manager.handleSessionStatus({
  270. type: 'session.status',
  271. properties: {
  272. sessionID: task.sessionId,
  273. status: { type: 'idle' },
  274. },
  275. });
  276. // Now waitForCompletion should return immediately
  277. const result = await manager.waitForCompletion(task.id, 5000);
  278. expect(result?.status).toBe('completed');
  279. expect(result?.result).toBe('Done');
  280. });
  281. test('returns immediately if already completed', async () => {
  282. const ctx = createMockContext({
  283. sessionMessagesResult: {
  284. data: [
  285. {
  286. info: { role: 'assistant' },
  287. parts: [{ type: 'text', text: 'Done' }],
  288. },
  289. ],
  290. },
  291. });
  292. const manager = new BackgroundTaskManager(ctx);
  293. const task = manager.launch({
  294. agent: 'explorer',
  295. prompt: 'test',
  296. description: 'test',
  297. parentSessionId: 'parent-123',
  298. });
  299. // Wait for task to start
  300. await Promise.resolve();
  301. await Promise.resolve();
  302. // Trigger completion
  303. await manager.handleSessionStatus({
  304. type: 'session.status',
  305. properties: {
  306. sessionID: task.sessionId,
  307. status: { type: 'idle' },
  308. },
  309. });
  310. // Now wait should return immediately
  311. const result = await manager.waitForCompletion(task.id, 5000);
  312. expect(result?.status).toBe('completed');
  313. });
  314. test('returns null for unknown task', async () => {
  315. const ctx = createMockContext();
  316. const manager = new BackgroundTaskManager(ctx);
  317. const result = await manager.waitForCompletion('unknown-task-id', 5000);
  318. expect(result).toBeNull();
  319. });
  320. });
  321. describe('cancel', () => {
  322. test('cancels pending task before it starts', () => {
  323. const ctx = createMockContext();
  324. const manager = new BackgroundTaskManager(ctx);
  325. const task = manager.launch({
  326. agent: 'explorer',
  327. prompt: 'test',
  328. description: 'test',
  329. parentSessionId: 'parent-123',
  330. });
  331. const count = manager.cancel(task.id);
  332. expect(count).toBe(1);
  333. const result = manager.getResult(task.id);
  334. expect(result?.status).toBe('cancelled');
  335. });
  336. test('cancels running task', async () => {
  337. const ctx = createMockContext();
  338. const manager = new BackgroundTaskManager(ctx);
  339. const task = manager.launch({
  340. agent: 'explorer',
  341. prompt: 'test',
  342. description: 'test',
  343. parentSessionId: 'parent-123',
  344. });
  345. // Wait for task to start
  346. await Promise.resolve();
  347. await Promise.resolve();
  348. const count = manager.cancel(task.id);
  349. expect(count).toBe(1);
  350. const result = manager.getResult(task.id);
  351. expect(result?.status).toBe('cancelled');
  352. });
  353. test('returns 0 when cancelling unknown task', () => {
  354. const ctx = createMockContext();
  355. const manager = new BackgroundTaskManager(ctx);
  356. const count = manager.cancel('unknown-task-id');
  357. expect(count).toBe(0);
  358. });
  359. test('cancels all pending/running tasks when no ID provided', () => {
  360. const ctx = createMockContext();
  361. const manager = new BackgroundTaskManager(ctx);
  362. manager.launch({
  363. agent: 'explorer',
  364. prompt: 'test1',
  365. description: 'test1',
  366. parentSessionId: 'parent-123',
  367. });
  368. manager.launch({
  369. agent: 'oracle',
  370. prompt: 'test2',
  371. description: 'test2',
  372. parentSessionId: 'parent-123',
  373. });
  374. const count = manager.cancel();
  375. expect(count).toBe(2);
  376. });
  377. test('does not cancel already completed tasks', async () => {
  378. const ctx = createMockContext({
  379. sessionMessagesResult: {
  380. data: [
  381. {
  382. info: { role: 'assistant' },
  383. parts: [{ type: 'text', text: 'Done' }],
  384. },
  385. ],
  386. },
  387. });
  388. const manager = new BackgroundTaskManager(ctx);
  389. const task = manager.launch({
  390. agent: 'explorer',
  391. prompt: 'test',
  392. description: 'test',
  393. parentSessionId: 'parent-123',
  394. });
  395. // Wait for task to start
  396. await Promise.resolve();
  397. await Promise.resolve();
  398. // Trigger completion
  399. await manager.handleSessionStatus({
  400. type: 'session.status',
  401. properties: {
  402. sessionID: task.sessionId,
  403. status: { type: 'idle' },
  404. },
  405. });
  406. // Now try to cancel - should fail since already completed
  407. const count = manager.cancel(task.id);
  408. expect(count).toBe(0);
  409. });
  410. });
  411. describe('BackgroundTask logic', () => {
  412. test('falls back to next model when first model prompt fails', async () => {
  413. let promptCalls = 0;
  414. const ctx = createMockContext({
  415. promptImpl: async (args) => {
  416. const isTaskPrompt =
  417. typeof args.path?.id === 'string' &&
  418. args.path.id.startsWith('test-session-');
  419. const isParentNotification = !isTaskPrompt;
  420. if (isParentNotification) return {};
  421. promptCalls += 1;
  422. const modelRef = args.body?.model;
  423. if (
  424. modelRef?.providerID === 'openai' &&
  425. modelRef?.modelID === 'gpt-5.4'
  426. ) {
  427. throw new Error('primary failed');
  428. }
  429. return {};
  430. },
  431. });
  432. const manager = new BackgroundTaskManager(ctx, undefined, {
  433. fallback: {
  434. enabled: true,
  435. timeoutMs: 15000,
  436. retryDelayMs: 0,
  437. chains: {
  438. explorer: ['openai/gpt-5.4', 'opencode/gpt-5-nano'],
  439. },
  440. },
  441. });
  442. const task = manager.launch({
  443. agent: 'explorer',
  444. prompt: 'test',
  445. description: 'test',
  446. parentSessionId: 'parent-123',
  447. });
  448. // Yield to let the fire-and-forget async chain complete
  449. // (retryDelayMs: 0 eliminates the inter-attempt delay)
  450. await new Promise((r) => setTimeout(r, 10));
  451. expect(task.status).toBe('running');
  452. expect(promptCalls).toBe(2);
  453. // Verify session.abort was called between attempts
  454. expect(ctx.client.session.abort).toHaveBeenCalled();
  455. });
  456. test('fails task when all fallback models fail', async () => {
  457. const ctx = createMockContext({
  458. promptImpl: async (args) => {
  459. const isTaskPrompt =
  460. typeof args.path?.id === 'string' &&
  461. args.path.id.startsWith('test-session-');
  462. const isParentNotification = !isTaskPrompt;
  463. if (isParentNotification) return {};
  464. throw new Error('all models failing');
  465. },
  466. });
  467. const manager = new BackgroundTaskManager(ctx, undefined, {
  468. fallback: {
  469. enabled: true,
  470. timeoutMs: 15000,
  471. retryDelayMs: 0,
  472. chains: {
  473. explorer: ['openai/gpt-5.4', 'opencode/gpt-5-nano'],
  474. },
  475. },
  476. });
  477. const task = manager.launch({
  478. agent: 'explorer',
  479. prompt: 'test',
  480. description: 'test',
  481. parentSessionId: 'parent-123',
  482. });
  483. // Yield to let the fire-and-forget async chain complete
  484. // (retryDelayMs: 0 eliminates the inter-attempt delay)
  485. await new Promise((r) => setTimeout(r, 10));
  486. expect(task.status).toBe('failed');
  487. expect(task.error).toContain('All fallback models failed');
  488. // Verify session.abort was called: once between attempts + once in completeTask
  489. expect(ctx.client.session.abort).toHaveBeenCalledTimes(2);
  490. });
  491. test('extracts content from multiple types and messages', async () => {
  492. const ctx = createMockContext({
  493. sessionMessagesResult: {
  494. data: [
  495. {
  496. info: { role: 'assistant' },
  497. parts: [
  498. { type: 'reasoning', text: 'I am thinking...' },
  499. { type: 'text', text: 'First part.' },
  500. ],
  501. },
  502. {
  503. info: { role: 'assistant' },
  504. parts: [
  505. { type: 'text', text: 'Second part.' },
  506. { type: 'text', text: '' }, // Should be ignored
  507. ],
  508. },
  509. ],
  510. },
  511. });
  512. const manager = new BackgroundTaskManager(ctx);
  513. const task = manager.launch({
  514. agent: 'test',
  515. prompt: 'test',
  516. description: 'test',
  517. parentSessionId: 'p1',
  518. });
  519. // Wait for task to start
  520. await Promise.resolve();
  521. await Promise.resolve();
  522. // Trigger completion
  523. await manager.handleSessionStatus({
  524. type: 'session.status',
  525. properties: {
  526. sessionID: task.sessionId,
  527. status: { type: 'idle' },
  528. },
  529. });
  530. expect(task.status).toBe('completed');
  531. expect(task.result).toContain('I am thinking...');
  532. expect(task.result).toContain('First part.');
  533. expect(task.result).toContain('Second part.');
  534. // Check for double newline join
  535. expect(task.result).toBe(
  536. 'I am thinking...\n\nFirst part.\n\nSecond part.',
  537. );
  538. });
  539. test('task has completedAt timestamp on completion or cancellation', async () => {
  540. const ctx = createMockContext({
  541. sessionMessagesResult: {
  542. data: [
  543. {
  544. info: { role: 'assistant' },
  545. parts: [{ type: 'text', text: 'done' }],
  546. },
  547. ],
  548. },
  549. });
  550. const manager = new BackgroundTaskManager(ctx);
  551. // Test completion timestamp
  552. const task1 = manager.launch({
  553. agent: 'test',
  554. prompt: 't1',
  555. description: 'd1',
  556. parentSessionId: 'p1',
  557. });
  558. await Promise.resolve();
  559. await Promise.resolve();
  560. await manager.handleSessionStatus({
  561. type: 'session.status',
  562. properties: {
  563. sessionID: task1.sessionId,
  564. status: { type: 'idle' },
  565. },
  566. });
  567. expect(task1.completedAt).toBeInstanceOf(Date);
  568. expect(task1.status).toBe('completed');
  569. // Test cancellation timestamp
  570. const task2 = manager.launch({
  571. agent: 'test',
  572. prompt: 't2',
  573. description: 'd2',
  574. parentSessionId: 'p2',
  575. });
  576. manager.cancel(task2.id);
  577. expect(task2.completedAt).toBeInstanceOf(Date);
  578. expect(task2.status).toBe('cancelled');
  579. });
  580. test('always sends notification to parent session on completion', async () => {
  581. const ctx = createMockContext({
  582. sessionMessagesResult: {
  583. data: [
  584. {
  585. info: { role: 'assistant' },
  586. parts: [{ type: 'text', text: 'done' }],
  587. },
  588. ],
  589. },
  590. });
  591. const manager = new BackgroundTaskManager(ctx, undefined, {
  592. background: { maxConcurrentStarts: 10 },
  593. });
  594. const task = manager.launch({
  595. agent: 'test',
  596. prompt: 't',
  597. description: 'd',
  598. parentSessionId: 'parent-session',
  599. });
  600. await Promise.resolve();
  601. await Promise.resolve();
  602. await manager.handleSessionStatus({
  603. type: 'session.status',
  604. properties: {
  605. sessionID: task.sessionId,
  606. status: { type: 'idle' },
  607. },
  608. });
  609. // Should have called prompt.append for notification
  610. expect(ctx.client.session.prompt).toHaveBeenCalled();
  611. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  612. [{ body?: { parts?: Array<{ text?: string }> } }]
  613. >;
  614. const notificationCall = promptCalls[promptCalls.length - 1];
  615. expect(
  616. notificationCall[0].body?.parts?.[0]?.text?.includes(
  617. SLIM_INTERNAL_INITIATOR_MARKER,
  618. ),
  619. ).toBe(true);
  620. });
  621. });
  622. describe('subagent delegation restrictions', () => {
  623. test('spawned explorer gets tools disabled (leaf node)', async () => {
  624. const ctx = createMockContext();
  625. const manager = new BackgroundTaskManager(ctx);
  626. // First, simulate orchestrator starting (parent session with no parent)
  627. const orchestratorTask = manager.launch({
  628. agent: 'orchestrator',
  629. prompt: 'test',
  630. description: 'test',
  631. parentSessionId: 'root-session',
  632. });
  633. await Promise.resolve();
  634. await Promise.resolve();
  635. // Verify orchestrator's session is tracked
  636. const orchestratorSessionId = orchestratorTask.sessionId;
  637. if (!orchestratorSessionId)
  638. throw new Error('Expected sessionId to be defined');
  639. // Launch explorer from orchestrator - explorer is a leaf node so tools disabled
  640. manager.launch({
  641. agent: 'explorer',
  642. prompt: 'test',
  643. description: 'test',
  644. parentSessionId: orchestratorSessionId,
  645. });
  646. await Promise.resolve();
  647. await Promise.resolve();
  648. // Explorer cannot delegate, so delegation tools are hidden
  649. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  650. [{ body: { tools?: Record<string, boolean> } }]
  651. >;
  652. const lastCall = promptCalls[promptCalls.length - 1];
  653. expect(lastCall[0].body.tools).toEqual({
  654. background_task: false,
  655. task: false,
  656. });
  657. });
  658. test('spawned designer gets tools disabled (leaf node)', async () => {
  659. const ctx = createMockContext();
  660. const manager = new BackgroundTaskManager(ctx);
  661. // First, launch an orchestrator task
  662. const orchestratorTask = manager.launch({
  663. agent: 'orchestrator',
  664. prompt: 'test',
  665. description: 'test',
  666. parentSessionId: 'root-session',
  667. });
  668. await Promise.resolve();
  669. await Promise.resolve();
  670. // Launch designer from orchestrator - designer is a leaf node, so tools are disabled
  671. const orchestratorSessionId = orchestratorTask.sessionId;
  672. if (!orchestratorSessionId)
  673. throw new Error('Expected sessionId to be defined');
  674. manager.launch({
  675. agent: 'designer',
  676. prompt: 'test',
  677. description: 'test',
  678. parentSessionId: orchestratorSessionId,
  679. });
  680. await Promise.resolve();
  681. await Promise.resolve();
  682. // Designer is a leaf node, so delegation tools are hidden
  683. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  684. [{ body: { tools?: Record<string, boolean> } }]
  685. >;
  686. const lastCall = promptCalls[promptCalls.length - 1];
  687. expect(lastCall[0].body.tools).toEqual({
  688. background_task: false,
  689. task: false,
  690. });
  691. });
  692. test('spawned explorer from designer gets tools disabled (leaf node)', async () => {
  693. const ctx = createMockContext();
  694. const manager = new BackgroundTaskManager(ctx);
  695. // Launch a designer task
  696. const designerTask = manager.launch({
  697. agent: 'designer',
  698. prompt: 'test',
  699. description: 'test',
  700. parentSessionId: 'root-session',
  701. });
  702. await Promise.resolve();
  703. await Promise.resolve();
  704. // Launch explorer from designer - explorer is a leaf node so tools disabled
  705. const designerSessionId = designerTask.sessionId;
  706. if (!designerSessionId)
  707. throw new Error('Expected sessionId to be defined');
  708. manager.launch({
  709. agent: 'explorer',
  710. prompt: 'test',
  711. description: 'test',
  712. parentSessionId: designerSessionId,
  713. });
  714. await Promise.resolve();
  715. await Promise.resolve();
  716. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  717. [{ body: { tools?: Record<string, boolean> } }]
  718. >;
  719. const lastCall = promptCalls[promptCalls.length - 1];
  720. expect(lastCall[0].body.tools).toEqual({
  721. background_task: false,
  722. task: false,
  723. });
  724. });
  725. test('librarian cannot delegate to any subagents', async () => {
  726. const ctx = createMockContext();
  727. const manager = new BackgroundTaskManager(ctx);
  728. // Launch a librarian task
  729. const librarianTask = manager.launch({
  730. agent: 'librarian',
  731. prompt: 'test',
  732. description: 'test',
  733. parentSessionId: 'root-session',
  734. });
  735. await Promise.resolve();
  736. await Promise.resolve();
  737. // Launch subagent from librarian - should have tools disabled
  738. const librarianSessionId = librarianTask.sessionId;
  739. if (!librarianSessionId)
  740. throw new Error('Expected sessionId to be defined');
  741. manager.launch({
  742. agent: 'explorer',
  743. prompt: 'test',
  744. description: 'test',
  745. parentSessionId: librarianSessionId,
  746. });
  747. await Promise.resolve();
  748. await Promise.resolve();
  749. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  750. [{ body: { tools?: Record<string, boolean> } }]
  751. >;
  752. const lastCall = promptCalls[promptCalls.length - 1];
  753. expect(lastCall[0].body.tools).toEqual({
  754. background_task: false,
  755. task: false,
  756. });
  757. });
  758. test('oracle cannot delegate to any subagents', async () => {
  759. const ctx = createMockContext();
  760. const manager = new BackgroundTaskManager(ctx);
  761. // Launch an oracle task
  762. const oracleTask = manager.launch({
  763. agent: 'oracle',
  764. prompt: 'test',
  765. description: 'test',
  766. parentSessionId: 'root-session',
  767. });
  768. await Promise.resolve();
  769. await Promise.resolve();
  770. // Launch subagent from oracle - should have tools disabled
  771. const oracleSessionId = oracleTask.sessionId;
  772. if (!oracleSessionId) throw new Error('Expected sessionId to be defined');
  773. manager.launch({
  774. agent: 'explorer',
  775. prompt: 'test',
  776. description: 'test',
  777. parentSessionId: oracleSessionId,
  778. });
  779. await Promise.resolve();
  780. await Promise.resolve();
  781. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  782. [{ body: { tools?: Record<string, boolean> } }]
  783. >;
  784. const lastCall = promptCalls[promptCalls.length - 1];
  785. expect(lastCall[0].body.tools).toEqual({
  786. background_task: false,
  787. task: false,
  788. });
  789. });
  790. test('spawned explorer from unknown parent gets tools disabled (leaf node)', async () => {
  791. const ctx = createMockContext();
  792. const manager = new BackgroundTaskManager(ctx);
  793. // Launch explorer from unknown parent session (root orchestrator)
  794. manager.launch({
  795. agent: 'explorer',
  796. prompt: 'test',
  797. description: 'test',
  798. parentSessionId: 'unknown-session-id',
  799. });
  800. await Promise.resolve();
  801. await Promise.resolve();
  802. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  803. [{ body: { tools?: Record<string, boolean> } }]
  804. >;
  805. const lastCall = promptCalls[promptCalls.length - 1];
  806. // Explorer is a leaf agent — tools disabled regardless of parent
  807. expect(lastCall[0].body.tools).toEqual({
  808. background_task: false,
  809. task: false,
  810. });
  811. });
  812. test('isAgentAllowed returns true for valid delegations', async () => {
  813. const ctx = createMockContext();
  814. const manager = new BackgroundTaskManager(ctx);
  815. const orchestratorTask = manager.launch({
  816. agent: 'orchestrator',
  817. prompt: 'test',
  818. description: 'test',
  819. parentSessionId: 'root-session',
  820. });
  821. await Promise.resolve();
  822. await Promise.resolve();
  823. const orchestratorSessionId = orchestratorTask.sessionId;
  824. if (!orchestratorSessionId)
  825. throw new Error('Expected sessionId to be defined');
  826. // Orchestrator can delegate to all subagents
  827. expect(manager.isAgentAllowed(orchestratorSessionId, 'explorer')).toBe(
  828. true,
  829. );
  830. expect(manager.isAgentAllowed(orchestratorSessionId, 'fixer')).toBe(true);
  831. expect(manager.isAgentAllowed(orchestratorSessionId, 'designer')).toBe(
  832. true,
  833. );
  834. expect(manager.isAgentAllowed(orchestratorSessionId, 'librarian')).toBe(
  835. true,
  836. );
  837. expect(manager.isAgentAllowed(orchestratorSessionId, 'oracle')).toBe(
  838. true,
  839. );
  840. });
  841. test('isAgentAllowed returns false for invalid delegations', async () => {
  842. const ctx = createMockContext();
  843. const manager = new BackgroundTaskManager(ctx);
  844. const fixerTask = manager.launch({
  845. agent: 'fixer',
  846. prompt: 'test',
  847. description: 'test',
  848. parentSessionId: 'root-session',
  849. });
  850. await Promise.resolve();
  851. await Promise.resolve();
  852. const fixerSessionId = fixerTask.sessionId;
  853. if (!fixerSessionId) throw new Error('Expected sessionId to be defined');
  854. // Fixer cannot delegate to any subagents
  855. expect(manager.isAgentAllowed(fixerSessionId, 'explorer')).toBe(false);
  856. expect(manager.isAgentAllowed(fixerSessionId, 'oracle')).toBe(false);
  857. expect(manager.isAgentAllowed(fixerSessionId, 'designer')).toBe(false);
  858. });
  859. test('isAgentAllowed returns false for leaf agents', async () => {
  860. const ctx = createMockContext();
  861. const manager = new BackgroundTaskManager(ctx);
  862. // Explorer is a leaf agent
  863. const explorerTask = manager.launch({
  864. agent: 'explorer',
  865. prompt: 'test',
  866. description: 'test',
  867. parentSessionId: 'root-session',
  868. });
  869. await Promise.resolve();
  870. await Promise.resolve();
  871. const explorerSessionId = explorerTask.sessionId;
  872. if (!explorerSessionId)
  873. throw new Error('Expected sessionId to be defined');
  874. expect(manager.isAgentAllowed(explorerSessionId, 'fixer')).toBe(false);
  875. // Librarian is also a leaf agent
  876. const librarianTask = manager.launch({
  877. agent: 'librarian',
  878. prompt: 'test',
  879. description: 'test',
  880. parentSessionId: 'root-session',
  881. });
  882. await Promise.resolve();
  883. await Promise.resolve();
  884. const librarianSessionId = librarianTask.sessionId;
  885. if (!librarianSessionId)
  886. throw new Error('Expected sessionId to be defined');
  887. expect(manager.isAgentAllowed(librarianSessionId, 'explorer')).toBe(
  888. false,
  889. );
  890. });
  891. test('isAgentAllowed treats unknown session as root orchestrator', () => {
  892. const ctx = createMockContext();
  893. const manager = new BackgroundTaskManager(ctx);
  894. // Unknown sessions default to orchestrator, which can delegate to all subagents
  895. expect(manager.isAgentAllowed('unknown-session', 'explorer')).toBe(true);
  896. expect(manager.isAgentAllowed('unknown-session', 'fixer')).toBe(true);
  897. expect(manager.isAgentAllowed('unknown-session', 'designer')).toBe(true);
  898. expect(manager.isAgentAllowed('unknown-session', 'librarian')).toBe(true);
  899. expect(manager.isAgentAllowed('unknown-session', 'oracle')).toBe(true);
  900. });
  901. test('unknown agent type defaults to explorer-only delegation', async () => {
  902. const ctx = createMockContext();
  903. const manager = new BackgroundTaskManager(ctx);
  904. // Launch a task with an agent type not in SUBAGENT_DELEGATION_RULES
  905. const customTask = manager.launch({
  906. agent: 'custom-agent',
  907. prompt: 'test',
  908. description: 'test',
  909. parentSessionId: 'root-session',
  910. });
  911. await Promise.resolve();
  912. await Promise.resolve();
  913. const customSessionId = customTask.sessionId;
  914. if (!customSessionId) throw new Error('Expected sessionId to be defined');
  915. // Unknown agent types should default to explorer-only
  916. expect(manager.getAllowedSubagents(customSessionId)).toEqual([
  917. 'explorer',
  918. ]);
  919. expect(manager.isAgentAllowed(customSessionId, 'explorer')).toBe(true);
  920. expect(manager.isAgentAllowed(customSessionId, 'fixer')).toBe(false);
  921. expect(manager.isAgentAllowed(customSessionId, 'oracle')).toBe(false);
  922. });
  923. test('spawned explorer from custom agent gets tools disabled (leaf node)', async () => {
  924. const ctx = createMockContext();
  925. const manager = new BackgroundTaskManager(ctx);
  926. // Launch a custom agent first to get a tracked session
  927. const parentTask = manager.launch({
  928. agent: 'custom-agent',
  929. prompt: 'test',
  930. description: 'test',
  931. parentSessionId: 'root-session',
  932. });
  933. await Promise.resolve();
  934. await Promise.resolve();
  935. const parentSessionId = parentTask.sessionId;
  936. if (!parentSessionId) throw new Error('Expected sessionId to be defined');
  937. // Launch explorer from custom agent - explorer is leaf, tools disabled
  938. manager.launch({
  939. agent: 'explorer',
  940. prompt: 'test',
  941. description: 'test',
  942. parentSessionId: parentSessionId,
  943. });
  944. await Promise.resolve();
  945. await Promise.resolve();
  946. // Explorer is a leaf agent — tools disabled regardless of parent
  947. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  948. [{ body: { tools?: Record<string, boolean> } }]
  949. >;
  950. const lastCall = promptCalls[promptCalls.length - 1];
  951. expect(lastCall[0].body.tools).toEqual({
  952. background_task: false,
  953. task: false,
  954. });
  955. });
  956. test('full chain: orchestrator → designer → explorer', async () => {
  957. const ctx = createMockContext();
  958. const manager = new BackgroundTaskManager(ctx);
  959. // Level 1: Launch orchestrator
  960. const orchestratorTask = manager.launch({
  961. agent: 'orchestrator',
  962. prompt: 'coordinate work',
  963. description: 'orchestrator',
  964. parentSessionId: 'root-session',
  965. });
  966. await Promise.resolve();
  967. await Promise.resolve();
  968. const orchestratorSessionId = orchestratorTask.sessionId;
  969. if (!orchestratorSessionId)
  970. throw new Error('Expected sessionId to be defined');
  971. // Level 2: Launch designer from orchestrator
  972. const designerTask = manager.launch({
  973. agent: 'designer',
  974. prompt: 'design UI',
  975. description: 'designer',
  976. parentSessionId: orchestratorSessionId,
  977. });
  978. await Promise.resolve();
  979. await Promise.resolve();
  980. const designerSessionId = designerTask.sessionId;
  981. if (!designerSessionId)
  982. throw new Error('Expected sessionId to be defined');
  983. // Designer is a leaf node, so delegation tools stay disabled
  984. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  985. [{ body: { tools?: Record<string, boolean> } }]
  986. >;
  987. const designerPromptCall = promptCalls[1];
  988. expect(designerPromptCall[0].body.tools).toEqual({
  989. background_task: false,
  990. task: false,
  991. });
  992. // Designer is a leaf node and cannot spawn subagents
  993. expect(manager.isAgentAllowed(designerSessionId, 'explorer')).toBe(false);
  994. expect(manager.isAgentAllowed(designerSessionId, 'fixer')).toBe(false);
  995. expect(manager.isAgentAllowed(designerSessionId, 'oracle')).toBe(false);
  996. // Level 3: Launch explorer from designer
  997. const explorerTask = manager.launch({
  998. agent: 'explorer',
  999. prompt: 'find patterns',
  1000. description: 'explorer',
  1001. parentSessionId: designerSessionId,
  1002. });
  1003. await Promise.resolve();
  1004. await Promise.resolve();
  1005. const explorerSessionId = explorerTask.sessionId;
  1006. if (!explorerSessionId)
  1007. throw new Error('Expected sessionId to be defined');
  1008. // Explorer gets tools DISABLED
  1009. const explorerPromptCall = promptCalls[2];
  1010. expect(explorerPromptCall[0].body.tools).toEqual({
  1011. background_task: false,
  1012. task: false,
  1013. });
  1014. // Explorer is a dead end
  1015. expect(manager.getAllowedSubagents(explorerSessionId)).toEqual([]);
  1016. });
  1017. test('chain enforcement: fixer cannot spawn unauthorized agents mid-chain', async () => {
  1018. const ctx = createMockContext();
  1019. const manager = new BackgroundTaskManager(ctx);
  1020. // Orchestrator spawns fixer
  1021. const orchestratorTask = manager.launch({
  1022. agent: 'orchestrator',
  1023. prompt: 'test',
  1024. description: 'test',
  1025. parentSessionId: 'root-session',
  1026. });
  1027. await Promise.resolve();
  1028. await Promise.resolve();
  1029. const orchestratorSessionId = orchestratorTask.sessionId;
  1030. if (!orchestratorSessionId)
  1031. throw new Error('Expected sessionId to be defined');
  1032. const fixerTask = manager.launch({
  1033. agent: 'fixer',
  1034. prompt: 'test',
  1035. description: 'test',
  1036. parentSessionId: orchestratorSessionId,
  1037. });
  1038. await Promise.resolve();
  1039. await Promise.resolve();
  1040. const fixerSessionId = fixerTask.sessionId;
  1041. if (!fixerSessionId) throw new Error('Expected sessionId to be defined');
  1042. // Fixer should be blocked from spawning these agents
  1043. expect(manager.isAgentAllowed(fixerSessionId, 'oracle')).toBe(false);
  1044. expect(manager.isAgentAllowed(fixerSessionId, 'designer')).toBe(false);
  1045. expect(manager.isAgentAllowed(fixerSessionId, 'librarian')).toBe(false);
  1046. expect(manager.isAgentAllowed(fixerSessionId, 'fixer')).toBe(false);
  1047. // Explorer is also blocked (fixer is a leaf node)
  1048. expect(manager.isAgentAllowed(fixerSessionId, 'explorer')).toBe(false);
  1049. expect(manager.getAllowedSubagents(fixerSessionId)).toEqual([]);
  1050. });
  1051. test('chain: completed parent does not affect child permissions', async () => {
  1052. const ctx = createMockContext({
  1053. sessionMessagesResult: {
  1054. data: [
  1055. {
  1056. info: { role: 'assistant' },
  1057. parts: [{ type: 'text', text: 'done' }],
  1058. },
  1059. ],
  1060. },
  1061. });
  1062. const manager = new BackgroundTaskManager(ctx);
  1063. // Launch designer
  1064. const designerTask = manager.launch({
  1065. agent: 'designer',
  1066. prompt: 'test',
  1067. description: 'test',
  1068. parentSessionId: 'root-session',
  1069. });
  1070. await Promise.resolve();
  1071. await Promise.resolve();
  1072. const designerSessionId = designerTask.sessionId;
  1073. if (!designerSessionId)
  1074. throw new Error('Expected sessionId to be defined');
  1075. // Launch explorer from designer BEFORE designer completes
  1076. const explorerTask = manager.launch({
  1077. agent: 'explorer',
  1078. prompt: 'test',
  1079. description: 'test',
  1080. parentSessionId: designerSessionId,
  1081. });
  1082. await Promise.resolve();
  1083. await Promise.resolve();
  1084. const explorerSessionId = explorerTask.sessionId;
  1085. if (!explorerSessionId)
  1086. throw new Error('Expected sessionId to be defined');
  1087. // Explorer has its own tracking — tools disabled
  1088. const promptCalls = ctx.client.session.prompt.mock.calls as Array<
  1089. [{ body: { tools?: Record<string, boolean> } }]
  1090. >;
  1091. const explorerPromptCall = promptCalls[1];
  1092. expect(explorerPromptCall[0].body.tools).toEqual({
  1093. background_task: false,
  1094. task: false,
  1095. });
  1096. // Now complete the designer (cleans up designer's agentBySessionId entry)
  1097. await manager.handleSessionStatus({
  1098. type: 'session.status',
  1099. properties: {
  1100. sessionID: designerSessionId,
  1101. status: { type: 'idle' },
  1102. },
  1103. });
  1104. expect(designerTask.status).toBe('completed');
  1105. // Explorer's own session tracking is independent — still works
  1106. expect(manager.isAgentAllowed(explorerSessionId, 'fixer')).toBe(false);
  1107. expect(manager.getAllowedSubagents(explorerSessionId)).toEqual([]);
  1108. });
  1109. test('getAllowedSubagents returns correct lists', async () => {
  1110. const ctx = createMockContext();
  1111. const manager = new BackgroundTaskManager(ctx);
  1112. // Orchestrator -> all 5 subagent names
  1113. const orchestratorTask = manager.launch({
  1114. agent: 'orchestrator',
  1115. prompt: 'test',
  1116. description: 'test',
  1117. parentSessionId: 'root-session',
  1118. });
  1119. await Promise.resolve();
  1120. await Promise.resolve();
  1121. const orchestratorSessionId = orchestratorTask.sessionId;
  1122. if (!orchestratorSessionId)
  1123. throw new Error('Expected sessionId to be defined');
  1124. expect(manager.getAllowedSubagents(orchestratorSessionId)).toEqual([
  1125. 'explorer',
  1126. 'librarian',
  1127. 'oracle',
  1128. 'designer',
  1129. 'fixer',
  1130. 'council',
  1131. ]);
  1132. // Fixer -> empty (leaf node)
  1133. const fixerTask = manager.launch({
  1134. agent: 'fixer',
  1135. prompt: 'test',
  1136. description: 'test',
  1137. parentSessionId: 'root-session',
  1138. });
  1139. await Promise.resolve();
  1140. await Promise.resolve();
  1141. const fixerSessionId = fixerTask.sessionId;
  1142. if (!fixerSessionId) throw new Error('Expected sessionId to be defined');
  1143. expect(manager.getAllowedSubagents(fixerSessionId)).toEqual([]);
  1144. // Designer -> only explorer
  1145. const designerTask = manager.launch({
  1146. agent: 'designer',
  1147. prompt: 'test',
  1148. description: 'test',
  1149. parentSessionId: 'root-session',
  1150. });
  1151. await Promise.resolve();
  1152. await Promise.resolve();
  1153. const designerSessionId = designerTask.sessionId;
  1154. if (!designerSessionId)
  1155. throw new Error('Expected sessionId to be defined');
  1156. expect(manager.getAllowedSubagents(designerSessionId)).toEqual([]);
  1157. // Explorer -> empty (leaf)
  1158. const explorerTask = manager.launch({
  1159. agent: 'explorer',
  1160. prompt: 'test',
  1161. description: 'test',
  1162. parentSessionId: 'root-session',
  1163. });
  1164. await Promise.resolve();
  1165. await Promise.resolve();
  1166. const explorerSessionId = explorerTask.sessionId;
  1167. if (!explorerSessionId)
  1168. throw new Error('Expected sessionId to be defined');
  1169. expect(manager.getAllowedSubagents(explorerSessionId)).toEqual([]);
  1170. // Unknown session -> orchestrator (all subagents)
  1171. expect(manager.getAllowedSubagents('unknown-session')).toEqual([
  1172. 'explorer',
  1173. 'librarian',
  1174. 'oracle',
  1175. 'designer',
  1176. 'fixer',
  1177. 'council',
  1178. ]);
  1179. });
  1180. });
  1181. });