dynamic-model-selection.test.ts 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /// <reference types="bun-types" />
  2. import { describe, expect, test } from 'bun:test';
  3. import {
  4. buildDynamicModelPlan,
  5. rankModelsV1WithBreakdown,
  6. } from './dynamic-model-selection';
  7. import type { DiscoveredModel, InstallConfig } from './types';
  8. function m(
  9. input: Partial<DiscoveredModel> & { model: string },
  10. ): DiscoveredModel {
  11. const [providerID] = input.model.split('/');
  12. return {
  13. providerID: providerID ?? 'openai',
  14. model: input.model,
  15. name: input.name ?? input.model,
  16. status: input.status ?? 'active',
  17. contextLimit: input.contextLimit ?? 200000,
  18. outputLimit: input.outputLimit ?? 32000,
  19. reasoning: input.reasoning ?? true,
  20. toolcall: input.toolcall ?? true,
  21. attachment: input.attachment ?? false,
  22. dailyRequestLimit: input.dailyRequestLimit,
  23. costInput: input.costInput,
  24. costOutput: input.costOutput,
  25. };
  26. }
  27. function baseInstallConfig(): InstallConfig {
  28. return {
  29. hasKimi: false,
  30. hasOpenAI: true,
  31. hasAnthropic: false,
  32. hasCopilot: true,
  33. hasZaiPlan: true,
  34. hasAntigravity: false,
  35. hasChutes: true,
  36. hasOpencodeZen: true,
  37. useOpenCodeFreeModels: true,
  38. selectedOpenCodePrimaryModel: 'opencode/glm-4.7-free',
  39. selectedOpenCodeSecondaryModel: 'opencode/gpt-5-nano',
  40. selectedChutesPrimaryModel: 'chutes/kimi-k2.5',
  41. selectedChutesSecondaryModel: 'chutes/minimax-m2.1',
  42. hasTmux: false,
  43. installSkills: false,
  44. installCustomSkills: false,
  45. setupMode: 'quick',
  46. };
  47. }
  48. describe('dynamic-model-selection', () => {
  49. test('builds assignments and chains for all six agents', () => {
  50. const plan = buildDynamicModelPlan(
  51. [
  52. m({ model: 'openai/gpt-5.3-codex', reasoning: true, toolcall: true }),
  53. m({
  54. model: 'openai/gpt-5.1-codex-mini',
  55. reasoning: true,
  56. toolcall: true,
  57. }),
  58. m({
  59. model: 'github-copilot/grok-code-fast-1',
  60. reasoning: true,
  61. toolcall: true,
  62. }),
  63. m({
  64. model: 'zai-coding-plan/glm-4.7',
  65. reasoning: true,
  66. toolcall: true,
  67. }),
  68. m({ model: 'chutes/kimi-k2.5', reasoning: true, toolcall: true }),
  69. m({ model: 'chutes/minimax-m2.1', reasoning: true, toolcall: true }),
  70. ],
  71. baseInstallConfig(),
  72. );
  73. expect(plan).not.toBeNull();
  74. const agents = plan?.agents ?? {};
  75. const chains = plan?.chains ?? {};
  76. expect(Object.keys(agents).sort()).toEqual([
  77. 'designer',
  78. 'explorer',
  79. 'fixer',
  80. 'librarian',
  81. 'oracle',
  82. 'orchestrator',
  83. ]);
  84. expect(agents.oracle?.model.startsWith('opencode/')).toBe(false);
  85. expect(agents.orchestrator?.model.startsWith('opencode/')).toBe(false);
  86. expect(chains.oracle.some((m: string) => m.startsWith('openai/'))).toBe(
  87. true,
  88. );
  89. expect(chains.orchestrator).toContain('chutes/kimi-k2.5');
  90. expect(chains.explorer).toContain('opencode/gpt-5-nano');
  91. expect(chains.fixer[chains.fixer.length - 1]).toBe('opencode/gpt-5-nano');
  92. expect(plan?.provenance?.oracle?.winnerLayer).toBe(
  93. 'dynamic-recommendation',
  94. );
  95. expect(plan?.scoring?.engineVersionApplied).toBe('v1');
  96. });
  97. test('supports v2-shadow mode without changing applied engine', () => {
  98. const plan = buildDynamicModelPlan(
  99. [
  100. m({ model: 'openai/gpt-5.3-codex', reasoning: true, toolcall: true }),
  101. m({ model: 'chutes/kimi-k2.5', reasoning: true, toolcall: true }),
  102. m({ model: 'opencode/gpt-5-nano', reasoning: true, toolcall: true }),
  103. ],
  104. baseInstallConfig(),
  105. undefined,
  106. { scoringEngineVersion: 'v2-shadow' },
  107. );
  108. expect(plan).not.toBeNull();
  109. expect(plan?.scoring?.engineVersionApplied).toBe('v1');
  110. expect(plan?.scoring?.shadowCompared).toBe(true);
  111. expect(plan?.scoring?.diffs?.oracle).toBeDefined();
  112. });
  113. test('balances provider usage when subscription mode is enabled', () => {
  114. const plan = buildDynamicModelPlan(
  115. [
  116. m({ model: 'openai/gpt-5.3-codex', reasoning: true, toolcall: true }),
  117. m({
  118. model: 'openai/gpt-5.1-codex-mini',
  119. reasoning: true,
  120. toolcall: true,
  121. }),
  122. m({
  123. model: 'zai-coding-plan/glm-4.7',
  124. reasoning: true,
  125. toolcall: true,
  126. }),
  127. m({
  128. model: 'zai-coding-plan/glm-4.7-flash',
  129. reasoning: true,
  130. toolcall: true,
  131. }),
  132. m({
  133. model: 'chutes/moonshotai/Kimi-K2.5-TEE',
  134. reasoning: true,
  135. toolcall: true,
  136. }),
  137. m({
  138. model: 'chutes/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE',
  139. reasoning: true,
  140. toolcall: true,
  141. }),
  142. ],
  143. {
  144. ...baseInstallConfig(),
  145. hasCopilot: false,
  146. balanceProviderUsage: true,
  147. },
  148. undefined,
  149. { scoringEngineVersion: 'v2' },
  150. );
  151. expect(plan).not.toBeNull();
  152. const usage = Object.values(plan?.agents ?? {}).reduce(
  153. (acc, assignment) => {
  154. const provider = assignment.model.split('/')[0] ?? 'unknown';
  155. acc[provider] = (acc[provider] ?? 0) + 1;
  156. return acc;
  157. },
  158. {} as Record<string, number>,
  159. );
  160. expect(usage.openai).toBe(2);
  161. expect(usage['zai-coding-plan']).toBe(2);
  162. expect(usage.chutes).toBe(2);
  163. });
  164. test('matches external signals for multi-segment chutes ids in v1', () => {
  165. const ranked = rankModelsV1WithBreakdown(
  166. [m({ model: 'chutes/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE' })],
  167. 'fixer',
  168. {
  169. 'qwen/qwen3-coder-480b-a35b-instruct': {
  170. source: 'artificial-analysis',
  171. qualityScore: 95,
  172. codingScore: 92,
  173. },
  174. },
  175. );
  176. expect(ranked[0]?.externalSignalBoost).toBeGreaterThan(0);
  177. });
  178. test('prefers chutes kimi/minimax over qwen3 in v1 role scoring', () => {
  179. const catalog = [
  180. m({
  181. model: 'chutes/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE',
  182. reasoning: true,
  183. toolcall: true,
  184. }),
  185. m({
  186. model: 'chutes/moonshotai/Kimi-K2.5-TEE',
  187. reasoning: true,
  188. toolcall: true,
  189. }),
  190. m({
  191. model: 'chutes/minimax-m2.1',
  192. reasoning: true,
  193. toolcall: true,
  194. }),
  195. ];
  196. const fixer = rankModelsV1WithBreakdown(catalog, 'fixer');
  197. const explorer = rankModelsV1WithBreakdown(catalog, 'explorer');
  198. expect(fixer[0]?.model).not.toContain('Qwen3-Coder-480B');
  199. expect(explorer[0]?.model).toContain('minimax-m2.1');
  200. });
  201. test('does not apply a positive Gemini bonus in v1 scoring', () => {
  202. const catalog = [
  203. m({
  204. model: 'google/antigravity-gemini-3-pro',
  205. reasoning: true,
  206. toolcall: true,
  207. }),
  208. m({ model: 'openai/gpt-5.3-codex', reasoning: true, toolcall: true }),
  209. ];
  210. const oracle = rankModelsV1WithBreakdown(catalog, 'oracle');
  211. const orchestrator = rankModelsV1WithBreakdown(catalog, 'orchestrator');
  212. const designer = rankModelsV1WithBreakdown(catalog, 'designer');
  213. const librarian = rankModelsV1WithBreakdown(catalog, 'librarian');
  214. expect(oracle[0]?.model).toBe('openai/gpt-5.3-codex');
  215. expect(orchestrator[0]?.model).toBe('openai/gpt-5.3-codex');
  216. expect(designer[0]?.model).toBe('openai/gpt-5.3-codex');
  217. expect(librarian[0]?.model).toBe('openai/gpt-5.3-codex');
  218. });
  219. });