/// import { describe, expect, test } from 'bun:test'; import type { DiscoveredModel, ExternalSignalMap } from '../types'; import { rankModelsV2, scoreCandidateV2 } from './engine'; function model( input: Partial & { model: string }, ): DiscoveredModel { const [providerID] = input.model.split('/'); return { providerID: providerID ?? 'openai', model: input.model, name: input.name ?? input.model, status: input.status ?? 'active', contextLimit: input.contextLimit ?? 200000, outputLimit: input.outputLimit ?? 32000, reasoning: input.reasoning ?? true, toolcall: input.toolcall ?? true, attachment: input.attachment ?? false, dailyRequestLimit: input.dailyRequestLimit, costInput: input.costInput, costOutput: input.costOutput, }; } describe('scoring-v2', () => { test('returns explain breakdown with deterministic total', () => { const candidate = model({ model: 'openai/gpt-5.3-codex' }); const signalMap: ExternalSignalMap = { 'openai/gpt-5.3-codex': { source: 'artificial-analysis', qualityScore: 70, codingScore: 75, latencySeconds: 1.2, inputPricePer1M: 1, outputPricePer1M: 3, }, }; const first = scoreCandidateV2(candidate, 'oracle', signalMap); const second = scoreCandidateV2(candidate, 'oracle', signalMap); expect(first.totalScore).toBe(second.totalScore); expect(first.scoreBreakdown.features.quality).toBe(0.7); expect(first.scoreBreakdown.weighted.coding).toBeGreaterThan(0); }); test('uses stable tie-break when scores are equal', () => { const ranked = rankModelsV2( [ model({ model: 'zai-coding-plan/glm-4.7', reasoning: false }), model({ model: 'openai/gpt-5.3-codex', reasoning: false }), ], 'explorer', ); expect(ranked[0]?.model.providerID).toBe('openai'); expect(ranked[1]?.model.providerID).toBe('zai-coding-plan'); }); test('matches external signals for multi-segment chutes ids', () => { const candidate = model({ model: 'chutes/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE', }); const signalMap: ExternalSignalMap = { 'qwen/qwen3-coder-480b-a35b-instruct': { source: 'artificial-analysis', qualityScore: 95, codingScore: 92, }, }; const scored = scoreCandidateV2(candidate, 'fixer', signalMap); expect(scored.scoreBreakdown.features.quality).toBe(0.95); expect(scored.scoreBreakdown.features.coding).toBe(0.92); }); test('applies designer output threshold rule', () => { const belowThreshold = model({ model: 'chutes/moonshotai/Kimi-K2.5-TEE', outputLimit: 63999, }); const aboveThreshold = model({ model: 'zai-coding-plan/glm-4.7', outputLimit: 64000, }); const low = scoreCandidateV2(belowThreshold, 'designer'); const high = scoreCandidateV2(aboveThreshold, 'designer'); expect(low.scoreBreakdown.features.output).toBe(-1); expect(low.scoreBreakdown.weighted.output).toBe(-10); expect(high.scoreBreakdown.features.output).toBe(0); expect(high.scoreBreakdown.weighted.output).toBe(0); }); test('prefers kimi k2.5 over kimi k2 when otherwise equal', () => { const ranked = rankModelsV2( [ model({ model: 'chutes/moonshotai/Kimi-K2-TEE', contextLimit: 262144, outputLimit: 65535, reasoning: true, toolcall: true, attachment: false, }), model({ model: 'chutes/moonshotai/Kimi-K2.5-TEE', contextLimit: 262144, outputLimit: 65535, reasoning: true, toolcall: true, attachment: false, }), ], 'designer', ); expect(ranked[0]?.model.model).toBe('chutes/moonshotai/Kimi-K2.5-TEE'); expect(ranked[1]?.model.model).toBe('chutes/moonshotai/Kimi-K2-TEE'); }); test('downranks chutes qwen3 against kimi/minimax priors', () => { const ranked = rankModelsV2( [ model({ model: 'chutes/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE', contextLimit: 262144, outputLimit: 262144, reasoning: true, toolcall: true, }), model({ model: 'chutes/moonshotai/Kimi-K2.5-TEE', contextLimit: 262144, outputLimit: 65535, reasoning: true, toolcall: true, }), model({ model: 'chutes/minimax-m2.1', contextLimit: 500000, outputLimit: 64000, reasoning: true, toolcall: true, }), ], 'fixer', ); expect(ranked[0]?.model.model).not.toContain('Qwen3-Coder-480B'); }); });