From 26df5af20571dce79a25fb9872e40d53342e9062 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Sat, 4 Jul 2026 06:59:16 +0800 Subject: [PATCH] feat(collectivex): finalize isolated v1 results explorer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read verified content-addressed datasets from the local CollectiveX store. Render publisher-owned comparisons and recommendations, isolate the page from the benchmark database, and fail closed on mutable, malformed, or wrong-owner evidence. 中文:完成隔离式 CollectiveX v1 结果浏览器。仅从本地 CollectiveX 存储读取已验证的内容寻址数据集,展示发布器生成的对比与推荐,与基准测试数据库完全隔离,并严格拒绝可变、格式异常或所有者不匹配的证据。 --- .env.example | 3 + packages/app/cypress/component/tab-nav.cy.tsx | 6 + packages/app/cypress/e2e/collectivex.cy.ts | 384 ++++++++ packages/app/package.json | 4 +- .../src/app/(dashboard)/collectivex/page.tsx | 10 + .../collectivex-data/[...path]/route.test.ts | 182 ++++ .../app/collectivex-data/[...path]/route.ts | 132 +++ packages/app/src/app/sitemap.ts | 1 + .../collectivex/CollectiveXChart.tsx | 277 ++++++ .../collectivex/CollectiveXDisplay.tsx | 878 ++++++++++++++++++ .../collectivex/CollectiveXTables.tsx | 439 +++++++++ .../src/components/collectivex/axis.test.ts | 22 + .../app/src/components/collectivex/axis.ts | 51 + .../src/components/collectivex/data.test.ts | 92 ++ .../app/src/components/collectivex/data.ts | 150 +++ .../src/components/collectivex/reader.test.ts | 249 +++++ .../app/src/components/collectivex/reader.ts | 172 ++++ .../components/collectivex/test-fixture.ts | 747 +++++++++++++++ .../app/src/components/collectivex/types.ts | 340 +++++++ .../app/src/components/dashboard-shell.tsx | 11 + packages/app/src/components/header/header.tsx | 1 + packages/app/src/components/tab-nav.tsx | 1 + packages/app/src/hooks/api/use-collectivex.ts | 13 + packages/app/src/lib/api.test.ts | 41 + packages/app/src/lib/api.ts | 11 + .../app/src/lib/d3-chart/D3Chart/types.ts | 2 + .../d3-chart/D3Chart/useD3ChartRenderer.ts | 40 +- .../app/src/lib/d3-chart/chart-update.test.ts | 55 ++ packages/app/src/lib/d3-chart/chart-update.ts | 45 +- packages/app/src/lib/tab-meta.ts | 6 + pnpm-lock.yaml | 54 +- 31 files changed, 4391 insertions(+), 28 deletions(-) create mode 100644 packages/app/cypress/e2e/collectivex.cy.ts create mode 100644 packages/app/src/app/(dashboard)/collectivex/page.tsx create mode 100644 packages/app/src/app/collectivex-data/[...path]/route.test.ts create mode 100644 packages/app/src/app/collectivex-data/[...path]/route.ts create mode 100644 packages/app/src/components/collectivex/CollectiveXChart.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXDisplay.tsx create mode 100644 packages/app/src/components/collectivex/CollectiveXTables.tsx create mode 100644 packages/app/src/components/collectivex/axis.test.ts create mode 100644 packages/app/src/components/collectivex/axis.ts create mode 100644 packages/app/src/components/collectivex/data.test.ts create mode 100644 packages/app/src/components/collectivex/data.ts create mode 100644 packages/app/src/components/collectivex/reader.test.ts create mode 100644 packages/app/src/components/collectivex/reader.ts create mode 100644 packages/app/src/components/collectivex/test-fixture.ts create mode 100644 packages/app/src/components/collectivex/types.ts create mode 100644 packages/app/src/hooks/api/use-collectivex.ts diff --git a/.env.example b/.env.example index 645e01ac..2ae5880a 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,9 @@ # Create at: https://github.com/settings/personal-access-tokens # GITHUB_TOKEN= +# CollectiveX public artifact store (serves only the store's public/ tree) +# COLLECTIVEX_STORE_ROOT=/absolute/path/to/collectivex-store + # ╔══════════════════════════════════════════════════════════════════════════╗ # ║ Production deployment (Vercel) ║ # ║ ║ diff --git a/packages/app/cypress/component/tab-nav.cy.tsx b/packages/app/cypress/component/tab-nav.cy.tsx index 2c24d256..31229ac7 100644 --- a/packages/app/cypress/component/tab-nav.cy.tsx +++ b/packages/app/cypress/component/tab-nav.cy.tsx @@ -70,6 +70,11 @@ describe('TabNav — unofficialrun URL preservation (issue #319)', () => { 'href', '/submissions?unofficialruns=12345', ); + cy.get('[data-testid="tab-trigger-collectivex"]').should( + 'have.attr', + 'href', + '/collectivex?unofficialruns=12345', + ); cy.get('[data-testid="tab-trigger-historical"]').should( 'have.attr', 'href', @@ -109,6 +114,7 @@ describe('TabNav — Hidden popover for gated tabs', () => { mountTabNav({}); cy.get('[data-testid="tab-trigger-inference"]').should('exist'); cy.get('[data-testid="tab-trigger-gpu-specs"]').should('exist'); + cy.get('[data-testid="tab-trigger-collectivex"]').should('exist'); cy.get('[data-testid="tab-trigger-submissions"]').should('exist'); cy.get('[data-testid="tab-trigger-hidden"]').should('not.exist'); cy.get('[data-testid="tab-trigger-feedback"]').should('not.exist'); diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts new file mode 100644 index 00000000..5b35906f --- /dev/null +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -0,0 +1,384 @@ +import { + makeCollectiveXDataset, + makeCollectiveXDatasetWithPrefillCohort, + makeCollectiveXDatasetWithDiagnosticCohort, + makeCollectiveXDiagnosticDataset, +} from '@/components/collectivex/test-fixture'; +import type { CollectiveXDataset } from '@/components/collectivex/types'; + +type Channel = 'dev-latest' | 'latest-attempt'; +const channelUrl = (channel: Channel) => `/collectivex-data/channels/${channel}.json`; + +async function sha256(value: string): Promise { + const digest = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(value)); + return [...new Uint8Array(digest)].map((byte) => byte.toString(16).padStart(2, '0')).join(''); +} + +function installPublication( + dataset: CollectiveXDataset | Record = makeCollectiveXDataset(), + options: { channel?: Channel; digest?: string; delay?: number } = {}, +) { + const channel = options.channel ?? 'dev-latest'; + const body = JSON.stringify(dataset); + const generatedAt = + typeof dataset.generated_at === 'string' ? dataset.generated_at : '2026-07-04T01:00:00Z'; + return cy.wrap(sha256(body), { log: false }).then((actualDigest) => { + const digest = options.digest ?? actualDigest; + cy.intercept('GET', channelUrl(channel), { + body: { + format: 'collectivex.channel.v1', + channel, + generated_at: generatedAt, + dataset: { + path: `datasets/${digest}/dataset.json`, + sha256: digest, + bytes: new TextEncoder().encode(body).length, + }, + }, + }).as(`collectivexChannel-${channel}`); + cy.intercept('GET', `/collectivex-data/datasets/${digest}/dataset.json`, { + body, + delay: options.delay, + headers: { 'content-type': 'application/json' }, + }).as(`collectivexDataset-${channel}`); + }); +} + +function openCollectiveX() { + cy.visit('/collectivex'); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-display"]', { timeout: 10_000 }).should('be.visible'); +} + +describe('CollectiveX native publication', () => { + beforeEach(() => { + installPublication(); + installPublication(makeCollectiveXDiagnosticDataset(), { channel: 'latest-attempt' }); + openCollectiveX(); + }); + + it('defaults to a publisher-controlled, decision-grade cohort', () => { + cy.get('[data-testid="collectivex-display"]') + .should('contain.text', 'Promoted v1') + .and('contain.text', '8/8') + .and('contain.text', '24') + .and('contain.text', 'H100 EP8 library comparison'); + cy.get('[data-testid="collectivex-scope-toggle"]') + .contains('button', 'Controlled') + .should('have.attr', 'aria-selected', 'true'); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Round trip (measured) · decode · p99') + .and('contain.text', 'H100 EP8 · deepep') + .and('contain.text', 'H100 EP8 · mori'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-diagnostic-warning"]').should('not.exist'); + cy.get('[data-testid="collectivex-source-link"]').should( + 'have.attr', + 'href', + `https://github.com/SemiAnalysisAI/InferenceX/tree/${'a'.repeat(40)}/experimental/CollectiveX`, + ); + }); + + it('disables source navigation when publication revisions differ', () => { + const inconsistent = makeCollectiveXDataset(); + inconsistent.series[1].build.source_sha = 'b'.repeat(40); + installPublication(inconsistent); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + + cy.get('[data-testid="collectivex-source-link"]') + .should('have.attr', 'aria-disabled', 'true') + .and('not.have.attr', 'href'); + }); + + it('switches to the controlled cohort for the selected phase', () => { + installPublication(makeCollectiveXDatasetWithPrefillCohort()); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); + cy.get('[data-testid="collectivex-cohort-select"]').should( + 'contain.text', + 'H100 EP8 prefill library comparison', + ); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Round trip (measured) · prefill · p99') + .and('contain.text', 'H100 EP8 · deepep') + .and('contain.text', 'H100 EP8 · mori'); + cy.contains('[role="tab"]', 'Decisions').click(); + cy.get('[data-testid="collectivex-rankings"]') + .should('contain.text', 'T=512') + .and('contain.text', 'prefill'); + cy.get('[data-testid="collectivex-recommendations"]').should( + 'contain.text', + 'Best p99 latency at T=512', + ); + + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Decode').click(); + cy.get('[data-testid="collectivex-cohort-select"]').should( + 'contain.text', + 'H100 EP8 library comparison', + ); + cy.get('[data-testid="collectivex-rankings"]').should('contain.text', 'T=128'); + }); + + it('clears rendered lines when every series is disabled', () => { + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="chart-legend"] input[type="checkbox"]:checked') + .first() + .uncheck({ force: true }); + cy.get('[data-testid="chart-legend"] input[type="checkbox"]:checked') + .first() + .uncheck({ force: true }); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('not.exist'); + }); + + it('restores internal tabs with browser history', () => { + cy.contains('[role="tab"]', 'Decisions').click(); + cy.location('hash').should('eq', '#tab-decisions'); + cy.contains('[role="tab"]', 'Evidence').click(); + cy.location('hash').should('eq', '#tab-evidence'); + cy.go('back'); + cy.location('hash').should('eq', '#tab-decisions'); + cy.get('[data-testid="collectivex-rankings"]').should('be.visible'); + }); + + it('does not query database availability for the isolated page', () => { + let availabilityRequests = 0; + cy.intercept('GET', '/api/v1/availability', (request) => { + availabilityRequests += 1; + request.reply([]); + }); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-display"]').should('be.visible'); + cy.then(() => expect(availabilityRequests).to.eq(0)); + }); + + it('keeps the evidence workflow usable on a mobile viewport', () => { + cy.viewport(390, 844); + cy.get('[data-testid="collectivex-channel-toggle"]').should('be.visible'); + cy.get('[data-testid="collectivex-cohort-select"]').should('be.visible'); + cy.get('[data-testid="collectivex-tabs"]').should('be.visible'); + cy.document() + .its('documentElement') + .should((element) => { + expect(element.scrollWidth).to.be.at.most(element.clientWidth); + }); + }); + + it('requires an explicit switch to render diagnostics', () => { + installPublication(makeCollectiveXDatasetWithDiagnosticCohort()); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-scope-toggle"]').contains('button', 'Diagnostics').click(); + + cy.get('[data-testid="collectivex-diagnostic-warning"]') + .should('be.visible') + .and('contain.text', 'excluded from rankings'); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'H100 EP8 · deepep') + .and('contain.text', 'H100 EP8 · mori'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-sku-select"]').should('exist'); + }); + + it('shows why a controlled cohort was excluded', () => { + installPublication(makeCollectiveXDatasetWithDiagnosticCohort()); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-scope-toggle"]').contains('button', 'Diagnostics').click(); + cy.get('[data-testid="collectivex-cohort-select"]').click(); + cy.contains('[role="option"]', 'H100 EP8 library comparison').click(); + + cy.get('[data-testid="collectivex-diagnostic-cohort-reasons"]') + .should('contain.text', 'unstable-ordering') + .and('contain.text', 'p50 1.050x') + .and('contain.text', 'p99 1.100x'); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'H100 EP8 · deepep') + .and('contain.text', 'H100 EP8 · mori'); + }); + + it('resolves the latest-attempt channel without carrying published data forward', () => { + cy.get('[data-testid="collectivex-channel-toggle"]') + .contains('button', 'Latest attempt') + .click(); + cy.wait('@collectivexChannel-latest-attempt'); + + cy.get('[data-testid="collectivex-display"]') + .should('contain.text', 'diagnostic') + .and('contain.text', '1/1') + .and('contain.text', '2') + .and('contain.text', 'H100 EP8 · nccl-ep') + .and('not.contain.text', 'H100 EP8 · deepep'); + }); + + it('resets diagnostic cohorts and filters when the publication changes', () => { + installPublication(makeCollectiveXDatasetWithDiagnosticCohort()); + const latest = makeCollectiveXDiagnosticDataset(); + latest.series[0].label = 'MI300X EP8 · nccl-ep'; + latest.series[0].system = { + ...latest.series[0].system, + sku: 'mi300x', + label: 'AMD Instinct MI300X', + vendor: 'amd', + topology_class: 'single-node-xgmi', + transport: 'xgmi', + }; + latest.coverage[0].sku = 'mi300x'; + installPublication(latest, { channel: 'latest-attempt' }); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + + cy.get('[data-testid="collectivex-scope-toggle"]').contains('button', 'Diagnostics').click(); + cy.get('[data-testid="collectivex-cohort-select"]').click(); + cy.contains('[role="option"]', 'H100 EP8 library comparison').click(); + cy.get('[data-testid="collectivex-sku-select"]').click(); + cy.contains('[role="option"]', 'H100').click(); + cy.get('[data-testid="collectivex-channel-toggle"]') + .contains('button', 'Latest attempt') + .click(); + cy.wait('@collectivexChannel-latest-attempt'); + + cy.get('[data-testid="collectivex-cohort-select"]').should( + 'contain.text', + 'All diagnostic evidence', + ); + cy.get('[data-testid="collectivex-sku-select"]').should('contain.text', 'All'); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'MI300X EP8 · nccl-ep') + .and('not.contain.text', 'H100 EP8 · deepep'); + }); + + it('never promotes latest-attempt candidates in the browser', () => { + const unpromoted = makeCollectiveXDataset(); + unpromoted.promotion.status = 'diagnostic'; + installPublication(unpromoted, { channel: 'latest-attempt' }); + cy.get('[data-testid="collectivex-channel-toggle"]') + .contains('button', 'Latest attempt') + .click(); + cy.wait('@collectivexChannel-latest-attempt'); + + cy.get('[data-testid="collectivex-scope-toggle"]') + .find('button') + .should('have.length', 1) + .and('contain.text', 'Diagnostics'); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'H100 EP8 · deepep') + .and('contain.text', 'H100 EP8 · nccl-ep'); + + cy.contains('[role="tab"]', 'Decisions').click(); + cy.get('[data-testid="collectivex-unpromoted-decisions"]').should( + 'contain.text', + 'does not drive rankings or recommendations', + ); + cy.get('[data-testid="collectivex-rankings"]').should('not.exist'); + }); + + it('can inspect the latest attempt before the first promotion exists', () => { + cy.intercept('GET', channelUrl('dev-latest'), { statusCode: 404 }).as('missingPromotion'); + cy.reload(); + cy.wait('@missingPromotion'); + cy.get('[data-testid="collectivex-error"]').should('be.visible'); + + cy.get('[data-testid="collectivex-error-channel-toggle"]') + .contains('button', 'Latest attempt') + .click(); + cy.wait('@collectivexChannel-latest-attempt'); + cy.get('[data-testid="collectivex-display"]') + .should('be.visible') + .and('contain.text', 'diagnostic'); + }); + + it('renders only publisher-declared rankings and recommendations', () => { + cy.contains('[role="tab"]', 'Decisions').click(); + + cy.get('[data-testid="collectivex-rankings"]') + .should('contain.text', '3 allocations') + .and('contain.text', 'deepep') + .and('contain.text', 'mori') + .and('not.contain.text', 'nccl-ep'); + cy.get('[data-testid="collectivex-recommendations"]') + .should('contain.text', 'Best p99 latency at T=128') + .and('contain.text', '100 us') + .and('contain.text', 'deepep') + .and('contain.text', 'Official'); + + cy.get('[data-testid="collectivex-cohort-select"]').click(); + cy.contains('[role="option"]', 'H100 EP8 routing comparison').click(); + cy.get('[data-testid="collectivex-recommendations"]').should('not.exist'); + cy.get('[data-testid="collectivex-rankings"]').should('contain.text', 'Experimental'); + cy.get('[data-testid="collectivex-sensitivity"]') + .should('contain.text', 'Routing sensitivity: p99 latency T=128') + .and('contain.text', '30.0%') + .and('contain.text', 'Experimental'); + }); + + it('shows terminal coverage and every retained retry', () => { + cy.contains('[role="tab"]', 'Evidence').click(); + + cy.get('[data-testid="collectivex-coverage-table"]') + .should('contain.text', 'deepep decode') + .and('contain.text', 'nccl-ep decode') + .and('contain.text', 'MI355X / DeepEP / unsupported') + .and('contain.text', 'runnable') + .and('contain.text', 'unsupported') + .and('contain.text', 'capability') + .and('contain.text', 'success'); + + cy.get('[data-testid="collectivex-channel-toggle"]') + .contains('button', 'Latest attempt') + .click(); + cy.wait('@collectivexChannel-latest-attempt'); + cy.contains('[role="tab"]', 'Evidence').click(); + cy.get('[data-testid="collectivex-attempts-table"]') + .should('contain.text', 'timeout') + .and('contain.text', 'execution-timeout') + .and('contain.text', 'failed') + .and('contain.text', 'retained') + .and('contain.text', 'selected') + .and('contain.text', 'Failure mode'); + cy.get('[data-testid="collectivex-provenance"]') + .should('contain.text', 'latest-attempt') + .and('contain.text', 'Dataset SHA-256'); + }); + + it('keeps nullable isolated components unavailable', () => { + cy.get('[data-testid="collectivex-operation-select"]').click(); + cy.contains('[role="option"]', 'Dispatch').click(); + + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + cy.get('[data-testid="collectivex-main-chart"]').should( + 'contain.text', + 'Unavailable components remain null', + ); + }); + + it('renders loading while resolving immutable bytes', () => { + const delayed = makeCollectiveXDataset(); + delayed.generated_at = '2026-07-04T02:00:00Z'; + installPublication(delayed, { delay: 750 }); + cy.reload(); + cy.get('[data-testid="collectivex-loading"]').should('be.visible'); + cy.get('[data-testid="collectivex-display"]').should('be.visible'); + }); + + it('fails closed on digest or schema mismatch', () => { + installPublication(makeCollectiveXDataset(), { digest: 'f'.repeat(64) }); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-error"]') + .should('be.visible') + .and('contain.text', 'SHA-256 does not match'); + + const malformed = makeCollectiveXDataset() as unknown as Record; + malformed.browser_ranking = true; + installPublication(malformed); + cy.reload(); + cy.wait('@collectivexChannel-dev-latest'); + cy.get('[data-testid="collectivex-error"]') + .should('be.visible') + .and('contain.text', 'unknown field browser_ranking'); + }); +}); diff --git a/packages/app/package.json b/packages/app/package.json index d5159920..08a0164c 100644 --- a/packages/app/package.json +++ b/packages/app/package.json @@ -32,6 +32,7 @@ "@chenglou/pretext": "^0.0.8", "@jpinsonneau/html-to-image": "^1.11.13", "@noble/ciphers": "^2.2.0", + "@noble/hashes": "^2.2.0", "@posthog/nextjs-config": "^1.9.68", "@radix-ui/react-accordion": "^1.2.14", "@radix-ui/react-dialog": "^1.1.17", @@ -69,7 +70,8 @@ "remark-gfm": "^4.0.1", "shiki": "^4.3.0", "tailwind-merge": "^3.6.0", - "three": "^0.185.0" + "three": "^0.185.0", + "zod": "^4.4.3" }, "devDependencies": { "@bahmutov/cypress-esbuild-preprocessor": "^2.2.8", diff --git a/packages/app/src/app/(dashboard)/collectivex/page.tsx b/packages/app/src/app/(dashboard)/collectivex/page.tsx new file mode 100644 index 00000000..d3380bd9 --- /dev/null +++ b/packages/app/src/app/(dashboard)/collectivex/page.tsx @@ -0,0 +1,10 @@ +import type { Metadata } from 'next'; + +import CollectiveXDisplay from '@/components/collectivex/CollectiveXDisplay'; +import { tabMetadata } from '@/lib/tab-meta'; + +export const metadata: Metadata = tabMetadata('collectivex'); + +export default function CollectiveXPage() { + return ; +} diff --git a/packages/app/src/app/collectivex-data/[...path]/route.test.ts b/packages/app/src/app/collectivex-data/[...path]/route.test.ts new file mode 100644 index 00000000..48a842c6 --- /dev/null +++ b/packages/app/src/app/collectivex-data/[...path]/route.test.ts @@ -0,0 +1,182 @@ +import { createHash } from 'node:crypto'; +import { + chmod, + mkdtemp, + mkdir, + readdir, + realpath, + rm, + symlink, + truncate, + writeFile, +} from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { GET } from './route'; + +let root: string | undefined; + +afterEach(async () => { + vi.restoreAllMocks(); + delete process.env.COLLECTIVEX_STORE_ROOT; + if (root) { + const datasets = path.join(root, 'public', 'datasets'); + for (const entry of await readdir(datasets).catch(() => [])) { + await chmod(path.join(datasets, entry), 0o755).catch(() => undefined); + } + await rm(root, { recursive: true, force: true }); + } + root = undefined; +}); + +async function store() { + root = await realpath(await mkdtemp(path.join(tmpdir(), 'collectivex-route-'))); + process.env.COLLECTIVEX_STORE_ROOT = root; + await mkdir(path.join(root, 'public', 'channels'), { recursive: true }); + await mkdir(path.join(root, 'public', 'datasets'), { recursive: true }); + return root; +} + +async function freezeDataset(dataset: string) { + await chmod(path.join(dataset, 'dataset.json'), 0o444); + await chmod(path.join(dataset, 'COMPLETE'), 0o444); + await chmod(dataset, 0o555); +} + +function request(...segments: string[]) { + return GET(new Request('http://localhost/collectivex-data/test'), { + params: Promise.resolve({ path: segments }), + }); +} + +describe('CollectiveX filesystem publication route', () => { + it('serves only channel pointers without caching', async () => { + const directory = await store(); + const body = '{"format":"collectivex.channel.v1"}\n'; + await writeFile(path.join(directory, 'public', 'channels', 'dev-latest.json'), body); + + const response = await request('channels', 'dev-latest.json'); + + expect(response.status).toBe(200); + expect(response.headers.get('cache-control')).toBe('no-cache'); + expect(await response.text()).toBe(body); + }); + + it('requires a matching COMPLETE marker for immutable datasets', async () => { + const directory = await store(); + const body = '{}\n'; + const digest = createHash('sha256').update(body).digest('hex'); + const dataset = path.join(directory, 'public', 'datasets', digest); + await mkdir(dataset); + await writeFile(path.join(dataset, 'dataset.json'), body); + + const incomplete = await request('datasets', digest, 'dataset.json'); + expect(incomplete.status).toBe(404); + await writeFile(path.join(dataset, 'COMPLETE'), `${digest}\n`); + await freezeDataset(dataset); + const response = await request('datasets', digest, 'dataset.json'); + expect(response.status).toBe(200); + expect(response.headers.get('cache-control')).toContain('immutable'); + + await chmod(dataset, 0o755); + await chmod(path.join(dataset, 'dataset.json'), 0o644); + await writeFile(path.join(dataset, 'dataset.json'), '{"changed":true}\n'); + await freezeDataset(dataset); + const corrupted = await request('datasets', digest, 'dataset.json'); + expect(corrupted.status).toBe(404); + }); + + it('rejects writable, oversized, and symlinked publication files', async () => { + const directory = await store(); + const latest = path.join(directory, 'public', 'channels', 'latest-attempt.json'); + await writeFile(latest, '{}\n'); + await chmod(latest, 0o666); + const writable = await request('channels', 'latest-attempt.json'); + expect(writable.status).toBe(404); + + const promoted = path.join(directory, 'public', 'channels', 'dev-latest.json'); + await writeFile(promoted, 'x'.repeat(64 * 1024 + 1)); + const oversized = await request('channels', 'dev-latest.json'); + expect(oversized.status).toBe(404); + + await rm(promoted); + await symlink(latest, promoted); + const linked = await request('channels', 'dev-latest.json'); + expect(linked.status).toBe(404); + }); + + it('rejects writable publication directories', async () => { + const directory = await store(); + const channels = path.join(directory, 'public', 'channels'); + await writeFile(path.join(channels, 'dev-latest.json'), '{}\n'); + await chmod(channels, 0o777); + const unsafeChannel = await request('channels', 'dev-latest.json'); + expect(unsafeChannel.status).toBe(404); + + await chmod(channels, 0o755); + const digest = 'c'.repeat(64); + const dataset = path.join(directory, 'public', 'datasets', digest); + await mkdir(dataset); + await writeFile(path.join(dataset, 'dataset.json'), '{}\n'); + await writeFile(path.join(dataset, 'COMPLETE'), `${digest}\n`); + await chmod(dataset, 0o777); + const unsafeDataset = await request('datasets', digest, 'dataset.json'); + expect(unsafeDataset.status).toBe(404); + }); + + it('rejects a store owned by a different process user', async () => { + const directory = await store(); + await writeFile(path.join(directory, 'public', 'channels', 'dev-latest.json'), '{}\n'); + const getuid = process.getuid; + if (!getuid) throw new Error('CollectiveX filesystem tests require a POSIX process UID'); + const ownerUid = getuid(); + vi.spyOn(process, 'getuid').mockReturnValue(ownerUid + 1); + + const response = await request('channels', 'dev-latest.json'); + + expect(response.status).toBe(503); + }); + + it('rejects owner-writable immutable dataset objects', async () => { + const directory = await store(); + const digest = 'd'.repeat(64); + const dataset = path.join(directory, 'public', 'datasets', digest); + await mkdir(dataset); + await writeFile(path.join(dataset, 'dataset.json'), '{}\n'); + await writeFile(path.join(dataset, 'COMPLETE'), `${digest}\n`); + await chmod(dataset, 0o755); + + const mutableDirectory = await request('datasets', digest, 'dataset.json'); + expect(mutableDirectory.status).toBe(404); + + await chmod(dataset, 0o555); + const mutableFiles = await request('datasets', digest, 'dataset.json'); + expect(mutableFiles.status).toBe(404); + }); + + it('rejects datasets larger than 32 MiB before reading them', async () => { + const directory = await store(); + const digest = 'b'.repeat(64); + const dataset = path.join(directory, 'public', 'datasets', digest); + await mkdir(dataset); + await writeFile(path.join(dataset, 'dataset.json'), '{}\n'); + await truncate(path.join(dataset, 'dataset.json'), 32 * 1024 * 1024 + 1); + await writeFile(path.join(dataset, 'COMPLETE'), `${digest}\n`); + await freezeDataset(dataset); + + const response = await request('datasets', digest, 'dataset.json'); + expect(response.status).toBe(404); + }); + + it('rejects unlisted paths and missing configuration', async () => { + const unconfigured = await request('channels', 'dev-latest.json'); + expect(unconfigured.status).toBe(503); + await store(); + const privatePath = await request('..', 'private', 'bundle.json'); + const directory = await request('datasets'); + expect(privatePath.status).toBe(404); + expect(directory.status).toBe(404); + }); +}); diff --git a/packages/app/src/app/collectivex-data/[...path]/route.ts b/packages/app/src/app/collectivex-data/[...path]/route.ts new file mode 100644 index 00000000..bfd47286 --- /dev/null +++ b/packages/app/src/app/collectivex-data/[...path]/route.ts @@ -0,0 +1,132 @@ +import { createHash } from 'node:crypto'; +import { constants } from 'node:fs'; +import { lstat, open, realpath } from 'node:fs/promises'; +import path from 'node:path'; + +export const dynamic = 'force-dynamic'; +export const runtime = 'nodejs'; + +const CHANNEL = /^channels\/(?dev-latest|latest-attempt)\.json$/; +const DATASET = /^datasets\/(?[a-f0-9]{64})\/dataset\.json$/; +const MAX_CHANNEL_BYTES = 64 * 1024; +const MAX_DATASET_BYTES = 32 * 1024 * 1024; + +function unavailable(status: number) { + return new Response(null, { status, headers: { 'Cache-Control': 'no-store' } }); +} + +async function safeDirectory( + directoryPath: string, + ownerUid: number, + mode?: number, +): Promise { + try { + const stats = await lstat(directoryPath); + return ( + !stats.isSymbolicLink() && + stats.isDirectory() && + stats.uid === ownerUid && + (stats.mode & 0o022) === 0 && + (mode === undefined || (stats.mode & 0o777) === mode) && + (await realpath(directoryPath)) === directoryPath + ); + } catch { + return false; + } +} + +async function regularFile( + filePath: string, + maxBytes: number, + mode: number, + ownerUid: number, +): Promise | null> { + let handle; + try { + handle = await open(filePath, constants.O_RDONLY | (constants.O_NOFOLLOW ?? 0)); + const stats = await handle.stat(); + if ( + !stats.isFile() || + stats.uid !== ownerUid || + stats.size === 0 || + stats.size > maxBytes || + (stats.mode & 0o777) !== mode + ) { + return null; + } + const body = Uint8Array.from(await handle.readFile()); + return body.byteLength === stats.size && body.byteLength <= maxBytes ? body : null; + } catch { + return null; + } finally { + await handle?.close(); + } +} + +export async function GET(_request: Request, context: { params: Promise<{ path: string[] }> }) { + const parameters = await context.params; + const relative = parameters.path.join('/'); + const channel = CHANNEL.exec(relative); + const dataset = DATASET.exec(relative); + if (!channel && !dataset) return unavailable(404); + + const configuredRoot = process.env.COLLECTIVEX_STORE_ROOT; + if (!configuredRoot || !path.isAbsolute(configuredRoot)) return unavailable(503); + const ownerUid = process.getuid?.(); + if (ownerUid === undefined) return unavailable(503); + const storeRoot = path.resolve(configuredRoot); + if (!(await safeDirectory(storeRoot, ownerUid))) return unavailable(503); + + const publicRoot = path.join(storeRoot, 'public'); + const filePath = path.join(publicRoot, ...relative.split('/')); + const directories: [string, number | undefined][] = dataset + ? [ + [publicRoot, 0o755], + [path.join(publicRoot, 'datasets'), 0o755], + [path.dirname(filePath), 0o555], + ] + : [ + [publicRoot, 0o755], + [path.join(publicRoot, 'channels'), 0o755], + ]; + for (const [directory, mode] of directories) { + if (!(await safeDirectory(directory, ownerUid, mode))) return unavailable(404); + } + try { + const resolved = await realpath(filePath); + if (!resolved.startsWith(`${publicRoot}${path.sep}`) || resolved !== filePath) { + return unavailable(404); + } + } catch { + return unavailable(404); + } + + if (dataset) { + const complete = await regularFile( + path.join(path.dirname(filePath), 'COMPLETE'), + 128, + 0o444, + ownerUid, + ); + if (complete === null || new TextDecoder().decode(complete).trim() !== dataset.groups?.digest) { + return unavailable(404); + } + } + const body = await regularFile( + filePath, + channel ? MAX_CHANNEL_BYTES : MAX_DATASET_BYTES, + channel ? 0o644 : 0o444, + ownerUid, + ); + if (body === null) return unavailable(404); + if (dataset && createHash('sha256').update(body).digest('hex') !== dataset.groups?.digest) { + return unavailable(404); + } + return new Response(body, { + headers: { + 'Cache-Control': channel ? 'no-cache' : 'public, max-age=31536000, immutable', + 'Content-Type': 'application/json; charset=utf-8', + 'X-Content-Type-Options': 'nosniff', + }, + }); +} diff --git a/packages/app/src/app/sitemap.ts b/packages/app/src/app/sitemap.ts index fbe5d987..c2c431f1 100644 --- a/packages/app/src/app/sitemap.ts +++ b/packages/app/src/app/sitemap.ts @@ -13,6 +13,7 @@ const TABS = [ 'reliability', 'gpu-specs', 'gpu-metrics', + 'collectivex', ] as const; type SitemapEntry = MetadataRoute.Sitemap[number]; diff --git a/packages/app/src/components/collectivex/CollectiveXChart.tsx b/packages/app/src/components/collectivex/CollectiveXChart.tsx new file mode 100644 index 00000000..4c0e27c4 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXChart.tsx @@ -0,0 +1,277 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { sparseLogTicks } from './axis'; +import { chartPoints, collectiveXColorKey } from './data'; +import type { + CollectiveXChartPoint, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXSeries, + CollectiveXScale, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +interface CollectiveXChartProps { + chartId: string; + series: CollectiveXSeries[]; + colors: Record; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + xAxis: CollectiveXXAxis; + yAxis: CollectiveXYAxis; + xScaleType: CollectiveXScale; + yScaleType: CollectiveXScale; + caption?: React.ReactNode; + legendElement?: React.ReactNode; + testId?: string; +} + +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (Σp, not measured)', +}; + +const X_AXIS_LABELS: Record = { + 'tokens-per-rank': 'Source tokens / rank', + 'global-tokens': 'Global source tokens', +}; + +const Y_AXIS_LABELS: Record = { + latency: 'Latency (µs)', + 'tokens-per-second': 'Token rate at selected latency percentile (tokens/s)', + 'payload-rate': 'Payload rate at selected latency percentile (GB/s)', +}; + +function paddedDomain(values: number[], scaleType: CollectiveXScale): [number, number] { + if (values.length === 0) return scaleType === 'log' ? [1, 10] : [0, 1]; + const min = d3.min(values) ?? 0; + const max = d3.max(values) ?? 1; + if (min === max) { + if (scaleType === 'log') return [Math.max(min / 2, Number.MIN_VALUE), max * 2]; + const padding = Math.max(Math.abs(min) * 0.1, 1); + return [min - padding, max + padding]; + } + if (scaleType === 'log') return [min / 1.08, max * 1.08]; + const padding = (max - min) * 0.06; + return [Math.max(0, min - padding), max + padding]; +} + +function formatCompact(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(value < 1e10 ? 1 : 0)}G`; + if (value >= 1e6) return `${(value / 1e6).toFixed(value < 1e7 ? 1 : 0)}M`; + if (value >= 1e3) return `${(value / 1e3).toFixed(value < 1e4 ? 1 : 0)}k`; + if (value >= 10) return value.toFixed(0); + if (value >= 1) return value.toFixed(value < 3 ? 1 : 0); + return value.toFixed(2); +} + +function formatTokenCount(value: number): string { + return Number.isInteger(value) ? value.toLocaleString('en-US') : formatCompact(value); +} + +function formatMetric(value: number, yAxis: CollectiveXYAxis): string { + if (yAxis === 'latency') return `${value.toFixed(value >= 100 ? 0 : 1)} µs`; + if (yAxis === 'tokens-per-second') return `${formatCompact(value)} tok/s`; + return `${value.toFixed(value >= 100 ? 0 : 2)} GB/s`; +} + +function formatPercentiles( + value: CollectiveXSeries['points'][number]['components']['dispatch'], +): string { + if (value === null) return 'unavailable'; + return `${value.latency_us.p50.toFixed(1)} / ${value.latency_us.p90.toFixed(1)} / ${value.latency_us.p95.toFixed(1)} / ${value.latency_us.p99.toFixed(1)} µs`; +} + +function escapeHtml(value: string): string { + return value + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"') + .replaceAll("'", '''); +} + +export function CollectiveXChart({ + chartId, + series, + colors, + operation, + percentile, + xAxis, + yAxis, + xScaleType, + yScaleType, + caption, + legendElement, + testId, +}: CollectiveXChartProps) { + const points = useMemo( + () => chartPoints(series, operation, percentile, xAxis, yAxis), + [series, operation, percentile, xAxis, yAxis], + ); + const seriesById = useMemo(() => new Map(series.map((item) => [item.series_id, item])), [series]); + const lines = useMemo(() => { + const result: Record = {}; + for (const point of points) { + (result[point.seriesId] ??= []).push({ x: point.x, y: point.y }); + } + for (const line of Object.values(result)) { + line.sort((a, b) => a.x - b.x); + } + return result; + }, [points]); + + const xDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.x), + xScaleType, + ), + [points, xScaleType], + ); + const yDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.y), + yScaleType, + ), + [points, yScaleType], + ); + const xTickValues = useMemo( + () => [...new Set(points.map((point) => point.x))].toSorted((a, b) => a - b), + [points], + ); + + const noDataOverlay = + points.length === 0 ? ( +
+

+ {series.length > 0 + ? `${OPERATION_LABELS[operation]} is unavailable for the selected series.` + : 'No matching CollectiveX series.'} +

+
+ ) : undefined; + + return ( + + chartId={chartId} + data={points} + height={560} + margin={{ top: 24, right: 20, bottom: 62, left: 78 }} + watermark="logo" + testId={testId} + grabCursor + instructions="Shift+Scroll to zoom · Drag to pan · Double-click to reset · Click a point to pin tooltip" + xScale={ + xScaleType === 'log' + ? { type: 'log', domain: xDomain, nice: false } + : { type: 'linear', domain: xDomain, nice: true } + } + yScale={{ type: yScaleType, domain: yDomain, nice: yScaleType === 'linear' }} + xAxis={{ + label: `${X_AXIS_LABELS[xAxis]}${xScaleType === 'log' ? ' (log)' : ''}`, + tickCount: 8, + tickValues: xTickValues, + tickFormat: (value) => formatTokenCount(Number(value)), + }} + yAxis={{ + label: Y_AXIS_LABELS[yAxis], + tickCount: 5, + tickValues: + yScaleType === 'log' + ? (scale) => sparseLogTicks(scale.domain().map(Number), 5) + : undefined, + tickFormat: (value) => formatCompact(Number(value)), + }} + layers={[ + { + type: 'line', + key: 'collectivex-lines', + lines, + config: { + getColor: (key) => { + const item = seriesById.get(key); + return colors[item ? collectiveXColorKey(item) : ''] ?? '#888'; + }, + strokeWidth: 2.25, + curve: d3.curveLinear, + }, + }, + { + type: 'point', + key: 'collectivex-points', + data: points, + config: { + getCx: () => 0, + getCy: () => 0, + getX: (point) => point.x, + getY: (point) => point.y, + getColor: (point) => colors[point.colorKey] ?? '#888', + getRadius: () => 3.5, + stroke: 'var(--background)', + strokeWidth: 1, + keyFn: (point) => `${point.seriesId}-${point.x}`, + maxPoints: Infinity, + }, + }, + ]} + zoom={{ + enabled: true, + axes: 'both', + scaleExtent: [1, 20], + resetEventName: `collectivex_zoom_reset_${chartId}`, + }} + tooltip={{ + rulerType: 'crosshair', + attachToLayer: 1, + content: (point, isPinned) => { + const color = colors[point.colorKey] ?? '#888'; + const measurement = point.point; + const measuredRoundtrip = measurement.components.roundtrip; + const eplb = point.series.eplb; + const eplbDetails = eplb.enabled + ? `${escapeHtml(eplb.planner ?? 'enabled')} · ${eplb.physical_experts}/${eplb.logical_experts} physical/logical · ${eplb.redundant_experts} redundant · ${eplb.replicated_experts} replicated (max ${eplb.max_replicas ?? 'n/a'}x) · reference T=${eplb.reference_tokens_per_rank ?? 'n/a'} · imbalance ${eplb.imbalance_before?.toFixed(3) ?? 'n/a'} -> ${eplb.imbalance_after?.toFixed(3) ?? 'n/a'}` + : `off · ${eplb.logical_experts} logical experts`; + return `
+ ${isPinned ? '
Click elsewhere to dismiss
' : ''} +
${escapeHtml(point.seriesLabel)}
+
${escapeHtml(OPERATION_LABELS[operation])} ${yAxis === 'latency' ? percentile : `at ${percentile} latency`}: ${formatMetric(point.y, yAxis)} · ${escapeHtml(point.series.status)}
+
${measurement.tokens_per_rank} tokens/rank · ${measurement.global_tokens} global tokens
+
Dispatch p50/p90/p95/p99: ${formatPercentiles(measurement.components.dispatch)}
+
Combine p50/p90/p95/p99: ${formatPercentiles(measurement.components.combine)}
+
Round trip p50/p90/p95/p99: ${formatPercentiles(measuredRoundtrip)}${measuredRoundtrip ? ' (measured)' : ''}
+
Fan-out: ${measurement.routing.fanout_mean.toFixed(2)} · routed copies: ${measurement.routing.routed_copies} · recv max: ${measurement.routing.recv_tokens_max}
+
Expert CV: ${measurement.routing.expert_load_cv.toFixed(3)} · rank CV: ${measurement.routing.payload_rank_cv.toFixed(3)} · hotspot: ${measurement.routing.hotspot_ratio.toFixed(2)}x · empty experts/ranks: ${measurement.routing.empty_expert_count}/${measurement.routing.empty_rank_count}
+
Correctness: ${measurement.correct ? 'pass' : 'fail'} · EPLB: ${eplbDetails}
+ ${eplb.mapping_sha256 ? `
EPLB mapping SHA-256: ${escapeHtml(eplb.mapping_sha256)}
` : ''} +
${escapeHtml(point.series.measurement.contract)} · ${escapeHtml(point.series.suite)} · ${escapeHtml(point.series.system.topology_class)}
+
${escapeHtml(point.series.workload.dispatch_dtype)} · ${escapeHtml(point.series.workload.routing)}${point.series.workload.eplb ? '+eplb' : ''}
+
workload=${escapeHtml(point.series.workload.workload_id.slice(0, 24))} · allocations=${point.series.allocation_ids.length}
+
`; + }, + getRulerX: (point, scale) => + (scale as d3.ScaleLinear | d3.ScaleLogarithmic)(point.x), + getRulerY: (point, scale) => scale(point.y), + onHoverStart: (selection) => { + selection.attr('r', 6); + }, + onHoverEnd: (selection) => { + selection.attr('r', 3.5); + }, + }} + transitionDuration={200} + legendElement={legendElement} + noDataOverlay={noDataOverlay} + caption={caption} + /> + ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx new file mode 100644 index 00000000..fd6a560e --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -0,0 +1,878 @@ +'use client'; + +import { ExternalLink, Loader2, RefreshCw } from 'lucide-react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; + +import { Button } from '@/components/ui/button'; +import { Card } from '@/components/ui/card'; +import ChartLegend from '@/components/ui/chart-legend'; +import { Label } from '@/components/ui/label'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; +import { useCollectiveX } from '@/hooks/api/use-collectivex'; +import { useThemeColors } from '@/hooks/useThemeColors'; +import { track } from '@/lib/analytics'; + +import { CollectiveXChart } from './CollectiveXChart'; +import { + CollectiveXAttemptTable, + CollectiveXCoverageTable, + CollectiveXDecisionTables, +} from './CollectiveXTables'; +import { collectiveXColorKey, collectiveXSeriesLabel, comparisonDifferences } from './data'; +import type { + CollectiveXCohort, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXPhase, + CollectiveXScale, + CollectiveXSeries, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +type EvidenceScope = 'controlled' | 'diagnostic'; +type PublicationChannel = 'dev-latest' | 'latest-attempt'; +type CollectiveXTab = 'results' | 'decisions' | 'evidence'; +interface SelectOption { + value: T; + label: string; +} + +const OPERATION_OPTIONS: SelectOption[] = [ + { value: 'dispatch', label: 'Dispatch' }, + { value: 'combine', label: 'Combine' }, + { value: 'roundtrip', label: 'Round trip' }, + { value: 'isolated-sum', label: 'Isolated sum' }, +]; +const PHASE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'decode', label: 'Decode' }, + { value: 'prefill', label: 'Prefill' }, +]; +const PERCENTILE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'p50', label: 'p50' }, + { value: 'p90', label: 'p90' }, + { value: 'p95', label: 'p95' }, + { value: 'p99', label: 'p99' }, +]; +const SCALE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'log', label: 'Log' }, + { value: 'linear', label: 'Linear' }, +]; +const X_AXIS_OPTIONS: SelectOption[] = [ + { value: 'tokens-per-rank', label: 'Source tokens / rank' }, + { value: 'global-tokens', label: 'Global source tokens' }, +]; +const SCOPE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'controlled', label: 'Controlled' }, + { value: 'diagnostic', label: 'Diagnostics' }, +]; +const DIAGNOSTIC_SCOPE_OPTIONS: SegmentedToggleOption[] = [SCOPE_OPTIONS[1]]; +const CHANNEL_OPTIONS: SegmentedToggleOption[] = [ + { value: 'dev-latest', label: 'Published' }, + { value: 'latest-attempt', label: 'Latest attempt' }, +]; +const TABS: { value: CollectiveXTab; label: string }[] = [ + { value: 'results', label: 'EP results' }, + { value: 'decisions', label: 'Decisions' }, + { value: 'evidence', label: 'Evidence' }, +]; +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (derived)', +}; +const Y_AXIS_LABELS: Record = { + latency: 'Latency', + 'tokens-per-second': 'Token rate at selected latency percentile', + 'payload-rate': 'Payload rate at selected latency percentile', +}; +const PROMOTION_CLASSES = { + promoted: 'border-emerald-600/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300', + diagnostic: 'border-amber-600/40 bg-amber-500/10 text-amber-700 dark:text-amber-300', + quarantined: 'border-red-600/40 bg-red-500/10 text-red-700 dark:text-red-300', +}; +const COHORT_KIND_ORDER: Record = { + library: 0, + chip: 1, + system: 2, + routing: 3, +}; + +function formatDate(value: string): string { + return new Intl.DateTimeFormat('en', { + dateStyle: 'medium', + timeStyle: 'short', + timeZone: 'UTC', + }).format(new Date(value)); +} + +function ControlGroup({ label, children }: { label: string; children: React.ReactNode }) { + return ( +
+ + {children} +
+ ); +} + +function selectOptions(values: string[], uppercase = false): SelectOption[] { + return values.map((value) => ({ + value, + label: value === 'all' ? 'All' : uppercase ? value.toUpperCase() : value, + })); +} + +function cohortSeries(cohort: CollectiveXCohort | null, series: CollectiveXSeries[]) { + if (cohort === null) return []; + const ids = new Set(cohort.series_ids); + return series.filter((item) => ids.has(item.series_id)); +} + +function publicationSourceSha(series: CollectiveXSeries[]): string | null { + const sourceSha = series[0]?.build.source_sha; + return sourceSha && series.every((item) => item.build.source_sha === sourceSha) + ? sourceSha + : null; +} + +export default function CollectiveXDisplay() { + const [publication, setPublication] = useState('dev-latest'); + const { data, error, isLoading, isFetching, refetch } = useCollectiveX(publication); + const [tab, setTab] = useState('results'); + const [scope, setScope] = useState('controlled'); + const [controlledCohortId, setControlledCohortId] = useState(''); + const [diagnosticCohortId, setDiagnosticCohortId] = useState('all'); + const [operation, setOperation] = useState('roundtrip'); + const [phase, setPhase] = useState('decode'); + const [percentile, setPercentile] = useState('p99'); + const [xAxis, setXAxis] = useState('tokens-per-rank'); + const [yAxis, setYAxis] = useState('latency'); + const [xScale, setXScale] = useState('log'); + const [yScale, setYScale] = useState('log'); + const [sku, setSku] = useState('all'); + const [backend, setBackend] = useState('all'); + const [routing, setRouting] = useState('all'); + const [activeSeriesIds, setActiveSeriesIds] = useState>(new Set()); + const [legendExpanded, setLegendExpanded] = useState(true); + const [highContrast, setHighContrast] = useState(false); + + const dataset = data?.dataset; + const sourceSha = useMemo(() => publicationSourceSha(dataset?.series ?? []), [dataset?.series]); + const eligibleCohorts = useMemo( + () => + dataset?.cohorts + .filter((item) => item.eligibility.decision_grade) + .toSorted( + (left, right) => + COHORT_KIND_ORDER[left.kind] - COHORT_KIND_ORDER[right.kind] || + left.label.localeCompare(right.label), + ) ?? [], + [dataset?.cohorts], + ); + const diagnosticCohorts = useMemo( + () => + dataset?.cohorts + .filter((item) => !item.eligibility.decision_grade) + .toSorted((left, right) => left.label.localeCompare(right.label)) ?? [], + [dataset?.cohorts], + ); + const seriesById = useMemo( + () => new Map(dataset?.series.map((item) => [item.series_id, item])), + [dataset?.series], + ); + const phaseControlledCohorts = useMemo( + () => + eligibleCohorts.filter((cohort) => + cohort.series_ids.every((seriesId) => seriesById.get(seriesId)?.phase === phase), + ), + [eligibleCohorts, phase, seriesById], + ); + const selectedControlledCohort = useMemo( + () => + phaseControlledCohorts.find((item) => item.cohort_id === controlledCohortId) ?? + phaseControlledCohorts[0] ?? + null, + [controlledCohortId, phaseControlledCohorts], + ); + const selectedDiagnosticCohort = useMemo( + () => diagnosticCohorts.find((item) => item.cohort_id === diagnosticCohortId) ?? null, + [diagnosticCohortId, diagnosticCohorts], + ); + useEffect(() => { + if (selectedControlledCohort && selectedControlledCohort.cohort_id !== controlledCohortId) { + setControlledCohortId(selectedControlledCohort.cohort_id); + } + }, [controlledCohortId, selectedControlledCohort]); + + useEffect(() => { + const readHash = () => { + const value = window.location.hash.replace(/^#(?:tab-)?/, ''); + if (TABS.some((item) => item.value === value)) setTab(value as CollectiveXTab); + }; + readHash(); + window.addEventListener('hashchange', readHash); + window.addEventListener('popstate', readHash); + return () => { + window.removeEventListener('hashchange', readHash); + window.removeEventListener('popstate', readHash); + }; + }, []); + + const diagnosticSeries = useMemo(() => { + if (!dataset) return []; + const diagnosticMembers = new Set(diagnosticCohorts.flatMap((cohort) => cohort.series_ids)); + return dataset.series.filter( + (item) => + publication === 'latest-attempt' || + item.status === 'diagnostic' || + diagnosticMembers.has(item.series_id), + ); + }, [dataset, diagnosticCohorts, publication]); + const skuOptions = useMemo( + () => ['all', ...new Set(diagnosticSeries.map((item) => item.system.sku))], + [diagnosticSeries], + ); + const backendOptions = useMemo( + () => ['all', ...new Set(diagnosticSeries.map((item) => item.backend.label))], + [diagnosticSeries], + ); + const routingOptions = useMemo( + () => [ + 'all', + ...new Set( + diagnosticSeries.map( + (item) => `${item.workload.routing}${item.workload.eplb ? '+eplb' : ''}`, + ), + ), + ], + [diagnosticSeries], + ); + useEffect(() => { + if ( + diagnosticCohortId !== 'all' && + !diagnosticCohorts.some((cohort) => cohort.cohort_id === diagnosticCohortId) + ) { + setDiagnosticCohortId('all'); + } + if (!skuOptions.includes(sku)) setSku('all'); + if (!backendOptions.includes(backend)) setBackend('all'); + if (!routingOptions.includes(routing)) setRouting('all'); + }, [ + backend, + backendOptions, + diagnosticCohortId, + diagnosticCohorts, + routing, + routingOptions, + sku, + skuOptions, + ]); + const scopedSeries = useMemo(() => { + if (!dataset) return []; + if (scope === 'controlled') return cohortSeries(selectedControlledCohort, dataset.series); + const candidates = selectedDiagnosticCohort + ? cohortSeries(selectedDiagnosticCohort, dataset.series) + : diagnosticSeries; + return candidates.filter( + (item) => + (sku === 'all' || item.system.sku === sku) && + (backend === 'all' || item.backend.label === backend) && + (routing === 'all' || + `${item.workload.routing}${item.workload.eplb ? '+eplb' : ''}` === routing), + ); + }, [ + backend, + dataset, + diagnosticSeries, + routing, + scope, + selectedControlledCohort, + selectedDiagnosticCohort, + sku, + ]); + const phaseSeries = useMemo( + () => scopedSeries.filter((item) => item.phase === phase), + [phase, scopedSeries], + ); + + useEffect(() => { + if ( + scope === 'diagnostic' && + scopedSeries.length > 0 && + !scopedSeries.some((item) => item.phase === phase) + ) { + setPhase(scopedSeries[0].phase); + } + }, [phase, scope, scopedSeries]); + useEffect(() => { + setActiveSeriesIds(new Set(scopedSeries.map((item) => item.series_id))); + }, [scopedSeries]); + + const activeSeries = useMemo( + () => phaseSeries.filter((item) => activeSeriesIds.has(item.series_id)), + [activeSeriesIds, phaseSeries], + ); + const colorKeys = useMemo( + () => [...new Set(scopedSeries.map(collectiveXColorKey))], + [scopedSeries], + ); + const { resolveColor, getCssColor } = useThemeColors({ + highContrast, + activeKeys: colorKeys, + hcKeys: colorKeys, + hcVendorKeyFor: (key) => key.split('_')[0], + }); + const colors = useMemo( + () => Object.fromEntries(colorKeys.map((key) => [key, getCssColor(resolveColor(key, key))])), + [colorKeys, getCssColor, resolveColor], + ); + const legendItems = useMemo( + () => + phaseSeries.map((item) => ({ + name: item.series_id, + label: collectiveXSeriesLabel(item), + color: colors[collectiveXColorKey(item)] ?? 'var(--muted-foreground)', + isActive: activeSeriesIds.has(item.series_id), + title: `${item.status} · ${item.system.topology_class} · ${item.workload.workload_id}`, + onClick: () => { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + if (next.has(item.series_id)) next.delete(item.series_id); + else next.add(item.series_id); + return next; + }); + track('collectivex_series_toggled', { series: item.series_id }); + }, + })), + [activeSeriesIds, colors, phaseSeries], + ); + const warnings = useMemo( + () => (scope === 'diagnostic' ? comparisonDifferences(activeSeries) : []), + [activeSeries, scope], + ); + const missingComponents = activeSeries.some((item) => + item.points.some((point) => + operation === 'isolated-sum' + ? point.components.isolated_sum === null + : point.components[operation] === null, + ), + ); + + const handleRefresh = useCallback(() => { + track('collectivex_data_refreshed'); + void refetch(); + }, [refetch]); + const handlePublication = useCallback((value: PublicationChannel) => { + setPublication(value); + setScope(value === 'dev-latest' ? 'controlled' : 'diagnostic'); + setDiagnosticCohortId('all'); + setSku('all'); + setBackend('all'); + setRouting('all'); + track('collectivex_publication_changed', { publication: value }); + }, []); + const handleTab = useCallback((value: string) => { + const next = value as CollectiveXTab; + setTab(next); + window.location.hash = `tab-${next}`; + track('collectivex_tab_changed', { tab: next }); + }, []); + + if (isLoading) { + return ( + + +

Resolving CollectiveX publication...

+
+ ); + } + if (error || !data || !dataset) { + return ( + +

CollectiveX publication unavailable

+

+ {error instanceof Error ? error.message : 'The publication failed validation.'} +

+
+ + +
+
+ ); + } + + return ( +
+ +
+
+
+

CollectiveX

+ + {dataset.promotion.status === 'promoted' ? 'Promoted v1' : dataset.promotion.status} + +
+

+ Expert-parallel latency and payload rate across collective libraries and systems. +

+ {dataset.promotion.reason && ( +

+ Publication reason: {dataset.promotion.reason} +

+ )} +
+ +
+
+ item.status === 'decision-grade').length} + label="Decision series" + /> + + + + + +
+
+ + +
+ + + + + { + setScope(value); + track('collectivex_evidence_scope_changed', { scope: value }); + }} + ariaLabel="CollectiveX evidence scope" + testId="collectivex-scope-toggle" + /> + +
+ { + if (scope === 'controlled') setControlledCohortId(value); + else setDiagnosticCohortId(value); + }} + options={ + scope === 'controlled' + ? phaseControlledCohorts.map((item) => ({ + value: item.cohort_id, + label: item.label, + })) + : [ + { value: 'all', label: 'All diagnostic evidence' }, + ...diagnosticCohorts.map((item) => ({ + value: item.cohort_id, + label: item.label, + })), + ] + } + placeholder={scope === 'controlled' ? 'No eligible cohort' : 'All diagnostics'} + /> +
+ { + setOperation(next); + if (next !== 'roundtrip' && yAxis === 'tokens-per-second') setYAxis('latency'); + if (next === 'isolated-sum' && yAxis === 'payload-rate') setYAxis('latency'); + }} + /> + + + + + + + {scope === 'diagnostic' && ( + <> + + + + + )} + + + + + + + + +
+
+ + + + {TABS.map((item) => ( + + {item.label} + + ))} + + + {phaseSeries.length === 0 && ( + +

+ {scope === 'controlled' + ? 'No decision-grade series in this cohort and phase.' + : 'No diagnostic series match these filters.'} +

+
+ )} + + +

+ {OPERATION_LABELS[operation]} · {phase} ·{' '} + {yAxis === 'latency' ? percentile : `at ${percentile} latency`} +

+

+ {scope === 'controlled' + ? selectedControlledCohort?.label + : (selectedDiagnosticCohort?.label ?? 'Diagnostic evidence')}{' '} + · {Y_AXIS_LABELS[yAxis]} +

+ + } + legendElement={ + + setActiveSeriesIds( + (previous) => new Set([...previous].filter((item) => item !== id)), + ) + } + isLegendExpanded={legendExpanded} + onExpandedChange={setLegendExpanded} + switches={[ + { + id: 'collectivex-high-contrast', + label: 'High Contrast', + checked: highContrast, + onCheckedChange: setHighContrast, + }, + ]} + actions={ + activeSeries.length < phaseSeries.length + ? [ + { + id: 'collectivex-reset-filter', + label: 'Reset filter', + onClick: () => + setActiveSeriesIds( + new Set(phaseSeries.map((item) => item.series_id)), + ), + }, + ] + : [] + } + /> + } + /> + {scope === 'diagnostic' && ( +

+ Diagnostic evidence is excluded from rankings, recommendations, and regression + claims. +

+ )} + {scope === 'diagnostic' && selectedDiagnosticCohort && ( +

+ Excluded: {selectedDiagnosticCohort.eligibility.reasons.join(', ')}. Repeat spread: + {' p50 '} + {selectedDiagnosticCohort.eligibility.p50_max_min_ratio?.toFixed(3) ?? 'n/a'}x, + {' p99 '} + {selectedDiagnosticCohort.eligibility.p99_max_min_ratio?.toFixed(3) ?? 'n/a'}x. +

+ )} + {warnings.length > 0 && ( +

+ Selected factors differ: {warnings.join(', ')}. +

+ )} + {missingComponents && ( +

+ Unavailable components remain null and are omitted. +

+ )} + {operation === 'isolated-sum' && ( +

+ Isolated sum is derived and never drives throughput or recommendations. +

+ )} + {yAxis === 'payload-rate' && ( +

+ Payload rate is derived at the selected latency percentile and is not physical link + bandwidth. +

+ )} +
+
+ + {publication === 'dev-latest' ? ( + + ) : ( + +

Unpromoted evidence

+

+ Latest-attempt evidence does not drive rankings or recommendations. +

+
+ )} +
+ + + + +

Publication provenance

+
+ + + + +
+
+
+
+
+ ); +} + +function Stat({ + value, + label, + compact = false, +}: { + value: React.ReactNode; + label: string; + compact?: boolean; +}) { + return ( +
+

{value}

+

{label}

+
+ ); +} + +function SelectControl({ + label, + testId, + value, + options, + onChange, + placeholder, +}: { + label: string; + testId: string; + value: T; + options: SelectOption[]; + onChange: (value: T) => void; + placeholder?: string; +}) { + return ( + + + + ); +} + +function Provenance({ + label, + value, + mono = false, +}: { + label: string; + value: string; + mono?: boolean; +}) { + return ( +
+
{label}
+
{value}
+
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXTables.tsx b/packages/app/src/components/collectivex/CollectiveXTables.tsx new file mode 100644 index 00000000..3d88273c --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXTables.tsx @@ -0,0 +1,439 @@ +'use client'; + +import { useMemo } from 'react'; + +import { Badge } from '@/components/ui/badge'; +import { Card } from '@/components/ui/card'; +import { type DataTableColumn, DataTable } from '@/components/ui/data-table'; + +import type { + CollectiveXAttempt, + CollectiveXCohort, + CollectiveXCoverage, + CollectiveXDataset, + CollectiveXOutcome, + CollectiveXPublicationTier, + CollectiveXRanking, + CollectiveXRecommendation, + CollectiveXSensitivity, + CollectiveXSeries, +} from './types'; + +const OUTCOME_CLASSES = { + success: 'border-emerald-600/40 bg-emerald-500/15 text-emerald-700 dark:text-emerald-300', + unsupported: 'border-zinc-500/40 bg-zinc-500/15 text-zinc-700 dark:text-zinc-300', + failed: 'border-red-700/50 bg-red-700/15 text-red-800 dark:text-red-300', + invalid: 'border-red-600/40 bg-red-500/15 text-red-700 dark:text-red-300', + diagnostic: 'border-amber-600/40 bg-amber-500/15 text-amber-700 dark:text-amber-300', +} satisfies Record; + +const PUBLICATION_TIER_CLASSES = { + official: 'border-emerald-600/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300', + 'comparable-experimental': + 'border-amber-600/40 bg-amber-500/10 text-amber-700 dark:text-amber-300', +} satisfies Record; + +function OutcomeBadge({ outcome }: { outcome: CollectiveXOutcome }) { + return ( + + {outcome} + + ); +} + +function PublicationTierBadge({ tier }: { tier: CollectiveXPublicationTier }) { + return ( + + {tier === 'official' ? 'Official' : 'Experimental'} + + ); +} + +function shortId(value: string | null): string { + if (value === null) return '-'; + const suffix = value.lastIndexOf('-'); + return suffix === -1 ? value : value.slice(suffix + 1, suffix + 9); +} + +function metricLabel(ranking: CollectiveXRanking): string { + const { metric } = ranking; + const measure = + metric.measure === 'latency_us' + ? `${metric.statistic} latency` + : `payload rate at ${metric.statistic} latency`; + return `${metric.phase} T=${metric.tokens_per_rank} ${metric.operation} ${measure}`; +} + +export function CollectiveXCoverageTable({ coverage }: { coverage: CollectiveXCoverage[] }) { + const columns = useMemo[]>( + () => [ + { + header: 'Case', + cell: (row) => row.label, + sortValue: (row) => row.label, + className: 'font-medium whitespace-nowrap', + }, + { + header: 'SKU', + cell: (row) => row.sku.toUpperCase(), + sortValue: (row) => row.sku, + }, + { + header: 'Backend', + cell: (row) => row.backend, + sortValue: (row) => row.backend, + }, + { + header: 'Phase', + cell: (row) => row.phase, + sortValue: (row) => row.phase, + }, + { + header: 'Disposition', + cell: (row) => row.disposition, + sortValue: (row) => row.disposition, + }, + { + header: 'Outcome', + cell: (row) => , + sortValue: (row) => row.outcome, + }, + { + header: 'Attempts', + align: 'right', + cell: (row) => row.attempt_ids.length, + sortValue: (row) => row.attempt_ids.length, + className: 'tabular-nums', + }, + { + header: 'Selected', + cell: (row) => ( + + {shortId(row.selected_attempt_id)} + + ), + sortValue: (row) => row.selected_attempt_id ?? '', + }, + { + header: 'Failure mode', + cell: (row) => row.failure_mode ?? '-', + sortValue: (row) => row.failure_mode ?? '', + }, + { + header: 'Reason', + cell: (row) => row.reason ?? '-', + sortValue: (row) => row.reason ?? '', + }, + ], + [], + ); + + return ( + +

Terminal coverage

+ +
+ ); +} + +export function CollectiveXAttemptTable({ attempts }: { attempts: CollectiveXAttempt[] }) { + const columns = useMemo[]>( + () => [ + { + header: 'Case', + cell: (row) => {shortId(row.case_id)}, + sortValue: (row) => row.case_id, + }, + { + header: 'Allocation', + cell: (row) => {shortId(row.allocation_id)}, + sortValue: (row) => row.allocation_id, + }, + { + header: 'Run', + cell: (row) => `${row.run_id}.${row.run_attempt}`, + sortValue: (row) => + `${row.run_id.padStart(20, '0')}.${String(row.run_attempt).padStart(10, '0')}`, + className: 'font-mono text-xs', + }, + { + header: 'Try', + align: 'right', + cell: (row) => row.attempt_index, + sortValue: (row) => row.attempt_index, + className: 'tabular-nums', + }, + { + header: 'Outcome', + cell: (row) => , + sortValue: (row) => row.outcome, + }, + { + header: 'Role', + cell: (row) => (row.selected ? 'selected' : 'retained'), + sortValue: (row) => (row.selected ? 'selected' : 'retained'), + }, + { + header: 'Evidence', + align: 'right', + cell: (row) => row.evidence.length, + sortValue: (row) => row.evidence.length, + className: 'tabular-nums', + }, + { + header: 'Failure mode', + cell: (row) => row.failure_mode ?? '-', + sortValue: (row) => row.failure_mode ?? '', + }, + { + header: 'Reason', + cell: (row) => row.reason ?? '-', + sortValue: (row) => row.reason ?? '', + }, + ], + [], + ); + + return ( + +

Retained attempts

+ +
+ ); +} + +interface RankingRow { + ranking: CollectiveXRanking; + rank: number; + series: CollectiveXSeries; + value: number; + unit: string; +} + +export function CollectiveXDecisionTables({ + dataset, + cohort, +}: { + dataset: CollectiveXDataset; + cohort: CollectiveXCohort | null; +}) { + const seriesById = useMemo( + () => new Map(dataset.series.map((item) => [item.series_id, item])), + [dataset.series], + ); + const rankings = cohort + ? dataset.rankings.filter((item) => item.cohort_id === cohort.cohort_id) + : []; + const recommendations = cohort + ? dataset.recommendations.filter((item) => item.cohort_id === cohort.cohort_id) + : []; + const sensitivities = cohort + ? dataset.sensitivities.filter((item) => item.cohort_id === cohort.cohort_id) + : []; + const rankingRows = rankings.flatMap((ranking) => + ranking.entries.flatMap((entry) => { + const series = seriesById.get(entry.series_id); + return series + ? [{ ranking, rank: entry.rank, series, value: entry.value, unit: entry.unit }] + : []; + }), + ); + const rankingColumns = useMemo[]>( + () => [ + { + header: 'Comparison', + cell: (row) => row.ranking.label, + sortValue: (row) => row.ranking.label, + }, + { + header: 'Metric', + cell: (row) => metricLabel(row.ranking), + sortValue: (row) => metricLabel(row.ranking), + }, + { + header: 'Rank', + align: 'right', + cell: (row) => row.rank, + sortValue: (row) => row.rank, + className: 'tabular-nums', + }, + { + header: 'Tier', + cell: (row) => , + sortValue: (row) => row.ranking.publication_tier, + }, + { + header: 'Configuration', + cell: (row) => row.series.label, + sortValue: (row) => row.series.label, + className: 'font-medium whitespace-nowrap', + }, + { + header: 'Value', + align: 'right', + cell: (row) => + `${row.value.toLocaleString('en-US', { maximumFractionDigits: 2 })} ${row.unit}`, + sortValue: (row) => row.value, + className: 'tabular-nums whitespace-nowrap', + }, + ], + [], + ); + + if (!cohort) { + return ( + +

No controlled cohort is selected.

+
+ ); + } + + return ( + <> + +
+
+

Rankings

+

{cohort.label}

+
+
+ + {cohort.eligibility.allocation_ids.length} allocations +
+
+ +
+ {recommendations.length > 0 && ( + + )} + {sensitivities.length > 0 && ( + + )} + + ); +} + +function RecommendationTable({ + recommendations, + seriesById, +}: { + recommendations: CollectiveXRecommendation[]; + seriesById: Map; +}) { + const columns = useMemo[]>( + () => [ + { + header: 'Objective', + cell: (row) => row.label, + sortValue: (row) => row.label, + }, + { + header: 'Recommended configuration', + cell: (row) => seriesById.get(row.series_id)?.label ?? '-', + sortValue: (row) => seriesById.get(row.series_id)?.label ?? '', + className: 'font-medium whitespace-nowrap', + }, + { + header: 'Tier', + cell: (row) => , + sortValue: (row) => row.publication_tier, + }, + { + header: 'Value', + align: 'right', + cell: (row) => + `${row.value.toLocaleString('en-US', { maximumFractionDigits: 2 })} ${row.unit}`, + sortValue: (row) => row.value, + className: 'tabular-nums whitespace-nowrap', + }, + { + header: 'Basis', + cell: (row) => row.rationale, + sortValue: (row) => row.rationale, + }, + ], + [seriesById], + ); + return ( + +

Best conforming configurations

+ +
+ ); +} + +function SensitivityTable({ + sensitivities, + seriesById, +}: { + sensitivities: CollectiveXSensitivity[]; + seriesById: Map; +}) { + const columns = useMemo[]>( + () => [ + { + header: 'Contrast', + cell: (row) => row.label, + sortValue: (row) => row.label, + }, + { + header: 'Baseline', + cell: (row) => seriesById.get(row.baseline_series_id)?.label ?? '-', + sortValue: (row) => seriesById.get(row.baseline_series_id)?.label ?? '', + }, + { + header: 'Candidate', + cell: (row) => seriesById.get(row.candidate_series_id)?.label ?? '-', + sortValue: (row) => seriesById.get(row.candidate_series_id)?.label ?? '', + }, + { + header: 'Tier', + cell: (row) => , + sortValue: (row) => row.publication_tier, + }, + { + header: 'Change', + align: 'right', + cell: (row) => `${(row.signed_change_ratio * 100).toFixed(1)}%`, + sortValue: (row) => row.signed_change_ratio, + className: 'tabular-nums', + }, + ], + [seriesById], + ); + return ( + +

Routing sensitivity

+ +
+ ); +} diff --git a/packages/app/src/components/collectivex/axis.test.ts b/packages/app/src/components/collectivex/axis.test.ts new file mode 100644 index 00000000..a5b17156 --- /dev/null +++ b/packages/app/src/components/collectivex/axis.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest'; + +import { sparseLogTicks } from './axis'; + +describe('sparseLogTicks', () => { + it('uses sparse 1-2-5 ticks for a typical latency domain', () => { + expect(sparseLogTicks([48, 225], 5)).toEqual([50, 100, 200]); + }); + + it('caps wide domains without restoring dense minor ticks', () => { + expect(sparseLogTicks([0.1, 1000], 5)).toEqual([0.1, 1, 10, 100, 1000]); + }); + + it('falls back to a small geometric set for a narrow domain', () => { + expect(sparseLogTicks([52, 65], 4)).toEqual([52, 58, 65]); + }); + + it('handles reversed and invalid domains', () => { + expect(sparseLogTicks([225, 48], 5)).toEqual([50, 100, 200]); + expect(sparseLogTicks([0, Number.NaN], 5)).toEqual([]); + }); +}); diff --git a/packages/app/src/components/collectivex/axis.ts b/packages/app/src/components/collectivex/axis.ts new file mode 100644 index 00000000..d3d21250 --- /dev/null +++ b/packages/app/src/components/collectivex/axis.ts @@ -0,0 +1,51 @@ +const LOG_MANTISSAS = [1, 2, 5] as const; + +function evenlySpaced(values: T[], count: number): T[] { + if (values.length <= count) return values; + if (count <= 1) return [values[Math.floor(values.length / 2)]]; + + const selected: T[] = []; + for (let index = 0; index < count; index += 1) { + selected.push(values[Math.round((index * (values.length - 1)) / (count - 1))]); + } + return selected; +} + +function fallbackLogTicks(min: number, max: number, maxTicks: number): number[] { + const count = Math.min(maxTicks, 3); + const logMin = Math.log(min); + const logSpan = Math.log(max) - logMin; + const ticks = Array.from({ length: count }, (_, index) => { + const value = Math.exp(logMin + (logSpan * index) / Math.max(1, count - 1)); + return Number(value.toPrecision(2)); + }); + return [...new Set(ticks)].filter((value) => value >= min && value <= max); +} + +/** + * Generate sparse 1-2-5 log ticks instead of D3's dense minor-tick sequence. + * The callback is evaluated against the current visible domain, including zoom. + */ +export function sparseLogTicks(domain: number[], maxTicks: number): number[] { + const numericDomain = domain.filter((value) => Number.isFinite(value) && value > 0); + if (numericDomain.length < 2 || maxTicks <= 0) return []; + + const min = Math.min(...numericDomain); + const max = Math.max(...numericDomain); + if (min === max) return [min]; + + const ticks: number[] = []; + const firstExponent = Math.floor(Math.log10(min)); + const lastExponent = Math.ceil(Math.log10(max)); + + for (let exponent = firstExponent; exponent <= lastExponent; exponent += 1) { + const magnitude = 10 ** exponent; + for (const mantissa of LOG_MANTISSAS) { + const value = mantissa * magnitude; + if (value >= min && value <= max) ticks.push(value); + } + } + + const candidates = ticks.length >= 2 ? ticks : fallbackLogTicks(min, max, maxTicks); + return evenlySpaced(candidates, maxTicks); +} diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts new file mode 100644 index 00000000..5b347405 --- /dev/null +++ b/packages/app/src/components/collectivex/data.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from 'vitest'; + +import { + chartPoints, + collectiveXColorKey, + collectiveXSeriesLabel, + comparisonDifferences, + metricValue, +} from './data'; +import { makeCollectiveXDataset } from './test-fixture'; + +describe('CollectiveX EP projections', () => { + it('uses measured roundtrip without synthesizing nullable components', () => { + const dataset = makeCollectiveXDataset(); + const pairedOnly = dataset.series[1].points[0]; + + expect(metricValue(pairedOnly, 'dispatch', 'p99', 'latency')).toBeNull(); + expect(metricValue(pairedOnly, 'combine', 'p99', 'payload-rate')).toBeNull(); + expect(metricValue(pairedOnly, 'roundtrip', 'p99', 'latency')).toBe(120); + expect(metricValue(pairedOnly, 'roundtrip', 'p99', 'tokens-per-second')).toBeCloseTo( + 8_533_333.33, + ); + }); + + it('uses publisher supplied logical rates', () => { + const point = makeCollectiveXDataset().series[0].points[0]; + point.components.roundtrip!.logical_payload_rate_gbps_at_latency_percentile!.p99 = 123.45; + + expect(metricValue(point, 'roundtrip', 'p99', 'payload-rate')).toBe(123.45); + expect(metricValue(point, 'roundtrip', 'p95', 'payload-rate')).toBeGreaterThan(0); + }); + + it('omits unavailable series from a component projection', () => { + const series = makeCollectiveXDataset().series; + + expect(chartPoints(series, 'dispatch', 'p99', 'tokens-per-rank', 'latency')).toHaveLength(1); + expect(chartPoints(series, 'roundtrip', 'p99', 'tokens-per-rank', 'latency')).toHaveLength(7); + }); + + it('reports mismatched diagnostic factors without deciding comparability', () => { + const series = makeCollectiveXDataset().series; + series[1].workload.routing = 'zipf'; + series[1].system.topology_class = 'other-topology'; + + expect(comparisonDifferences(series)).toEqual(expect.arrayContaining(['routing', 'topology'])); + }); + + it('reports implementation, transport, and resource differences', () => { + const base = makeCollectiveXDataset().series[0]; + const different = structuredClone(base); + different.backend.version = '2.0.0'; + different.build.image_digest = `sha256:${'f'.repeat(64)}`; + different.system.transport = 'pcie'; + different.resource.configured_units = 12; + + expect(comparisonDifferences([base, different])).toEqual( + expect.arrayContaining([ + 'backend implementation', + 'implementation build', + 'transport', + 'resource profile', + ]), + ); + expect(collectiveXColorKey(base)).not.toBe(collectiveXColorKey(different)); + expect(collectiveXSeriesLabel(base)).toContain( + '1.0.0 · backend-default · build dddddddd · series 00000001', + ); + }); + + it('gives routing variants distinct visual identities', () => { + const [uniform, zipf] = makeCollectiveXDataset().series; + zipf.workload.routing = 'zipf'; + + expect(collectiveXColorKey(uniform)).not.toBe(collectiveXColorKey(zipf)); + zipf.workload.eplb = true; + expect(collectiveXColorKey(zipf)).toContain('zipf-eplb'); + }); + + it('keeps public config, routing-control, and runtime builds visually distinct', () => { + const base = makeCollectiveXDataset().series[0]; + const publicConfig = structuredClone(base); + const routingControl = structuredClone(base); + const runtime = structuredClone(base); + publicConfig.build.public_config_sha256 = '0'.repeat(64); + routingControl.build.routing_control_sha256 = '9'.repeat(64); + runtime.build.runtime_fingerprint_sha256 = '6'.repeat(64); + + expect(collectiveXColorKey(base)).not.toBe(collectiveXColorKey(publicConfig)); + expect(collectiveXColorKey(base)).not.toBe(collectiveXColorKey(routingControl)); + expect(collectiveXColorKey(base)).not.toBe(collectiveXColorKey(runtime)); + }); +}); diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts new file mode 100644 index 00000000..512efc70 --- /dev/null +++ b/packages/app/src/components/collectivex/data.ts @@ -0,0 +1,150 @@ +import type { + CollectiveXChartPoint, + CollectiveXComponent, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXPoint, + CollectiveXSeries, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +export function collectiveXSeriesLabel(series: CollectiveXSeries): string { + const version = series.backend.version ?? 'unversioned'; + const build = series.build.implementation_contract_sha256.slice(0, 8); + const identity = series.series_id.slice(-8); + const tier = series.publication_tier === 'official' ? 'official' : 'experimental'; + return `${series.label} · ${version} · ${series.resource.profile} · build ${build} · series ${identity} · ${tier}`; +} + +export function collectiveXColorKey(series: CollectiveXSeries): string { + const routing = `${series.workload.routing}${series.workload.eplb ? '-eplb' : ''}`; + const eplb = series.eplb.enabled + ? `${series.eplb.planner ?? 'enabled'}-${series.eplb.mapping_sha256 ?? 'unmapped'}-${series.eplb.physical_experts}` + : 'eplb-off'; + const units = `${series.resource.comm_units_kind ?? 'units'}-${series.resource.configured_units ?? 'default'}`; + return [ + series.system.sku, + `ep${series.system.ep_size}`, + series.system.topology_class, + series.system.transport, + series.backend.id, + series.backend.generation ?? 'default', + series.backend.version ?? 'unversioned', + series.publication_tier, + series.build.implementation_contract_sha256, + series.build.public_config_sha256, + series.build.routing_control_sha256, + series.build.runtime_fingerprint_sha256, + series.build.image_digest, + series.build.source_sha, + series.build.squash_sha256, + routing, + eplb, + series.resource.profile, + units, + ].join('_'); +} + +function operationComponent( + point: CollectiveXPoint, + operation: CollectiveXOperation, +): CollectiveXComponent | null { + return point.components[operation === 'isolated-sum' ? 'isolated_sum' : operation]; +} + +export function metricValue( + point: CollectiveXPoint, + operation: CollectiveXOperation, + percentile: CollectiveXPercentile, + yAxis: CollectiveXYAxis, +): number | null { + const component = operationComponent(point, operation); + if (component === null) return null; + const latencyUs = component.latency_us[percentile]; + if (yAxis === 'latency') return latencyUs; + if (yAxis === 'tokens-per-second') { + return operation === 'roundtrip' + ? point.roundtrip_token_rate_at_latency_percentile[percentile] + : null; + } + return component.logical_payload_rate_gbps_at_latency_percentile?.[percentile] ?? null; +} + +export function chartPoints( + series: CollectiveXSeries[], + operation: CollectiveXOperation, + percentile: CollectiveXPercentile, + xAxis: CollectiveXXAxis, + yAxis: CollectiveXYAxis, +): CollectiveXChartPoint[] { + return series.flatMap((item) => + item.points.flatMap((point) => { + const x = xAxis === 'tokens-per-rank' ? point.tokens_per_rank : point.global_tokens; + const y = metricValue(point, operation, percentile, yAxis); + if (!Number.isFinite(x) || x <= 0 || y === null || y <= 0 || !Number.isFinite(y)) return []; + return [ + { + seriesId: item.series_id, + seriesLabel: collectiveXSeriesLabel(item), + colorKey: collectiveXColorKey(item), + x, + y, + operation, + percentile, + point, + series: item, + }, + ]; + }), + ); +} + +export function comparisonDifferences(series: CollectiveXSeries[]): string[] { + if (series.length === 0) return []; + const warnings: string[] = []; + const different = (getValue: (item: CollectiveXSeries) => unknown) => + new Set(series.map(getValue)).size > 1; + const checks: [string, (item: CollectiveXSeries) => unknown][] = [ + ['model', (item) => item.model], + ['suite', (item) => item.suite], + ['publication tier', (item) => item.publication_tier], + ['phase', (item) => item.phase], + ['backend implementation', (item) => JSON.stringify(item.backend)], + ['implementation build', (item) => JSON.stringify(item.build)], + ['system identity', (item) => `${item.system.sku}/${item.system.vendor}/${item.system.label}`], + ['topology', (item) => item.system.topology_class], + ['transport', (item) => item.system.transport], + ['world size', (item) => item.system.world_size], + ['EP degree', (item) => item.system.ep_size], + ['placement', (item) => item.system.placement], + ['workload', (item) => item.workload.workload_id], + [ + 'model shape', + (item) => + `${item.workload.hidden}/${item.workload.top_k}/${item.workload.experts}/${item.workload.activation_profile}`, + ], + ['routing', (item) => `${item.workload.routing}/${item.workload.eplb}`], + ['EPLB plan', (item) => JSON.stringify(item.eplb)], + ['dtypes', (item) => `${item.workload.dispatch_dtype}/${item.workload.combine_dtype}`], + ['resource profile', (item) => JSON.stringify(item.resource)], + ['measurement', (item) => JSON.stringify(item.measurement)], + ['token ladder', (item) => item.points.map((point) => point.tokens_per_rank).join(',')], + [ + 'component availability', + (item) => + item.points + .map((point) => + ['dispatch', 'combine', 'roundtrip', 'isolated_sum'] + .map((name) => point.components[name as keyof typeof point.components] !== null) + .join('/'), + ) + .join(','), + ], + ['correctness', (item) => item.points.map((point) => point.correct).join(',')], + ]; + for (const [label, getValue] of checks) { + if (different(getValue)) warnings.push(label); + } + return warnings; +} diff --git a/packages/app/src/components/collectivex/reader.test.ts b/packages/app/src/components/collectivex/reader.test.ts new file mode 100644 index 00000000..7c212aee --- /dev/null +++ b/packages/app/src/components/collectivex/reader.test.ts @@ -0,0 +1,249 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + collectiveXChannelUrl, + fetchCollectiveXPublication, + parseCollectiveXChannel, + parseCollectiveXDataset, + sha256Hex, +} from './reader'; +import { makeCollectiveXDataset, makeCollectiveXDiagnosticDataset } from './test-fixture'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +beforeEach(() => mockFetch.mockReset()); + +describe('CollectiveX publication reader', () => { + it('hashes bytes without requiring secure-context Web Crypto', async () => { + await expect(sha256Hex(new TextEncoder().encode('abc'))).resolves.toBe( + 'ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad', + ); + }); + + it('accepts the strict public shape without recomputing publisher policy', () => { + const dataset = makeCollectiveXDataset(); + dataset.rankings[0].entries[0].value += 1; + const nonSuccessAttempt = dataset.attempts.find( + (attempt) => attempt.outcome === 'unsupported', + )!; + nonSuccessAttempt.failure_mode = 'future-runtime-mode'; + nonSuccessAttempt.reason = 'future-runtime-reason'; + + const result = parseCollectiveXDataset(dataset); + + expect(result.series[0].publication_tier).toBe('official'); + expect(result.cohorts.find((item) => item.kind === 'routing')?.publication_tier).toBe( + 'comparable-experimental', + ); + expect(result.series[1].points[0].components.dispatch).toBeNull(); + }); + + it('rejects unknown, missing, and stale structural fields', () => { + const unknown = makeCollectiveXDataset() as unknown as Record; + unknown.browser_decision = true; + expect(() => parseCollectiveXDataset(unknown)).toThrow('unknown field browser_decision'); + + const missingTier = makeCollectiveXDataset(); + delete (missingTier.series[0] as Partial<(typeof missingTier.series)[number]>).publication_tier; + expect(() => parseCollectiveXDataset(missingTier)).toThrow('publication_tier'); + + const staleMetric = makeCollectiveXDataset(); + const component = staleMetric.series[0].points[0].components.roundtrip! as unknown as Record< + string, + unknown + >; + component.logical_gbps = component.logical_payload_rate_gbps_at_latency_percentile; + delete component.logical_payload_rate_gbps_at_latency_percentile; + expect(() => parseCollectiveXDataset(staleMetric)).toThrow('logical_payload_rate'); + }); + + it('matches backend eligibility and evidence uniqueness constraints', () => { + const missingReason = makeCollectiveXDataset(); + missingReason.series[0].eligibility.decision_grade = false; + expect(() => parseCollectiveXDataset(missingReason)).toThrow('diagnostic eligibility'); + + const repeatedEvidenceId = makeCollectiveXDataset(); + const attemptWithEvidence = repeatedEvidenceId.attempts.find( + (attempt) => attempt.evidence.length > 0, + )!; + const evidence = attemptWithEvidence.evidence[0]; + attemptWithEvidence.evidence.push({ + evidence_id: evidence.evidence_id, + point_id: repeatedEvidenceId.series[1].points[0].point_id, + }); + expect( + parseCollectiveXDataset(repeatedEvidenceId).attempts.find( + (attempt) => attempt.attempt_id === attemptWithEvidence.attempt_id, + )?.evidence, + ).toHaveLength(2); + + const duplicateEvidence = makeCollectiveXDataset(); + const duplicateAttempt = duplicateEvidence.attempts.find( + (attempt) => attempt.evidence.length > 0, + )!; + duplicateAttempt.evidence.push({ ...duplicateAttempt.evidence[0] }); + expect(() => parseCollectiveXDataset(duplicateEvidence)).toThrow('duplicate evidence items'); + }); + + it('accepts only digest-addressed public channel paths', () => { + const digest = 'a'.repeat(64); + expect( + parseCollectiveXChannel({ + format: 'collectivex.channel.v1', + channel: 'dev-latest', + generated_at: '2026-07-04T00:00:00Z', + dataset: { + path: `datasets/${digest}/dataset.json`, + sha256: digest, + bytes: 10, + }, + }).dataset.sha256, + ).toBe(digest); + + expect(() => + parseCollectiveXChannel({ + format: 'collectivex.channel.v1', + channel: 'dev-latest', + generated_at: '2026-07-04T00:00:00Z', + dataset: { path: '../private/dataset.json', sha256: digest, bytes: 10 }, + }), + ).toThrow('dataset.path'); + + expect(() => + parseCollectiveXChannel({ + format: 'collectivex.channel.v1', + channel: 'dev-latest', + generated_at: '2026-07-04T00:00:00Z', + dataset: { + path: `datasets/${'b'.repeat(64)}/dataset.json`, + sha256: digest, + bytes: 10, + }, + }), + ).toThrow('digest-addressed'); + + expect(() => + parseCollectiveXChannel({ + format: 'collectivex.channel.v1', + channel: 'dev-latest', + generated_at: '2026-07-04T00:00:00Z', + dataset: { + path: `datasets/${digest}/dataset.json`, + sha256: digest, + bytes: 32 * 1024 * 1024 + 1, + }, + }), + ).toThrow('33554432'); + }); + + it('verifies exact bytes and SHA-256 before parsing', async () => { + const bytes = new TextEncoder().encode(JSON.stringify(makeCollectiveXDataset())); + const digest = await sha256Hex(bytes); + mockPublication(bytes, digest); + + const result = await fetchCollectiveXPublication(); + + expect(result.digest).toBe(digest); + expect(mockFetch).toHaveBeenNthCalledWith( + 1, + collectiveXChannelUrl('dev-latest'), + expect.objectContaining({ cache: 'no-store', credentials: 'same-origin' }), + ); + expect(mockFetch).toHaveBeenNthCalledWith( + 2, + `/collectivex-data/datasets/${digest}/dataset.json`, + expect.objectContaining({ cache: 'force-cache', credentials: 'same-origin' }), + ); + }); + + it('fails closed on byte, digest, and channel-name mismatch', async () => { + const bytes = new TextEncoder().encode(JSON.stringify(makeCollectiveXDataset())); + const digest = await sha256Hex(bytes); + mockPublication(bytes, digest, { byteLength: bytes.length + 1 }); + await expect(fetchCollectiveXPublication()).rejects.toThrow('byte count'); + + mockFetch.mockReset(); + mockPublication(bytes, 'f'.repeat(64)); + await expect(fetchCollectiveXPublication()).rejects.toThrow('SHA-256'); + + mockFetch.mockReset(); + mockPublication(bytes, digest, { pointerChannel: 'latest-attempt' }); + await expect(fetchCollectiveXPublication('dev-latest')).rejects.toThrow( + 'channel name does not match', + ); + + mockFetch.mockReset(); + mockPublication(bytes, digest, { pointerTimestamp: '2099-01-01T00:00:00Z' }); + await expect(fetchCollectiveXPublication()).rejects.toThrow('timestamp does not match'); + }); + + it('requires a promoted dataset only on dev-latest', async () => { + const bytes = new TextEncoder().encode(JSON.stringify(makeCollectiveXDiagnosticDataset())); + const digest = await sha256Hex(bytes); + mockPublication(bytes, digest); + await expect(fetchCollectiveXPublication('dev-latest')).rejects.toThrow( + 'does not reference a promoted dataset', + ); + + mockFetch.mockReset(); + mockPublication(bytes, digest, { pointerChannel: 'latest-attempt' }); + await expect(fetchCollectiveXPublication('latest-attempt')).resolves.toMatchObject({ + dataset: { promotion: { status: 'diagnostic' } }, + }); + }); + + it('rejects duplicate JSON keys before schema validation', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + text: () => + Promise.resolve( + `{"format":"collectivex.channel.v1","format":"collectivex.channel.v1",` + + `"channel":"dev-latest","generated_at":"2026-07-04T01:00:00Z",` + + `"dataset":{"path":"datasets/${'a'.repeat(64)}/dataset.json",` + + `"sha256":"${'a'.repeat(64)}","bytes":1}}`, + ), + }); + await expect(fetchCollectiveXPublication()).rejects.toThrow('duplicate key format'); + + mockFetch.mockReset(); + const text = JSON.stringify(makeCollectiveXDataset()).replace( + '"schema_version":1', + '"schema_version":1,"schema_version":1', + ); + const bytes = new TextEncoder().encode(text); + const digest = await sha256Hex(bytes); + mockPublication(bytes, digest); + await expect(fetchCollectiveXPublication()).rejects.toThrow('duplicate key schema_version'); + }); +}); + +function mockPublication( + bytes: Uint8Array, + digest: string, + options: { + byteLength?: number; + pointerChannel?: 'dev-latest' | 'latest-attempt'; + pointerTimestamp?: string; + } = {}, +) { + const channel = options.pointerChannel ?? 'dev-latest'; + mockFetch + .mockResolvedValueOnce({ + ok: true, + text: () => + Promise.resolve( + JSON.stringify({ + format: 'collectivex.channel.v1', + channel, + generated_at: options.pointerTimestamp ?? '2026-07-04T01:00:00Z', + dataset: { + path: `datasets/${digest}/dataset.json`, + sha256: digest, + bytes: options.byteLength ?? bytes.length, + }, + }), + ), + }) + .mockResolvedValueOnce({ ok: true, arrayBuffer: () => Promise.resolve(bytes.buffer) }); +} diff --git a/packages/app/src/components/collectivex/reader.ts b/packages/app/src/components/collectivex/reader.ts new file mode 100644 index 00000000..62c6b0f3 --- /dev/null +++ b/packages/app/src/components/collectivex/reader.ts @@ -0,0 +1,172 @@ +import type { ZodError } from 'zod'; +import { sha256 } from '@noble/hashes/sha2.js'; +import { bytesToHex } from '@noble/hashes/utils.js'; + +import { + collectiveXChannelSchema, + collectiveXDatasetSchema, + type CollectiveXChannel, + type CollectiveXDataset, + type CollectiveXResolvedDataset, +} from './types'; + +export type CollectiveXChannelName = CollectiveXChannel['channel']; + +const COLLECTIVEX_PUBLIC_ROOT = '/collectivex-data/'; + +export const collectiveXChannelUrl = (channel: CollectiveXChannelName) => + `${COLLECTIVEX_PUBLIC_ROOT}channels/${channel}.json`; + +class CollectiveXDataError extends Error { + constructor(message: string) { + super(`CollectiveX publication rejected: ${message}`); + this.name = 'CollectiveXDataError'; + } +} + +function schemaError(error: ZodError): CollectiveXDataError { + const issue = error.issues[0]; + const path = issue?.path.length ? `$.${issue.path.join('.')}` : '$'; + if (issue?.code === 'unrecognized_keys') { + return new CollectiveXDataError(`${path} contains unknown field ${issue.keys[0]}.`); + } + return new CollectiveXDataError(`${path} ${issue?.message ?? 'is malformed'}.`); +} + +function strictJson(text: string, name: string): unknown { + let value: unknown; + try { + value = JSON.parse(text); + } catch { + throw new CollectiveXDataError(`${name} is not valid JSON.`); + } + + let offset = 0; + const whitespace = () => { + while (/\s/.test(text[offset] ?? '')) offset += 1; + }; + const string = () => { + const start = offset++; + while (offset < text.length) { + if (text[offset] === '"') { + offset += 1; + return JSON.parse(text.slice(start, offset)) as string; + } + if (text[offset] === '\\') offset += text[offset + 1] === 'u' ? 6 : 2; + else offset += 1; + } + throw new CollectiveXDataError(`${name} contains an unterminated string.`); + }; + const parseValue = (): void => { + whitespace(); + if (text[offset] === '{') return object(); + if (text[offset] === '[') return array(); + if (text[offset] === '"') return void string(); + while (offset < text.length && !/[\s,\]}]/.test(text[offset])) offset += 1; + }; + const object = (): void => { + const keys = new Set(); + offset += 1; + whitespace(); + if (text[offset] === '}') return void (offset += 1); + while (offset < text.length) { + const key = string(); + if (keys.has(key)) throw new CollectiveXDataError(`${name} contains duplicate key ${key}.`); + keys.add(key); + whitespace(); + offset += 1; + parseValue(); + whitespace(); + if (text[offset] === '}') return void (offset += 1); + offset += 1; + whitespace(); + } + }; + const array = (): void => { + offset += 1; + whitespace(); + if (text[offset] === ']') return void (offset += 1); + while (offset < text.length) { + parseValue(); + whitespace(); + if (text[offset] === ']') return void (offset += 1); + offset += 1; + } + }; + parseValue(); + return value; +} + +export function parseCollectiveXChannel(value: unknown): CollectiveXChannel { + const parsed = collectiveXChannelSchema.safeParse(value); + if (!parsed.success) throw schemaError(parsed.error); + const { dataset } = parsed.data; + if (dataset.path !== `datasets/${dataset.sha256}/dataset.json`) { + throw new CollectiveXDataError('$.dataset.path must be the digest-addressed dataset path.'); + } + return parsed.data; +} + +export function parseCollectiveXDataset(value: unknown): CollectiveXDataset { + const parsed = collectiveXDatasetSchema.safeParse(value); + if (!parsed.success) throw schemaError(parsed.error); + return parsed.data; +} + +async function responseOrThrow(url: string, options: RequestInit, name: string): Promise { + const response = await fetch(url, options); + if (!response.ok) throw new CollectiveXDataError(`${name} request failed (${response.status}).`); + return response; +} + +export async function sha256Hex(bytes: Uint8Array): Promise { + if (globalThis.crypto?.subtle) { + const digest = await globalThis.crypto.subtle.digest('SHA-256', bytes); + return bytesToHex(new Uint8Array(digest)); + } + return bytesToHex(sha256(bytes)); +} + +export async function fetchCollectiveXPublication( + channelName: CollectiveXChannelName = 'dev-latest', + signal?: AbortSignal, +): Promise { + const channelResponse = await responseOrThrow( + collectiveXChannelUrl(channelName), + { cache: 'no-store', credentials: 'same-origin', signal }, + 'channel', + ); + const channel = parseCollectiveXChannel(strictJson(await channelResponse.text(), 'channel')); + if (channel.channel !== channelName) { + throw new CollectiveXDataError('channel name does not match its path.'); + } + + const datasetResponse = await responseOrThrow( + `${COLLECTIVEX_PUBLIC_ROOT}${channel.dataset.path}`, + { cache: 'force-cache', credentials: 'same-origin', signal }, + 'dataset', + ); + const bytes = new Uint8Array(await datasetResponse.arrayBuffer()); + if (bytes.byteLength !== channel.dataset.bytes) { + throw new CollectiveXDataError('dataset byte count does not match the channel pointer.'); + } + const digest = await sha256Hex(bytes); + if (digest !== channel.dataset.sha256) { + throw new CollectiveXDataError('dataset SHA-256 does not match the channel pointer.'); + } + + let text: string; + try { + text = new TextDecoder('utf-8', { fatal: true }).decode(bytes); + } catch { + throw new CollectiveXDataError('dataset is not valid UTF-8 JSON.'); + } + const dataset = parseCollectiveXDataset(strictJson(text, 'dataset')); + if (dataset.generated_at !== channel.generated_at) { + throw new CollectiveXDataError('dataset timestamp does not match the channel pointer.'); + } + if (channelName === 'dev-latest' && dataset.promotion.status !== 'promoted') { + throw new CollectiveXDataError('dev-latest does not reference a promoted dataset.'); + } + return { channel, dataset, digest }; +} diff --git a/packages/app/src/components/collectivex/test-fixture.ts b/packages/app/src/components/collectivex/test-fixture.ts new file mode 100644 index 00000000..1e5b2ad1 --- /dev/null +++ b/packages/app/src/components/collectivex/test-fixture.ts @@ -0,0 +1,747 @@ +import type { + CollectiveXAttempt, + CollectiveXDataset, + CollectiveXEligibility, + CollectiveXMetric, + CollectiveXSeries, +} from './types'; + +function fixtureId( + kind: + | 'allocation' + | 'attempt' + | 'case' + | 'cohort' + | 'evidence' + | 'point' + | 'ranking' + | 'recommendation' + | 'sensitivity' + | 'series' + | 'work', + value: number, +): string { + return `cx${kind}-v1-${value.toString(16).padStart(64, '0')}`; +} + +const allocations = [1, 2, 3].map((value) => fixtureId('allocation', value)); +const pointIds = Array.from({ length: 7 }, (_, index) => fixtureId('point', index + 1)); +const decisionIds = { + libraryCohort: fixtureId('cohort', 1), + routingCohort: fixtureId('cohort', 2), + chipCohort: fixtureId('cohort', 3), + systemCohort: fixtureId('cohort', 4), + diagnosticLibraryCohort: fixtureId('cohort', 5), + rankings: Array.from({ length: 16 }, (_, index) => fixtureId('ranking', index + 1)), + recommendations: Array.from({ length: 16 }, (_, index) => fixtureId('recommendation', index + 1)), + sensitivities: Array.from({ length: 8 }, (_, index) => fixtureId('sensitivity', index + 1)), +} as const; + +function attemptId(caseIndex: number, allocationIndex: number, ordinal: number): string { + return fixtureId('attempt', caseIndex * 100 + allocationIndex * 10 + ordinal); +} +function makeEligibility(): CollectiveXEligibility { + return { + decision_grade: true, + allocation_ids: [...allocations], + complete: true, + correct: true, + measured_roundtrip_p99: true, + stable_p50: true, + stable_p99: true, + stable_ordering: true, + p50_max_min_ratio: 1.05, + p99_max_min_ratio: 1.1, + reasons: [], + }; +} + +function component(base: number) { + const latency = { p50: base, p90: base + 10, p95: base + 15, p99: base + 20 }; + const logicalBytes = 1_048_576; + return { + origin: 'measured' as const, + latency_us: latency, + logical_bytes: logicalBytes, + logical_payload_rate_gbps_at_latency_percentile: { + p50: logicalBytes / (latency.p50 * 1000), + p90: logicalBytes / (latency.p90 * 1000), + p95: logicalBytes / (latency.p95 * 1000), + p99: logicalBytes / (latency.p99 * 1000), + }, + sample_count: 512, + }; +} + +function decisionMetricValue( + item: CollectiveXSeries, + metric: CollectiveXDataset['rankings'][number]['metric'], +) { + const roundtrip = item.points[0].components.roundtrip!; + return metric.measure === 'latency_us' + ? roundtrip.latency_us[metric.statistic] + : roundtrip.logical_payload_rate_gbps_at_latency_percentile![metric.statistic]; +} + +function metricLabel(metric: CollectiveXMetric): string { + return metric.measure === 'latency_us' + ? `${metric.statistic} latency` + : `payload rate at ${metric.statistic} latency`; +} + +function makeSeries(index: number, backend: string, latency: number): CollectiveXSeries { + const evidenceIds = allocations.map((_, allocationIndex) => + fixtureId('evidence', index * 10 + allocationIndex), + ); + const roundtrip = component(latency); + const globalTokens = 1024; + return { + series_id: fixtureId('series', index), + label: `H100 EP8 · ${backend} · BF16 · uniform`, + status: 'decision-grade', + case_ids: [fixtureId('case', index)], + allocation_ids: [...allocations], + model: 'deepseek-v3-v1', + suite: 'ep-core-v1', + publication_tier: 'official', + phase: 'decode', + backend: { + id: backend, + label: backend, + role: backend === 'nccl-ep' ? 'reference' : 'library', + generation: 'v1', + version: '1.0.0', + }, + build: { + implementation_contract_sha256: backend === 'deepep' ? 'd'.repeat(64) : 'e'.repeat(64), + public_config_sha256: backend === 'deepep' ? '4'.repeat(64) : '5'.repeat(64), + routing_control_sha256: backend === 'deepep' ? 'a'.repeat(64) : 'b'.repeat(64), + runtime_fingerprint_sha256: backend === 'deepep' ? '7'.repeat(64) : '8'.repeat(64), + image_digest: `sha256:${backend === 'deepep' ? '1'.repeat(64) : '2'.repeat(64)}`, + source_sha: 'a'.repeat(40), + squash_sha256: backend === 'deepep' ? '3'.repeat(64) : '4'.repeat(64), + }, + system: { + sku: 'h100', + label: 'NVIDIA H100 SXM', + vendor: 'nvidia', + topology_class: 'single-node-nvlink', + transport: 'nvlink', + world_size: 8, + ep_size: 8, + placement: 'packed', + }, + workload: { + workload_id: fixtureId('work', 1), + hidden: 7168, + top_k: 8, + experts: 256, + routing: 'uniform', + eplb: false, + dispatch_dtype: 'bf16', + combine_dtype: 'bf16', + activation_profile: 'canonical-counter-source-v3', + }, + eplb: { + enabled: false, + planner: null, + mapping_sha256: null, + logical_experts: 256, + physical_experts: 256, + redundant_experts: 0, + reference_tokens_per_rank: null, + replicated_experts: 0, + max_replicas: null, + imbalance_before: null, + imbalance_after: null, + }, + resource: { + mode: 'tuned', + profile: 'backend-default', + comm_units_kind: 'sm', + configured_units: 20, + }, + measurement: { + contract: 'layout-and-dispatch-v1', + sampling_contract: 'fixed-512-v1', + iters: 8, + trials: 64, + warmups: 32, + samples_per_component: 512, + headline_component: 'roundtrip', + headline_percentile: 'p99', + }, + points: [ + { + point_id: pointIds[index - 1], + tokens_per_rank: 128, + global_tokens: globalTokens, + correct: true, + routing: { + fanout_mean: 5.25, + recv_tokens_max: 740, + expert_load_cv: 0.12, + payload_rank_cv: 0.08, + hotspot_ratio: 1.4, + empty_expert_count: 0, + empty_rank_count: 0, + routed_copies: 5376, + }, + components: { + dispatch: index === 1 ? component(30) : null, + combine: index === 1 ? component(40) : null, + roundtrip, + isolated_sum: + index === 1 + ? { + origin: 'derived', + latency_us: { p50: 70, p90: 90, p95: 100, p99: 110 }, + logical_bytes: null, + logical_payload_rate_gbps_at_latency_percentile: null, + sample_count: null, + } + : null, + }, + roundtrip_token_rate_at_latency_percentile: { + p50: globalTokens / (roundtrip.latency_us.p50 * 1e-6), + p90: globalTokens / (roundtrip.latency_us.p90 * 1e-6), + p95: globalTokens / (roundtrip.latency_us.p95 * 1e-6), + p99: globalTokens / (roundtrip.latency_us.p99 * 1e-6), + }, + evidence_ids: evidenceIds, + }, + ], + eligibility: makeEligibility(), + }; +} + +export function makeCollectiveXDataset(): CollectiveXDataset { + const routingVariant = makeSeries(4, 'deepep', 110); + routingVariant.label = 'H100 EP8 · deepep · BF16 · zipf'; + routingVariant.suite = 'ep-routing-v1'; + routingVariant.publication_tier = 'comparable-experimental'; + routingVariant.workload.routing = 'zipf'; + routingVariant.points[0].routing = { + ...routingVariant.points[0].routing, + expert_load_cv: 0.72, + payload_rank_cv: 0.41, + hotspot_ratio: 4.8, + empty_expert_count: 37, + }; + const routingEplbVariant = makeSeries(7, 'deepep', 90); + routingEplbVariant.label = 'H100 EP8 · deepep · BF16 · zipf+eplb'; + routingEplbVariant.suite = 'ep-routing-v1'; + routingEplbVariant.publication_tier = 'comparable-experimental'; + routingEplbVariant.workload.routing = 'zipf'; + routingEplbVariant.workload.eplb = true; + routingEplbVariant.build.implementation_contract_sha256 = 'f'.repeat(64); + routingEplbVariant.eplb = { + enabled: true, + planner: 'greedy-rank-major-v1', + mapping_sha256: 'f'.repeat(64), + logical_experts: 256, + physical_experts: 288, + redundant_experts: 32, + reference_tokens_per_rank: 2048, + replicated_experts: 24, + max_replicas: 3, + imbalance_before: 4.8, + imbalance_after: 1.2, + }; + routingEplbVariant.points[0].routing = { + ...routingEplbVariant.points[0].routing, + expert_load_cv: 0.18, + payload_rank_cv: 0.13, + hotspot_ratio: 1.7, + empty_expert_count: 3, + }; + const chipVariant = makeSeries(5, 'deepep', 70); + const systemVariant = makeSeries(6, 'nccl-ep', 130); + for (const item of [chipVariant, systemVariant]) { + item.label = `B200 EP8 · ${item.backend.id} · BF16 · uniform`; + item.system = { + ...item.system, + sku: 'b200', + label: 'NVIDIA B200 SXM', + }; + } + const series = [ + makeSeries(1, 'deepep', 80), + makeSeries(2, 'mori', 100), + makeSeries(3, 'nccl-ep', 150), + routingVariant, + chipVariant, + systemVariant, + routingEplbVariant, + ]; + const metrics = ( + ['latency_us', 'logical_payload_rate_gbps_at_latency_percentile'] as const + ).flatMap((measure) => + (['p50', 'p99'] as const).map((statistic) => ({ + operation: 'roundtrip' as const, + statistic, + measure, + objective: measure === 'latency_us' ? ('min' as const) : ('max' as const), + tokens_per_rank: 128, + phase: 'decode' as const, + })), + ); + const attempts: CollectiveXAttempt[] = series.flatMap((item, seriesIndex) => + allocations.map((allocationId, allocationIndex) => ({ + attempt_id: attemptId(seriesIndex + 1, allocationIndex + 1, 1), + evidence: [ + { + evidence_id: item.points[0].evidence_ids[allocationIndex], + point_id: item.points[0].point_id, + }, + ], + case_id: item.case_ids[0], + allocation_id: allocationId, + run_id: String(1000 + allocationIndex), + run_attempt: 1, + attempt_index: 1, + outcome: 'success' as const, + failure_mode: null, + reason: null, + series_id: item.series_id, + selected: true, + completed_at: '2026-07-04T00:01:00Z', + })), + ); + const unsupportedCaseId = fixtureId('case', 8); + const unsupportedAttempts: CollectiveXAttempt[] = allocations.map( + (allocationId, allocationIndex) => ({ + attempt_id: attemptId(8, allocationIndex + 1, 1), + evidence: [], + case_id: unsupportedCaseId, + allocation_id: allocationId, + run_id: String(1000 + allocationIndex), + run_attempt: 1, + attempt_index: 1, + outcome: 'unsupported', + failure_mode: 'capability', + reason: 'backend-platform-unsupported', + series_id: null, + selected: true, + completed_at: '2026-07-04T00:01:00Z', + }), + ); + attempts.push(...unsupportedAttempts); + const cohortId = decisionIds.libraryCohort; + const routingCohortId = decisionIds.routingCohort; + const cohortMembers = [ + series.slice(0, 2), + [series[0], routingVariant, routingEplbVariant], + [series[0], chipVariant], + [series[2], systemVariant], + ]; + const cohortIds = [ + cohortId, + routingCohortId, + decisionIds.chipCohort, + decisionIds.systemCohort, + ] as const; + const cohortLabels = ['Library', 'Routing', 'Chip', 'System']; + const rankings: CollectiveXDataset['rankings'] = cohortMembers + .flatMap((members, cohortIndex) => + metrics.map((metric, metricIndex) => ({ + ranking_id: decisionIds.rankings[cohortIndex * metrics.length + metricIndex], + cohort_id: cohortIds[cohortIndex], + label: `${cohortLabels[cohortIndex]} ${metricLabel(metric)} T=128`, + publication_tier: + cohortIndex === 1 ? ('comparable-experimental' as const) : ('official' as const), + metric, + entries: members + .toSorted((left, right) => { + const delta = decisionMetricValue(left, metric) - decisionMetricValue(right, metric); + return metric.objective === 'min' ? delta : -delta; + }) + .map((item, index) => ({ + rank: index + 1, + series_id: item.series_id, + point_id: item.points[0].point_id, + value: decisionMetricValue(item, metric), + unit: metric.measure === 'latency_us' ? ('us' as const) : ('GB/s' as const), + })), + eligibility: makeEligibility(), + })), + ) + .toSorted((left, right) => left.ranking_id.localeCompare(right.ranking_id)); + const recommendations: CollectiveXDataset['recommendations'] = rankings + .filter( + ( + ranking, + ): ranking is CollectiveXDataset['rankings'][number] & { + publication_tier: 'official'; + } => ranking.publication_tier === 'official', + ) + .map((ranking) => { + const idIndex = cohortIds.indexOf(ranking.cohort_id as (typeof cohortIds)[number]); + const metricIndex = metrics.findIndex( + (metric) => + metric.measure === ranking.metric.measure && + metric.statistic === ranking.metric.statistic, + ); + const top = ranking.entries[0]; + const objective: CollectiveXDataset['recommendations'][number]['objective'] = + ranking.metric.measure === 'latency_us' + ? `min-${ranking.metric.statistic}-latency` + : `max-payload-rate-at-${ranking.metric.statistic}-latency`; + return { + recommendation_id: decisionIds.recommendations[idIndex * metrics.length + metricIndex], + cohort_id: ranking.cohort_id, + label: `Best ${metricLabel(ranking.metric)} at T=128`, + objective, + publication_tier: ranking.publication_tier, + series_id: top.series_id, + point_id: top.point_id, + value: top.value, + unit: top.unit, + rationale: 'Top stable measured roundtrip result in a controlled cohort', + eligibility: makeEligibility(), + }; + }) + .toSorted((left, right) => left.recommendation_id.localeCompare(right.recommendation_id)); + const sensitivities: CollectiveXDataset['sensitivities'] = [routingVariant, routingEplbVariant] + .flatMap((candidate, candidateIndex) => + metrics.map((metric, metricIndex) => ({ + sensitivity_id: decisionIds.sensitivities[candidateIndex * metrics.length + metricIndex], + cohort_id: routingCohortId, + label: `Routing sensitivity: ${metricLabel(metric)} T=128`, + publication_tier: 'comparable-experimental' as const, + baseline_series_id: series[0].series_id, + candidate_series_id: candidate.series_id, + metric, + signed_change_ratio: + (decisionMetricValue(candidate, metric) - decisionMetricValue(series[0], metric)) / + decisionMetricValue(series[0], metric), + eligibility: makeEligibility(), + })), + ) + .toSorted((left, right) => left.sensitivity_id.localeCompare(right.sensitivity_id)); + const coverage: CollectiveXDataset['coverage'] = series.map((item) => { + const retained = attempts.filter((attempt) => attempt.case_id === item.case_ids[0]); + const selected = retained.at(-1); + return { + case_id: item.case_ids[0], + label: `${item.backend.label} decode`, + required: true, + disposition: 'runnable', + sku: item.system.sku, + backend: item.backend.id, + phase: item.phase, + selected_attempt_id: selected?.attempt_id ?? null, + outcome: selected?.outcome ?? 'invalid', + failure_mode: selected?.failure_mode ?? null, + reason: selected ? selected.reason : 'missing-selected-attempt', + attempt_ids: retained.map((attempt) => attempt.attempt_id), + }; + }); + coverage.push({ + case_id: unsupportedCaseId, + label: 'MI355X / DeepEP / unsupported', + required: true, + disposition: 'unsupported', + sku: 'mi355x', + backend: 'deepep', + phase: 'decode', + selected_attempt_id: unsupportedAttempts.at(-1)!.attempt_id, + outcome: 'unsupported', + failure_mode: 'capability', + reason: 'backend-platform-unsupported', + attempt_ids: unsupportedAttempts.map((attempt) => attempt.attempt_id), + }); + const orderedAttempts = attempts.toSorted((left, right) => + left.attempt_id.localeCompare(right.attempt_id), + ); + return { + format: 'collectivex.public.v1', + schema_version: 1, + generated_at: '2026-07-04T01:00:00Z', + source_bundle_ids: ['a'.repeat(64), 'b'.repeat(64), 'c'.repeat(64)], + promotion: { + status: 'promoted', + matrix_id: '5'.repeat(64), + allocation_ids: [...allocations], + required_allocations: 3, + requested_cases: 8, + terminal_cases: 8, + policy: 'collectivex-decision-grade-v1', + reason: null, + }, + coverage, + attempts: orderedAttempts, + series, + cohorts: [ + { + cohort_id: cohortId, + kind: 'library' as const, + label: 'H100 EP8 library comparison', + description: 'Matched H100 EP8 uniform-routing library contrast', + publication_tier: 'official' as const, + series_ids: series.slice(0, 2).map((item) => item.series_id), + controlled_factors: [ + 'system', + 'workload', + 'phase', + 'measurement', + 'resource.mode', + 'source', + ], + varying_factors: ['backend', 'resource'], + eligibility: makeEligibility(), + }, + { + cohort_id: routingCohortId, + kind: 'routing' as const, + label: 'H100 EP8 routing comparison', + description: 'Matched H100 EP8 routing contrast', + publication_tier: 'comparable-experimental' as const, + series_ids: [series[0].series_id, routingVariant.series_id, routingEplbVariant.series_id], + controlled_factors: [ + 'backend', + 'implementation-static-build', + 'system', + 'model-shape', + 'phase', + 'measurement', + 'resource', + ], + varying_factors: ['workload.routing', 'workload.eplb', 'implementation-config'], + eligibility: makeEligibility(), + }, + { + cohort_id: decisionIds.chipCohort, + kind: 'chip' as const, + label: 'NVIDIA chip comparison', + description: 'Matched H100 and B200 DeepEP contrast', + publication_tier: 'official' as const, + series_ids: [series[0].series_id, chipVariant.series_id], + controlled_factors: [ + 'backend', + 'source', + 'workload', + 'phase', + 'measurement', + 'resource.mode', + ], + varying_factors: ['system', 'resource'], + eligibility: makeEligibility(), + }, + { + cohort_id: decisionIds.systemCohort, + kind: 'system' as const, + label: 'NVIDIA reference system comparison', + description: 'Matched H100 and B200 NCCL reference contrast', + publication_tier: 'official' as const, + series_ids: [series[2].series_id, systemVariant.series_id], + controlled_factors: ['workload', 'phase', 'measurement', 'source'], + varying_factors: ['system', 'backend', 'resource'], + eligibility: makeEligibility(), + }, + ].toSorted((left, right) => left.cohort_id.localeCompare(right.cohort_id)), + rankings, + recommendations, + sensitivities, + }; +} + +export function makeCollectiveXDatasetWithPrefillCohort(): CollectiveXDataset { + const dataset = makeCollectiveXDataset(); + const decode = dataset.cohorts.find((item) => item.cohort_id === decisionIds.libraryCohort)!; + const byId = new Map(dataset.series.map((item) => [item.series_id, item])); + const prefill = decode.series_ids.map((seriesId, index) => { + const item = structuredClone(byId.get(seriesId)!); + item.series_id = fixtureId('series', 20 + index); + item.case_ids = [fixtureId('case', 20 + index)]; + item.label = item.label.replace('uniform', 'uniform prefill'); + item.phase = 'prefill'; + item.points[0].point_id = fixtureId('point', 20 + index); + item.points[0].tokens_per_rank = 512; + item.points[0].global_tokens = 4096; + item.points[0].evidence_ids = allocations.map((_, allocationIndex) => + fixtureId('evidence', 200 + index * 10 + allocationIndex), + ); + return item; + }); + dataset.series.push(...prefill); + const prefillCohort = { + ...structuredClone(decode), + cohort_id: fixtureId('cohort', 20), + label: 'H100 EP8 prefill library comparison', + description: 'Matched H100 EP8 prefill library contrast', + series_ids: prefill.map((item) => item.series_id), + }; + dataset.cohorts.push(prefillCohort); + for (const [index, item] of prefill.entries()) { + const attemptIds = allocations.map((allocationId, allocationIndex) => { + const attempt_id = attemptId(20 + index, allocationIndex + 1, 1); + dataset.attempts.push({ + attempt_id, + evidence: [ + { + evidence_id: item.points[0].evidence_ids[allocationIndex], + point_id: item.points[0].point_id, + }, + ], + case_id: item.case_ids[0], + allocation_id: allocationId, + run_id: String(1000 + allocationIndex), + run_attempt: 1, + attempt_index: 1, + outcome: 'success', + failure_mode: null, + reason: null, + series_id: item.series_id, + selected: true, + completed_at: '2026-07-04T00:01:00Z', + }); + return attempt_id; + }); + dataset.coverage.push({ + case_id: item.case_ids[0], + label: item.label, + required: true, + sku: item.system.sku, + backend: item.backend.id, + phase: 'prefill', + disposition: 'runnable', + selected_attempt_id: attemptIds.at(-1)!, + outcome: 'success', + failure_mode: null, + reason: null, + attempt_ids: attemptIds, + }); + } + const decodeRankings = dataset.rankings.filter((item) => item.cohort_id === decode.cohort_id); + const prefillRankings = decodeRankings.map((ranking, index) => { + const metric = { ...ranking.metric, tokens_per_rank: 512, phase: 'prefill' as const }; + return { + ...structuredClone(ranking), + ranking_id: fixtureId('ranking', 20 + index), + cohort_id: prefillCohort.cohort_id, + label: ranking.label.replace('T=128', 'T=512').replace('Library', 'Prefill library'), + metric, + entries: prefill + .toSorted((left, right) => { + const delta = decisionMetricValue(left, metric) - decisionMetricValue(right, metric); + return metric.objective === 'min' ? delta : -delta; + }) + .map((item, entryIndex) => ({ + rank: entryIndex + 1, + series_id: item.series_id, + point_id: item.points[0].point_id, + value: decisionMetricValue(item, metric), + unit: metric.measure === 'latency_us' ? ('us' as const) : ('GB/s' as const), + })), + }; + }); + dataset.rankings.push(...prefillRankings); + dataset.recommendations.push( + ...prefillRankings.map((ranking, index) => { + const top = ranking.entries[0]; + return { + recommendation_id: fixtureId('recommendation', 20 + index), + cohort_id: prefillCohort.cohort_id, + label: `Best ${metricLabel(ranking.metric)} at T=512`, + objective: + ranking.metric.measure === 'latency_us' + ? (`min-${ranking.metric.statistic}-latency` as const) + : (`max-payload-rate-at-${ranking.metric.statistic}-latency` as const), + publication_tier: 'official' as const, + series_id: top.series_id, + point_id: top.point_id, + value: top.value, + unit: top.unit, + rationale: 'Top stable measured roundtrip result in a controlled cohort', + eligibility: makeEligibility(), + }; + }), + ); + dataset.promotion.requested_cases += prefill.length; + dataset.promotion.terminal_cases += prefill.length; + return dataset; +} + +export function makeCollectiveXDiagnosticDataset(): CollectiveXDataset { + const dataset = makeCollectiveXDataset(); + const series = dataset.series.find((item) => item.backend.role === 'reference')!; + series.status = 'diagnostic'; + const allocationId = series.allocation_ids[0]; + const evidenceId = series.points[0].evidence_ids[0]; + series.allocation_ids = [allocationId]; + series.points[0].evidence_ids = [evidenceId]; + series.eligibility = { + decision_grade: false, + allocation_ids: [allocationId], + complete: false, + correct: true, + measured_roundtrip_p99: true, + stable_p50: false, + stable_p99: false, + stable_ordering: false, + p50_max_min_ratio: null, + p99_max_min_ratio: null, + reasons: ['awaiting-repeat-allocations'], + }; + const attempt = dataset.attempts.find( + (item) => item.series_id === series.series_id && item.allocation_id === allocationId, + )!; + attempt.attempt_id = attemptId(3, 1, 2); + attempt.attempt_index = 2; + const failedAttempt: CollectiveXAttempt = { + attempt_id: attemptId(3, 1, 1), + evidence: [], + case_id: attempt.case_id, + allocation_id: attempt.allocation_id, + run_id: attempt.run_id, + run_attempt: attempt.run_attempt, + attempt_index: 1, + outcome: 'failed', + failure_mode: 'timeout', + reason: 'execution-timeout', + series_id: null, + selected: false, + completed_at: '2026-07-04T00:00:30Z', + }; + const coverage = dataset.coverage.find((item) => item.case_id === series.case_ids[0])!; + coverage.attempt_ids = [failedAttempt.attempt_id, attempt.attempt_id].toSorted(); + coverage.selected_attempt_id = attempt.attempt_id; + dataset.promotion = { + ...dataset.promotion, + status: 'diagnostic', + allocation_ids: [allocationId], + requested_cases: 1, + terminal_cases: 1, + }; + dataset.source_bundle_ids = [dataset.source_bundle_ids[0]]; + dataset.coverage = [coverage]; + dataset.attempts = [failedAttempt, attempt].toSorted((left, right) => + left.attempt_id.localeCompare(right.attempt_id), + ); + dataset.series = [series]; + dataset.cohorts = []; + dataset.rankings = []; + dataset.recommendations = []; + dataset.sensitivities = []; + return dataset; +} + +export function makeCollectiveXDatasetWithDiagnosticCohort(): CollectiveXDataset { + const dataset = makeCollectiveXDataset(); + const eligible = dataset.cohorts.find((item) => item.kind === 'library')!; + const cohort = { + ...eligible, + cohort_id: decisionIds.diagnosticLibraryCohort, + series_ids: eligible.series_ids.toReversed(), + eligibility: { + ...eligible.eligibility, + decision_grade: false, + stable_ordering: false, + reasons: ['unstable-ordering'], + }, + }; + dataset.cohorts.push(cohort); + dataset.cohorts.sort((left, right) => left.cohort_id.localeCompare(right.cohort_id)); + return dataset; +} diff --git a/packages/app/src/components/collectivex/types.ts b/packages/app/src/components/collectivex/types.ts new file mode 100644 index 00000000..c339ecf7 --- /dev/null +++ b/packages/app/src/components/collectivex/types.ts @@ -0,0 +1,340 @@ +import { z } from 'zod'; + +export type CollectiveXPhase = 'decode' | 'prefill'; +export type CollectiveXOperation = 'dispatch' | 'combine' | 'roundtrip' | 'isolated-sum'; +export type CollectiveXPercentile = 'p50' | 'p90' | 'p95' | 'p99'; +export type CollectiveXXAxis = 'tokens-per-rank' | 'global-tokens'; +export type CollectiveXYAxis = 'latency' | 'tokens-per-second' | 'payload-rate'; +export type CollectiveXScale = 'log' | 'linear'; + +const hex64 = z.string().regex(/^[a-f0-9]{64}$/); +const sourceHash = z.string().regex(/^[a-f0-9]{40,64}$/); +const typedId = (kind: string) => z.string().regex(new RegExp(`^cx${kind}-v1-[a-f0-9]{64}$`)); +const safeId = z + .string() + .max(128) + .regex(/^[a-z0-9][a-z0-9_.-]*$/); +const label = z.string().min(1).max(160); +const reason = z + .string() + .max(96) + .regex(/^[a-z0-9][a-z0-9.-]*$/) + .nullable(); +const timestamp = z.iso.datetime({ offset: true }); +const positiveInteger = z.number().int().safe().positive(); +const nonnegativeInteger = z.number().int().safe().nonnegative(); +const publicationTier = z.enum(['official', 'comparable-experimental']); +const unique = (schema: z.ZodType) => + z.array(schema).refine((items) => new Set(items).size === items.length, 'duplicate values'); + +export const collectiveXChannelSchema = z.strictObject({ + format: z.literal('collectivex.channel.v1'), + channel: z.enum(['latest-attempt', 'dev-latest']), + generated_at: timestamp, + dataset: z.strictObject({ + path: z.string().regex(/^datasets\/[a-f0-9]{64}\/dataset\.json$/), + sha256: hex64, + bytes: positiveInteger.max(32 * 1024 * 1024), + }), +}); + +const percentilesSchema = z.strictObject({ + p50: z.number().finite().positive(), + p90: z.number().finite().positive(), + p95: z.number().finite().positive(), + p99: z.number().finite().positive(), +}); +const componentSchema = z.strictObject({ + origin: z.enum(['measured', 'derived']), + latency_us: percentilesSchema, + logical_bytes: positiveInteger.nullable(), + logical_payload_rate_gbps_at_latency_percentile: percentilesSchema.nullable(), + sample_count: positiveInteger.nullable(), +}); +const routingEvidenceSchema = z.strictObject({ + fanout_mean: z.number().finite().nonnegative(), + recv_tokens_max: nonnegativeInteger, + expert_load_cv: z.number().finite().nonnegative(), + payload_rank_cv: z.number().finite().nonnegative(), + hotspot_ratio: z.number().finite().nonnegative(), + empty_expert_count: nonnegativeInteger, + empty_rank_count: nonnegativeInteger, + routed_copies: positiveInteger, +}); +const eligibilitySchema = z + .strictObject({ + decision_grade: z.boolean(), + allocation_ids: unique(typedId('allocation')), + complete: z.boolean(), + correct: z.boolean(), + measured_roundtrip_p99: z.boolean(), + stable_p50: z.boolean(), + stable_p99: z.boolean(), + stable_ordering: z.boolean(), + p50_max_min_ratio: z.number().finite().min(1).nullable(), + p99_max_min_ratio: z.number().finite().min(1).nullable(), + reasons: unique(reason.unwrap()), + }) + .refine((value) => value.decision_grade === (value.reasons.length === 0), { + path: ['reasons'], + message: + 'decision-grade eligibility must have no reasons; diagnostic eligibility must have reasons', + }); +const pointSchema = z.strictObject({ + point_id: typedId('point'), + tokens_per_rank: positiveInteger, + global_tokens: positiveInteger, + correct: z.boolean(), + routing: routingEvidenceSchema, + components: z.strictObject({ + dispatch: componentSchema.nullable(), + combine: componentSchema.nullable(), + roundtrip: componentSchema.nullable(), + isolated_sum: componentSchema.nullable(), + }), + roundtrip_token_rate_at_latency_percentile: percentilesSchema, + evidence_ids: unique(typedId('evidence')), +}); +const seriesSchema = z.strictObject({ + series_id: typedId('series'), + label, + status: z.enum(['decision-grade', 'diagnostic']), + case_ids: unique(typedId('case')).min(1), + allocation_ids: unique(typedId('allocation')).min(1), + model: safeId, + suite: safeId, + publication_tier: publicationTier, + phase: z.enum(['decode', 'prefill']), + backend: z.strictObject({ + id: safeId, + label, + role: z.enum(['library', 'reference']), + generation: label.nullable(), + version: label.nullable(), + }), + build: z.strictObject({ + implementation_contract_sha256: hex64, + public_config_sha256: hex64, + routing_control_sha256: hex64, + runtime_fingerprint_sha256: hex64, + image_digest: z.string().regex(/^sha256:[a-f0-9]{64}$/), + source_sha: sourceHash, + squash_sha256: hex64, + }), + system: z.strictObject({ + sku: safeId, + label, + vendor: z.enum(['nvidia', 'amd']), + topology_class: safeId, + transport: safeId, + world_size: positiveInteger, + ep_size: positiveInteger, + placement: z.literal('packed'), + }), + workload: z.strictObject({ + workload_id: typedId('work'), + hidden: positiveInteger, + top_k: positiveInteger, + experts: positiveInteger, + routing: z.enum(['uniform', 'zipf']), + eplb: z.boolean(), + dispatch_dtype: z.literal('bf16'), + combine_dtype: z.literal('bf16'), + activation_profile: z.literal('canonical-counter-source-v3'), + }), + eplb: z.strictObject({ + enabled: z.boolean(), + planner: label.nullable(), + mapping_sha256: hex64.nullable(), + logical_experts: positiveInteger, + physical_experts: positiveInteger, + redundant_experts: nonnegativeInteger, + reference_tokens_per_rank: positiveInteger.nullable(), + replicated_experts: nonnegativeInteger, + max_replicas: nonnegativeInteger.nullable(), + imbalance_before: z.number().finite().nonnegative().nullable(), + imbalance_after: z.number().finite().nonnegative().nullable(), + }), + resource: z.strictObject({ + mode: z.literal('tuned'), + profile: safeId, + comm_units_kind: label.nullable(), + configured_units: positiveInteger.nullable(), + }), + measurement: z.strictObject({ + contract: z.literal('layout-and-dispatch-v1'), + sampling_contract: z.literal('fixed-512-v1'), + iters: z.literal(8), + trials: z.literal(64), + warmups: z.literal(32), + samples_per_component: z.literal(512), + headline_component: z.literal('roundtrip'), + headline_percentile: z.literal('p99'), + }), + points: z.array(pointSchema).min(1), + eligibility: eligibilitySchema, +}); +const outcome = z.enum(['success', 'unsupported', 'failed', 'invalid', 'diagnostic']); +const coverageSchema = z.strictObject({ + case_id: typedId('case'), + label, + required: z.boolean(), + disposition: z.enum(['runnable', 'unsupported']), + sku: safeId, + backend: safeId, + phase: z.enum(['decode', 'prefill']), + selected_attempt_id: typedId('attempt').nullable(), + outcome, + failure_mode: reason, + reason, + attempt_ids: unique(typedId('attempt')), +}); +const attemptSchema = z.strictObject({ + attempt_id: typedId('attempt'), + evidence: z + .array( + z.strictObject({ + evidence_id: typedId('evidence'), + point_id: typedId('point'), + }), + ) + .refine( + (items) => + new Set(items.map((item) => `${item.evidence_id}\0${item.point_id}`)).size === items.length, + 'duplicate evidence items', + ), + case_id: typedId('case'), + allocation_id: typedId('allocation'), + run_id: z.string().regex(/^[1-9][0-9]*$/), + run_attempt: positiveInteger, + attempt_index: positiveInteger, + outcome, + failure_mode: reason, + reason, + series_id: typedId('series').nullable(), + selected: z.boolean(), + completed_at: timestamp.nullable(), +}); +const metricSchema = z.strictObject({ + operation: z.literal('roundtrip'), + statistic: z.enum(['p50', 'p99']), + measure: z.enum(['latency_us', 'logical_payload_rate_gbps_at_latency_percentile']), + objective: z.enum(['min', 'max']), + tokens_per_rank: positiveInteger, + phase: z.enum(['decode', 'prefill']), +}); +const cohortSchema = z.strictObject({ + cohort_id: typedId('cohort'), + kind: z.enum(['library', 'chip', 'system', 'routing']), + label, + description: label, + publication_tier: publicationTier, + series_ids: unique(typedId('series')).min(2), + controlled_factors: unique(safeId).min(1), + varying_factors: unique(safeId).min(1), + eligibility: eligibilitySchema, +}); +const rankingSchema = z.strictObject({ + ranking_id: typedId('ranking'), + cohort_id: typedId('cohort'), + label, + publication_tier: publicationTier, + metric: metricSchema, + entries: z + .array( + z.strictObject({ + rank: positiveInteger, + series_id: typedId('series'), + point_id: typedId('point'), + value: z.number().finite().positive(), + unit: z.enum(['us', 'GB/s']), + }), + ) + .min(2), + eligibility: eligibilitySchema, +}); +const recommendationSchema = z.strictObject({ + recommendation_id: typedId('recommendation'), + cohort_id: typedId('cohort'), + label, + objective: z.enum([ + 'min-p50-latency', + 'min-p99-latency', + 'max-payload-rate-at-p50-latency', + 'max-payload-rate-at-p99-latency', + ]), + publication_tier: z.literal('official'), + series_id: typedId('series'), + point_id: typedId('point'), + value: z.number().finite().positive(), + unit: z.enum(['us', 'GB/s']), + rationale: label, + eligibility: eligibilitySchema, +}); +const sensitivitySchema = z.strictObject({ + sensitivity_id: typedId('sensitivity'), + cohort_id: typedId('cohort'), + label, + publication_tier: publicationTier, + baseline_series_id: typedId('series'), + candidate_series_id: typedId('series'), + metric: metricSchema, + signed_change_ratio: z.number().finite(), + eligibility: eligibilitySchema, +}); + +export const collectiveXDatasetSchema = z.strictObject({ + format: z.literal('collectivex.public.v1'), + schema_version: z.literal(1), + generated_at: timestamp, + source_bundle_ids: unique(hex64), + promotion: z.strictObject({ + status: z.enum(['promoted', 'diagnostic', 'quarantined']), + reason, + matrix_id: hex64.nullable(), + allocation_ids: unique(typedId('allocation')), + required_allocations: z.literal(3), + requested_cases: nonnegativeInteger, + terminal_cases: nonnegativeInteger, + policy: z.literal('collectivex-decision-grade-v1'), + }), + coverage: z.array(coverageSchema), + attempts: z.array(attemptSchema), + series: z.array(seriesSchema), + cohorts: z.array(cohortSchema), + rankings: z.array(rankingSchema), + recommendations: z.array(recommendationSchema), + sensitivities: z.array(sensitivitySchema), +}); + +export type CollectiveXChannel = z.infer; +export type CollectiveXDataset = z.infer; +export type CollectiveXComponent = z.infer; +export type CollectiveXPoint = z.infer; +export type CollectiveXSeries = z.infer; +export type CollectiveXCoverage = z.infer; +export type CollectiveXAttempt = z.infer; +export type CollectiveXEligibility = z.infer; +export type CollectiveXMetric = z.infer; +export type CollectiveXCohort = z.infer; +export type CollectiveXRanking = z.infer; +export type CollectiveXRecommendation = z.infer; +export type CollectiveXSensitivity = z.infer; +export type CollectiveXOutcome = z.infer; +export type CollectiveXPublicationTier = z.infer; +export interface CollectiveXResolvedDataset { + channel: CollectiveXChannel; + dataset: CollectiveXDataset; + digest: string; +} +export interface CollectiveXChartPoint { + seriesId: string; + seriesLabel: string; + colorKey: string; + x: number; + y: number; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + point: CollectiveXPoint; + series: CollectiveXSeries; +} diff --git a/packages/app/src/components/dashboard-shell.tsx b/packages/app/src/components/dashboard-shell.tsx index 17eb1386..ea341f79 100644 --- a/packages/app/src/components/dashboard-shell.tsx +++ b/packages/app/src/components/dashboard-shell.tsx @@ -4,8 +4,19 @@ import { GlobalFilterProvider } from '@/components/GlobalFilterContext'; import { NudgeEngine } from '@/components/nudge-engine'; import { TabNav } from '@/components/tab-nav'; import { UnofficialRunProvider } from '@/components/unofficial-run-provider'; +import { usePathname } from 'next/navigation'; export function DashboardShell({ children }: { children: React.ReactNode }) { + const pathname = usePathname(); + const content = ( +
+
+ + {children} +
+
+ ); + if (pathname === '/collectivex') return content; return ( <> diff --git a/packages/app/src/components/header/header.tsx b/packages/app/src/components/header/header.tsx index 0fe42e86..9868fdb5 100644 --- a/packages/app/src/components/header/header.tsx +++ b/packages/app/src/components/header/header.tsx @@ -24,6 +24,7 @@ const DASHBOARD_TABS = [ '/reliability', '/gpu-specs', '/gpu-metrics', + '/collectivex', '/submissions', '/current-inferencex-image', ]; diff --git a/packages/app/src/components/tab-nav.tsx b/packages/app/src/components/tab-nav.tsx index d15ec06d..a1f0adb4 100644 --- a/packages/app/src/components/tab-nav.tsx +++ b/packages/app/src/components/tab-nav.tsx @@ -31,6 +31,7 @@ const VISIBLE_TABS = [ { href: '/historical', label: 'Historical Trends', testId: 'tab-trigger-historical' }, { href: '/calculator', label: 'TCO Calculator', testId: 'tab-trigger-calculator' }, { href: '/gpu-specs', label: 'GPU Specs', testId: 'tab-trigger-gpu-specs' }, + { href: '/collectivex', label: 'CollectiveX', testId: 'tab-trigger-collectivex' }, { href: '/submissions', label: 'Submissions', testId: 'tab-trigger-submissions' }, ] as const; diff --git a/packages/app/src/hooks/api/use-collectivex.ts b/packages/app/src/hooks/api/use-collectivex.ts new file mode 100644 index 00000000..fa0213a3 --- /dev/null +++ b/packages/app/src/hooks/api/use-collectivex.ts @@ -0,0 +1,13 @@ +import { useQuery } from '@tanstack/react-query'; + +import { fetchCollectiveX } from '@/lib/api'; +import type { CollectiveXChannelName } from '@/components/collectivex/reader'; + +export function useCollectiveX(channel: CollectiveXChannelName = 'dev-latest') { + return useQuery({ + queryKey: ['collectivex', channel, 1], + queryFn: ({ signal }) => fetchCollectiveX(channel, signal), + staleTime: 0, + refetchOnMount: 'always', + }); +} diff --git a/packages/app/src/lib/api.test.ts b/packages/app/src/lib/api.test.ts index a1f29006..a39d2481 100644 --- a/packages/app/src/lib/api.test.ts +++ b/packages/app/src/lib/api.test.ts @@ -4,9 +4,12 @@ import { fetchBenchmarks, fetchWorkflowInfo, fetchAvailability, + fetchCollectiveX, fetchReliability, fetchEvaluations, } from './api'; +import { makeCollectiveXDataset } from '@/components/collectivex/test-fixture'; +import { collectiveXChannelUrl, sha256Hex } from '@/components/collectivex/reader'; const mockFetch = vi.fn(); vi.stubGlobal('fetch', mockFetch); @@ -126,3 +129,41 @@ describe('fetchEvaluations', () => { expect(result[0].task).toBe('gsm8k'); }); }); + +describe('fetchCollectiveX', () => { + it('resolves the no-cache channel to a digest-addressed dataset', async () => { + const bytes = new TextEncoder().encode(JSON.stringify(makeCollectiveXDataset())); + const digest = await sha256Hex(bytes); + mockFetch + .mockResolvedValueOnce({ + ok: true, + text: () => + Promise.resolve( + JSON.stringify({ + format: 'collectivex.channel.v1', + channel: 'dev-latest', + generated_at: '2026-07-04T01:00:00Z', + dataset: { + path: `datasets/${digest}/dataset.json`, + sha256: digest, + bytes: bytes.length, + }, + }), + ), + }) + .mockResolvedValueOnce({ ok: true, arrayBuffer: () => Promise.resolve(bytes.buffer) }); + + const result = await fetchCollectiveX(); + + expect(mockFetch).toHaveBeenCalledWith( + collectiveXChannelUrl('dev-latest'), + expect.objectContaining({ cache: 'no-store', credentials: 'same-origin' }), + ); + expect(mockFetch).toHaveBeenLastCalledWith( + `/collectivex-data/datasets/${digest}/dataset.json`, + expect.objectContaining({ cache: 'force-cache', credentials: 'same-origin' }), + ); + expect(result.dataset.format).toBe('collectivex.public.v1'); + expect(result.digest).toBe(digest); + }); +}); diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts index a9d66715..8a34e234 100644 --- a/packages/app/src/lib/api.ts +++ b/packages/app/src/lib/api.ts @@ -3,6 +3,10 @@ * Each function is a thin fetch wrapper returning typed data. */ +import { + fetchCollectiveXPublication, + type CollectiveXChannelName, +} from '@/components/collectivex/reader'; import type { WorkerPower } from '@/components/inference/types'; import type { SubmissionsResponse } from './submissions-types'; @@ -300,6 +304,13 @@ export function fetchSubmissions(signal?: AbortSignal) { return fetchJson('/api/v1/submissions', signal); } +export function fetchCollectiveX( + channel: CollectiveXChannelName = 'dev-latest', + signal?: AbortSignal, +) { + return fetchCollectiveXPublication(channel, signal); +} + export interface FeedbackListRow { id: string; created_at: string; diff --git a/packages/app/src/lib/d3-chart/D3Chart/types.ts b/packages/app/src/lib/d3-chart/D3Chart/types.ts index 3062784e..7d62eda0 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/types.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/types.ts @@ -126,6 +126,8 @@ export interface AxisConfig { label?: string; tickFormat?: (d: d3.AxisDomain) => string; tickCount?: number; + /** Explicit ticks or a domain-aware generator, useful for geometric and sparse log axes. */ + tickValues?: (number | Date)[] | ((scale: AnyScale) => (number | Date)[]); /** Post-render callback for custom axis label formatting (e.g., multi-line tspan). */ customize?: (axisGroup: d3.Selection) => void; } diff --git a/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts b/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts index 8953d156..7d9ba3ec 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts @@ -8,7 +8,7 @@ import type { ChartLayout, ContinuousScale } from '../types'; import { buildScale, isBandScale, type BuiltScale } from './scale-builders'; import { renderLayer, updateLayerOnZoom } from './layer-renderer'; -import type { D3ChartProps, RenderContext, ZoomContext } from './types'; +import type { AxisConfig, D3ChartProps, RenderContext, ZoomContext } from './types'; interface RendererDeps { svgRef: React.RefObject; @@ -51,6 +51,14 @@ interface RendererDeps { ) => void; } +function resolveTickValues( + tickValues: AxisConfig['tickValues'], + scale: AnyScale, +): (number | Date)[] | undefined { + if (!tickValues) return undefined; + return typeof tickValues === 'function' ? tickValues(scale) : tickValues; +} + /** * Core render effect for D3Chart. Builds scales, renders structure/axes/grid/layers, * wires up tooltip and zoom handlers. @@ -97,7 +105,14 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps // preventing a frame where dots and lines are out of sync during y-axis metric changes. useLayoutEffect(() => { if (!svgRef.current || !tooltipRef.current || dimensions.width === 0) return; - if (data.length === 0 && layers.every((l) => l.type !== 'custom')) return; + if (data.length === 0 && layers.every((layer) => layer.type !== 'custom')) { + d3.select(svgRef.current).selectAll('*').remove(); + scalesRef.current = null; + layoutRef.current = null; + dismissTooltip(true); + prevDataRef.current = data; + return; + } // Animate when data or scale domains changed (but not on resize/theme changes) const dataChanged = data !== prevDataRef.current; @@ -162,12 +177,24 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps // ── Grid + Axes (skip when no scale configs) ── if (hasScales) { - renderGrid(layout, xScale as AnyScale, yScale as any, yAxisConfig?.tickCount ?? 5); + const xTickValues = resolveTickValues(xAxisConfig?.tickValues, xScale as AnyScale); + const yTickValues = resolveTickValues(yAxisConfig?.tickValues, yScale as AnyScale); + renderGrid( + layout, + xScale as AnyScale, + yScale as any, + yAxisConfig?.tickCount ?? 5, + 0, + xTickValues, + yTickValues, + ); renderAxes(layout, xScale as AnyScale, yScale as any, { xTickFormat: xAxisConfig?.tickFormat, yTickFormat: yAxisConfig?.tickFormat, xTickCount: xAxisConfig?.tickCount, yTickCount: yAxisConfig?.tickCount, + xTickValues, + yTickValues, }); // Custom axis formatting callbacks @@ -408,11 +435,15 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps } // Update axes + grid + const xTickValues = resolveTickValues(xAxisConfig?.tickValues, newXScale as AnyScale); + const yTickValues = resolveTickValues(yAxisConfig?.tickValues, newYScale as AnyScale); renderAxes(layout, newXScale as AnyScale, newYScale as any, { xTickFormat: xAxisConfig?.tickFormat, yTickFormat: yAxisConfig?.tickFormat, xTickCount: xAxisConfig?.tickCount, yTickCount: yAxisConfig?.tickCount, + xTickValues, + yTickValues, }); if (xAxisConfig?.customize) { xAxisConfig.customize(layout.xAxisGroup); @@ -425,6 +456,9 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps newXScale as AnyScale, newYScale as any, yAxisConfig?.tickCount ?? 5, + 0, + xTickValues, + yTickValues, ); // Update layers diff --git a/packages/app/src/lib/d3-chart/chart-update.test.ts b/packages/app/src/lib/d3-chart/chart-update.test.ts index a4a52b85..a8b3d012 100644 --- a/packages/app/src/lib/d3-chart/chart-update.test.ts +++ b/packages/app/src/lib/d3-chart/chart-update.test.ts @@ -51,6 +51,23 @@ describe('renderAxes', () => { expect(tickCount).toBeLessThanOrEqual(8); }); + it('renders only explicit x tick values within the visible domain', () => { + const layout = makeLayout(); + const xScale = d3.scaleLinear().domain([2, 10]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + + renderAxes(layout, xScale, yScale, { + xTickValues: [1, 2, 4, 16], + xTickFormat: String, + }); + + const labels: string[] = []; + layout.xAxisGroup.selectAll('.tick text').each(function () { + labels.push(d3.select(this).text()); + }); + expect(labels).toEqual(['2', '4']); + }); + it('respects yTickCount', () => { const layout = makeLayout(); const xScale = d3.scaleLinear().domain([0, 100]).range([0, layout.width]); @@ -130,6 +147,26 @@ describe('renderAxes', () => { }); }); + describe('with log scales', () => { + it('uses measured geometric sweep values instead of generated log subdivisions', () => { + const layout = makeLayout(); + const xScale = d3.scaleLog().base(2).domain([0.9, 70]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + const measuredValues = [1, 2, 4, 8, 16, 32, 64]; + + renderAxes(layout, xScale, yScale, { + xTickValues: measuredValues, + xTickFormat: String, + }); + + const labels: string[] = []; + layout.xAxisGroup.selectAll('.tick text').each(function () { + labels.push(d3.select(this).text()); + }); + expect(labels).toEqual(measuredValues.map(String)); + }); + }); + describe('with band scales', () => { it('renders band scale on x-axis', () => { const layout = makeLayout(); @@ -281,6 +318,24 @@ describe('renderGrid', () => { expect(vLines).toBeGreaterThan(0); }); + it('uses explicit x tick values for vertical grid lines', () => { + const layout = makeLayout(); + const xScale = d3.scaleLog().base(2).domain([1, 64]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + const measuredValues = [1, 4, 16, 64]; + + renderGrid(layout, xScale, yScale, 5, 0, measuredValues); + + const positions: number[] = []; + layout.gridGroup + .select('.grid-v') + .selectAll('line') + .each(function () { + positions.push(Number(d3.select(this).attr('x1'))); + }); + expect(positions).toEqual(measuredValues.map((value) => xScale(value))); + }); + it('creates horizontal grid lines matching y-scale ticks', () => { const layout = makeLayout(); const xScale = d3.scaleLinear().domain([0, 100]).range([0, layout.width]); diff --git a/packages/app/src/lib/d3-chart/chart-update.ts b/packages/app/src/lib/d3-chart/chart-update.ts index 45c458c7..fb79ac27 100644 --- a/packages/app/src/lib/d3-chart/chart-update.ts +++ b/packages/app/src/lib/d3-chart/chart-update.ts @@ -10,6 +10,8 @@ export interface AxisUpdateConfig { yTickFormat?: (d: d3.AxisDomain) => string; xTickCount?: number; yTickCount?: number; + xTickValues?: (number | Date)[]; + yTickValues?: (number | Date)[]; /** Override tick size for Y axis (default: 6, use 0 for band scales). */ yTickSize?: number; /** When set, axes animate to new positions over this duration (ms). */ @@ -23,8 +25,16 @@ export function renderAxes( yScale: ContinuousScale | d3.ScaleBand, config: AxisUpdateConfig, ): void { - const { xTickFormat, yTickFormat, xTickCount, yTickCount, yTickSize, transitionDuration } = - config; + const { + xTickFormat, + yTickFormat, + xTickCount, + yTickCount, + xTickValues, + yTickValues, + yTickSize, + transitionDuration, + } = config; const dur = transitionDuration ?? 0; // X axis @@ -36,6 +46,9 @@ export function renderAxes( } else { const gen = d3.axisBottom(xScale as ContinuousScale).tickSize(6); if (xTickCount) gen.ticks(xTickCount); + if (xTickValues) { + gen.tickValues(visibleTickValues(xScale, xTickValues) as Iterable); + } if (xTickFormat) gen.tickFormat(xTickFormat as any); xAxisGen = gen as unknown as d3.Axis; } @@ -54,6 +67,9 @@ export function renderAxes( } else { const yAxisGen = d3.axisLeft(yScale as ContinuousScale).tickSize(yTickSize ?? 6); if (yTickCount) yAxisGen.ticks(yTickCount); + if (yTickValues) { + yAxisGen.tickValues(visibleTickValues(yScale, yTickValues) as Iterable); + } if (yTickFormat) yAxisGen.tickFormat(yTickFormat as any); const yTarget = dur > 0 ? layout.yAxisGroup.transition().duration(dur) : layout.yAxisGroup; (yTarget as any).call(yAxisGen as any); @@ -67,6 +83,8 @@ export function renderGrid( yScale: ContinuousScale | d3.ScaleBand, yTickCount?: number, transitionDuration = 0, + xTickValues?: (number | Date)[], + yTickValues?: (number | Date)[], ): void { const { width, height, gridGroup } = layout; const dur = transitionDuration; @@ -87,7 +105,9 @@ export function renderGrid( .attr('y2', height); } else { const tickScale = xScale as { ticks: (count?: number) => number[]; (v: number): number }; - const xTicks = tickScale.ticks(); + const xTicks = xTickValues + ? (visibleTickValues(xScale, xTickValues) as number[]) + : tickScale.ticks(); const vJoin = vGroup .selectAll('line') .data(xTicks) @@ -126,7 +146,9 @@ export function renderGrid( .attr('y2', (d) => (bandScale(d) || 0) + bandScale.bandwidth() / 2) .style('stroke-width', 0.5); } else { - const yTicks = yScale.ticks(yTickCount ?? 5); + const yTicks = yTickValues + ? (visibleTickValues(yScale, yTickValues) as number[]) + : yScale.ticks(yTickCount ?? 5); const hJoin = hGroup .selectAll('line') .data(yTicks) @@ -149,3 +171,18 @@ export function renderGrid( .attr('y2', (d: number) => yScale(d)); } } + +function visibleTickValues( + scale: ContinuousScale | d3.ScaleTime, + values: (number | Date)[], +): (number | Date)[] { + const domain = scale.domain(); + const start = Number(domain[0]); + const end = Number(domain.at(-1)); + const min = Math.min(start, end); + const max = Math.max(start, end); + return values.filter((value) => { + const numeric = Number(value); + return Number.isFinite(numeric) && numeric >= min && numeric <= max; + }); +} diff --git a/packages/app/src/lib/tab-meta.ts b/packages/app/src/lib/tab-meta.ts index b312a6e7..5d641b23 100644 --- a/packages/app/src/lib/tab-meta.ts +++ b/packages/app/src/lib/tab-meta.ts @@ -16,6 +16,7 @@ export const VALID_TABS = [ 'calculator', 'reliability', 'gpu-specs', + 'collectivex', 'ai-chart', 'gpu-metrics', 'submissions', @@ -56,6 +57,11 @@ export const TAB_META: Record = description: 'Detailed GPU specifications for AI inference. Compare NVIDIA, AMD, and Intel GPUs — memory bandwidth, FLOPS, interconnects, and topology.', }, + collectivex: { + title: 'CollectiveX Communication Benchmarks', + description: + 'Experimental cross-vendor expert-parallel communication benchmarks. Compare MoE dispatch and combine latency across NVIDIA and AMD GPU platforms.', + }, 'ai-chart': { title: 'AI-Powered Chart Generation', description: diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 58cdbba9..45760282 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -51,6 +51,9 @@ importers: '@noble/ciphers': specifier: ^2.2.0 version: 2.2.0 + '@noble/hashes': + specifier: ^2.2.0 + version: 2.2.0 '@posthog/nextjs-config': specifier: ^1.9.68 version: 1.9.68(next@16.2.9(@babel/core@8.0.1)(@opentelemetry/api@1.9.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(webpack@5.106.2(esbuild@0.28.1)(postcss@8.5.15)) @@ -165,6 +168,9 @@ importers: three: specifier: ^0.185.0 version: 0.185.0 + zod: + specifier: ^4.4.3 + version: 4.4.3 devDependencies: '@bahmutov/cypress-esbuild-preprocessor': specifier: ^2.2.8 @@ -216,7 +222,7 @@ importers: version: 6.2.5 jsdom: specifier: ^29.1.1 - version: 29.1.1 + version: 29.1.1(@noble/hashes@2.2.0) tailwindcss: specifier: ^4.3.2 version: 4.3.2 @@ -231,7 +237,7 @@ importers: version: 6.0.3 vitest: specifier: ^4.1.9 - version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) + version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) packages/constants: devDependencies: @@ -240,7 +246,7 @@ importers: version: 4.1.9(vitest@4.1.9) vitest: specifier: ^4.1.9 - version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) + version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) packages/db: dependencies: @@ -289,7 +295,7 @@ importers: version: 6.0.3 vitest: specifier: ^4.1.9 - version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) + version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) packages/mcp: dependencies: @@ -323,7 +329,7 @@ importers: version: 6.0.3 vitest: specifier: ^4.1.9 - version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) + version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) packages: @@ -985,6 +991,10 @@ packages: resolution: {integrity: sha512-Z6pjIZ/8IJcCGzb2S/0Px5J81yij85xASuk1teLNeg75bfT07MV3a/O2Mtn1I2se43k3lkVEcFaR10N4cgQcZA==} engines: {node: '>= 20.19.0'} + '@noble/hashes@2.2.0': + resolution: {integrity: sha512-IYqDGiTXab6FniAgnSdZwgWbomxpy9FtYvLKs7wCUs2a8RkITG+DFGO1DM9cr+E3/RgADRpFjrKVaJ1z6sjtEg==} + engines: {node: '>= 20.19.0'} + '@nodelib/fs.scandir@2.1.5': resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==} engines: {node: '>= 8'} @@ -5773,7 +5783,9 @@ snapshots: '@esbuild/win32-x64@0.28.1': optional: true - '@exodus/bytes@1.15.0': {} + '@exodus/bytes@1.15.0(@noble/hashes@2.2.0)': + optionalDependencies: + '@noble/hashes': 2.2.0 '@floating-ui/core@1.7.5': dependencies: @@ -6014,6 +6026,8 @@ snapshots: '@noble/ciphers@2.2.0': {} + '@noble/hashes@2.2.0': {} + '@nodelib/fs.scandir@2.1.5': dependencies: '@nodelib/fs.stat': 2.0.5 @@ -7085,7 +7099,7 @@ snapshots: obug: 2.1.1 std-env: 4.1.0 tinyrainbow: 3.1.0 - vitest: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) + vitest: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) '@vitest/expect@4.1.9': dependencies: @@ -7785,10 +7799,10 @@ snapshots: dependencies: assert-plus: 1.0.0 - data-urls@7.0.0: + data-urls@7.0.0(@noble/hashes@2.2.0): dependencies: whatwg-mimetype: 5.0.0 - whatwg-url: 16.0.1 + whatwg-url: 16.0.1(@noble/hashes@2.2.0) transitivePeerDependencies: - '@noble/hashes' @@ -8541,9 +8555,9 @@ snapshots: hono@4.12.25: {} - html-encoding-sniffer@6.0.0: + html-encoding-sniffer@6.0.0(@noble/hashes@2.2.0): dependencies: - '@exodus/bytes': 1.15.0 + '@exodus/bytes': 1.15.0(@noble/hashes@2.2.0) transitivePeerDependencies: - '@noble/hashes' @@ -8787,17 +8801,17 @@ snapshots: jsbn@0.1.1: {} - jsdom@29.1.1: + jsdom@29.1.1(@noble/hashes@2.2.0): dependencies: '@asamuzakjp/css-color': 5.1.11 '@asamuzakjp/dom-selector': 7.1.1 '@bramus/specificity': 2.4.2 '@csstools/css-syntax-patches-for-csstree': 1.1.4(css-tree@3.2.1) - '@exodus/bytes': 1.15.0 + '@exodus/bytes': 1.15.0(@noble/hashes@2.2.0) css-tree: 3.2.1 - data-urls: 7.0.0 + data-urls: 7.0.0(@noble/hashes@2.2.0) decimal.js: 10.6.0 - html-encoding-sniffer: 6.0.0 + html-encoding-sniffer: 6.0.0(@noble/hashes@2.2.0) is-potential-custom-element-name: 1.0.1 lru-cache: 11.3.6 parse5: 8.0.1 @@ -8808,7 +8822,7 @@ snapshots: w3c-xmlserializer: 5.0.0 webidl-conversions: 8.0.1 whatwg-mimetype: 5.0.0 - whatwg-url: 16.0.1 + whatwg-url: 16.0.1(@noble/hashes@2.2.0) xml-name-validator: 5.0.0 transitivePeerDependencies: - '@noble/hashes' @@ -10613,7 +10627,7 @@ snapshots: tsx: 4.22.4 yaml: 2.9.0 - vitest@4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1)(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)): + vitest@4.1.9(@opentelemetry/api@1.9.1)(@types/node@26.0.1)(@vitest/coverage-v8@4.1.9)(jsdom@29.1.1(@noble/hashes@2.2.0))(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)): dependencies: '@vitest/expect': 4.1.9 '@vitest/mocker': 4.1.9(vite@8.1.0(@types/node@26.0.1)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.47.1)(tsx@4.22.4)(yaml@2.9.0)) @@ -10639,7 +10653,7 @@ snapshots: '@opentelemetry/api': 1.9.1 '@types/node': 26.0.1 '@vitest/coverage-v8': 4.1.9(vitest@4.1.9) - jsdom: 29.1.1 + jsdom: 29.1.1(@noble/hashes@2.2.0) transitivePeerDependencies: - msw @@ -10705,9 +10719,9 @@ snapshots: whatwg-mimetype@5.0.0: {} - whatwg-url@16.0.1: + whatwg-url@16.0.1(@noble/hashes@2.2.0): dependencies: - '@exodus/bytes': 1.15.0 + '@exodus/bytes': 1.15.0(@noble/hashes@2.2.0) tr46: 6.0.0 webidl-conversions: 8.0.1 transitivePeerDependencies: