From 107f0663398a63ae96c3b6e17940c75e77d7f4a1 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sat, 4 Jul 2026 01:38:54 -0400 Subject: [PATCH 1/3] feat(quotes): order /quotes carousel-first, drive carousel order from QUOTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorder the QUOTES array so carousel orgs come first (in the existing CAROUSEL_ORGS order); non-carousel quotes follow in their previous relative order. Pure reorder — no quote content changed. The carousel now derives its order directly from the QUOTES array: remove the 'order' override from QuoteCarousel and intro-section, so the /quotes page order is the single source of truth. CAROUSEL_ORGS only controls membership now. Co-Authored-By: Claude Fable 5 --- packages/app/src/components/intro-section.tsx | 2 +- .../app/src/components/quote-carousel.tsx | 20 +- .../app/src/components/quotes/quotes-data.ts | 310 +++++++++--------- 3 files changed, 163 insertions(+), 169 deletions(-) diff --git a/packages/app/src/components/intro-section.tsx b/packages/app/src/components/intro-section.tsx index 73e509a8..ec5a324a 100644 --- a/packages/app/src/components/intro-section.tsx +++ b/packages/app/src/components/intro-section.tsx @@ -5,10 +5,10 @@ import { MinecraftSplash } from '@/components/minecraft/minecraft-splash'; import { QuoteCarousel } from '@/components/quote-carousel'; import { QUOTES, CAROUSEL_ORGS, CAROUSEL_LABELS } from '@/components/quotes/quotes-data'; +// Carousel order follows QUOTES order — carousel orgs are listed first there. const carouselQuotes = QUOTES.filter((q) => (CAROUSEL_ORGS as readonly string[]).includes(q.org)); const CAROUSEL_OVERRIDES = { - order: [...CAROUSEL_ORGS] as string[], labels: CAROUSEL_LABELS, }; diff --git a/packages/app/src/components/quote-carousel.tsx b/packages/app/src/components/quote-carousel.tsx index 6a9b2d1f..8fca5798 100644 --- a/packages/app/src/components/quote-carousel.tsx +++ b/packages/app/src/components/quote-carousel.tsx @@ -19,8 +19,6 @@ export interface CarouselQuote { export interface QuoteCarouselProps { quotes: CarouselQuote[]; overrides?: { - /** Companies pinned to the front in this order; rest are shuffled after */ - order?: string[]; /** Override display names in the org strip */ labels?: Record; }; @@ -35,26 +33,18 @@ interface CompanyEntry { quote: CarouselQuote; } -function buildCompanyQuotes(quotes: CarouselQuote[], order?: string[]): CompanyEntry[] { +// One entry per org, first quote wins; entries keep the order of the quotes array. +function buildCompanyQuotes(quotes: CarouselQuote[]): CompanyEntry[] { const byCompany = new Map(); for (const q of quotes) { const list = byCompany.get(q.org); if (list) list.push(q); else byCompany.set(q.org, [q]); } - const entries = [...byCompany.entries()].map(([org, pool]) => ({ + return [...byCompany.entries()].map(([org, pool]) => ({ org, quote: pool[0], })); - if (order?.length) { - const orderSet = new Set(order); - const pinned = order - .map((c) => entries.find((e) => e.org === c)) - .filter(Boolean) as CompanyEntry[]; - const rest = entries.filter((e) => !orderSet.has(e.org)); - return [...pinned, ...rest]; - } - return entries; } // Warm a logo into the browser cache so it paints instantly when its quote @@ -106,10 +96,10 @@ export function QuoteCarousel({ moreHref, intervalMs = 8_000, }: QuoteCarouselProps) { - const { order, labels = {} } = overrides; + const { labels = {} } = overrides; // Keep the first render deterministic so SSR reserves the carousel's full height before hydration. - const entries = useMemo(() => buildCompanyQuotes(quotes, order), [quotes, order]); + const entries = useMemo(() => buildCompanyQuotes(quotes), [quotes]); const [activeIndex, setActiveIndex] = useState(0); const [fading, setFading] = useState(false); const timerRef = useRef | null>(null); diff --git a/packages/app/src/components/quotes/quotes-data.ts b/packages/app/src/components/quotes/quotes-data.ts index fea3d371..71158c07 100644 --- a/packages/app/src/components/quotes/quotes-data.ts +++ b/packages/app/src/components/quotes/quotes-data.ts @@ -9,52 +9,36 @@ export interface Quote { export const QUOTES: Quote[] = [ { - text: "As we build systems at unprecedented scale, it's critical for the ML community to have open, transparent benchmarks that reflect how inference really performs across hardware and software. InferenceMAX\u2122's head-to-head benchmarks cut through the noise and provide a living picture of token throughput, performance per dollar, and tokens per Megawatt. This kind of open source effort strengthens the entire ecosystem and helps everyone, from researchers to operators of frontier datacenters, make smarter decisions.", - name: 'Peter Hoeschele', - title: 'VP of Infrastructure and Industrial Compute, OpenAI Stargate', - org: 'OpenAI', - logo: 'openai.svg', - link: 'https://www.linkedin.com/in/peter-hoeschele/', - }, - { - text: 'Open collaboration is driving the next era of AI innovation. The open-source InferenceMAX benchmark gives the community transparent, nightly results that inspire trust and accelerate progress. It highlights the competitive TCO performance of our AMD Instinct MI300, MI325X, and MI355X GPUs across diverse workloads, underscoring the strength of our platform and our commitment to giving developers real-time visibility into our software progress.', - name: 'Dr. Lisa Su', - title: 'Chair and CEO, AMD', - org: 'AMD', - logo: 'amd.svg', - link: 'https://www.linkedin.com/in/lisasu-amd/', - }, - { - text: "Inference demand is growing exponentially, driven by long-context reasoning. NVIDIA Grace Blackwell NVL72 was invented for this new era of thinking AI. NVIDIA is meeting that demand through constant hardware and software innovation to enable what's next in AI. By benchmarking frequently, InferenceMAX\u2122 gives the industry a transparent view of LLM inference performance on real-world workloads. The results are clear: Grace Blackwell NVL72 with TRT-LLM and Dynamo delivers unmatched performance per dollar and per megawatt\u2014powering the most productive and cost-effective AI factories in the world.", - name: 'Jensen Huang', - title: 'Founder & CEO, NVIDIA', - org: 'NVIDIA', - logo: 'nvidia.svg', - link: 'https://www.linkedin.com/in/jenhsunhuang/', + text: "Vendor-neutral, continuously updated benchmarking is essential as models and inference stacks co-evolve. MiniMax M3 was built with both frontier capability and real-world deployment efficiency in mind, and the day-one vLLM support from the community reflects the collaborative spirit we're proud to be part of. InferenceX provides the kind of transparent, reproducible data the ecosystem needs.", + name: 'Ryan Lee', + title: 'Head of DevRel, MiniMax', + org: 'MiniMax', + logo: 'minimax.svg', + link: 'https://x.com/RyanLeeMiniMax', }, { - text: "Speed is the moat. InferenceMAX\u2122's nightly benchmarks match the speed of improvement of the AMD software stack. It's fantastic to see AMD's MI300, MI325, and MI355 GPUs performing so well across diverse workloads and interactivity levels.", - name: 'Anush Elangovan', - title: 'VP GPU Software, AMD', - org: 'AMD', - logo: 'amd.svg', - link: 'https://www.linkedin.com/in/anushelangovan/', + text: 'At Moonshot AI, we are dedicated to supporting the open-source ecosystem by advancing frontier open models. As the Kimi K2 series evolves, we are glad to see its performance tracked in InferenceX™’s open and reproducible benchmarks. InferenceX™ helps the community better understand industry-level performance and encourages the ecosystem to keep improving and optimizing.', + name: 'Moonshot AI', + title: '', + org: 'Moonshot AI', + logo: 'moonshot-ai.svg', + link: 'https://www.moonshot.ai/', }, { - text: 'InferenceMAX\u2122 highlights workloads that the ML community cares about. At NVIDIA, we welcome these comparisons because they underscore the advantage of our full-stack approach\u2014from GPUs hardware to NVLink networking to NVL72 Rack Scale to Dynamo disaggregated serving that consistently delivers industry-leading inference performance and ROI at scale.', - name: 'Ian Buck', - title: 'VP & GM, Hyperscale, NVIDIA & Inventor of CUDA', - org: 'NVIDIA', - logo: 'nvidia.svg', - link: 'https://www.linkedin.com/in/ian-buck-19201315/', + text: "Qwen has always been about putting capable models into the hands of as many developers as possible, and real-world inference efficiency is what makes that scale. InferenceX™ brings rigorous, vendor-neutral measurement to exactly the questions that matter: how models like Qwen3.5 actually perform across accelerators. Independent, reproducible benchmarks on real hardware give the community the clarity it needs to deploy with confidence, and we're glad to see that level of transparency driving the inference ecosystem forward.", + name: 'Alibaba Qwen', + title: '', + org: 'Alibaba Qwen', + logo: 'qwen.webp', + link: 'https://qwen.ai', }, { - text: "InferenceMAX\u2122's nightly results highlight the rapid pace of progress in the AMD software stack. It's exciting to witness the birth of an open project that provides a tied feedback loop between what the software team works on here at AMD and how it affects specific ML use cases across our MI300, MI325, and MI355 GPUs. I'm looking forward to see what's next for InferenceMAX and to showcase what the AMD platform can do. AMD GPUs will continue to get faster every week.", - name: 'Quentin Colombet', - title: 'Senior Director, AMD, Ex-Brium CEO', - org: 'AMD', - logo: 'amd.svg', - link: 'https://www.linkedin.com/in/quentincolombet/', + text: "As we build systems at unprecedented scale, it's critical for the ML community to have open, transparent benchmarks that reflect how inference really performs across hardware and software. InferenceMAX\u2122's head-to-head benchmarks cut through the noise and provide a living picture of token throughput, performance per dollar, and tokens per Megawatt. This kind of open source effort strengthens the entire ecosystem and helps everyone, from researchers to operators of frontier datacenters, make smarter decisions.", + name: 'Peter Hoeschele', + title: 'VP of Infrastructure and Industrial Compute, OpenAI Stargate', + org: 'OpenAI', + logo: 'openai.svg', + link: 'https://www.linkedin.com/in/peter-hoeschele/', }, { text: "Our mission at Azure is to give customers the most performant, efficient, and cost-effective cloud for AI. SemiAnalysis InferenceMAX\u2122 supports that mission by providing transparent, reproducible benchmarks that track inference performance across GPUs and software stacks under realistic workloads. This continuous data on throughput, efficiency, and cost per watt strengthens our ability to tune Azure's inference platform for scale, helping customers build with confidence on Microsoft Cloud.", @@ -72,6 +56,22 @@ export const QUOTES: Quote[] = [ logo: 'microsoft.svg', link: 'https://www.linkedin.com/in/saurabhdighe/', }, + { + text: 'PyTorch was built on the belief that open tools accelerate the entire AI ecosystem. InferenceX\u2122 embodies that same philosophy\u2014open, reproducible, and vendor-neutral benchmarks that give the community real data on real hardware. As inference workloads scale to serve billions of users, having a continuously updated, transparent performance baseline across accelerators is essential for practitioners and platform teams making critical infrastructure decisions.', + name: 'Joseph Spisak', + title: 'Product Director, Meta Super Intelligence Lab', + org: 'Meta Superintelligence Labs', + logo: 'meta.svg', + link: 'https://www.linkedin.com/in/jspisak', + }, + { + text: 'Oracle Cloud Infrastructure is built to give frontier labs & enterprises flexibility and choice, with many GPU SKUs available for AI at scale. InferenceMAX strengthens that mission by delivering open source, reproducible benchmarks that reflect real-world performance, efficiency, and cost on the latest hardware and software. With this transparency, customers can confidently select the platforms that best align with their AI strategies.', + name: 'Jay Jackson', + title: 'Vice President, Oracle Cloud Infrastructure', + org: 'Oracle', + logo: 'oracle.svg', + link: 'https://www.linkedin.com/in/jayejackson/', + }, { text: 'The gap between theoretical peak and real-world inference throughput is often determined by systems software: inference engine, distributed strategies, and low-level kernels. InferenceMAX\u2122 is valuable because it benchmarks the latest software showing how optimizations like FP4, MTP, speculative decode, and wide-EP actually play out across various hardware. Open, reproducible results like these help the whole community move faster.', name: 'Tri Dao', @@ -88,14 +88,6 @@ export const QUOTES: Quote[] = [ logo: 'vllm.svg', link: 'https://www.linkedin.com/in/simon-mo-834217162/', }, - { - text: 'The benchmark is good sir', - name: 'Michael Goin', - title: 'vLLM Core Maintainer & Senior Principal Engineer at Red Hat', - org: 'Red Hat', - logo: 'redhat.svg', - link: 'https://www.linkedin.com/in/michael-goin/', - }, { text: 'InferenceMAX\u2122 benchmark is pogchamp & W in chat', name: 'Kaichao You', @@ -120,14 +112,6 @@ export const QUOTES: Quote[] = [ logo: 'pytorch.svg', link: 'https://www.linkedin.com/in/mdwdata/', }, - { - text: 'Oracle Cloud Infrastructure is built to give frontier labs & enterprises flexibility and choice, with many GPU SKUs available for AI at scale. InferenceMAX strengthens that mission by delivering open source, reproducible benchmarks that reflect real-world performance, efficiency, and cost on the latest hardware and software. With this transparency, customers can confidently select the platforms that best align with their AI strategies.', - name: 'Jay Jackson', - title: 'Vice President, Oracle Cloud Infrastructure', - org: 'Oracle', - logo: 'oracle.svg', - link: 'https://www.linkedin.com/in/jayejackson/', - }, { text: 'InferenceMAX\u2122 raises the bar by delivering open, transparent benchmarks that track how inference really performs across the latest GPUs and software stacks. For customers, having reproducible data that measures real world tokens per dollar & tokens per watt, turns abstract marketing numbers into actionable insight. At CoreWeave, we support this effort because it brings clarity to a fast-moving space and helps the entire ecosystem build with confidence.', name: 'Peter Salanki', @@ -144,6 +128,118 @@ export const QUOTES: Quote[] = [ logo: 'nebius.svg', link: 'https://www.linkedin.com/in/roman-chernin-1b4b8758/', }, + { + text: "At TensorWave, we're building a next-generation cloud on AMD GPUs because we believe innovation thrives when customers have strong alternatives. InferenceMAX\u2122 reinforces that vision by providing open source, reproducible benchmarks that track throughput, efficiency, and cost across the latest hardware and software. By cutting through synthetic numbers and highlighting real-world inference performance, it helps customers see the full potential of AMD platforms for AI at scale.", + name: 'Darrick Horton', + title: 'CEO, TensorWave', + org: 'TensorWave', + logo: 'tensorwave.svg', + link: 'https://www.linkedin.com/in/darrick-horton/', + }, + { + text: "SGLang is the inference engine behind many production inference factories such as xAI's Grok, earning its recognition as THE Inference King. At scale, we see firsthand how much performance varies across hardware, models, and configurations. InferenceX\u2122 benchmarks SGLang across every major GPU platform nightly, capturing that variance in a way no other benchmark does, continuously, & reproducibly.", + name: 'Mingyi Lu', + title: 'SGLang Product Lead', + org: 'SGLang', + logo: 'sglang.webp', + link: 'https://www.linkedin.com/in/mingyi-lu/', + }, + { + text: "InferenceX\u2122 ensembles precisely that \u2014 open, reproducible benchmarks that are continuously updated as xPU accelerators (GPUs/TPUs/LPUs), memory, storage, and software stacks evolve. I'm excited to see the InferenceX benchmarking roadmap include agentic coding workloads that stress CPU KV Cache offloading & soon NVMe KV Cache offloading from xPUs. As WEKA helps scale the Memory Wall by building the KV Cache infrastructure that feeds these xPUs, having this level of visibility into inference performance helps the entire ecosystem make smarter decisions about where to invest.", + name: 'Val Bercovici', + title: 'Chief AI Officer, WEKA', + org: 'WEKA', + logo: 'weka.svg', + link: 'https://www.linkedin.com/in/valentinbercovici/', + }, + { + text: 'For researchers working on inference optimizations, understanding how new techniques interact across the software and hardware stack is critical yet incredibly hard to measure. InferenceX\u2122 provides much-needed insights into how inference performance evolves across major hardware platforms, moving the field forward with open, reproducible data that makes the gaps and progress visible.', + name: 'Simon Guo', + title: 'PhD Student, Stanford CS', + org: 'Stanford', + logo: 'stanford.svg', + link: 'https://simonguo.tech/', + }, + { + text: 'Hugging Face exists to make AI open and accessible to everyone. InferenceX\u2122 extends that mission to ai chip performance, pulling models directly from the Hub and benchmarking them across every major accelerator, continuously and transparently. When the community can see exactly how frontier open models perform on real hardware in real time, it raises the bar for the entire ecosystem.', + name: 'Clement Delangue', + title: 'CEO, Hugging Face', + org: 'Hugging Face', + logo: 'huggingface.svg', + link: 'https://www.linkedin.com/in/cdelangue/', + }, + { + text: 'Lambda exists to make GPU compute simple and accessible for AI teams, from individual researchers to the largest labs. InferenceX\u2122 aligns with that mission by giving the community open, reproducible benchmarks that measure what actually matters: real-world throughput, cost efficiency, and performance per watt across the latest hardware and software stacks. Teams can make informed compute choices grounded in transparent, continuously updated data.', + name: 'Stephen Balaban', + title: 'Co-founder and CEO, Lambda', + org: 'Lambda', + logo: 'lambda.svg', + link: 'https://www.linkedin.com/in/sbalaban/', + }, + { + text: 'When we introduced DistServe, the thesis was simple: split prefill and decode and optimize each on its own terms. Eighteen months later, disaggregation is the default architecture across the industry. InferenceX\u2122 is the benchmark that comparing disaggregated and aggregated serving across the whole pareto curve. InferenceX shows exactly when and where P/D separation pays off in TTFT, TPOT, throughput, and cost.', + name: 'Hao Zhang', + title: 'Assistant Professor, UC San Diego & Co-Creator of DistServe, vLLM, and FastVideo', + org: 'UC San Diego', + logo: 'uc-san-diego.svg', + link: 'https://haozhang.ai/', + }, + { + text: 'The benchmark is good sir', + name: 'Michael Goin', + title: 'vLLM Core Maintainer & Senior Principal Engineer at Red Hat', + org: 'Red Hat', + logo: 'redhat.svg', + link: 'https://www.linkedin.com/in/michael-goin/', + }, + { + text: 'Now commonly hearing "We want the Semianalysis for X". Testament to what @dylan522p has built.', + name: 'Sriram Krishnan', + title: 'White House Senior AI Advisor', + org: 'White House', + logo: 'white-house.svg', + link: 'https://x.com/sriramk/status/2048824255702262135', + }, + { + text: 'Open collaboration is driving the next era of AI innovation. The open-source InferenceMAX benchmark gives the community transparent, nightly results that inspire trust and accelerate progress. It highlights the competitive TCO performance of our AMD Instinct MI300, MI325X, and MI355X GPUs across diverse workloads, underscoring the strength of our platform and our commitment to giving developers real-time visibility into our software progress.', + name: 'Dr. Lisa Su', + title: 'Chair and CEO, AMD', + org: 'AMD', + logo: 'amd.svg', + link: 'https://www.linkedin.com/in/lisasu-amd/', + }, + { + text: "Inference demand is growing exponentially, driven by long-context reasoning. NVIDIA Grace Blackwell NVL72 was invented for this new era of thinking AI. NVIDIA is meeting that demand through constant hardware and software innovation to enable what's next in AI. By benchmarking frequently, InferenceMAX\u2122 gives the industry a transparent view of LLM inference performance on real-world workloads. The results are clear: Grace Blackwell NVL72 with TRT-LLM and Dynamo delivers unmatched performance per dollar and per megawatt\u2014powering the most productive and cost-effective AI factories in the world.", + name: 'Jensen Huang', + title: 'Founder & CEO, NVIDIA', + org: 'NVIDIA', + logo: 'nvidia.svg', + link: 'https://www.linkedin.com/in/jenhsunhuang/', + }, + { + text: "Speed is the moat. InferenceMAX\u2122's nightly benchmarks match the speed of improvement of the AMD software stack. It's fantastic to see AMD's MI300, MI325, and MI355 GPUs performing so well across diverse workloads and interactivity levels.", + name: 'Anush Elangovan', + title: 'VP GPU Software, AMD', + org: 'AMD', + logo: 'amd.svg', + link: 'https://www.linkedin.com/in/anushelangovan/', + }, + { + text: 'InferenceMAX\u2122 highlights workloads that the ML community cares about. At NVIDIA, we welcome these comparisons because they underscore the advantage of our full-stack approach\u2014from GPUs hardware to NVLink networking to NVL72 Rack Scale to Dynamo disaggregated serving that consistently delivers industry-leading inference performance and ROI at scale.', + name: 'Ian Buck', + title: 'VP & GM, Hyperscale, NVIDIA & Inventor of CUDA', + org: 'NVIDIA', + logo: 'nvidia.svg', + link: 'https://www.linkedin.com/in/ian-buck-19201315/', + }, + { + text: "InferenceMAX\u2122's nightly results highlight the rapid pace of progress in the AMD software stack. It's exciting to witness the birth of an open project that provides a tied feedback loop between what the software team works on here at AMD and how it affects specific ML use cases across our MI300, MI325, and MI355 GPUs. I'm looking forward to see what's next for InferenceMAX and to showcase what the AMD platform can do. AMD GPUs will continue to get faster every week.", + name: 'Quentin Colombet', + title: 'Senior Director, AMD, Ex-Brium CEO', + org: 'AMD', + logo: 'amd.svg', + link: 'https://www.linkedin.com/in/quentincolombet/', + }, { text: "At Crusoe, we believe being a great partner means empowering our customers with choice and clarity. That's why we're proud to support InferenceMAX\u2122, which provides the entire AI community with open-source, reproducible benchmarks for the latest hardware. By delivering transparent, real-world data on throughput, efficiency, and cost, InferenceMAX\u2122 cuts through the hype and helps our customers confidently select the very best platform for their unique workloads.", name: 'Chase Lochmiller', @@ -160,14 +256,6 @@ export const QUOTES: Quote[] = [ logo: 'supermicro.svg', link: 'https://en.wikipedia.org/wiki/Charles_Liang', }, - { - text: "At TensorWave, we're building a next-generation cloud on AMD GPUs because we believe innovation thrives when customers have strong alternatives. InferenceMAX\u2122 reinforces that vision by providing open source, reproducible benchmarks that track throughput, efficiency, and cost across the latest hardware and software. By cutting through synthetic numbers and highlighting real-world inference performance, it helps customers see the full potential of AMD platforms for AI at scale.", - name: 'Darrick Horton', - title: 'CEO, TensorWave', - org: 'TensorWave', - logo: 'tensorwave.svg', - link: 'https://www.linkedin.com/in/darrick-horton/', - }, { text: 'Vultr is committed to providing an open ecosystem that gives developers freedom in how they build and scale AI \u2014 whether on NVIDIA or AMD GPUs. With InferenceMAX\u2122, customers gain open, reproducible benchmarks that deliver clear insights into throughput, efficiency, and cost across cutting-edge hardware and software. By showcasing real-world performance, we empower teams to confidently choose the right platform for their AI workloads.', name: 'Nathan Goulding', @@ -207,14 +295,6 @@ export const QUOTES: Quote[] = [ org: 'Stas Bekman', link: 'https://github.com/stas00/ml-engineering', }, - { - text: "SGLang is the inference engine behind many production inference factories such as xAI's Grok, earning its recognition as THE Inference King. At scale, we see firsthand how much performance varies across hardware, models, and configurations. InferenceX\u2122 benchmarks SGLang across every major GPU platform nightly, capturing that variance in a way no other benchmark does, continuously, & reproducibly.", - name: 'Mingyi Lu', - title: 'SGLang Product Lead', - org: 'SGLang', - logo: 'sglang.webp', - link: 'https://www.linkedin.com/in/mingyi-lu/', - }, { text: 'We use InferenceX benchmarks ourselves as one of the key datapoints to help us make infrastructure decisions at Adaptive ML. Inference performance is critical for large-scale RL workloads, where fast generation directly impacts time to market & revenue for our customers. InferenceX\u2122 benchmarks the full stack continuously \u2014 engine, model, software, and hardware across rack-scale systems like GB300 NVL72. This is the kind of open, transparent, reproducible signal the ecosystem has been missing.', name: 'Julien Launay', @@ -239,14 +319,6 @@ export const QUOTES: Quote[] = [ logo: 'verda.svg', link: 'https://www.linkedin.com/in/ruben-bryon/', }, - { - text: "InferenceX\u2122 ensembles precisely that \u2014 open, reproducible benchmarks that are continuously updated as xPU accelerators (GPUs/TPUs/LPUs), memory, storage, and software stacks evolve. I'm excited to see the InferenceX benchmarking roadmap include agentic coding workloads that stress CPU KV Cache offloading & soon NVMe KV Cache offloading from xPUs. As WEKA helps scale the Memory Wall by building the KV Cache infrastructure that feeds these xPUs, having this level of visibility into inference performance helps the entire ecosystem make smarter decisions about where to invest.", - name: 'Val Bercovici', - title: 'Chief AI Officer, WEKA', - org: 'WEKA', - logo: 'weka.svg', - link: 'https://www.linkedin.com/in/valentinbercovici/', - }, { text: 'Voltage Park is built to give AI teams fast, affordable access to GPU compute at scale. InferenceX\u2122 supports that goal by providing open, reproducible benchmarks that show how inference actually performs across the latest hardware and software stacks. With transparent, continuously updated data on throughput, efficiency, and cost, teams can make confident compute decisions instead of guessing. We\u2019re happy to back an effort that brings this level of clarity to the ecosystem.', name: 'Saurabh Giri', @@ -263,14 +335,6 @@ export const QUOTES: Quote[] = [ logo: 'periodic-labs.png', link: 'https://www.linkedin.com/in/xanderdunn/', }, - { - text: 'For researchers working on inference optimizations, understanding how new techniques interact across the software and hardware stack is critical yet incredibly hard to measure. InferenceX\u2122 provides much-needed insights into how inference performance evolves across major hardware platforms, moving the field forward with open, reproducible data that makes the gaps and progress visible.', - name: 'Simon Guo', - title: 'PhD Student, Stanford CS', - org: 'Stanford', - logo: 'stanford.svg', - link: 'https://simonguo.tech/', - }, { text: 'As AI infrastructure scales globally, no single vendor or region can define the benchmarks that matter for everyone. InferenceX is an important step toward a shared, transparent view of inference performance and TCO, enabling more rational investments for sovereign AI Cloud operators, as well as healthier competition, and ultimately more accessible AI capacity worldwide.', name: 'Talal M. Al Kaissi', @@ -278,22 +342,6 @@ export const QUOTES: Quote[] = [ org: 'Core42', logo: 'core42.webp', }, - { - text: 'PyTorch was built on the belief that open tools accelerate the entire AI ecosystem. InferenceX\u2122 embodies that same philosophy\u2014open, reproducible, and vendor-neutral benchmarks that give the community real data on real hardware. As inference workloads scale to serve billions of users, having a continuously updated, transparent performance baseline across accelerators is essential for practitioners and platform teams making critical infrastructure decisions.', - name: 'Joseph Spisak', - title: 'Product Director, Meta Super Intelligence Lab', - org: 'Meta Superintelligence Labs', - logo: 'meta.svg', - link: 'https://www.linkedin.com/in/jspisak', - }, - { - text: 'Hugging Face exists to make AI open and accessible to everyone. InferenceX\u2122 extends that mission to ai chip performance, pulling models directly from the Hub and benchmarking them across every major accelerator, continuously and transparently. When the community can see exactly how frontier open models perform on real hardware in real time, it raises the bar for the entire ecosystem.', - name: 'Clement Delangue', - title: 'CEO, Hugging Face', - org: 'Hugging Face', - logo: 'huggingface.svg', - link: 'https://www.linkedin.com/in/cdelangue/', - }, { text: 'It is important to have an open and continuously updated platform for benchmarking inference engines across real workloads and diverse hardware. InferenceX provides this kind of transparent and practical evaluation, helping the community better understand real system bottlenecks and tradeoffs. Benchmarks like this are essential for building more efficient and scalable AI systems. Moreover, as LLM agents become increasingly capable at improving systems, such a platform can provide the reliable feedback needed to close the automatic optimization loop, further driving progress in this field.', name: 'Cao Shiyi', @@ -301,22 +349,6 @@ export const QUOTES: Quote[] = [ org: 'UC Berkeley', logo: 'sky-berkeley.webp', }, - { - text: 'Lambda exists to make GPU compute simple and accessible for AI teams, from individual researchers to the largest labs. InferenceX\u2122 aligns with that mission by giving the community open, reproducible benchmarks that measure what actually matters: real-world throughput, cost efficiency, and performance per watt across the latest hardware and software stacks. Teams can make informed compute choices grounded in transparent, continuously updated data.', - name: 'Stephen Balaban', - title: 'Co-founder and CEO, Lambda', - org: 'Lambda', - logo: 'lambda.svg', - link: 'https://www.linkedin.com/in/sbalaban/', - }, - { - text: 'When we introduced DistServe, the thesis was simple: split prefill and decode and optimize each on its own terms. Eighteen months later, disaggregation is the default architecture across the industry. InferenceX\u2122 is the benchmark that comparing disaggregated and aggregated serving across the whole pareto curve. InferenceX shows exactly when and where P/D separation pays off in TTFT, TPOT, throughput, and cost.', - name: 'Hao Zhang', - title: 'Assistant Professor, UC San Diego & Co-Creator of DistServe, vLLM, and FastVideo', - org: 'UC San Diego', - logo: 'uc-san-diego.svg', - link: 'https://haozhang.ai/', - }, { text: 'At GMI Cloud, we believe inference has become the center of AI value creation. SemiAnalysis has done something the industry has long needed with InferenceX—they’ve turned inference from a black box into a continuously measured, real-world system. By benchmarking not just hardware, but the full stack—models, runtimes, and distributed systems—InferenceX reflects how AI actually runs in production, not how it’s marketed.', name: 'Alex Yeh', @@ -325,14 +357,6 @@ export const QUOTES: Quote[] = [ logo: 'gmi-cloud.svg', link: 'https://www.linkedin.com/in/gmi-yeh', }, - { - text: 'Now commonly hearing "We want the Semianalysis for X". Testament to what @dylan522p has built.', - name: 'Sriram Krishnan', - title: 'White House Senior AI Advisor', - org: 'White House', - logo: 'white-house.svg', - link: 'https://x.com/sriramk/status/2048824255702262135', - }, { text: 'At EmbeddedLLM, our team works deep in the production inference stack, including major maintainer and contributor work in vLLM, so we see every day how much real-world AI performance depends on the full system: model, runtime, kernels, scheduling, and hardware. InferenceX\u2122 matters because it benchmarks that full system continuously and openly. It turns inference from a marketing conversation into an engineering discipline, giving AI labs, neoclouds, and enterprises the data they need to make decisions on throughput, cost, and efficiency at production scale.', name: 'Pin Siang Tan', @@ -341,33 +365,13 @@ export const QUOTES: Quote[] = [ logo: 'embeddedllm.webp', link: 'https://www.linkedin.com/in/tanpinsiang', }, - { - text: "Vendor-neutral, continuously updated benchmarking is essential as models and inference stacks co-evolve. MiniMax M3 was built with both frontier capability and real-world deployment efficiency in mind, and the day-one vLLM support from the community reflects the collaborative spirit we're proud to be part of. InferenceX provides the kind of transparent, reproducible data the ecosystem needs.", - name: 'Ryan Lee', - title: 'Head of DevRel, MiniMax', - org: 'MiniMax', - logo: 'minimax.svg', - link: 'https://x.com/RyanLeeMiniMax', - }, - { - text: 'At Moonshot AI, we are dedicated to supporting the open-source ecosystem by advancing frontier open models. As the Kimi K2 series evolves, we are glad to see its performance tracked in InferenceX™’s open and reproducible benchmarks. InferenceX™ helps the community better understand industry-level performance and encourages the ecosystem to keep improving and optimizing.', - name: 'Moonshot AI', - title: '', - org: 'Moonshot AI', - logo: 'moonshot-ai.svg', - link: 'https://www.moonshot.ai/', - }, - { - text: "Qwen has always been about putting capable models into the hands of as many developers as possible, and real-world inference efficiency is what makes that scale. InferenceX™ brings rigorous, vendor-neutral measurement to exactly the questions that matter: how models like Qwen3.5 actually perform across accelerators. Independent, reproducible benchmarks on real hardware give the community the clarity it needs to deploy with confidence, and we're glad to see that level of transparency driving the inference ecosystem forward.", - name: 'Alibaba Qwen', - title: '', - org: 'Alibaba Qwen', - logo: 'qwen.webp', - link: 'https://qwen.ai', - }, ]; -/** Orgs featured in the landing page carousel. */ +/** + * Orgs featured in the landing page carousel. Display order comes from the + * QUOTES array above (carousel orgs are listed first there); this list only + * controls membership. + */ export const CAROUSEL_ORGS = [ 'MiniMax', 'Moonshot AI', From adfb19e50fb43fbe86615eab384f5c45cf1d70fd Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sat, 4 Jul 2026 01:42:05 -0400 Subject: [PATCH 2/3] feat(quotes): list model-lab supporters first in /quotes intro + metadata Update the supporters sentence to lead with MiniMax, Moonshot Kimi, and Alibaba Qwen, matching the new carousel-first ordering. Keep the page metadata and OG description in sync with the visible intro text. Co-Authored-By: Claude Fable 5 --- packages/app/src/app/quotes/page.tsx | 4 ++-- packages/app/src/components/quotes/quotes-content.tsx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/app/src/app/quotes/page.tsx b/packages/app/src/app/quotes/page.tsx index 8f9bc256..0cba135a 100644 --- a/packages/app/src/app/quotes/page.tsx +++ b/packages/app/src/app/quotes/page.tsx @@ -6,12 +6,12 @@ import { SITE_URL } from '@semianalysisai/inferencex-constants'; export const metadata: Metadata = { title: 'Supporters', description: - 'InferenceX initiative is supported by major buyers of compute and prominent members of the ML community including those from OpenAI, Microsoft, PyTorch Foundation, and more.', + 'InferenceX initiative is supported by major buyers of compute and prominent members of the ML community including those from MiniMax, Moonshot Kimi, Alibaba Qwen, OpenAI, Microsoft, vLLM, PyTorch Foundation, Oracle and more.', alternates: { canonical: `${SITE_URL}/quotes` }, openGraph: { title: 'Supporters | InferenceX by SemiAnalysis', description: - 'Supported by OpenAI, Microsoft, PyTorch Foundation, and prominent members of the ML community.', + 'Supported by MiniMax, Moonshot Kimi, Alibaba Qwen, OpenAI, Microsoft, vLLM, PyTorch Foundation, Oracle, and prominent members of the ML community.', url: `${SITE_URL}/quotes`, }, }; diff --git a/packages/app/src/components/quotes/quotes-content.tsx b/packages/app/src/components/quotes/quotes-content.tsx index 1b130ae2..d3b2a5d7 100644 --- a/packages/app/src/components/quotes/quotes-content.tsx +++ b/packages/app/src/components/quotes/quotes-content.tsx @@ -100,8 +100,8 @@ export function QuotesContent() {

InferenceX™ initiative is supported by many major buyers of compute and - prominent members of the ML community including those from OpenAI, Microsoft, vLLM, - PyTorch Foundation, Oracle and more. + prominent members of the ML community including those from MiniMax, Moonshot Kimi, + Alibaba Qwen, OpenAI, Microsoft, vLLM, PyTorch Foundation, Oracle and more.

{orgLogos.map(({ org, logo }) => ( From 95658f232e3371f953537081b58d036f9cb4918b Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Sat, 4 Jul 2026 01:45:46 -0400 Subject: [PATCH 3/3] chore: fix README_zh.md formatting from master MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run oxfmt over README_zh.md (added on master in #521) so the oxc CI check passes again; the file landed unformatted and broke every PR. 中文:对 master 分支(#521)新增的 README_zh.md 运行 oxfmt 修复格式, 使 oxc CI 检查恢复通过;该文件合入时未经格式化,导致所有 PR 检查失败。 Co-Authored-By: Claude Fable 5 --- README_zh.md | 64 ++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/README_zh.md b/README_zh.md index ab0e6d0d..c9886756 100644 --- a/README_zh.md +++ b/README_zh.md @@ -93,44 +93,44 @@ pnpm dev 以下是开发过程中的常用脚本。数据库与缓存管理的运维脚本单独列在下方。 部分脚本可能需要额外的配置或环境变量。 -| 脚本 | 说明 | -| --------------------------- | ------------------------------------------ | -| `pnpm dev` | 启动开发服务器(Turbopack) | -| `pnpm build` | 生产构建 | -| `pnpm start` | 启动生产服务器 | -| `pnpm preview` | 本地构建并启动生产服务器 | -| `pnpm typecheck` | TypeScript 类型检查(所有包) | -| `pnpm lint` | 使用 oxlint 进行 lint | -| `pnpm lint:fix` | 自动修复 lint 问题 | -| `pnpm fmt` | 使用 oxfmt 检查格式 | -| `pnpm fmt:fix` | 自动修复格式 | -| `pnpm security` | 安全审计(pnpm audit + audit-ci) | -| `pnpm test` | 运行所有测试(单元 + E2E) | -| `pnpm test:unit` | Vitest 单元测试 | -| `pnpm test:unit:coverage` | Vitest 单元测试(含覆盖率) | -| `pnpm test:e2e` | Cypress E2E 测试 | -| `pnpm test:e2e:component` | 仅 Cypress 组件测试 | -| `pnpm test:e2e:integration` | 仅 Cypress 集成测试 | -| `pnpm clean` | 清除构建产物 | -| `pnpm clean:all` | 清除构建产物 + node_modules | +| 脚本 | 说明 | +| --------------------------- | --------------------------------- | +| `pnpm dev` | 启动开发服务器(Turbopack) | +| `pnpm build` | 生产构建 | +| `pnpm start` | 启动生产服务器 | +| `pnpm preview` | 本地构建并启动生产服务器 | +| `pnpm typecheck` | TypeScript 类型检查(所有包) | +| `pnpm lint` | 使用 oxlint 进行 lint | +| `pnpm lint:fix` | 自动修复 lint 问题 | +| `pnpm fmt` | 使用 oxfmt 检查格式 | +| `pnpm fmt:fix` | 自动修复格式 | +| `pnpm security` | 安全审计(pnpm audit + audit-ci) | +| `pnpm test` | 运行所有测试(单元 + E2E) | +| `pnpm test:unit` | Vitest 单元测试 | +| `pnpm test:unit:coverage` | Vitest 单元测试(含覆盖率) | +| `pnpm test:e2e` | Cypress E2E 测试 | +| `pnpm test:e2e:component` | 仅 Cypress 组件测试 | +| `pnpm test:e2e:integration` | 仅 Cypress 集成测试 | +| `pnpm clean` | 清除构建产物 | +| `pnpm clean:all` | 清除构建产物 + node_modules | ### 运维脚本 以下脚本用于数据库与缓存的管理维护,常规开发中一般不需要。 不过在改动数据库或 API 路由后,将 `pnpm admin:cache:invalidate` 指向本地开发服务器进行测试会很有用。 -| 脚本 | 说明 | -| ----------------------------------- | -------------------------------------- | -| `pnpm admin:db:migrate` | 运行数据库迁移 | -| `pnpm admin:db:ingest:run` | 从 GitHub 运行摄取基准测试数据 | -| `pnpm admin:db:ingest:ci` | 摄取基准测试数据(CI 模式) | -| `pnpm admin:db:ingest:gcs` | 从 GCS 摄取基准测试数据 | -| `pnpm admin:db:ingest:supplemental` | 摄取补充数据 | -| `pnpm admin:db:apply-overrides` | 应用数据覆盖 | -| `pnpm admin:db:reset` | 重置数据库 | -| `pnpm admin:db:verify` | 校验数据库完整性 | -| `pnpm admin:cache:invalidate` | 失效 API 缓存 | -| `pnpm admin:cache:warmup` | 预热 API 缓存 | +| 脚本 | 说明 | +| ----------------------------------- | ------------------------------ | +| `pnpm admin:db:migrate` | 运行数据库迁移 | +| `pnpm admin:db:ingest:run` | 从 GitHub 运行摄取基准测试数据 | +| `pnpm admin:db:ingest:ci` | 摄取基准测试数据(CI 模式) | +| `pnpm admin:db:ingest:gcs` | 从 GCS 摄取基准测试数据 | +| `pnpm admin:db:ingest:supplemental` | 摄取补充数据 | +| `pnpm admin:db:apply-overrides` | 应用数据覆盖 | +| `pnpm admin:db:reset` | 重置数据库 | +| `pnpm admin:db:verify` | 校验数据库完整性 | +| `pnpm admin:cache:invalidate` | 失效 API 缓存 | +| `pnpm admin:cache:warmup` | 预热 API 缓存 | ## 部署