SemiAnalysisAI · richardhuo-nv · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
@@ -12362,14 +12362,14 @@ minimaxm3-fp8-gb300-dynamo-vllm:
       osl: 1024
       search-space:
       # 1p1d DEP2+DEP8, 3n: conc 256
-      - conc-list: [256]
+      - conc-list: [256, 512]
         prefill:
           num-worker: 1
           tp: 2
           ep: 2
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/vllm/minimax-m3-gb300-fp8/8k1k/1p1d-dep2-dep8-8k1k.yaml"
+          - "CONFIG_FILE=recipes/vllm/minimax-m3-gb300-fp8/8k1k/1p1d-dep2-dep8-8k1k-healthcheck.yaml"
         decode:
           num-worker: 1
           tp: 8

diff --git a/...3-gb300-fp8/8k1k/1p1d-dep2-dep8-8k1k.yaml → ...8k1k/1p1d-dep2-dep8-8k1k-healthcheck.yaml b/...3-gb300-fp8/8k1k/1p1d-dep2-dep8-8k1k.yaml → ...8k1k/1p1d-dep2-dep8-8k1k-healthcheck.yaml
@@ -15,7 +15,7 @@ dynamo:
   version: 1.3.0.dev20260614
 
 health_check:
-  max_attempts: 720
+  max_attempts: 840
   interval_seconds: 10
 
 sbatch_directives:
@@ -103,5 +103,5 @@ benchmark:
   type: "sa-bench"
   isl: 8192
   osl: 1024
-  concurrencies: "256"
+  concurrencies: "256x512"
   req_rate: "inf"
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -4343,3 +4343,9 @@
     - "Use nvidia/MiniMax-M3-NVFP4 from /scratch/models/MiniMax-M3-NVFP4 with vllm/vllm-openai:vllm-minimax-m3-perf-x86_64-13.0.1-8b00f41, which includes vllm-project/vllm PR #46380; no runtime patch needed"
     - "Reuse the existing MXFP8 B300 topology and concurrency matrix across 15 srt-slurm recipes, while dropping the FP8-only Marlin override from TP4 decode"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1931
+
+- config-keys:
+    - minimaxm3-fp8-gb300-dynamo-vllm
+  description:
+    - "Rename 1p1d-dep2-dep8-8k1k.yaml to 1p1d-dep2-dep8-8k1k-healthcheck.yaml and update the nvidia-master.yaml CONFIG_FILE pointer accordingly; increase health_check max_attempts from 720 to 840 (7200s → 8400s) to accommodate longer GB300 model load times."
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1961