remove latent moe fp8

jenchen13 · jenchen13 · commit 9f96df3181a5 · 2026-04-28T11:35:38.000-07:00
Signed-off-by: Jennifer Chen &lt;jennifchen@nvidia.com&gt;
diff --git a/modelopt_recipes/models/Nemotron-3-Super-120B-A12B/super-nvfp4.yaml b/modelopt_recipes/models/Nemotron-3-Super-120B-A12B/super-nvfp4.yaml
@@ -91,29 +91,6 @@ quantize:
         num_bits: e4m3
         axis:
 
-    # latent MOE down/up projections) -> FP8 per-tensor.
-    # NOTE: only 3 layers quantized latent MOE to FP8, layers 1, 3, 5
-    - quantizer_name: '*mixer.fc1_latent_proj*weight_quantizer'
-      enable: true
-      cfg:
-        num_bits: e4m3
-        axis:
-    - quantizer_name: '*mixer.fc1_latent_proj*input_quantizer'
-      enable: true
-      cfg:
-        num_bits: e4m3
-        axis:
-    - quantizer_name: '*mixer.fc2_latent_proj*weight_quantizer'
-      enable: true
-      cfg:
-        num_bits: e4m3
-        axis:
-    - quantizer_name: '*mixer.fc2_latent_proj*input_quantizer'
-      enable: true
-      cfg:
-        num_bits: e4m3
-        axis:
-
     # KV cache -> FP8.
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true