Extract cast_mxfp4_to_nvfp4 quant_cfg mutation into helper

cjluo-nv · cjluo-nv · commit 0ef1b9879bdd · 2026-05-01T17:38:40.000Z
Move the inline weight-quantizer block_sizes='static' rewrite out of quantize_main() into a public force_weight_quantizers_static() helper in cast_mxfp4_to_nvfp4.py, keeping the cast-specific config logic colocated with the rest of the cast flow. Addresses review feedback on PR #1372. Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
diff --git a/examples/llm_ptq/cast_mxfp4_to_nvfp4.py b/examples/llm_ptq/cast_mxfp4_to_nvfp4.py
@@ -291,6 +291,23 @@ def build_amax_map(checkpoint_dir: str | Path) -> dict[str, dict]:
     return amax_map
 
 
+def force_weight_quantizers_static(quant_cfg: list) -> None:
+    """Force every weight-quantizer entry's ``block_sizes`` to ``type='static'``.
+
+    The MXFP4 -> NVFP4 cast needs the per-block weight ``_amax`` to be recorded
+    by max-cal (so it can be paired with the pinned global_amax later). Setting
+    ``block_sizes['type'] = 'static'`` makes ``is_static_block_quant`` True so
+    ``promote_nvfp4_static_quantizers`` picks the entry up automatically at the
+    end of max_calibrate.
+    """
+    for i, entry in enumerate(quant_cfg):
+        qname = entry.get("quantizer_name", "")
+        cfg = entry.get("cfg") or {}
+        bs = cfg.get("block_sizes")
+        if "weight_quantizer" in qname and isinstance(bs, dict):
+            quant_cfg[i] = {**entry, "cfg": {**cfg, "block_sizes": {**bs, "type": "static"}}}
+
+
 def apply_to_model(
     model: "torch.nn.Module",
     source_checkpoint_path: str | Path,
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -25,6 +25,7 @@
 import torch
 from accelerate.hooks import remove_hook_from_module
 from cast_mxfp4_to_nvfp4 import apply_to_model as apply_cast_mxfp4_to_nvfp4
+from cast_mxfp4_to_nvfp4 import force_weight_quantizers_static
 from example_utils import (
     build_quant_cfg,
     copy_custom_model_files,
@@ -1088,20 +1089,9 @@ def quantize_main(
                 f"Auto-resolved layerwise_checkpoint_dir: {quant_cfg['algorithm']['layerwise_checkpoint_dir']}"
             )
 
-        # MXFP4 -> NVFP4 cast needs the per-block weight ``_amax`` to be recorded
-        # by max-cal (so it can be paired with the pinned global_amax later).
-        # Force every weight-quantizer entry to ``block_sizes['type'] = 'static'``
-        # so ``is_static_block_quant`` is True and ``promote_nvfp4_static_quantizers``
-        # picks them up automatically at the end of max_calibrate.
         if args.cast_mxfp4_to_nvfp4:
             quant_cfg = copy.deepcopy(quant_cfg)
-            for entry in quant_cfg.get("quant_cfg", []):
-                qname = entry.get("quantizer_name", "")
-                cfg = entry.get("cfg") or {}
-                bs = cfg.get("block_sizes")
-                if "weight_quantizer" in qname and isinstance(bs, dict):
-                    bs = {**bs, "type": "static"}
-                    entry["cfg"] = {**cfg, "block_sizes": bs}
+            force_weight_quantizers_static(quant_cfg["quant_cfg"])
 
         if args.qformat in QUANT_CFG_CHOICES:
             mono_quantize(