Skip to content

Commit 78f4c42

Browse files
committed
chore(ptq): drop --exclude_modules CLI flag (recipes own exclusions)
The `--exclude_modules` flag was added in this PR as an escape hatch for overriding the auto-applied lm_head/embedding inclusion on Nemotron-H. Now that meenchen's recipe-system review is addressed and the Nemotron-H extensions live in `modelopt_recipes/models/Nemotron-H/nvfp4_w4a16.yaml`, this flag has no remaining purpose: users who want different exclusions write a different recipe. Removes: * the `--exclude_modules` argparse entry in `hf_ptq.py` * the `args.exclude_modules` apply-loop in `quantize_main()` * the `EXCLUDE_MODULES` env-var passthrough + `EXCLUDE_MODULES_ARGS` bash array in `examples/llm_ptq/scripts/huggingface_example.sh` Verified end-to-end on `nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16` with `--recipe models/Nemotron-H/nvfp4_w4a16` (transformers 4.56.2, GPU 5, calib_size=16): same coverage as before — 94 weight quantizers enabled, 21 disabled (the Mamba `*mixer.conv1d*` layers); `lm_head.weight_quantizer` and `backbone.embeddings.weight_quantizer` carry NVFP4 W4A16 cfg; exported safetensors 2.1 GiB; `hf_quant_config.json` reports `quant_algo=NVFP4_W4A16`, `group_size=16`, `exclude_modules=[21 conv1d layers]`. The recipe still dictates the exclusion set, so behavior is unchanged for the supported codepath. Signed-off-by: ajrasane <131806219+ajrasane@users.noreply.github.com>
1 parent e63965e commit 78f4c42

2 files changed

Lines changed: 0 additions & 36 deletions

File tree

examples/llm_ptq/hf_ptq.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,18 +1128,6 @@ def quantize_main(
11281128
quant_cfg["quant_cfg"].append({"quantizer_name": pattern, "enable": False})
11291129
print(f"Excluding MTP layer from quantization: {pattern}")
11301130

1131-
# Apply user-requested per-module exclusions (--exclude_modules).
1132-
if args.exclude_modules:
1133-
quant_cfg = copy.deepcopy(quant_cfg)
1134-
for mod in args.exclude_modules:
1135-
quant_cfg["quant_cfg"].append(
1136-
{"quantizer_name": f"*{mod}*.weight_quantizer", "enable": False}
1137-
)
1138-
quant_cfg["quant_cfg"].append(
1139-
{"quantizer_name": f"*{mod}*.input_quantizer", "enable": False}
1140-
)
1141-
print(f"Excluding module from quantization: {mod}")
1142-
11431131
# Use constant amax for KV quantizers when a cast format is selected.
11441132
if args.kv_cache_qformat in _KV_CAST_FORMATS:
11451133
quant_cfg = copy.deepcopy(quant_cfg)
@@ -1338,17 +1326,6 @@ def parse_args() -> argparse.Namespace:
13381326
default=False,
13391327
action="store_true",
13401328
)
1341-
parser.add_argument(
1342-
"--exclude_modules",
1343-
nargs="+",
1344-
default=[],
1345-
metavar="MODULE",
1346-
help=(
1347-
"Module name patterns to exclude from quantization "
1348-
"(e.g. lm_head backbone.layers.0.mixer). "
1349-
"Appends a disable rule for each pattern's weight and input quantizers."
1350-
),
1351-
)
13521329
parser.add_argument(
13531330
"--low_memory_mode",
13541331
help=(

examples/llm_ptq/scripts/huggingface_example.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -127,18 +127,6 @@ if $TRUST_REMOTE_CODE; then
127127
PTQ_ARGS+=" --trust_remote_code "
128128
fi
129129

130-
# --exclude_modules is kept out of the PTQ_ARGS string and passed via a bash array so
131-
# wildcard patterns like '*embed_tokens*' reach hf_ptq.py verbatim. Word-splitting into
132-
# per-pattern elements happens with glob expansion disabled (set -f) so the shell does
133-
# not expand '*' against the filesystem.
134-
EXCLUDE_MODULES_ARGS=()
135-
if [ -n "${EXCLUDE_MODULES:-}" ]; then
136-
set -f
137-
# shellcheck disable=SC2206 # intentional word-splitting without glob expansion
138-
EXCLUDE_MODULES_ARGS=(--exclude_modules $EXCLUDE_MODULES)
139-
set +f
140-
fi
141-
142130
if $USE_SEQ_DEVICE_MAP; then
143131
PTQ_ARGS+=" --use_seq_device_map "
144132
fi
@@ -195,7 +183,6 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
195183
--inference_tensor_parallel=$TP \
196184
--inference_pipeline_parallel=$PP \
197185
$PTQ_ARGS \
198-
"${EXCLUDE_MODULES_ARGS[@]}" \
199186
$AWQ_ARGS
200187
else
201188
echo "Quantized model config $MODEL_CONFIG exists, skipping the quantization stage"

0 commit comments

Comments
 (0)