Skip to content

Commit ae965a9

Browse files
ChenhanYuclaude
andauthored
add: HF PTQ support and modelopt_recipes mount in launcher (#1089)
- Add common/hf_ptq/hf_ptq.sh script for running hf_ptq.py directly - Add Qwen3-8B hf_ptq_local.yaml example config - Mount modelopt_recipes alongside modelopt in both Slurm and Docker executors so modelopt.recipe imports work with the overlay - Update default container to tensorrt-llm/release:1.3.0rc2 ### What does this PR do? Type of change: ? <!-- Use one of the following: Bug fix, new feature, new example, new tests, documentation. --> <!-- Details about the change. --> ### Usage ```python # Add a code snippet demonstrating how to use this ``` ### Testing <!-- Mention how have you tested your change if applicable. --> ### Before your PR is "*Ready for review*" Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md) and your commits are signed (`git commit -s -S`). Make sure you read and follow the [Security Best Practices](https://github.com/NVIDIA/Model-Optimizer/blob/main/SECURITY.md#security-coding-practices-for-contributors) (e.g. avoiding hardcoded `trust_remote_code=True`, `torch.load(..., weights_only=False)`, `pickle`, etc.). - Is this change backward compatible?: ✅ / ❌ / N/A <!--- If ❌, explain why. --> - If you copied code from any other sources or added a new PIP dependency, did you follow guidance in `CONTRIBUTING.md`: ✅ / ❌ / N/A <!--- Mandatory --> - Did you write any new necessary tests?: ✅ / ❌ / N/A <!--- Mandatory for new features or examples. --> - Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?: ✅ / ❌ / N/A <!--- Only for new features, API changes, critical bug fixes or backward incompatible changes. --> ### Additional Information <!-- E.g. related issue. --> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added a Hugging Face PTQ workflow to run post-training quantization for models. * Added a local single-GPU pipeline for Qwen3-8B with fp8 quantization and export support. * Added a small command-line launcher to invoke the PTQ workflow with configurable model, quantization, calibration, and export options. * **Chores** * Updated the default runtime container image to a newer release. * Included model-optimization recipe files in container mounts. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Chenhan Yu <chenhany@nvidia.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 174f3a2 commit ae965a9

4 files changed

Lines changed: 82 additions & 1 deletion

File tree

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/bash
2+
3+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
# SPDX-License-Identifier: Apache-2.0
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
19+
source "${SCRIPT_DIR}/../service_utils.sh"
20+
21+
trap 'error_handler $0 $LINENO' ERR # ERROR HANDLER
22+
trap 'exit_handler' EXIT
23+
###################################################################################################
24+
25+
HF_PTQ_DIR=modules/Model-Optimizer/examples/llm_ptq
26+
27+
HF_MODEL=${HF_MODEL:-"Qwen/Qwen3-8B"}
28+
QFORMAT=${QFORMAT:-"fp8"}
29+
CALIB_SIZE=${CALIB_SIZE:-"512"}
30+
EXPORT_PATH=${EXPORT_PATH:-"/scratchspace/exported_model"}
31+
32+
PYTHONPATH="${HF_PTQ_DIR}:${PYTHONPATH}" python ${HF_PTQ_DIR}/hf_ptq.py \
33+
--pyt_ckpt_path ${HF_MODEL} \
34+
--qformat ${QFORMAT} \
35+
--calib_size ${CALIB_SIZE} \
36+
--export_path ${EXPORT_PATH} \
37+
"$@"
38+
39+
report_result "PASS: hf_ptq ${HF_MODEL} ${QFORMAT}"

tools/launcher/core.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,18 @@ def build_slurm_executor(
235235
f"{job_dir}/{experiment_title}/{experiment_id}"
236236
f"/{task_name}/code/modules/Model-Optimizer/modelopt"
237237
)
238+
modelopt_recipes_dst = os.path.join(
239+
os.path.dirname(os.path.normpath(slurm_config.modelopt_install_path)),
240+
"modelopt_recipes",
241+
)
242+
modelopt_recipes_src = (
243+
f"{job_dir}/{experiment_title}/{experiment_id}"
244+
f"/{task_name}/code/modules/Model-Optimizer/modelopt_recipes"
245+
)
238246
container_mounts += [
239247
f"{scratch_src}:{scratch_dst}",
240248
f"{modelopt_src}:{modelopt_dst}",
249+
f"{modelopt_recipes_src}:{modelopt_recipes_dst}",
241250
f"{job_dir}/{experiment_title}:/{experiment_title}",
242251
]
243252

@@ -291,11 +300,17 @@ def build_docker_executor(
291300
modelopt_dst = slurm_config.modelopt_install_path
292301
if modelopt_src_path is None:
293302
modelopt_src_path = os.path.join(os.getcwd(), "modules/Model-Optimizer/modelopt")
303+
modelopt_recipes_dst = os.path.join(
304+
os.path.dirname(os.path.normpath(slurm_config.modelopt_install_path)),
305+
"modelopt_recipes",
306+
)
307+
modelopt_recipes_src_path = os.path.join(os.path.dirname(modelopt_src_path), "modelopt_recipes")
294308
exp_title_src = os.path.join(job_dir, experiment_title)
295309
os.makedirs(exp_title_src, exist_ok=True)
296310
container_mounts += [
297311
f"{scratch_src}:{scratch_dst}",
298312
f"{modelopt_src_path}:{modelopt_dst}",
313+
f"{modelopt_recipes_src_path}:{modelopt_recipes_dst}",
299314
f"{exp_title_src}:/{experiment_title}",
300315
]
301316

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Local single-GPU HF PTQ for Qwen3-8B using hf_ptq.py from Model-Optimizer.
2+
#
3+
# Runs hf_ptq.py directly (Hugging Face path, no Megatron-LM conversion).
4+
#
5+
# Usage:
6+
# uv run launch.py --yaml examples/Qwen/Qwen3-8B/hf_ptq_local.yaml hf_local=/mnt/hf-local --yes
7+
8+
job_name: Qwen3-8B_fp8_hf_ptq_local
9+
pipeline:
10+
skip: false
11+
allow_to_fail: false
12+
note:
13+
14+
task_0:
15+
script: common/hf_ptq/hf_ptq.sh
16+
args:
17+
- --dataset cnn_dailymail
18+
environment:
19+
- HF_MODEL: /hf-local/Qwen/Qwen3-8B
20+
- QFORMAT: fp8
21+
- CALIB_SIZE: "512"
22+
- EXPORT_PATH: /scratchspace/exported_model
23+
slurm_config:
24+
_factory_: "slurm_factory"
25+
nodes: 1
26+
ntasks_per_node: 1
27+
gpus_per_node: 1

tools/launcher/slurm_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def slurm_factory(
5353
nodes: int = 1,
5454
ntasks_per_node: int = 1,
5555
gpus_per_node: int = 1,
56-
container: str = "nvcr.io/nvidia/tensorrt-llm/release:1.2.0",
56+
container: str = "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc8",
5757
modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt",
5858
container_mounts: list[str] = [
5959
"{}:/hf-local".format(os.environ.get("SLURM_HF_LOCAL", "/hf-local")),

0 commit comments

Comments
 (0)