NVIDIA
diff --git a/‎.github/workflows/unit_tests.yml‎
Lines changed: 21 additions & 2 deletions b/‎.github/workflows/unit_tests.yml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tools/launcher/.gitignore‎
Lines changed: 22 additions & 0 deletions b/‎tools/launcher/.gitignore‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎tools/launcher/CLAUDE.md‎
Lines changed: 113 additions & 0 deletions b/‎tools/launcher/CLAUDE.md‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎tools/launcher/README.md‎
Lines changed: 67 additions & 0 deletions b/‎tools/launcher/README.md‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎tools/launcher/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎tools/launcher/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tools/launcher/common/eagle3/dump_offline_data.sh‎
Lines changed: 42 additions & 0 deletions b/‎tools/launcher/common/eagle3/dump_offline_data.sh‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎tools/launcher/common/eagle3/offline_training.sh‎
Lines changed: 40 additions & 0 deletions b/‎tools/launcher/common/eagle3/offline_training.sh‎
Lines changed: 40 additions & 0 deletions
@@ -12,6 +12,7 @@ on:
       - "tests/unit/**"
       - "pyproject.toml"
       - "tox.ini"
+      - "tools/launcher/**"
   schedule:
     - cron: "0 0 * * *" # Nightly
   workflow_dispatch: # On-demand
@@ -98,6 +99,23 @@ jobs:
       - uses: ./.github/actions/ubuntu-setup
       - name: Run unit tests
         run: pip install tox && tox -e py312-torch210-tf_${{ matrix.tf }}-unit
+  launcher:
+    if: github.event_name == 'pull_request'
+    needs: [linux]
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: recursive
+      - name: Run launcher tests
+        working-directory: tools/launcher
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          export PATH="$HOME/.local/bin:$PATH"
+          uv venv .venv
+          uv pip install -e . pytest
+          uv run python3 -m pytest -v
   partial-install:
     if: github.event_name == 'pull_request'
     needs: [linux]
@@ -114,7 +132,7 @@ jobs:
   unit-pr-required-check:
     # Run even if some jobs are skipped
     if: ${{ github.event_name == 'pull_request' && always() }}
-    needs: [linux, windows, multi-py, multi-torch, multi-transformers, partial-install]
+    needs: [linux, windows, multi-py, multi-torch, multi-transformers, partial-install, launcher]
     runs-on: ubuntu-latest
     steps:
       - name: Required unit tests did not succeed
@@ -124,5 +142,6 @@ jobs:
           needs.multi-py.result != 'success' ||
           needs.multi-torch.result != 'success' ||
           needs.multi-transformers.result != 'success' ||
-          needs.partial-install.result != 'success' }}
+          needs.partial-install.result != 'success' ||
+          needs.launcher.result != 'success' }}
         run: exit 1
@@ -0,0 +1,3 @@
+[submodule "tools/launcher/modules/Megatron-LM"]
+	path = tools/launcher/modules/Megatron-LM
+	url = https://github.com/NVIDIA/Megatron-LM.git
@@ -0,0 +1,22 @@
+# Virtual environment
+.venv/
+
+# nemo-run state
+.slurm_jobs
+.docker_jobs.json
+.local_jobs.json
+
+# Experiment artifacts (generated at runtime)
+experiments/
+local_experiments/
+
+# uv lock (generated, not portable)
+uv.lock
+
+# Python cache
+__pycache__/
+
+# Editor swap files
+*.swp
+*.swo
+*~
@@ -0,0 +1,113 @@
+# CLAUDE.md — ModelOpt Launcher
+
+## Overview
+
+The launcher submits ModelOpt quantization, training, and evaluation jobs to Slurm clusters or runs them locally with Docker.
+
+## Key Files
+
+| File | Role |
+|------|------|
+| `launch.py` | Public entrypoint — accepts `--yaml` or `pipeline=@` |
+| `core.py` | Shared dataclasses, executor builders, run loop, version reporting |
+| `slurm_config.py` | `SlurmConfig` dataclass and env-var-driven `slurm_factory` |
+| `common/` | Shell scripts and `query.py` packaged to the cluster |
+| `modules/Megatron-LM/` | Git submodule |
+| `modules/Model-Optimizer` | Symlink to `../..` (auto-created by `launch.py` if missing) |
+
+## Common Commands
+
+```shell
+# Run locally with Docker
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
+
+# Run on Slurm (set env vars first)
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+
+# Dry run — preview resolved config
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --dryrun --yes -v
+
+# Dump resolved config
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
+
+# Run unit tests
+uv pip install pytest
+uv run python3 -m pytest tests/ -v
+```
+
+## YAML Config Format
+
+The `--yaml` format maps top-level keys to `launch()` function arguments:
+
+```yaml
+job_name: Qwen3-8B_NVFP4_DEFAULT_CFG
+pipeline:
+  global_vars:
+    hf_local: /hf-local/
+  task_0:
+    script: common/megatron_lm/quantize/quantize.sh
+    args:
+      - --calib-dataset-path-or-name <<global_vars.hf_local>>abisee/cnn_dailymail
+    environment:
+      - MLM_MODEL_CFG: Qwen/Qwen3-8B
+      - HF_MODEL_CKPT: <<global_vars.hf_local>>Qwen/Qwen3-8B
+      - TP: 4
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+```
+
+Key conventions:
+
+- Scripts go in `common/` (not `services/`)
+- `<<global_vars.X>>` interpolation for shared values across tasks
+- `_factory_: "slurm_factory"` — resolved via `register_factory()` in `core.py`
+- Environment is list-of-single-key-dicts: `- KEY: value`
+- CLI overrides: `pipeline.task_0.slurm_config.nodes=2`
+
+## Architecture
+
+```text
+launch.py → imports core.py + slurm_config.py
+               ↓
+           core.run_jobs()
+               ↓
+         build_docker_executor() or build_slurm_executor()
+               ↓
+         nemo_run.Experiment → Docker or Slurm
+```
+
+- `set_slurm_config_type(SlurmConfig)` — patches `SandboxTask` annotation at import time
+- `register_factory("slurm_factory", slurm_factory)` — enables YAML `_factory_` resolution
+- `report_versions(base_dir)` — prints git commit/branch for launcher + submodules
+- `get_default_env(title)` — returns `(slurm_env, local_env)` dicts
+
+## Adding a New Model Config
+
+1. Create `examples/<Org>/<Model>/megatron_lm_ptq.yaml` following the format above
+2. Set `MLM_MODEL_CFG` to the HuggingFace repo ID
+3. Set `QUANT_CFG` (e.g., `NVFP4_DEFAULT_CFG`, `INT8_DEFAULT_CFG`)
+4. Set GPU/node counts based on model size
+5. Test: `uv run launch.py --yaml <path> --dryrun --yes -v`
+
+## Testing
+
+65 unit tests in `tests/`. Run standalone without installing `modelopt`:
+
+From the launcher directory:
+
+```shell
+uv run python3 -m pytest tests/ -v
+```
+
+Tests cover: core dataclasses, factory registry, global_vars interpolation, YAML formats, Docker/Slurm executor construction (mocked), environment merging, metadata writing, and end-to-end Docker launch via subprocess.
+
+## Further Reading
+
+- [docs/configuration.md](docs/configuration.md) — YAML formats, overrides, hf_local
+- [docs/architecture.md](docs/architecture.md) — Shared core, factory system, typed tasks, mount mechanism
+- [docs/testing.md](docs/testing.md) — Running tests locally and in CI
+- [docs/claude_code.md](docs/claude_code.md) — Claude Code workflows
+- [docs/contributing.md](docs/contributing.md) — Adding models, typed tasks, bug reporting
@@ -0,0 +1,67 @@
+# ModelOpt Launcher
+
+Submit ModelOpt quantization, training, and evaluation jobs to Slurm clusters or run them locally with Docker.
+
+## Quick Start
+
+```bash
+# Install
+curl -LsSf https://astral.sh/uv/install.sh | sh
+git submodule update --init --recursive
+
+# Run locally with 1 GPU
+cd Model-Optimizer/tools/launcher
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq_local.yaml hf_local=/mnt/hf-local --yes
+
+# Run on a Slurm cluster (4 GPUs)
+export SLURM_HOST=login-node.example.com
+export SLURM_ACCOUNT=my_account
+export SLURM_HF_LOCAL=/mnt/hf-local
+export SLURM_JOB_DIR=/shared/experiments
+uv run launch.py --yaml examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+```
+
+> **Local vs cluster:** `megatron_lm_ptq.yaml` defaults to TP=4 on 4 GPUs.
+> Use `megatron_lm_ptq_local.yaml` for single-GPU local Docker runs.
+
+## Directory Structure
+
+```text
+tools/launcher/
+├── launch.py                       # Main entrypoint
+├── core.py                         # Core logic (dataclasses, executors, run loop)
+├── slurm_config.py                 # SlurmConfig dataclass and factory
+├── common/                         # Scripts and typed tasks
+│   ├── megatron_lm/quantize/
+│   │   ├── quantize.sh             # PTQ quantization + MMLU evaluation
+│   │   └── task.py                 # MegatronLMQuantizeTask (typed config)
+│   ├── tensorrt_llm/query.sh       # TRT-LLM server + query
+│   ├── vllm/query.sh               # vLLM server + query
+│   ├── eagle3/                     # EAGLE3 speculative decoding scripts
+│   └── specdec_bench/              # Speculative decoding benchmark
+├── examples/                        # Example configs
+│   └── Qwen/Qwen3-8B/
+│   ├── megatron_lm_ptq.yaml        # PTQ (4 GPUs, Slurm)
+│   ├── megatron_lm_ptq_local.yaml  # PTQ (1 GPU, local Docker)
+│   └── hf_offline_eagle3.yaml      # EAGLE3 offline pipeline
+├── tests/                          # 64 unit tests
+├── modules/                        # Dependencies
+│   ├── Megatron-LM/                # Git submodule
+│   └── Model-Optimizer -> ../..    # Symlink (auto-created)
+└── docs/                           # Documentation
+    ├── configuration.md            # YAML formats, overrides, hf_local
+    ├── architecture.md             # Design, factory system, typed tasks
+    ├── testing.md                  # Running tests, CI
+    ├── claude_code.md              # Claude Code workflows
+    └── contributing.md             # Adding models, bug reporting
+```
+
+## Documentation
+
+| Guide | Description |
+|-------|-------------|
+| [Configuration](docs/configuration.md) | YAML formats, CLI overrides, flags, `hf_local` |
+| [Architecture](docs/architecture.md) | Shared core, factory system, typed tasks, mount mechanism |
+| [Testing](docs/testing.md) | Running tests locally and in CI |
+| [Claude Code](docs/claude_code.md) | Submit, monitor, diagnose workflows |
+| [Contributing](docs/contributing.md) | Adding models, typed tasks, bug reporting |
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters."""
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+
+source ${SCRIPT_DIR}/../service_utils.sh
+
+###################################################################################################
+
+if [ -z ${SLURM_ARRAY_TASK_ID} ]; then
+    TASK_ID=0
+else
+    echo "SLURM_ARRAY_TASK_ID ${SLURM_ARRAY_TASK_ID}"
+    TASK_ID=${SLURM_ARRAY_TASK_ID}
+fi
+
+if [ -z ${SLURM_ARRAY_TASK_COUNT} ]; then
+    TASK_COUNT=1
+else
+    echo "SLURM_ARRAY_TASK_COUNT ${SLURM_ARRAY_TASK_COUNT}"
+    TASK_COUNT=${SLURM_ARRAY_TASK_COUNT}
+fi
+
+trtllm-llmapi-launch python3 modules/Model-Optimizer/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_trtllm.py \
+    --model ${HF_MODEL_CKPT} \
+    --dp-rank ${TASK_ID} \
+    --dp-world-size ${TASK_COUNT} \
+    ${@}
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+source ${SCRIPT_DIR}/../service_utils.sh
+
+pip install -r modules/Model-Optimizer/examples/speculative_decoding/requirements.txt
+pip install huggingface-hub>=1.2.1
+export PATH=$PATH:/workspace/.local/bin
+
+###################################################################################################
+
+trap 'error_handler $0 $LINENO' ERR # ERROR HANDLER
+
+bash modules/Model-Optimizer/examples/speculative_decoding/launch_train.sh \
+    --model ${HF_MODEL_CKPT} \
+    ${@}
+
+python modules/Model-Optimizer/examples/speculative_decoding/scripts/export_hf_checkpoint.py \
+    --model_path /scratchspace/eagle3 \
+    --export_path /scratchspace/export
+
+###################################################################################################
+
+# This function handles the exit status (fails the CI).
+#exit_handler $0
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "tools/launcher/modules/Megatron-LM"]`
	`2`	`+ path = tools/launcher/modules/Megatron-LM`
	`3`	`+ url = https://github.com/NVIDIA/Megatron-LM.git`