From 6f3d1d376dae2333ce0666a0196ced08da4c1df9 Mon Sep 17 00:00:00 2001
From: EazyReal <8047065+EazyReal@users.noreply.github.com>
Date: Sat, 20 Jun 2026 17:46:19 +0000
Subject: [PATCH] fix(train): support eval-only mode (--num-rollout 0)

---
 .github/workflows/pr-test.yml               |   2 +-
 .github/workflows/pr-test.yml.j2            |   1 +
 slime/backends/megatron_utils/model.py      |   9 +-
 tests/test_eval_only_optimizer_scheduler.py | 115 ++++++++++++++++++++
 4 files changed, 125 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_eval_only_optimizer_scheduler.py

diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index 2d6e8ce5b2..10fbc23124 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -372,7 +372,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        info: [{"num_gpus": 0, "test_file": "test_megatron_argument_validation.py"}, {"num_gpus": 0, "test_file": "utils/test_megatron_server_arguments.py"}, {"num_gpus": 0, "test_file": "test_dp_schedule.py"}, {"num_gpus": 0, "test_file": "test_cp_utils.py"}, {"num_gpus": 0, "test_file": "test_metric_report.py"}, {"num_gpus": 0, "test_file": "test_metric_report_dist.py"}, {"num_gpus": 0, "test_file": "test_loss_cp_invariance.py"}, {"num_gpus": 0, "test_file": "test_logprob_response_spans.py"}, {"num_gpus": 0, "test_file": "test_value_temperature.py"}, {"num_gpus": 0, "test_file": "test_cispo_loss.py"}, {"num_gpus": 0, "test_file": "test_rm_f1.py"}, {"num_gpus": 0, "test_file": "test_rm_gpqa.py"}, {"num_gpus": 0, "test_file": "test_rm_math.py"}, {"num_gpus": 0, "test_file": "test_rm_math_dapo.py"}, {"num_gpus": 0, "test_file": "test_rm_deepscaler.py"}, {"num_gpus": 0, "test_file": "test_sample.py"}, {"num_gpus": 0, "test_file": "test_rollout_validation.py"}, {"num_gpus": 0, "test_file": "test_placement_group.py"}, {"num_gpus": 0, "test_file": "test_external_sglang_engines.py"}, {"num_gpus": 0, "test_file": "utils/test_hf_checkpoint_saver.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_rollout_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_runtime_hook_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_path_loading_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_generate_contracts.py"}]
+        info: [{"num_gpus": 0, "test_file": "test_megatron_argument_validation.py"}, {"num_gpus": 0, "test_file": "utils/test_megatron_server_arguments.py"}, {"num_gpus": 0, "test_file": "test_dp_schedule.py"}, {"num_gpus": 0, "test_file": "test_cp_utils.py"}, {"num_gpus": 0, "test_file": "test_metric_report.py"}, {"num_gpus": 0, "test_file": "test_metric_report_dist.py"}, {"num_gpus": 0, "test_file": "test_loss_cp_invariance.py"}, {"num_gpus": 0, "test_file": "test_logprob_response_spans.py"}, {"num_gpus": 0, "test_file": "test_value_temperature.py"}, {"num_gpus": 0, "test_file": "test_cispo_loss.py"}, {"num_gpus": 0, "test_file": "test_rm_f1.py"}, {"num_gpus": 0, "test_file": "test_rm_gpqa.py"}, {"num_gpus": 0, "test_file": "test_rm_math.py"}, {"num_gpus": 0, "test_file": "test_rm_math_dapo.py"}, {"num_gpus": 0, "test_file": "test_rm_deepscaler.py"}, {"num_gpus": 0, "test_file": "test_sample.py"}, {"num_gpus": 0, "test_file": "test_rollout_validation.py"}, {"num_gpus": 0, "test_file": "test_placement_group.py"}, {"num_gpus": 0, "test_file": "test_external_sglang_engines.py"}, {"num_gpus": 0, "test_file": "utils/test_hf_checkpoint_saver.py"}, {"num_gpus": 0, "test_file": "test_eval_only_optimizer_scheduler.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_rollout_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_runtime_hook_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_path_loading_contracts.py"}, {"num_gpus": 0, "test_file": "plugin_contracts/test_plugin_generate_contracts.py"}]
     defaults:
       run:
         working-directory: ${{ github.workspace }}
diff --git a/.github/workflows/pr-test.yml.j2 b/.github/workflows/pr-test.yml.j2
index 30cf386421..574ce12cc9 100644
--- a/.github/workflows/pr-test.yml.j2
+++ b/.github/workflows/pr-test.yml.j2
@@ -82,6 +82,7 @@
         {'test_file': 'test_placement_group.py', 'num_gpus': 0},
         {'test_file': 'test_external_sglang_engines.py', 'num_gpus': 0},
         {'test_file': 'utils/test_hf_checkpoint_saver.py', 'num_gpus': 0},
+        {'test_file': 'test_eval_only_optimizer_scheduler.py', 'num_gpus': 0},
         {'test_file': 'plugin_contracts/test_plugin_rollout_contracts.py', 'num_gpus': 0},
         {'test_file': 'plugin_contracts/test_plugin_runtime_hook_contracts.py', 'num_gpus': 0},
         {'test_file': 'plugin_contracts/test_plugin_path_loading_contracts.py', 'num_gpus': 0},
diff --git a/slime/backends/megatron_utils/model.py b/slime/backends/megatron_utils/model.py
index 1ad6cd7957..7a55c8a575 100644
--- a/slime/backends/megatron_utils/model.py
+++ b/slime/backends/megatron_utils/model.py
@@ -201,7 +201,14 @@ def get_optimizer_param_scheduler(args: Namespace, optimizer: MegatronOptimizer)
     # resume), so the worst case is the cosine/linear schedule reaches its
     # plateau slightly early or late. Pass ``--lr-decay-iters`` explicitly if you
     # need exact decay control.
-    args.train_iters = args.num_rollout * args.rollout_batch_size * args.n_samples_per_prompt // args.global_batch_size
+    estimated_train_iters = (
+        args.num_rollout * args.rollout_batch_size * args.n_samples_per_prompt // args.global_batch_size
+    )
+    # ``num_rollout == 0`` is eval-only (see ``train.py``): no training runs, but
+    # the scheduler is still built and Megatron asserts ``lr_decay_steps > 0``.
+    # Use the smallest valid schedule size for zero-estimated runs; the training
+    # loop itself remains controlled by ``args.num_rollout``.
+    args.train_iters = max(1, estimated_train_iters)
     if args.lr_decay_iters is None:
         args.lr_decay_iters = args.train_iters
     lr_decay_steps = args.lr_decay_iters * args.global_batch_size
diff --git a/tests/test_eval_only_optimizer_scheduler.py b/tests/test_eval_only_optimizer_scheduler.py
new file mode 100644
index 0000000000..90591eabed
--- /dev/null
+++ b/tests/test_eval_only_optimizer_scheduler.py
@@ -0,0 +1,115 @@
+"""CPU regression test for eval-only mode (``--num-rollout 0``).
+
+With ``num_rollout == 0`` the estimated ``train_iters`` is 0, so ``lr_decay_steps``
+is 0 and Megatron's ``OptimizerParamScheduler`` aborts on ``assert lr_decay_steps > 0``.
+Megatron is stubbed because it isn't installed on the CPU CI runner.
+"""
+
+import importlib
+import sys
+import types
+from types import SimpleNamespace
+
+import pytest
+
+NUM_GPUS = 0
+
+
+class _RecordingScheduler:
+    """Stub for OptimizerParamScheduler that keeps Megatron's lr_decay_steps assertion."""
+
+    def __init__(self, optimizer, **kwargs):
+        assert kwargs["lr_decay_steps"] > 0
+
+
+def _register(monkeypatch, name, **attrs):
+    mod = types.ModuleType(name)
+    for k, v in attrs.items():
+        setattr(mod, k, v)
+    monkeypatch.setitem(sys.modules, name, mod)
+
+
+def _load_model_module(monkeypatch):
+    """Import slime.backends.megatron_utils.model with Megatron stubbed out."""
+    s = object  # placeholder for symbols that are imported but unused here
+
+    _register(monkeypatch, "megatron")
+    _register(monkeypatch, "megatron.core", mpu=types.ModuleType("megatron.core.mpu"))
+    _register(monkeypatch, "megatron.core.mpu")
+    _register(monkeypatch, "megatron.core.distributed", DistributedDataParallel=s, finalize_model_grads=s)
+    _register(monkeypatch, "megatron.core.enums", ModelType=s)
+    _register(monkeypatch, "megatron.core.models", gpt=types.ModuleType("megatron.core.models.gpt"))
+    _register(monkeypatch, "megatron.core.models.gpt", GPTModel=s)
+    _register(monkeypatch, "megatron.core.optimizer", OptimizerConfig=s, get_megatron_optimizer=s)
+    _register(monkeypatch, "megatron.core.optimizer.optimizer", MegatronOptimizer=s)
+    _register(monkeypatch, "megatron.core.optimizer_param_scheduler", OptimizerParamScheduler=_RecordingScheduler)
+    _register(monkeypatch, "megatron.core.pipeline_parallel", get_forward_backward_func=s)
+    _register(monkeypatch, "megatron.core.pipeline_parallel.utils", unwrap_model=s)
+    _register(monkeypatch, "megatron.core.utils", get_model_config=s, unwrap_model=s)
+    _register(monkeypatch, "megatron.training")
+    _register(monkeypatch, "megatron.training.global_vars", get_args=s)
+    _register(monkeypatch, "megatron.training.training", get_model=s)
+
+    _register(monkeypatch, "slime.backends.megatron_utils.checkpoint", load_checkpoint=s, save_checkpoint=s)
+    _register(monkeypatch, "slime.backends.megatron_utils.cp_utils", reduce_train_step_metrics=s)
+    _register(monkeypatch, "slime.backends.megatron_utils.data", DataIterator=s, get_batch=s)
+    _register(
+        monkeypatch,
+        "slime.backends.megatron_utils.loss",
+        ROLLOUT_TOP_P_TOKEN_KEYS=(),
+        get_rollout_top_p_logprob_kwargs=s,
+        loss_function=s,
+    )
+    _register(monkeypatch, "slime.backends.megatron_utils.model_provider", get_model_provider_func=s)
+    # slime.utils.logging_utils pulls wandb/tensorboard and memory_utils pulls
+    # psutil; none are installed on the CPU CI runner, so stub them too.
+    _register(monkeypatch, "slime.utils.logging_utils")
+    _register(monkeypatch, "slime.utils.memory_utils", clear_memory=s)
+
+    sys.modules.pop("slime.backends.megatron_utils.model", None)
+    return importlib.import_module("slime.backends.megatron_utils.model")
+
+
+def _make_args(**overrides):
+    args = SimpleNamespace(
+        num_rollout=4,
+        rollout_batch_size=8,
+        n_samples_per_prompt=8,
+        global_batch_size=16,
+        lr_decay_iters=None,
+        lr_wsd_decay_iters=None,
+        lr_warmup_fraction=None,
+        lr_warmup_iters=0,
+        lr_warmup_init=0.0,
+        lr=1e-6,
+        min_lr=0.0,
+        lr_decay_style="constant",
+        start_weight_decay=0.0,
+        end_weight_decay=0.0,
+        weight_decay_incr_style="constant",
+        use_checkpoint_opt_param_scheduler=False,
+        override_opt_param_scheduler=False,
+        lr_wsd_decay_style="exponential",
+    )
+    args.__dict__.update(overrides)
+    return args
+
+
+@pytest.mark.unit
+def test_eval_only_num_rollout_zero_does_not_crash(monkeypatch):
+    model = _load_model_module(monkeypatch)
+    args = _make_args(num_rollout=0)
+    model.get_optimizer_param_scheduler(args, optimizer=object())  # would assert without the clamp
+    assert args.train_iters == 1
+
+
+@pytest.mark.unit
+def test_clamp_is_a_noop_for_normal_training(monkeypatch):
+    model = _load_model_module(monkeypatch)
+    args = _make_args(num_rollout=4, rollout_batch_size=8, n_samples_per_prompt=8, global_batch_size=16)
+    model.get_optimizer_param_scheduler(args, optimizer=object())
+    assert args.train_iters == 16  # 4 * 8 * 8 // 16
+
+
+if __name__ == "__main__":
+    raise SystemExit(pytest.main([__file__]))