Merge branch 'main' into chenjiel/add_qwen_moe_test

kevalmorabia97 · web-flow · commit 4d522daa7096 · 2026-04-17T21:45:23.000+05:30
diff --git a/.github/codecov.yml b/.github/codecov.yml
@@ -9,5 +9,5 @@ coverage:
     project:
       default:
         target: auto
-        threshold: 2% # Allow atmost 2% coverage drop from main branch.
+        threshold: 1% # Allow atmost 1% coverage drop from main branch.
     patch: false
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -32,7 +32,7 @@ Changelog
 - [Experimental] Add support for transformers>=5.0, including generic PTQ and unified HF checkpoint export for fused MoE expert modules (Mixtral, Qwen2-MoE, Qwen3-MoE, Qwen3.5-MoE, DeepSeek-V3, Jamba, OLMoE, etc.).
 - Improve ``megatron_preprocess_data``: add ``--reasoning_content`` support for Nemotron v3 datasets, eliminate intermediate JSONL for HuggingFace datasets, return output file prefixes from the Python API, add gzip input support (``.jsonl.gz``), add ``--strip_newlines`` flag for plain-text pretraining data, add ``--hf_streaming`` for very large datasets (only consumed rows downloaded), and auto-shuffle when ``--hf_max_samples_per_split`` is set to avoid biased sampling.
 
-0.43 (2026-04-09)
+0.43 (2026-04-16)
 ^^^^^^^^^^^^^^^^^
 
 **Bug Fixes**
diff --git a/examples/windows/onnx_ptq/genai_llm/requirements.txt b/examples/windows/onnx_ptq/genai_llm/requirements.txt
@@ -1,3 +1,3 @@
 datasets>=2.14.5
-torch==2.9.0
-transformers==4.57.3
+torch
+transformers<5.0.0
diff --git a/modelopt/torch/utils/serialization.py b/modelopt/torch/utils/serialization.py
@@ -54,9 +54,11 @@ def safe_save(obj: Any, f: str | os.PathLike | BinaryIO, **kwargs) -> None:
 
 
 def safe_load(f: str | os.PathLike | BinaryIO | bytes, **kwargs) -> Any:
-    """Load a checkpoint securely using weights_only=True by default."""
-    kwargs.setdefault("weights_only", True)
+    """Load a checkpoint securely using ``weights_only=True`` by default.
 
+    NOTE: We dont set default ``weights_only`` (interpret as True for torch>=2.6) so you can override it with
+    ``export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1`` if you see ``pickle.UnpicklingError`` and trust the checkpoint.
+    """
     if isinstance(f, (bytes, bytearray)):
         f = BytesIO(f)
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ dependencies = [
     # modelopt.torch
     "PyYAML>=6.0",
     "omegaconf>=2.3.0",
-    "pulp",
+    "pulp<4.0",         # breaking changes in upcoming 4.0 release
     "pydantic>=2.0",
     "regex",
     "rich",
diff --git a/tests/unit/torch/utils/test_serialization.py b/tests/unit/torch/utils/test_serialization.py
@@ -16,7 +16,9 @@
 """Tests for Modelopt's serialization utilities."""
 
 from io import BytesIO
+from pickle import UnpicklingError
 
+import pytest
 import torch
 
 from modelopt.torch.opt.config import ModeloptBaseConfig
@@ -70,3 +72,25 @@ def test_safe_load_with_path(tmp_path):
     loaded_state = safe_load(file_path)
 
     assert loaded_state["data"] == 42
+
+
+class _UnsafeObj:
+    """Not registered in torch safe globals — unpickling fails with weights_only=True."""
+
+    def __init__(self, v):
+        self.v = v
+
+
+def test_safe_load_env_var_bypasses_weights_only(tmp_path, monkeypatch):
+    """Verify TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 allows safe_load to load objects unsafe for weights_only."""
+    file_path = tmp_path / "unsafe.pt"
+    torch.save({"obj": _UnsafeObj(42)}, file_path)
+
+    # Always fails when weights_only is not set (default=True)
+    with pytest.raises(UnpicklingError):
+        safe_load(file_path)
+
+    # With the env var, safe_load (no explicit weights_only) defers to torch's default=False
+    monkeypatch.setenv("TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD", "1")
+    loaded = safe_load(file_path)
+    assert loaded["obj"].v == 42
diff --git a/tox.ini b/tox.ini
@@ -82,7 +82,8 @@ commands =
 [testenv:cuda13-gpu-megatron]
 commands_pre =
     # Install deps here so that it gets installed even in --current-env
-    pip install -U megatron-core
+    # Temporarily disable latest mcore until we fix its nvidia-resiliency-ext dependency
+    pip install 'megatron-core<0.17.0'
     pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
     pip install --no-build-isolation git+https://github.com/Dao-AILab/causal-conv1d.git
     pip install -e .[hf,dev-test]