diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml index a350240f94f..c39f5ae5e0c 100644 --- a/.github/workflows/call_precommit.yml +++ b/.github/workflows/call_precommit.yml @@ -32,14 +32,12 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Override constraints if: ${{ inputs.override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/common/requirements.txt + run: pip install . -r tests/common/requirements.txt - name: Print installed modules run: pip list - name: Run common precommit test scope @@ -60,14 +58,12 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Override constraints if: ${{ inputs.override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/onnx/requirements.txt + run: pip install . -r tests/onnx/requirements.txt - name: Print installed modules run: pip list - name: Run ONNX precommit test scope @@ -88,14 +84,12 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Override constraints if: ${{ inputs.override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/openvino/requirements.txt + run: pip install . -r tests/openvino/requirements.txt - name: Print installed modules run: pip list - name: Run OV precommit test scope @@ -116,10 +110,8 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Install test requirements - run: uv pip install --system . -r tests/tools/requirements.txt + run: pip install . -r tests/tools/requirements.txt - name: Print installed modules run: pip list - name: Run tools precommit test scope @@ -138,14 +130,12 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Override constraints if: ${{ inputs.override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/torch/requirements.txt + run: pip install . -r tests/torch/requirements.txt - name: Print installed modules run: pip list - name: Run torch precommit test scope @@ -182,14 +172,12 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ inputs.python_version }} - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Override constraints if: ${{ inputs.override_requirements != '' }} run: python .github/scripts/override_constraints.py "${{ inputs.override_requirements }}" shell: bash - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/torch/requirements.txt + run: pip install . -r tests/torch/requirements.txt - name: Print installed modules run: pip list - name: Check CUDA diff --git a/.github/workflows/conformance_weight_compression.yml b/.github/workflows/conformance_weight_compression.yml index 04da234cbde..b6f0902aa6b 100644 --- a/.github/workflows/conformance_weight_compression.yml +++ b/.github/workflows/conformance_weight_compression.yml @@ -2,6 +2,7 @@ name: Weight compression permissions: read-all on: + pull_request: workflow_call: workflow_dispatch: inputs: @@ -37,12 +38,10 @@ jobs: - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.10.14 - - name: Install uv - uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: cpuinfo run: cat /proc/cpuinfo - name: Install NNCF and test requirements - run: uv pip install --system . -r tests/post_training/requirements.txt + run: pip install . -r tests/post_training/requirements.txt - name: Print installed modules run: pip list - name: Run examples test scope diff --git a/constraints.txt b/constraints.txt index a7d848f2bca..f7738f6ca07 100644 --- a/constraints.txt +++ b/constraints.txt @@ -1,5 +1,6 @@ # Openvino -openvino==2025.4.1 +openvino==2026.0.0rc1 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre # Pytorch torch==2.9.0 diff --git a/docs/Installation.md b/docs/Installation.md index 5b0d736a25f..63b41d59109 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -48,7 +48,7 @@ as well as the supported versions of Python: | NNCF | OpenVINO | PyTorch | ONNX | TensorFlow | Python | |-----------|------------|----------|----------|------------|--------| -| `develop` | `2025.4.1` | `2.9.0` | `1.17.0` | | `3.10` | +| `3.0.0` | `2026.0.0` | `2.9.0` | `1.17.0` | | `3.10` | | `2.19.0` | `2025.4.0` | `2.8.0` | `1.17.0` | `2.15.1` | `3.10` | | `2.18.0` | `2025.3.0` | `2.8.0` | `1.17.0` | `2.15.1` | `3.10` | | `2.17.0` | `2025.2.0` | `2.7.1` | `1.17.0` | `2.15.1` | `3.10` | diff --git a/examples/llm_compression/onnx/tiny_llama/requirements.txt b/examples/llm_compression/onnx/tiny_llama/requirements.txt index 6b7090a9267..2f44aabdda0 100644 --- a/examples/llm_compression/onnx/tiny_llama/requirements.txt +++ b/examples/llm_compression/onnx/tiny_llama/requirements.txt @@ -1,5 +1,6 @@ transformers==4.53.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/onnx/tiny_llama_scale_estimation/requirements.txt b/examples/llm_compression/onnx/tiny_llama_scale_estimation/requirements.txt index eeff609219c..e9b03064290 100644 --- a/examples/llm_compression/onnx/tiny_llama_scale_estimation/requirements.txt +++ b/examples/llm_compression/onnx/tiny_llama_scale_estimation/requirements.txt @@ -1,6 +1,7 @@ torch==2.9.0 transformers==4.53.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/main.py b/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/main.py index 847927ac3c5..8975609e00d 100644 --- a/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/main.py +++ b/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/main.py @@ -202,7 +202,7 @@ def load_model_and_tokenizer(model_id: str, export=True) -> tuple[OVModelForCaus def codebook_example( - model_id: str, compressed_model_id: str, adaptive_codebook: bool = False, num_elements: int = 10 + model_id: str, compressed_model_id: str, adaptive_codebook: bool = False, num_elements: int = 12 ) -> list[str]: """ Example of using the adaptive codebook compression. diff --git a/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/requirements.txt b/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/requirements.txt index 825f3d195eb..723f5162c12 100644 --- a/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/requirements.txt +++ b/examples/llm_compression/openvino/smollm2_360m_adaptive_codebook/requirements.txt @@ -1,5 +1,6 @@ datasets==4.5.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt b/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt index f6ff871034a..fd4eb5bc236 100644 --- a/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt +++ b/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt @@ -1,4 +1,5 @@ -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt b/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt index 3bea8a3a4c7..f94715610d2 100644 --- a/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt +++ b/examples/llm_compression/openvino/smollm2_360m_fp8/requirements.txt @@ -1,5 +1,6 @@ datasets==4.5.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/tiny_llama/requirements.txt b/examples/llm_compression/openvino/tiny_llama/requirements.txt index f603d710762..58dd2d77cda 100644 --- a/examples/llm_compression/openvino/tiny_llama/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama/requirements.txt @@ -1,6 +1,7 @@ datasets==4.5.0 onnx==1.17.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel[openvino]==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt index 2bd63973082..eb9a34f3005 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt @@ -1,6 +1,8 @@ -whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai@2025.4.1.0#subdirectory=tools/who_what_benchmark +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai@releases/2026/0#subdirectory=tools/who_what_benchmark +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre numpy==1.26.4 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt index 3485d9e79ed..bd704db5c32 100644 --- a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt @@ -1,7 +1,8 @@ torch==2.9.0 datasets==4.5.0 numpy>=1.23.5,<2 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/torch/distillation_qat_with_lora/requirements.txt b/examples/llm_compression/torch/distillation_qat_with_lora/requirements.txt index ff91ffdc75c..6f5873e588c 100644 --- a/examples/llm_compression/torch/distillation_qat_with_lora/requirements.txt +++ b/examples/llm_compression/torch/distillation_qat_with_lora/requirements.txt @@ -1,7 +1,8 @@ tensorboard==2.13.0 torch==2.9.0 numpy>=1.23.5,<2 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/torch/downstream_qat_with_nls/requirements.txt b/examples/llm_compression/torch/downstream_qat_with_nls/requirements.txt index ff91ffdc75c..6f5873e588c 100644 --- a/examples/llm_compression/torch/downstream_qat_with_nls/requirements.txt +++ b/examples/llm_compression/torch/downstream_qat_with_nls/requirements.txt @@ -1,7 +1,8 @@ tensorboard==2.13.0 torch==2.9.0 numpy>=1.23.5,<2 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum-intel==1.27.0 optimum-onnx==0.1.0 optimum==2.1.0 diff --git a/examples/llm_compression/torch_fx/tiny_llama/requirements.txt b/examples/llm_compression/torch_fx/tiny_llama/requirements.txt index e5c758303d1..4952122b106 100644 --- a/examples/llm_compression/torch_fx/tiny_llama/requirements.txt +++ b/examples/llm_compression/torch_fx/tiny_llama/requirements.txt @@ -1,6 +1,7 @@ transformers==4.53.0 datasets==4.5.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre optimum==2.1.0 torch==2.9.0 torchvision==0.24.0 diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt b/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt index 1ade7164d43..8b066ebe77c 100644 --- a/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt +++ b/examples/post_training_quantization/onnx/mobilenet_v2/requirements.txt @@ -5,5 +5,6 @@ fastprogress==1.0.5 fastcore==1.11.5 onnx==1.17.0 onnxruntime==1.21.1 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre numpy<2 diff --git a/examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control/requirements.txt b/examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control/requirements.txt index e5874c716e2..e0a5d4a607b 100644 --- a/examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control/requirements.txt +++ b/examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control/requirements.txt @@ -1,5 +1,6 @@ ultralytics==8.3.221 onnx==1.17.0 onnxruntime==1.21.1 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 diff --git a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt index 4b29f268266..7714a98d9f7 100644 --- a/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt +++ b/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control/requirements.txt @@ -1,3 +1,4 @@ anomalib==0.6.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre numpy<2 diff --git a/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt b/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt index 09f35f1583b..6602500f67e 100644 --- a/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt +++ b/examples/post_training_quantization/openvino/mobilenet_v2/requirements.txt @@ -4,4 +4,5 @@ scikit-learn fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre diff --git a/examples/post_training_quantization/openvino/yolo26/requirements.txt b/examples/post_training_quantization/openvino/yolo26/requirements.txt index 8a224287028..cea8fa0b198 100644 --- a/examples/post_training_quantization/openvino/yolo26/requirements.txt +++ b/examples/post_training_quantization/openvino/yolo26/requirements.txt @@ -1,4 +1,5 @@ ultralytics==8.4.7 onnx==1.17.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 diff --git a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt index fc0b5cc3f4f..77ef3227963 100644 --- a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt +++ b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt @@ -1,4 +1,5 @@ ultralytics==8.3.221 onnx==1.17.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 diff --git a/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt b/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt index 60d552c1150..0f1ac6a4e8e 100644 --- a/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt +++ b/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt @@ -1,7 +1,8 @@ fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre scikit-learn torch==2.9.0 torchvision==0.24.0 diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt b/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt index a2bbaecc5ee..3f2b3bfdbc5 100644 --- a/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt +++ b/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt @@ -2,7 +2,8 @@ fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 onnx==1.17.0 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre pycocotools==2.0.7 torch==2.9.0 torchmetrics==1.0.1 diff --git a/examples/post_training_quantization/torch_fx/resnet18/requirements.txt b/examples/post_training_quantization/torch_fx/resnet18/requirements.txt index aa68860cc0c..69e312ce826 100644 --- a/examples/post_training_quantization/torch_fx/resnet18/requirements.txt +++ b/examples/post_training_quantization/torch_fx/resnet18/requirements.txt @@ -1,6 +1,7 @@ fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 torchvision==0.24.0 diff --git a/examples/pruning/torch/resnet18/requirements.txt b/examples/pruning/torch/resnet18/requirements.txt index aa68860cc0c..69e312ce826 100644 --- a/examples/pruning/torch/resnet18/requirements.txt +++ b/examples/pruning/torch/resnet18/requirements.txt @@ -1,6 +1,7 @@ fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 torchvision==0.24.0 diff --git a/examples/quantization_aware_training/torch/anomalib/requirements.txt b/examples/quantization_aware_training/torch/anomalib/requirements.txt index f257a20f24a..b95c23638f2 100644 --- a/examples/quantization_aware_training/torch/anomalib/requirements.txt +++ b/examples/quantization_aware_training/torch/anomalib/requirements.txt @@ -1,6 +1,7 @@ anomalib==2.2.0 torch==2.9.0 -openvino==2025.3.0 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre requests==2.32.5 matplotlib==3.10.7 numpy==2.2.6 diff --git a/examples/quantization_aware_training/torch/resnet18/requirements.txt b/examples/quantization_aware_training/torch/resnet18/requirements.txt index aa68860cc0c..69e312ce826 100644 --- a/examples/quantization_aware_training/torch/resnet18/requirements.txt +++ b/examples/quantization_aware_training/torch/resnet18/requirements.txt @@ -1,6 +1,7 @@ fastdownload==0.0.7 fastprogress==1.0.5 fastcore==1.11.5 -openvino==2025.4.1 +openvino==2026.0.0rc2 +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre torch==2.9.0 torchvision==0.24.0 diff --git a/src/nncf/experimental/quantization/algorithms/weight_compression/codebook_estimation.py b/src/nncf/experimental/quantization/algorithms/weight_compression/codebook_estimation.py index f0139615a76..d96c88f5c3f 100644 --- a/src/nncf/experimental/quantization/algorithms/weight_compression/codebook_estimation.py +++ b/src/nncf/experimental/quantization/algorithms/weight_compression/codebook_estimation.py @@ -170,6 +170,7 @@ def apply( codebook = self.calculate_codebook(stats, weight, wp.reduction_axes, config, wp) res[weight_name] = CompressedWeight(None, None, None, codebook) + config.codebook_values = codebook return res diff --git a/src/nncf/onnx/graph/model_transformer.py b/src/nncf/onnx/graph/model_transformer.py index db8f2ee33dc..cbe723e7277 100644 --- a/src/nncf/onnx/graph/model_transformer.py +++ b/src/nncf/onnx/graph/model_transformer.py @@ -55,7 +55,7 @@ def __init__(self, model: onnx.ModelProto, inplace: bool = False): # and is larger than 2GB, this method silently returns an empty model. inferred_model = model if inplace else onnx.shape_inference.infer_shapes(model) super().__init__(inferred_model) - self.onnx_model_extractor = onnx.utils.Extractor(inferred_model) + self.onnx_model_extractor = None self._inplace = inplace @staticmethod @@ -426,6 +426,9 @@ def _apply_model_extraction_transformation(self, transformation: ONNXModelExtrac if not output_tensor_names: output_tensor_names = [n.name for n in self._model.graph.output] + if self.onnx_model_extractor is None: + self.onnx_model_extractor = onnx.utils.Extractor(self._model) + extracted_model = self.onnx_model_extractor.extract_model(input_tensor_names, output_tensor_names) if self._model.metadata_props: values = {p.key: p.value for p in self._model.metadata_props} diff --git a/src/nncf/onnx/graph/passes.py b/src/nncf/onnx/graph/passes.py index 6cf1d479ab5..fb863fe92a2 100644 --- a/src/nncf/onnx/graph/passes.py +++ b/src/nncf/onnx/graph/passes.py @@ -95,6 +95,7 @@ def compress_quantize_weights_transformation(model: onnx.ModelProto): """ initializer = {x.name: x for x in model.graph.initializer} nodes_to_remove = [] + removed_initializers = [] version = max(model.opset_import[0].version, 19) QuantizeLinear = load_op("", "QuantizeLinear", version) @@ -129,11 +130,19 @@ def compress_quantize_weights_transformation(model: onnx.ModelProto): block_size = get_node_attr_value(node, "block_size") y = QuantizeLinear.eval(x, y_scale, y_zero_point, axis=axis, block_size=block_size) - # Update an existing initializer. The new name is the name of the `QuantizeLinear` output. + # Create a new initializer with the `QuantizeLinear` output name tensor_proto = onnx.numpy_helper.from_array(y, name=node.output[0]) - initializer[x_name].CopyFrom(tensor_proto) + # Remove the old initializer + model.graph.initializer.remove(initializer[x_name]) + removed_initializers.append(x_name) + # Add the new initializer + model.graph.initializer.append(tensor_proto) # `QuantizeLinear` and `DequantizeLinear` nodes share initializers on ports 1 and 2, # so these initializers should not be removed. for x in nodes_to_remove: model.graph.node.remove(x) + + for inp in list(model.graph.input): + if inp.name in removed_initializers: + model.graph.input.remove(inp) diff --git a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py index 7be77949569..05910441fc5 100644 --- a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py +++ b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py @@ -494,11 +494,13 @@ def _replace_matmul_with_matmulnbits( # Insert the MatMulNBits node before the consumer nodes insert_index = len(model.graph.node) + old_output = original_matmul.output[0] + new_output = matmul_n_bits.output[0] for node in model.graph.node: for j, input_name in enumerate(node.input): - if input_name == original_matmul.name: + if input_name == old_output: insert_index = min(insert_index, get_node_index(model, node.name)) - node.input[j] = matmul_n_bits + node.input[j] = new_output # Insert the MatMulNBits node before the first consumer node model.graph.node.insert(insert_index, matmul_n_bits) diff --git a/tests/cross_fw/examples/example_scope.json b/tests/cross_fw/examples/example_scope.json index 8fbe4b95f9d..fff3fbaaf6f 100644 --- a/tests/cross_fw/examples/example_scope.json +++ b/tests/cross_fw/examples/example_scope.json @@ -311,9 +311,9 @@ "accuracy_metrics": { "answers": [ "Paris.", - "Mont Blanc in the Alps.", - "Toronto in Ontario.", - "Osaka in Japan." + "Mount Blanc.", + "Toronto.", + "Fukuoka." ] } }, diff --git a/tests/onnx/quantization/test_ptq_regression.py b/tests/onnx/quantization/test_ptq_regression.py index f05d499a233..3f440c4b3df 100644 --- a/tests/onnx/quantization/test_ptq_regression.py +++ b/tests/onnx/quantization/test_ptq_regression.py @@ -8,6 +8,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass from pathlib import Path import numpy as np @@ -25,21 +26,40 @@ import nncf + +@dataclass +class TestModel: + model_url: str + model_name: str + int8_ref_top1: float + num_inputs_initial_model: int + num_inputs_quantized_model: int + + def __str__(self) -> str: + return self.model_name + + MODELS = [ - ( + TestModel( "https://github.com/onnx/models/raw/5faef4c33eba0395177850e1e31c4a6a9e634c82/vision/classification/mobilenet/model/mobilenetv2-12.onnx", "mobilenetv2-12", 0.7864968152866242, + 1, + 1, ), - ( + TestModel( "https://github.com/onnx/models/raw/5faef4c33eba0395177850e1e31c4a6a9e634c82/vision/classification/resnet/model/resnet50-v1-7.onnx", "resnet50-v1-7", 0.8114649681528663, + 300, + 246, ), - ( + TestModel( "https://github.com/onnx/models/raw/5faef4c33eba0395177850e1e31c4a6a9e634c82/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx", "efficientnet-lite4-11", 0.8035668789808917, + 1, + 1, ), ] @@ -111,9 +131,9 @@ def res_callback(infer_request: ov.InferRequest, userdata) -> None: return accuracy_score(predictions, references) -@pytest.mark.parametrize("model_url, model_name, int8_ref_top1", MODELS, ids=[model_name[1] for model_name in MODELS]) -def test_compression(tmp_path, model_dir, data_dir, model_url, model_name, int8_ref_top1): - original_model_path = download_model(model_url, model_dir) +@pytest.mark.parametrize("test_model", MODELS, ids=str) +def test_compression(tmp_path, model_dir, data_dir, test_model): + original_model_path = download_model(test_model.model_url, model_dir) dataset_path = download_dataset(data_dir) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) @@ -126,7 +146,9 @@ def test_compression(tmp_path, model_dir, data_dir, model_url, model_name, int8_ transforms.ToTensor(), normalize, transforms.Lambda( - lambda images: torch.moveaxis(images, 0, 2) if model_name == "efficientnet-lite4-11" else images + lambda images: torch.moveaxis(images, 0, 2) + if test_model.model_name == "efficientnet-lite4-11" + else images ), ] ), @@ -142,10 +164,15 @@ def transform_fn(data_item): images, _ = data_item return {input_name: images.numpy()} + assert len([inp.name for inp in converted_model.graph.input]) == test_model.num_inputs_initial_model + calibration_dataset = nncf.Dataset(val_loader, transform_fn) quantized_model = nncf.quantize(converted_model, calibration_dataset) + + assert len([inp.name for inp in quantized_model.graph.input]) == test_model.num_inputs_quantized_model + int8_model_path = tmp_path / "quantized_model.onnx" onnx.save_model(quantized_model, str(int8_model_path)) int8_top1 = validate(int8_model_path, val_loader) print(f"INT8 metrics = {int8_top1}") - assert abs(int8_top1 - int8_ref_top1) < 3e-3 # 0.03 deviations + assert abs(int8_top1 - test_model.int8_ref_top1) < 3e-3 # 0.03 deviations diff --git a/tests/openvino/native/quantization/test_gptq.py b/tests/openvino/native/quantization/test_gptq.py index 9834f0b7e38..351e78725c7 100644 --- a/tests/openvino/native/quantization/test_gptq.py +++ b/tests/openvino/native/quantization/test_gptq.py @@ -337,7 +337,7 @@ def forward(self, x): def _create_ov_model(weights: np.ndarray, input_shape: tuple, is_3d_weights: bool = False): - import openvino.runtime.opset13 as opset + import openvino.opset13 as opset param = opset.parameter(input_shape, dtype=np.float32, name="input") const = opset.constant(weights, dtype=np.float32, name="self.weight") diff --git a/tests/post_training/data/wc_reference_data.yaml b/tests/post_training/data/wc_reference_data.yaml index cd3ccbda6c7..60e10e7ad33 100644 --- a/tests/post_training/data/wc_reference_data.yaml +++ b/tests/post_training/data/wc_reference_data.yaml @@ -4,11 +4,11 @@ tinyllama_data_free_backend_OV: num_int8: 84 tinyllama_data_free_backend_ONNX: metric_value: 0.73779 - num_int4: 264 + num_int4: 228 num_int8: 84 tinyllama_data_free_opset19_backend_ONNX: metric_value: 0.73779 - num_int4: 264 + num_int4: 228 num_int8: 84 tinyllama_data_aware_backend_OV: metric_value: 0.85767 @@ -36,7 +36,7 @@ tinyllama_data_aware_awq_scale_estimation_stateful_backend_OV: num_int8: 124 tinyllama_data_aware_awq_scale_estimation_backend_ONNX: metric_value: 0.85502 - num_int4: 230 + num_int4: 188 num_int8: 124 tinyllama_int8_data_free_backend_TORCH: metric_value: 0.95624 @@ -132,9 +132,9 @@ tinyllama_data_free_awq_backend_FX_TORCH: num_int8: 124 tinyllama_data_free_awq_backend_ONNX: metric_value: 0.8597 - num_int4: 230 + num_int4: 188 num_int8: 124 tinyllama_data_aware_backend_ONNX: metric_value: 0.85807 - num_int4: 230 + num_int4: 188 num_int8: 124 diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index 07822dec02c..3ba283575c8 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -24,4 +24,5 @@ tensorflow-io==0.32.0 timm==0.9.2 accelerate==1.9.0 transformers==4.53.0 -whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai@2025.4.1.0#subdirectory=tools/who_what_benchmark +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai@releases/2026/0#subdirectory=tools/who_what_benchmark +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --pre diff --git a/tools/activation_profiler/profiler.py b/tools/activation_profiler/profiler.py index 58199034b36..9b9345047a4 100644 --- a/tools/activation_profiler/profiler.py +++ b/tools/activation_profiler/profiler.py @@ -22,7 +22,7 @@ from typing import Any, Optional, Union import numpy as np -import openvino.runtime as ov +import openvino as ov import pandas as pd from nncf.common.tensor_statistics.builders import get_raw_stat_collector @@ -56,7 +56,7 @@ class NNCFProfiler: Example ------- ```python - import openvino.runtime as ov + import openvino as ov from nncf import Dataset model = ov.Core().read_model("model.xml")