diff --git a/src/nncf/openvino/engine.py b/src/nncf/openvino/engine.py index 06c73db92b9..af64bd7be6e 100644 --- a/src/nncf/openvino/engine.py +++ b/src/nncf/openvino/engine.py @@ -10,6 +10,10 @@ # limitations under the License. +import contextvars +from collections.abc import Generator +from contextlib import contextmanager + import numpy as np import openvino as ov from openvino import Type @@ -19,6 +23,17 @@ from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY from nncf.openvino.graph.model_utils import model_has_state +_calibration_device: contextvars.ContextVar[str | None] = contextvars.ContextVar("_calibration_device", default=None) + + +@contextmanager +def calibration_device_context(device: str | None) -> Generator[None, None, None]: + token = _calibration_device.set(device) + try: + yield + finally: + _calibration_device.reset(token) + class OVCompiledModelEngine(Engine): """ @@ -79,12 +94,13 @@ def __init__(self, model: ov.Model, use_fp32_precision: bool = True): :param use_fp32_precision: A flag that determines whether to force the engine to use FP32 precision during inference. """ + device_name = _calibration_device.get() or "CPU" config = None if use_fp32_precision: config = {inference_precision: Type.f32} ie = ov.Core() stateful = model_has_state(model) - compiled_model = ie.compile_model(model, device_name="CPU", config=config) + compiled_model = ie.compile_model(model, device_name=device_name, config=config) self.engine = OVCompiledModelEngine(compiled_model, stateful) def infer( diff --git a/src/nncf/openvino/quantization/quantize_model.py b/src/nncf/openvino/quantization/quantize_model.py index 19031fb1674..8b7bd4d7bdf 100644 --- a/src/nncf/openvino/quantization/quantize_model.py +++ b/src/nncf/openvino/quantization/quantize_model.py @@ -21,6 +21,7 @@ from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset +from nncf.openvino.engine import calibration_device_context from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype @@ -119,9 +120,11 @@ def _extract_all_subgraphs(model: ov.Model, current_id: str) -> None: f"The model consists of {if_ops_number} If node(-s) with then and else bodies. \ Main model and all If bodies will be quantized recursively." ) - quantized_model, _ = apply_algorithm_if_bodies( - quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1 - ) + calibration_device = advanced_parameters.calibration_device if advanced_parameters else None + with calibration_device_context(calibration_device): + quantized_model, _ = apply_algorithm_if_bodies( + quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1 + ) if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) @@ -168,7 +171,9 @@ def native_quantize_impl( ) graph = GraphConverter.create_nncf_graph(model) warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS) - quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) + calibration_device = advanced_parameters.calibration_device if advanced_parameters else None + with calibration_device_context(calibration_device): + quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) @@ -296,15 +301,19 @@ def quantize_with_accuracy_control_impl( advanced_accuracy_restorer_parameters.num_ranking_workers, advanced_accuracy_restorer_parameters.restore_mode, ) - quantized_model = accuracy_restorer.apply( - model, - initial_metric_results, - quantized_model, - quantized_metric_results, - validation_dataset, - validation_dataset_size, - evaluator, + calibration_device = ( + advanced_quantization_parameters.calibration_device if advanced_quantization_parameters else None ) + with calibration_device_context(calibration_device): + quantized_model = accuracy_restorer.apply( + model, + initial_metric_results, + quantized_model, + quantized_metric_results, + validation_dataset, + validation_dataset_size, + evaluator, + ) if compress_weights: compress_quantize_weights_transformation(quantized_model) @@ -402,12 +411,15 @@ def compress_weights_impl( advanced_parameters, ) + calibration_device = advanced_parameters.calibration_device if advanced_parameters else None + statistics_points = None if advanced_parameters and advanced_parameters.statistics_path: # If there is no such directory, then caches statistics statistics_path = Path(advanced_parameters.statistics_path) if not statistics_path.exists(): - cache_weight_compression_statistics(model, graph, dataset, subset_size, statistics_path) + with calibration_device_context(calibration_device): + cache_weight_compression_statistics(model, graph, dataset, subset_size, statistics_path) statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) compression_algorithm.set_backend_entity(model) _, matmul_input_to_output_nodes_map = compression_algorithm.get_compression_nodes_info(graph) @@ -421,4 +433,5 @@ def compress_weights_impl( statistics_aggregator.load_statistics_from_dir(statistics_path) statistics_points = statistics_aggregator.statistic_points - return compression_algorithm.apply(model, graph, statistics_points, dataset) + with calibration_device_context(calibration_device): + return compression_algorithm.apply(model, graph, statistics_points, dataset) diff --git a/src/nncf/quantization/advanced_parameters.py b/src/nncf/quantization/advanced_parameters.py index df39661f164..9fd9a776590 100644 --- a/src/nncf/quantization/advanced_parameters.py +++ b/src/nncf/quantization/advanced_parameters.py @@ -252,6 +252,10 @@ class AdvancedQuantizationParameters: :type smooth_quant_alpha: float :param backend_params: Backend-specific parameters. :type backend_params: dict[str, Any] + :param calibration_device: OpenVINO device name to use for calibration inference + (e.g. "CPU", "GPU", "GPU.0", "AUTO:GPU,CPU"). If None, defaults to "CPU". + Only applicable to the OpenVINO backend. + :type calibration_device: Optional[str] """ # General parameters @@ -282,6 +286,9 @@ class AdvancedQuantizationParameters: # Backend specific parameters backend_params: dict[str, Any] = field(default_factory=dict) + # Calibration device + calibration_device: str | None = None + @api() @dataclass @@ -427,6 +434,10 @@ class AdvancedCompressionParameters: :type lora_correction_params: AdvancedLoraCorrectionParameters :param backend_params: Backend-specific parameters. :type backend_params: dict[str, Any] + :param calibration_device: OpenVINO device name to use for calibration inference + (e.g. "CPU", "GPU", "GPU.0", "AUTO:GPU,CPU"). If None, defaults to "CPU". + Only applicable to the OpenVINO backend. + :type calibration_device: Optional[str] :param codebook: The codebook (LUT) for the weight compression. Applicable for vector quantization. Must be a numpy array or ov Tensor. :type codebook: TTensor @@ -445,6 +456,7 @@ class AdvancedCompressionParameters: gptq_params: AdvancedGPTQParameters = field(default_factory=AdvancedGPTQParameters) lora_correction_params: AdvancedLoraCorrectionParameters = field(default_factory=AdvancedLoraCorrectionParameters) backend_params: dict[str, Any] = field(default_factory=dict) + calibration_device: str | None = None codebook: TTensor | None = None adaptive_codebook_params: AdvancedAdaptiveCodebookParameters = field( default_factory=AdvancedAdaptiveCodebookParameters diff --git a/src/nncf/quantization/quantize_model.py b/src/nncf/quantization/quantize_model.py index 6ce83d496ef..54dd503de84 100644 --- a/src/nncf/quantization/quantize_model.py +++ b/src/nncf/quantization/quantize_model.py @@ -201,6 +201,10 @@ def quantize( if backend == BackendType.ONNX: from nncf.onnx.quantization.quantize_model import quantize_impl + if advanced_parameters and advanced_parameters.calibration_device: + msg = "ONNX backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + return quantize_impl( # type: ignore[no-any-return] model=model, calibration_dataset=calibration_dataset, @@ -217,6 +221,10 @@ def quantize( if backend == BackendType.TORCH: from nncf.torch.function_hook.quantization.quantize_model import quantize_impl + if advanced_parameters and advanced_parameters.calibration_device: + msg = "Torch backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + return quantize_impl( # type: ignore[no-any-return] model=model, calibration_dataset=calibration_dataset, @@ -233,6 +241,10 @@ def quantize( if backend == BackendType.TORCH_FX: from nncf.experimental.torch.fx.quantization.quantize_model import quantize_impl + if advanced_parameters and advanced_parameters.calibration_device: + msg = "TorchFX backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + return quantize_impl( # type: ignore[no-any-return] model=model, calibration_dataset=calibration_dataset, @@ -372,6 +384,10 @@ def quantize_with_accuracy_control( if backend == BackendType.ONNX: from nncf.onnx.quantization.quantize_model import quantize_with_accuracy_control_impl + if advanced_quantization_parameters and advanced_quantization_parameters.calibration_device: + msg = "ONNX backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + return quantize_with_accuracy_control_impl( # type: ignore[no-any-return] model, calibration_dataset, @@ -528,6 +544,10 @@ def compress_weights( msg = "Torch backend does not support statistics caching." raise nncf.ParameterNotSupportedError(msg) + if advanced_parameters and advanced_parameters.calibration_device: + msg = "Torch backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + if compression_format == CompressionFormat.FQ and group_size != -1: msg = "Torch backend does not support FQ compression format for group-wise quantization." raise nncf.ParameterNotSupportedError(msg) @@ -578,6 +598,10 @@ def compress_weights( msg = "TorchFX does not supports statistics caching." raise nncf.ParameterNotSupportedError(msg) + if advanced_parameters and advanced_parameters.calibration_device: + msg = "TorchFX backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) + if compression_format in [CompressionFormat.FQ, CompressionFormat.FQ_LORA, CompressionFormat.FQ_LORA_NLS]: msg = "Torch FX backend does not support FQ, FQ_LORA and FQ_LORA_NLS compression formats." raise nncf.ParameterNotSupportedError(msg) @@ -649,6 +673,10 @@ def compress_weights( if advanced_parameters and advanced_parameters.statistics_path: msg = "ONNX does not supports statistics caching." raise nncf.ParameterNotSupportedError(msg) + + if advanced_parameters and advanced_parameters.calibration_device: + msg = "ONNX backend does not support the `calibration_device` option." + raise nncf.ParameterNotSupportedError(msg) compression_weights_impl = onnx_compress_weights_impl if compression_weights_impl is None: msg = f"Unsupported type of backend: {backend}" diff --git a/tests/cross_fw/test_templates/template_test_quantize_api.py b/tests/cross_fw/test_templates/template_test_quantize_api.py new file mode 100644 index 00000000000..aeeab17483d --- /dev/null +++ b/tests/cross_fw/test_templates/template_test_quantize_api.py @@ -0,0 +1,37 @@ +# Copyright (c) 2026 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC +from abc import abstractmethod +from typing import TypeVar + +import pytest + +import nncf +from nncf.data.dataset import Dataset +from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters + +TModel = TypeVar("TModel") + + +class TemplateTestQuantizeApi(ABC): + @staticmethod + @abstractmethod + def get_simple_model() -> TModel: + """Returns a minimal model for the backend.""" + + def test_quantize_calibration_device(self): + model = self.get_simple_model() + with pytest.raises(nncf.ParameterNotSupportedError): + nncf.quantize( + model, + Dataset([0]), + advanced_parameters=AdvancedQuantizationParameters(calibration_device="SOME_DEVICE"), + ) diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py index 99272e8aa8e..b36a6285325 100644 --- a/tests/cross_fw/test_templates/template_test_weights_compression.py +++ b/tests/cross_fw/test_templates/template_test_weights_compression.py @@ -983,3 +983,17 @@ def test_compression_skipped_with_transposed_activations(self, transpose_a_suppo all_layers=True, **kwargs, ) + + def test_compress_weights_calibration_device(self): + model = self.get_awq_model(non_mergable_pattern=False, is_3d_weights=False) + dataset = Dataset([self.to_tensor(np.ones([2, 8, 8]))]) + with pytest.raises(nncf.ParameterNotSupportedError): + compress_weights( + model, + mode=CompressWeightsMode.INT4_SYM, + ratio=1.0, + group_size=2, + dataset=dataset, + awq=True, + advanced_parameters=CompressionParams(calibration_device="SOME_DEVICE"), + ) diff --git a/tests/onnx/quantization/test_quantize_api.py b/tests/onnx/quantization/test_quantize_api.py new file mode 100644 index 00000000000..508fa4331b0 --- /dev/null +++ b/tests/onnx/quantization/test_quantize_api.py @@ -0,0 +1,38 @@ +# Copyright (c) 2026 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest + +import nncf +from nncf import Dataset +from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters +from tests.cross_fw.test_templates.template_test_quantize_api import TemplateTestQuantizeApi +from tests.onnx.models import LinearModel + +INPUT_SHAPE = [1, 3, 32, 32] + + +class TestONNXQuantizeApi(TemplateTestQuantizeApi): + @staticmethod + def get_simple_model(): + return LinearModel().onnx_model + + def test_quantize_with_accuracy_control_calibration_device(self): + model = self.get_simple_model() + dataset = Dataset([np.ones(INPUT_SHAPE, dtype=np.float32)]) + with pytest.raises(nncf.ParameterNotSupportedError): + nncf.quantize_with_accuracy_control( + model, + dataset, + dataset, + lambda model, dataset: (1.0, None), + advanced_quantization_parameters=AdvancedQuantizationParameters(calibration_device="SOME_DEVICE"), + ) diff --git a/tests/openvino/native/quantization/test_quantize_api.py b/tests/openvino/native/quantization/test_quantize_api.py index b6b3bbf7b54..552a14f3c6c 100644 --- a/tests/openvino/native/quantization/test_quantize_api.py +++ b/tests/openvino/native/quantization/test_quantize_api.py @@ -8,30 +8,67 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np +import openvino as ov import pytest -from openvino import Model -from openvino import Shape -from openvino import Type -from openvino import op -from openvino import opset13 as opset import nncf from nncf import Dataset -from tests.cross_fw.shared.datasets import MockDataset +from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters +from tests.cross_fw.test_templates.template_test_quantize_api import TemplateTestQuantizeApi +from tests.openvino.native.models import LinearModel -INPUT_SHAPE = [2, 1, 1, 1] +LINEAR_MODEL_INPUT_SHAPE = [1, 3, 4, 2] -def get_mock_model() -> Model: - param_node = op.Parameter(Type.f32, Shape(INPUT_SHAPE)) - softmax_axis = 1 - softmax_node = opset.softmax(param_node, softmax_axis) - return Model(softmax_node, [param_node], "mock") +class TestOVQuantizeApi(TemplateTestQuantizeApi): + @staticmethod + def get_simple_model() -> ov.Model: + return LinearModel().ov_model + def test_quantize_calibration_device(self, monkeypatch): + model = self.get_simple_model() + dataset = Dataset([np.ones(LINEAR_MODEL_INPUT_SHAPE, dtype=np.float32)]) + captured_devices = [] -def test_non_positive_subset_size(): - model_to_test = get_mock_model() + original_compile = ov.Core.compile_model - with pytest.raises(nncf.ValidationError) as e: - nncf.quantize(model_to_test, Dataset(MockDataset(INPUT_SHAPE)), subset_size=0) - assert "Subset size must be positive." in e.info + def mock_compile(self, model, device_name="CPU", config=None): + captured_devices.append(device_name) + return original_compile(self, model, device_name="CPU", config=config) + + monkeypatch.setattr(ov.Core, "compile_model", mock_compile) + nncf.quantize( + model, + dataset, + advanced_parameters=AdvancedQuantizationParameters(calibration_device="SOME_DEVICE"), + ) + assert all(d == "SOME_DEVICE" for d in captured_devices) + + def test_quantize_with_accuracy_control_calibration_device(self, monkeypatch): + model = self.get_simple_model() + dataset = Dataset([np.ones(LINEAR_MODEL_INPUT_SHAPE, dtype=np.float32)]) + captured_devices = [] + + original_compile = ov.Core.compile_model + + def mock_compile(self, model, device_name="CPU", config=None): + captured_devices.append(device_name) + return original_compile(self, model, device_name="CPU", config=config) + + monkeypatch.setattr(ov.Core, "compile_model", mock_compile) + nncf.quantize_with_accuracy_control( + model, + dataset, + dataset, + lambda model, dataset: (1.0, None), + advanced_quantization_parameters=AdvancedQuantizationParameters(calibration_device="SOME_DEVICE"), + ) + assert "SOME_DEVICE" in captured_devices + + def test_non_positive_subset_size(self): + model_to_test = self.get_simple_model() + + with pytest.raises(nncf.ValidationError) as e: + nncf.quantize(model_to_test, Dataset([np.ones(LINEAR_MODEL_INPUT_SHAPE, dtype=np.float32)]), subset_size=0) + assert "Subset size must be positive." in e.info diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index 7f5322b2ac5..46f08356e38 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -2767,3 +2767,27 @@ def test_phi_rope_model(self): group_size=-1, ) assert self.get_num_int8_nodes(compressed_model) == 0 + + def test_compress_weights_calibration_device(self, monkeypatch): + model = AWQMatmulModel().ov_model + dataset = Dataset([np.ones([2, 8, 8])]) + captured_devices = [] + + original_compile = ov.Core.compile_model + + def mock_compile(self, model, device_name="CPU", config=None): + captured_devices.append(device_name) + return original_compile(self, model, device_name="CPU", config=config) + + monkeypatch.setattr(ov.Core, "compile_model", mock_compile) + monkeypatch.setenv("NNCF_DISABLE_OPTIMIZED_COMPRESSION", "1") + compress_weights( + model, + mode=CompressWeightsMode.INT4_SYM, + ratio=1.0, + group_size=2, + dataset=dataset, + awq=True, + advanced_parameters=AdvancedCompressionParameters(calibration_device="SOME_DEVICE"), + ) + assert all(d == "SOME_DEVICE" for d in captured_devices) diff --git a/tests/openvino/native/test_engine.py b/tests/openvino/native/test_engine.py index 59adca19323..c4b9deb3b8f 100644 --- a/tests/openvino/native/test_engine.py +++ b/tests/openvino/native/test_engine.py @@ -11,10 +11,14 @@ from functools import wraps import numpy as np +import openvino as ov import pytest +from openvino import Type +from openvino.properties.hint import inference_precision from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY from nncf.openvino.engine import OVNativeEngine +from nncf.openvino.engine import calibration_device_context from tests.openvino.native.models import ConvModel from tests.openvino.native.models import LinearModel from tests.openvino.native.models import QuantizedModel @@ -123,3 +127,35 @@ def _reset_state(): "infer", "infer", ] + + +def test_calibration_device(monkeypatch): + model = LinearModel().ov_model + captured_device = None + captured_config = None + + original_compile = ov.Core.compile_model + + def mock_compile(self, model, device_name="CPU", config=None): + nonlocal captured_device + nonlocal captured_config + captured_device = device_name + captured_config = config + return original_compile(self, model, device_name="CPU", config=config) + + monkeypatch.setattr(ov.Core, "compile_model", mock_compile) + # Check default CPU + OVNativeEngine(model) + assert captured_device == "CPU" + assert captured_config == {inference_precision: Type.f32} + + # Check with the context + with calibration_device_context("SOME_DEVICE"): + OVNativeEngine(model) + assert captured_device == "SOME_DEVICE" + assert captured_config is None + + # Check the context exit resets the device back + OVNativeEngine(model) + assert captured_device == "CPU" + assert captured_config == {inference_precision: Type.f32} diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index 4aa41a94df3..865b61ed662 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -39,6 +39,12 @@ def pytest_addoption(parser): help="Report memory using MemoryMonitor from tools/memory_monitor.py. " "Warning: currently, reported memory values are not always reproducible.", ) + parser.addoption( + "--ov-calibration-device", + action="store", + default=None, + help="OpenVINO device to use for calibration during weight compression (e.g. CPU, GPU).", + ) @pytest.fixture(scope="session", name="data_dir") @@ -94,6 +100,11 @@ def fixture_memory_monitor(pytestconfig): return pytestconfig.getoption("memory_monitor") +@pytest.fixture(scope="session", name="ov_calibration_device") +def fixture_ov_calibration_device(pytestconfig): + return pytestconfig.getoption("ov_calibration_device") + + @pytest.fixture(scope="session", name="forked") def fixture_forked(pytestconfig): return pytestconfig.getoption("forked") diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index a14b9b50ee2..2fd7ed139d3 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -198,6 +198,7 @@ def run_pipeline( output_dir: Path, data_dir: Path | None, no_eval: bool, + ov_calibration_device: str | None, batch_size: int | None, run_fp32_backend: bool, run_torch_cuda_backend: bool, @@ -216,6 +217,10 @@ def run_pipeline( maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) pipeline_cls = test_model_param["pipeline_cls"] pipeline_kwargs = create_pipeline_kwargs(test_model_param, subset_size, test_case_name, reference_data) + if ov_calibration_device: + compression_params = pipeline_kwargs["compression_params"] + advanced_params = compression_params.setdefault("advanced_parameters", nncf.AdvancedCompressionParameters()) + advanced_params.calibration_device = ov_calibration_device pipeline_kwargs.update( { "output_dir": output_dir, @@ -285,6 +290,7 @@ def test_ptq_quantization( output_dir, data_dir, no_eval, + None, batch_size, run_fp32_backend, run_torch_cuda_backend, @@ -311,6 +317,7 @@ def test_weight_compression( capsys: pytest.CaptureFixture, extra_columns: bool, memory_monitor: bool, + ov_calibration_device: str | None, use_avx2: None, ): run_pipeline( @@ -321,6 +328,7 @@ def test_weight_compression( output_dir, None, # data_dir is not used in WC no_eval, + ov_calibration_device, batch_size, run_fp32_backend, run_torch_cuda_backend, diff --git a/tests/torch/function_hook/quantization/test_quantize_api.py b/tests/torch/function_hook/quantization/test_quantize_api.py new file mode 100644 index 00000000000..743bd6ecafb --- /dev/null +++ b/tests/torch/function_hook/quantization/test_quantize_api.py @@ -0,0 +1,18 @@ +# Copyright (c) 2026 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from tests.cross_fw.test_templates.helpers import ConvTestModel +from tests.cross_fw.test_templates.template_test_quantize_api import TemplateTestQuantizeApi + + +class TestPTQuantizeApi(TemplateTestQuantizeApi): + @staticmethod + def get_simple_model(): + return ConvTestModel() diff --git a/tests/torch/fx/test_quantize_api.py b/tests/torch/fx/test_quantize_api.py new file mode 100644 index 00000000000..46e123e0887 --- /dev/null +++ b/tests/torch/fx/test_quantize_api.py @@ -0,0 +1,22 @@ +# Copyright (c) 2026 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from tests.cross_fw.test_templates.helpers import ConvTestModel +from tests.cross_fw.test_templates.template_test_quantize_api import TemplateTestQuantizeApi + + +class TestFXQuantizeApi(TemplateTestQuantizeApi): + @staticmethod + def get_simple_model(): + model = ConvTestModel().eval() + example_input = torch.ones(ConvTestModel.INPUT_SIZE) + return torch.export.export(model, args=(example_input,)).module()