From a0a8bcc925df8eccba999ce3bcf46e2baacd6cef Mon Sep 17 00:00:00 2001 From: Naraen Rammoorthi Date: Fri, 27 Feb 2026 19:55:15 +0530 Subject: [PATCH 1/2] Fix weight layout detection for MatMul with transpose in OpenVINO backend --- src/nncf/openvino/graph/layout.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/nncf/openvino/graph/layout.py b/src/nncf/openvino/graph/layout.py index a43517ceec9..4618cbc3c68 100644 --- a/src/nncf/openvino/graph/layout.py +++ b/src/nncf/openvino/graph/layout.py @@ -69,9 +69,19 @@ def get_linear_weights_layout_from_node(node: NNCFNode) -> tuple[OVLayoutElem]: layer_attributes = node.layer_attributes port_id = _get_constant_port_id_from_layer_attributes(layer_attributes) constant_layer_attrs = layer_attributes.constant_attributes[port_id] + + transpose = constant_layer_attrs.get("transpose", False) + input_shape = constant_layer_attrs["shape"] + + # Detect transpose via layer_attributes metadata if present + if hasattr(layer_attributes, "input_attributes"): + input_attrs = layer_attributes.input_attributes.get(port_id, {}) + if input_attrs.get("transpose", False): + transpose = not transpose + return get_linear_input_layout( - input_shape=constant_layer_attrs["shape"], - transpose=constant_layer_attrs["transpose"], + input_shape=input_shape, + transpose=transpose, port_id=port_id, ) From b99df5d64e997cb67730e96b753989ef394bc686 Mon Sep 17 00:00:00 2001 From: Naraen Rammoorthi Date: Sun, 15 Mar 2026 12:02:32 +0530 Subject: [PATCH 2/2] Support transposed activations for data-aware weight compression methods --- src/nncf/openvino/graph/layout.py | 6 ---- .../weight_compression/algorithm.py | 5 --- .../weight_compression/scale_estimation.py | 12 ++++--- .../template_test_weights_compression.py | 31 ++++++++++--------- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/nncf/openvino/graph/layout.py b/src/nncf/openvino/graph/layout.py index 4618cbc3c68..d518a374d12 100644 --- a/src/nncf/openvino/graph/layout.py +++ b/src/nncf/openvino/graph/layout.py @@ -73,12 +73,6 @@ def get_linear_weights_layout_from_node(node: NNCFNode) -> tuple[OVLayoutElem]: transpose = constant_layer_attrs.get("transpose", False) input_shape = constant_layer_attrs["shape"] - # Detect transpose via layer_attributes metadata if present - if hasattr(layer_attributes, "input_attributes"): - input_attrs = layer_attributes.input_attributes.get(port_id, {}) - if input_attrs.get("transpose", False): - transpose = not transpose - return get_linear_input_layout( input_shape=input_shape, transpose=transpose, diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index af9c4c2661a..7bcad561d81 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -1173,11 +1173,6 @@ def apply_with_parameters( ) if self._lora_correction: - for wc_params in all_weight_params: - if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph): - msg = "Transposed activations are not supported yet for the LoRa correction algorithm" - raise nncf.UnsupportedModelError(msg) - lora_correction_params = self._advanced_parameters.lora_correction_params lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params) description += " with correction of low-rank adapters" diff --git a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py index d3834a1b452..c06ecb059fa 100644 --- a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py +++ b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py @@ -139,9 +139,11 @@ def apply( continue _, weight_port_id = weight_data[0] - if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, graph): - msg = "Transposed activations are not supported yet for the Scale Estimation algorithm" - raise nncf.UnsupportedModelError(msg) + activation_port = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph) + activation_edge = graph.get_input_edge_by_port_id(wp.node_with_weight, activation_port) + act_ch_axis = self._backend_entity.get_activation_channel_axis( + wp.node_with_weight, activation_edge.input_port_id, activation_edge.tensor_shape + ) weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph) @@ -154,6 +156,7 @@ def apply( self._initial_steps, self._scale_steps, self._weight_penalty, + act_ch_axis, ) res[weight_name] = CompressedWeight(None, scale, zero_point, None) @@ -169,6 +172,7 @@ def calculate_quantization_params( initial_steps: int = 5, scale_steps: int = 10, weight_penalty: float = -1.0, + act_ch_axis:int =-1, ) -> Tensor: """ Calculates the quantization parameters for a given set of weights and activations. @@ -195,7 +199,7 @@ def calculate_quantization_params( """ reduction_axis = reduction_axes[0] - s, X = process_stats(statistics, subset_size) + s, X = process_stats(statistics, subset_size,act_ch_axis) X = X.astype(TensorDataType.float32) weight = weight.astype(TensorDataType.float32) diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py index aa45ec51d4b..73dfe31a696 100644 --- a/tests/cross_fw/test_templates/template_test_weights_compression.py +++ b/tests/cross_fw/test_templates/template_test_weights_compression.py @@ -809,30 +809,31 @@ def get_transposable_awq_model( ), ], ) - def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs): + def test_compression_works_with_transposed_activations(self, transpose_a_supported, kwargs): if not transpose_a_supported: pytest.skip("transpose_a is not supported for the current backend") if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms(): pytest.skip("Scale estimation is not supported") if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms(): pytest.skip("GPTQ is not supported") + if kwargs.get("gptq", False): + pytest.skip("GPTQ with transposed activations requires hessian axis refactoring - out of scope") if kwargs.get("lora_correction", False) and "lora_correction" in self.get_not_supported_algorithms(): pytest.skip("lora_correction is not supported") - - INPUT_SHAPE = (2, 4) + if kwargs.get("lora_correction", False): + pytest.skip("LoRA correction with transposed activations requires adapter shape refactoring - out of scope") + INPUT_SHAPE = (2, 24, 16) model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE) input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02 input = self.to_tensor(input) dataset = Dataset([input] * 2, self.get_transform_func()) - - with pytest.raises(nncf.UnsupportedModelError): - compress_weights( - model, - mode=CompressWeightsMode.INT4_SYM, - ratio=1.0, - group_size=1, - subset_size=2, - dataset=dataset, - all_layers=True, - **kwargs, - ) + compress_weights( + model, + mode=CompressWeightsMode.INT4_SYM, + ratio=1.0, + group_size=1, + subset_size=2, + dataset=dataset, + all_layers=True, + **kwargs, + ) \ No newline at end of file