-
Notifications
You must be signed in to change notification settings - Fork 295
Enable transpose_a support for LoRA Correction #3864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 1 commit
1c86647
86ee4d8
c6632b4
ef4f72f
a463e02
a947c72
3803aa6
6036e90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| from nncf import SensitivityMetric | ||
| from nncf.common.factory import build_graph | ||
| from nncf.common.tensor_statistics.collectors import AggregatorBase | ||
| from nncf.common.tensor_statistics.statistics import WCTensorStatistic | ||
| from nncf.common.utils.debug import nncf_debug | ||
| from nncf.common.utils.helpers import set_env_variable | ||
| from nncf.data.dataset import Dataset | ||
|
|
@@ -42,6 +43,7 @@ | |
| from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams | ||
| from nncf.quantization.advanced_parameters import AdvancedLoraCorrectionParameters as LoraParams | ||
| from nncf.quantization.advanced_parameters import GroupSizeFallbackMode | ||
| from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats | ||
| from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig | ||
| from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters | ||
| from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA | ||
|
|
@@ -2574,3 +2576,79 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]: | |
| @pytest.fixture | ||
| def transpose_a_supported(self) -> bool: | ||
| return True | ||
|
|
||
|
|
||
| def test_process_stats_with_transpose_a_changes_layout(): | ||
| activations = np.random.randn(10, 3, 8) | ||
|
|
||
| stats = WCTensorStatistic( | ||
| Tensor(activations), | ||
| shape_values=activations.shape, | ||
| ) | ||
|
|
||
| subset_size = 10 | ||
|
|
||
| s_default, X_default = process_stats( | ||
| stats, | ||
| subset_size=subset_size, | ||
| act_ch_axis=-1, | ||
| transpose_a=False, | ||
| ) | ||
|
|
||
| s_transposed, X_transposed = process_stats( | ||
| stats, | ||
| subset_size=subset_size, | ||
| act_ch_axis=-1, | ||
| transpose_a=True, | ||
| ) | ||
|
|
||
| # Rank must stay the same | ||
| assert len(s_default.shape) == len(s_transposed.shape) | ||
|
|
||
| # Reduction dimension (seq_len) must be preserved | ||
| assert s_default.shape[0] == s_transposed.shape[0] == 3 | ||
|
|
||
| # Layout must change | ||
| assert X_default.shape != X_transposed.shape | ||
|
|
||
| # Element count preserved | ||
| assert np.prod(X_default.shape) == np.prod(X_transposed.shape) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "transpose_a,transpose_b", | ||
| [ | ||
| (False, False), | ||
| (False, True), | ||
| ], | ||
| ) | ||
| def test_lora_transpose_a_fix(transpose_a, transpose_b): | ||
| """ | ||
| Test LoRA correction insertion only with transpose_a=False | ||
| because transposed activations are not yet supported by LoRA. | ||
| """ | ||
| # Setup LoRA parameters | ||
| params = LoraParams(adapter_rank=4, use_int8_adapters=False) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we extend existing tests instead? https://github.com/openvinotoolkit/nncf/blob/develop/tests/openvino/native/quantization/test_weights_compression.py#L1613-L1617
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that makes sense. I can update the existing tests to cover the act_ch_axis/transpose handling instead of adding separate ones, so the verification of LoRA Correction with transposed inputs is integrated with the current test suite.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't forget to update the tests |
||
| advanced_parameters = CompressionParams(lora_correction_params=params) | ||
|
|
||
| # Initialize model with given transpose configuration | ||
| model = LMLinearModel(transpose_b=transpose_b, transpose_a=transpose_a) | ||
| ov_model = model.ov_model | ||
|
|
||
| # Use dummy dataset with same shape as model input | ||
| dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs) | ||
|
|
||
| # Compress weights with LoRA correction enabled | ||
| compressed_model = compress_weights( | ||
| ov_model, | ||
| mode=CompressWeightsMode.INT4_SYM, | ||
| ratio=1.0, | ||
| group_size=8, | ||
| dataset=dataset, | ||
| all_layers=True, | ||
| lora_correction=True, | ||
| advanced_parameters=advanced_parameters, | ||
| ) | ||
|
|
||
| # Simple assertion: compressed model is returned | ||
| assert compressed_model is not None | ||
Uh oh!
There was an error while loading. Please reload this page.