Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions tests/post_training/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,13 @@ pip install -r requirements.txt

## Data preparation

## Imagenet
Using datasets from huggingface, required set HF_TOKEN environment variable.
For using imagenet-1k need to sign licence https://huggingface.co/datasets/mlx-vision/imagenet-1k.

<data>/imagenet/val - name of path
Since Torchvision `ImageFolder` class is used to work with data the ImageNet validation dataset should be structured accordingly. Below is an example of the `val` folder:

```text
n01440764
n01695060
n01843383
...
```
> [!IMPORTANT]
> Used modified version of loader imagenet-1k to download only validation subset.
> To avoid any conflict with full dataset set another cache directory for this test.
> https://huggingface.co/docs/datasets/en/cache#cache-directory
Comment on lines +26 to +32

## Usage

Expand Down
64 changes: 59 additions & 5 deletions tests/post_training/pipelines/image_classification_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
import numpy as np
import openvino as ov
import torch
from datasets import Split
from datasets import load_dataset_builder
from sklearn.metrics import accuracy_score
from torch.ao.quantization.quantize_pt2e import convert_pt2e
from torch.ao.quantization.quantize_pt2e import prepare_pt2e
from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
from torchvision import datasets
from torchvision import transforms

import nncf
from nncf import AdvancedQuantizationParameters
Expand All @@ -36,17 +38,51 @@
from nncf.experimental.torch.fx import quantize_pt2e
from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import PT_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.base import PTQTestPipeline


def hf_imagenet_1k_val(model_transform):
"""
Download only VAL subset of ImageNet-1k dataset from Hugging Face.
load_dataset("imagenet-1k") loads full dataset, which is not needed.
"""

builder_instance = load_dataset_builder("ILSVRC/imagenet-1k", revision="49e2ee26f3810fb5a7536bbf732a7b07389a47b5")

builder_instance.info.splits = {"validation": builder_instance.info.splits["validation"]}
builder_instance.config.data_files = {"validation": builder_instance.config.data_files["validation"]}

builder_instance.download_and_prepare()
dataset = builder_instance.as_dataset(split=Split.VALIDATION)

def transform_fn(examples):
def f(image):
"""If input image grayscale, convert it to RGB"""
if len(image.getbands()) < 3:
return transforms.Grayscale(num_output_channels=3)(image)
return image

transform = transforms.Compose(
[
transforms.Lambda(f),
model_transform,
]
)
examples["image"] = [transform(img) for img in examples["image"]]
Comment thread
AlexanderDokuchaev marked this conversation as resolved.
return examples

dataset.set_transform(transform_fn)
return dataset


class ImageClassificationBase(PTQTestPipeline):
"""Base pipeline for Image Classification models"""

def prepare_calibration_dataset(self):
dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
dataset = hf_imagenet_1k_val(self.transform)
loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=False)

self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn())

def _validate_ov(
Expand Down Expand Up @@ -78,7 +114,9 @@ def process_result(request, userdata):

infer_queue.set_callback(process_result)

for i, (images, target) in enumerate(val_loader):
for i, data in enumerate(val_loader):
images = data["image"]
target = data["label"]
# W/A for memory leaks when using torch DataLoader and OpenVINO
image_copies = copy.deepcopy(images.numpy())
infer_queue.start_async(image_copies, userdata=i)
Expand Down Expand Up @@ -110,7 +148,7 @@ def _validate_torch_compile(
return predictions, references

def _validate(self) -> None:
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_dataset = hf_imagenet_1k_val(self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

dataset_size = len(val_loader)
Expand Down Expand Up @@ -219,3 +257,19 @@ def _build_quantizer(self) -> TorchAOQuantizer:
quantizer_kwargs["quantizer_propagation_rule"] = advanced_parameters.quantizer_propagation_rule

return OpenVINOQuantizer(**quantizer_kwargs)

def get_transform_calibration_fn(self):
if self.backend in FX_BACKENDS + PT_BACKENDS:
device = torch.device(
"cuda" if self.backend in [BackendType.CUDA_TORCH, BackendType.CUDA_FX_TORCH] else "cpu"
)

def transform_fn(data_item):
return data_item["image"].to(device)

else:

def transform_fn(data_item):
return {self.input_name: np.array(data_item["image"], dtype=np.float32)}

return transform_fn
17 changes: 0 additions & 17 deletions tests/post_training/pipelines/image_classification_timm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import onnx
import openvino as ov
import timm
Expand Down Expand Up @@ -95,19 +94,3 @@ def prepare_preprocessor(self) -> None:
mean=config["mean"],
std=config["std"],
)

def get_transform_calibration_fn(self):
if self.backend in PT_BACKENDS:
device = torch.device("cuda" if self.backend == BackendType.CUDA_TORCH else "cpu")

def transform_fn(data_item):
images, _ = data_item
return images.to(device)

else:

def transform_fn(data_item):
images, _ = data_item
return {self.input_name: np.array(images, dtype=np.float32)}

return transform_fn
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from dataclasses import dataclass
from typing import Any, Callable

import numpy as np
import onnx
import openvino as ov
import torch
Expand Down Expand Up @@ -143,21 +142,3 @@ def _dump_model_fp32(self) -> None:

def prepare_preprocessor(self) -> None:
self.transform = self.model_params.weights.transforms()

def get_transform_calibration_fn(self):
if self.backend in FX_BACKENDS + PT_BACKENDS:
device = torch.device(
"cuda" if self.backend in [BackendType.CUDA_TORCH, BackendType.CUDA_FX_TORCH] else "cpu"
)

def transform_fn(data_item):
images, _ = data_item
return images.to(device)

else:

def transform_fn(data_item):
images, _ = data_item
return {self.input_name: np.array(images, dtype=np.float32)}

return transform_fn
4 changes: 2 additions & 2 deletions tests/post_training/test_quantize_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def run_pipeline(
}
)
pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs)

try:
pipeline.run()
except Exception as e:
Expand Down Expand Up @@ -265,7 +266,6 @@ def run_pipeline(
def test_ptq_quantization(
ptq_reference_data: dict,
test_case_name: str,
data_dir: Path,
output_dir: Path,
result_data: dict[str, RunInfo],
no_eval: bool,
Expand All @@ -284,7 +284,7 @@ def test_ptq_quantization(
PTQ_TEST_CASES,
result_data,
output_dir,
data_dir,
None, # data_dir is not used in PTQ, used HF datasets
Comment thread
AlexanderDokuchaev marked this conversation as resolved.
no_eval,
batch_size,
run_fp32_backend,
Expand Down