diff --git a/examples/README.md b/examples/README.md
index 26e4a5792d7..2b567ef4ef6 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -39,6 +39,19 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Quantization (MXFP8/FP8)</td>
     <td><a href="./pytorch/diffusion_model/diffusers/flux">link</a></td>
 </tr>
+<tr>
+    <td>Wan2.2-I2V-A14B-Diffusers</td>
+    <td>Image to Video</td>
+    <td>Quantization (MXFP8/FP8)</td>
+    <td><a href="./pytorch/diffusion_model/diffusers/wan">link</a></td>
+</tr>
+<tr>
+    <td>Wan2.2-T2V-A14B-Diffusers</td>
+    <td>Text to Video</td>
+    <td>Quantization (MXFP8/FP8)</td>
+    <td><a href="./pytorch/diffusion_model/diffusers/wan">link</a></td>
+</tr>
+
 <tr>
     <td>Llama-4-Scout-17B-16E-Instruct</td>
     <td>Multimodal Modeling</td>
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/README.md b/examples/pytorch/diffusion_model/diffusers/wan/README.md
new file mode 100644
index 00000000000..85883127570
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/README.md
@@ -0,0 +1,174 @@
+# Step-by-Step
+
+This example provides a unified Wan entry for quantization and evaluation, with both t2v and i2v support.
+
+# Prerequisite
+
+## 1. Environment
+
+```shell
+# Use latest dev branch if needed before release
+# INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master
+# pip install git+https://github.com/intel/auto-round.git@main
+
+# install all runtime dependencies (including evaluation package VBench)
+pip install -r requirements.txt
+pip install VBench --no-deps
+```
+
+## 2. Prepare Model
+
+Use a local Wan diffusers model path, for example:
+
+- Wan2.2-T2V-A14B-Diffusers
+- Wan2.2-I2V-A14B-Diffusers
+
+Download example (from Hugging Face):
+
+```bash
+# optional: update CLI to latest version
+pip install -U "huggingface_hub[cli]"
+
+# t2v model
+hf download Wan-AI/Wan2.2-T2V-A14B-Diffusers \
+  --local-dir /path/to/Wan2.2-T2V-A14B-Diffusers
+
+# i2v model
+hf download Wan-AI/Wan2.2-I2V-A14B-Diffusers \
+  --local-dir /path/to/Wan2.2-I2V-A14B-Diffusers
+```
+
+## 3. Prepare Dataset
+Clone VBench to prepare the required dataset, then download i2v data:
+
+```bash
+# required for dataset preparation
+git clone https://github.com/Vchitect/VBench.git
+cd VBench
+bash vbench2_beta_i2v/download_data.sh
+```
+
+- t2v: pass prompt folder with --prompt_folder, and set --dimension to select `${prompt_folder}/${dimension}.txt`
+- t2v/i2v: pass comma-separated values in `--dimension` to run multiple dimensions in one command (e.g., `subject_consistency,overall_consistency`)
+- t2v: can pass --dimension for evaluation filtering (validated dimensions include `subject_consistency` and `overall_consistency`)
+- i2v: pass --image_folder, --info_json, and --dimension (validated dimensions include `i2v_subject`, `i2v_background`, `subject_consistency`, `background_consistency`, and `motion_smoothness`)
+
+# Run
+
+## Quantization
+
+### t2v
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_quant.sh \
+  --topology=wan_mxfp8 \
+  --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \
+  --task=t2v \
+  --output_model=wan_mxfp8_model_t2v
+```
+
+### i2v
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_quant.sh \
+  --topology=wan_mxfp8 \
+  --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \
+  --task=i2v \
+  --output_model=wan_mxfp8_model_i2v
+```
+
+## Inference + Evaluation
+
+When `--accuracy` is enabled, `run_benchmark.sh` runs VBench evaluation scripts from a local VBench repo.
+
+- Default VBench path is `$(dirname run_benchmark.sh)/VBench`.
+- If your VBench repo is elsewhere, pass `--vbench_dir=/path/to/VBench`.
+
+### t2v bf16
+
+```bash
+bash run_benchmark.sh \
+  --topology=wan_bf16 \
+  --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \
+  --task=t2v \
+  --dimension=subject_consistency,overall_consistency \
+  --prompt_folder=/path/to/VBench/prompts/prompts_per_dimension/ \
+  --output_video_path=wan_t2v_bf16_video \
+  --accuracy
+```
+
+### t2v mxfp8/fp8
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_benchmark.sh \
+  --topology=wan_mxfp8 \
+  --input_model=wan_mxfp8_model_t2v \
+  --task=t2v \
+  --dimension=subject_consistency,overall_consistency \
+  --prompt_folder=./VBench/prompts/prompts_per_dimension/ \
+  --output_video_path=wan_t2v_mxfp8_video \
+  --accuracy
+```
+
+### i2v bf16
+
+```bash
+bash run_benchmark.sh \
+  --topology=wan_bf16 \
+  --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \
+  --task=i2v \
+  --dimension=i2v_background,i2v_subject \
+  --image_folder=/path/to/VBench/vbench2_beta_i2v/data/crop/16-9 \
+  --info_json=/path/to/VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \
+  --output_video_path=wan_i2v_bf16_video \
+  --accuracy
+```
+
+### i2v mxfp8/fp8
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_benchmark.sh \
+  --topology=wan_mxfp8 \
+  --input_model=wan_mxfp8_model_i2v \
+  --task=i2v \
+  --dimension=i2v_background,i2v_subject \
+  --image_folder=./VBench/vbench2_beta_i2v/data/crop/16-9 \
+  --info_json=./VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \
+  --output_video_path=wan_i2v_mxfp8_video \
+  --accuracy
+```
+
+Note: For sharding and multi-GPU execution, set `--gpu_ids` (for example `--gpu_ids=0,1,2,3`) or set `CUDA_VISIBLE_DEVICES` before running `run_benchmark.sh`.
+
+### Standalone Accuracy Evaluation (Optional)
+
+If you already use `--accuracy` in `run_benchmark.sh`, you can skip this section.
+Use this section when you want to evaluate existing videos without re-running generation.
+
+```bash
+# t2v accuracy on generated videos
+cd /path/to/VBench
+python evaluate.py \
+  --dimension subject_consistency motion_smoothness aesthetic_quality overall_consistency imaging_quality \
+  --videos_path /path/to/wan_t2v_bf16_video \
+  --mode vbench_standard
+
+# i2v accuracy on generated videos
+cd /path/to/VBench
+python evaluate_i2v.py \
+  --dimension i2v_background i2v_subject subject_consistency background_consistency motion_smoothness \
+  --videos_path /path/to/wan_i2v_bf16_video \
+  --ratio 16-9 \
+  --mode vbench_standard
+```
+
+# Notes
+
+- Quantized weights are saved under:
+  - <output_model>/transformer
+  - <output_model>/transformer_2
+
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/main.py b/examples/pytorch/diffusion_model/diffusers/wan/main.py
new file mode 100644
index 00000000000..c36ebef7177
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/main.py
@@ -0,0 +1,310 @@
+import argparse
+import json
+import os
+import random
+
+import numpy as np
+import torch
+from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, WanPipeline, WanTransformer3DModel
+from diffusers.utils import export_to_video, load_image
+from functools import partial
+from neural_compressor.torch.quantization import AutoRoundConfig, convert, prepare
+
+from auto_round.data_type.fp8 import quant_fp8_sym
+from auto_round.data_type.mxfp import quant_mx_rceil
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Wan quantization and evaluation example.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--model", "--model_name", "--model_name_or_path", required=True, type=str, help="Wan model path")
+    parser.add_argument("--task", default="t2v", choices=["t2v", "i2v"], help="Wan task type")
+    parser.add_argument("--scheme", default="BF16", choices=["BF16", "FP8", "MXFP8"], type=str, help="Quantization scheme")
+    parser.add_argument("--quantize", action="store_true")
+    parser.add_argument("--inference", action="store_true")
+    parser.add_argument("--output_dir", "--quantized_model_path", default="./tmp_autoround", type=str, help="Directory to save quantized transformer weights")
+    parser.add_argument("--prompt_folder", type=str, default=None, help="T2V prompt folder path")
+    parser.add_argument("--image_folder", type=str, default=None, help="I2V image folder path")
+    parser.add_argument("--info_json", type=str, default=None, help="I2V info json file path")
+    parser.add_argument(
+        "--dimension",
+        type=str,
+        default=None,
+        help=(
+            "VBench dimension used by t2v/i2v evaluation or input filtering "
+            "(validated examples: t2v=subject_consistency,overall_consistency; "
+            "i2v=i2v_subject,i2v_background)"
+        ),
+    )
+    parser.add_argument("--output_video_path", default="./tmp_video", type=str, help="Directory to save generated videos")
+    parser.add_argument("--limit", default=-1, type=int, help="Limit the number of prompts for evaluation")
+    parser.add_argument("--seed", default=42, type=int, help="Random seed")
+    parser.add_argument("--height", default=720, type=int)
+    parser.add_argument("--width", default=1280, type=int)
+    parser.add_argument("--num_frames", default=81, type=int)
+    parser.add_argument("--num_inference_steps", default=40, type=int)
+    parser.add_argument("--guidance_scale", default=4.0, type=float, help="Guidance scale for t2v/i2v")
+    parser.add_argument("--guidance_scale_2", default=3.0, type=float, help="Second guidance scale for t2v only")
+    parser.add_argument("--fps", default=16, type=int)
+    parser.add_argument("--ratio", default="16-9", type=str, help="Aspect ratio used by i2v VBench dataset")
+    parser.add_argument("--image_max_area", default=480 * 832, type=int, help="Maximum i2v image area")
+    parser.add_argument(
+        "--mxfp8_chunk_rows",
+        default=2048,
+        type=int,
+        help="Row chunk size for MXFP8 activation QDQ",
+    )
+    parser.add_argument(
+        "--disable_mxfp8_inplace_qdq",
+        action="store_true",
+        help="Disable in-place MXFP8 activation QDQ",
+    )
+    return parser.parse_args()
+
+
+def setup_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+
+def get_scheme_config(scheme):
+    if scheme == "FP8":
+        return {"bits": 8, "data_type": "fp", "group_size": 0, "sym": True}
+    if scheme == "MXFP8":
+        return {"bits": 8, "data_type": "mx_fp", "group_size": 32}
+    return None
+
+
+def build_pipeline(args):
+    if args.task == "t2v":
+        vae = AutoencoderKLWan.from_pretrained(args.model, subfolder="vae", torch_dtype=torch.float32)
+        pipe = WanPipeline.from_pretrained(args.model, vae=vae, torch_dtype=torch.bfloat16)
+        pipe.enable_model_cpu_offload()
+        return pipe
+
+    if args.task == "i2v":
+        pipe = WanImageToVideoPipeline.from_pretrained(args.model, torch_dtype=torch.bfloat16)
+        pipe.enable_model_cpu_offload()
+        return pipe
+
+    raise ValueError(f"Unsupported task: {args.task}. Supported tasks are: i2v, t2v")
+
+
+def quantize_pipleine(pipe, args):
+    scheme_cfg = get_scheme_config(args.scheme)
+    if scheme_cfg is None:
+        raise ValueError("BF16 does not need quantization. Use --scheme FP8 or --scheme MXFP8 with --quantize.")
+
+
+    qconfig = AutoRoundConfig(
+        iters=0,
+        export_format="fake",
+        output_dir=args.output_dir,
+        disable_opt_rtn=True,
+        scheme=scheme_cfg,
+    )
+    pipe = prepare(pipe, qconfig)
+    convert(pipe, qconfig)
+
+
+def apply_activation_qdq(pipe, scheme, runtime_args):
+    if scheme == "BF16":
+        return
+
+    if scheme == "FP8":
+        def act_qdq_forward(module, x, *f_args, **f_kwargs):
+            qdq_x, _, _ = quant_fp8_sym(x, group_size=0)
+            return module.orig_forward(qdq_x, *f_args, **f_kwargs)
+    else:
+        def act_qdq_forward(module, x, *f_args, **f_kwargs):
+            chunk_rows = max(1, int(getattr(runtime_args, "mxfp8_chunk_rows", 2048)))
+            use_inplace = not getattr(runtime_args, "disable_mxfp8_inplace_qdq", False)
+
+            if use_inplace and x.is_cuda:
+                # Chunked in-place QDQ reduces peak activation memory on large tensors.
+                x_2d = x.reshape(-1, x.shape[-1])
+                total_rows = x_2d.shape[0]
+                for start in range(0, total_rows, chunk_rows):
+                    end = min(start + chunk_rows, total_rows)
+                    qdq_chunk = quant_mx_rceil(
+                        x_2d[start:end],
+                        bits=8,
+                        group_size=32,
+                        data_type="mx_fp_rceil",
+                    )[0]
+                    x_2d[start:end].copy_(qdq_chunk)
+                    del qdq_chunk
+                qdq_x = x
+            else:
+                qdq_x = quant_mx_rceil(
+                    x,
+                    bits=8,
+                    group_size=32,
+                    data_type="mx_fp_rceil",
+                )[0]
+
+            return module.orig_forward(qdq_x, *f_args, **f_kwargs)
+
+    for module_name in ["transformer", "transformer_2"]:
+        module = getattr(pipe, module_name)
+        for n, m in module.named_modules():
+            if m.__class__.__name__ == "Linear" and "blocks" in n:
+                m.orig_forward = m.forward
+                m.forward = partial(act_qdq_forward, m)
+
+
+def build_t2v_inputs(args):
+    prompt_folder = args.prompt_folder
+
+    if not prompt_folder:
+        raise ValueError("--prompt_folder is required for t2v inference/eval")
+    if not args.dimension:
+        raise ValueError("--dimension is required for t2v inference/eval")
+    if not os.path.isdir(prompt_folder):
+        raise FileNotFoundError(f"Prompt folder not found: {prompt_folder}")
+
+    prompt_file = os.path.join(prompt_folder, f"{args.dimension}.txt")
+    if not os.path.exists(prompt_file):
+        raise FileNotFoundError(f"Prompt file not found for dimension '{args.dimension}': {prompt_file}")
+
+    with open(prompt_file, "r", encoding="utf-8") as f:
+        prompt_list = [line.strip() for line in f if line.strip()]
+
+    if args.dimension not in {"subject_consistency", "overall_consistency"}:
+        print(
+            "[WARN] t2v --dimension is not in validated examples "
+            "(subject_consistency, overall_consistency). Continue anyway."
+        )
+
+    if args.limit >= 0:
+        prompt_list = prompt_list[: args.limit]
+
+    return [{"prompt": prompt} for prompt in prompt_list]
+
+
+def build_i2v_inputs(args):
+    image_folder = args.image_folder
+    info_json = args.info_json
+
+    if not image_folder:
+        raise ValueError("--image_folder is required for i2v inference/eval")
+    if not info_json:
+        raise ValueError("--info_json is required for i2v inference/eval")
+    if not args.dimension:
+        raise ValueError(
+            "--dimension is required for i2v inference/eval "
+            "(validated examples: i2v_subject, i2v_background)"
+        )
+    if not os.path.isdir(image_folder):
+        raise FileNotFoundError(f"Image folder not found: {image_folder}")
+    if not os.path.exists(info_json):
+        raise FileNotFoundError(f"Info json not found: {info_json}")
+
+    with open(info_json, "r", encoding="utf-8") as f:
+        info_list = json.load(f)
+
+    results = []
+    for info in info_list:
+        if args.dimension not in info["dimension"]:
+            continue
+
+        image_path = os.path.join(image_folder, info["image_name"])
+        if not os.path.exists(image_path):
+            continue
+        results.append(
+            {
+                "prompt": info["prompt_en"],
+                "image_path": image_path,
+            }
+        )
+
+    if args.limit >= 0:
+        results = results[: args.limit]
+
+    return results
+
+
+def safe_output_path(base_dir, prompt):
+    return os.path.join(base_dir, f"{prompt}-0.mp4")
+
+
+@torch.no_grad()
+def run_inference(args, pipe):
+    setup_seed(args.seed)
+    os.makedirs(args.output_video_path, exist_ok=True)
+    gen = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(args.seed)
+
+    if args.task == "t2v":
+        inputs = build_t2v_inputs(args)
+    else:
+        inputs = build_i2v_inputs(args)
+
+    for item in inputs:
+        prompt = item["prompt"]
+        save_path = safe_output_path(args.output_video_path, prompt)
+        if os.path.exists(save_path):
+            continue
+
+        if args.task == "t2v":
+            frames = pipe(
+                prompt=prompt,
+                height=args.height,
+                width=args.width,
+                num_frames=args.num_frames,
+                guidance_scale=args.guidance_scale,
+                guidance_scale_2=args.guidance_scale_2,
+                num_inference_steps=args.num_inference_steps,
+                generator=gen,
+            ).frames[0]
+        else:
+            image = load_image(item["image_path"])
+            aspect_ratio = image.height / image.width
+            mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
+            height = round(np.sqrt(args.image_max_area * aspect_ratio)) // mod_value * mod_value
+            width = round(np.sqrt(args.image_max_area / aspect_ratio)) // mod_value * mod_value
+            image = image.resize((width, height))
+
+            frames = pipe(
+                image=image,
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_frames=args.num_frames,
+                guidance_scale=args.guidance_scale,
+                num_inference_steps=args.num_inference_steps,
+                generator=gen,
+            ).frames[0]
+
+        export_to_video(frames, save_path, fps=args.fps)
+        print(f"Saved: {save_path}")
+
+
+def main():
+    args = parse_args()
+
+    if not (args.quantize or args.inference):
+        raise ValueError("Please enable at least one stage: --quantize or --inference")
+
+    if args.quantize or args.inference:
+        pipe = build_pipeline(args)
+    else:
+        pipe = None
+
+    if args.quantize:
+        quantize_pipleine(pipe, args)
+
+    if args.inference:
+        if args.scheme in ["FP8", "MXFP8"]:
+            apply_activation_qdq(pipe, args.scheme, args)
+        run_inference(args, pipe)
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt
new file mode 100644
index 00000000000..dd0a3842c04
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt
@@ -0,0 +1,43 @@
+# Core runtime
+torch
+torchvision
+diffusers
+transformers
+accelerate
+huggingface_hub
+safetensors
+
+# Quantization stack
+neural-compressor-pt
+auto-round
+
+# Utilities used by example scripts
+einops
+
+# Evaluation package (dataset/eval helpers)
+Pillow
+numpy<2.0.0
+matplotlib
+timm>=0.9,<=1.0.12
+wheel
+cython
+tensorboard
+scipy
+opencv-python
+scikit-learn
+scikit-image
+openai-clip
+decord
+requests
+pyyaml
+pyiqa
+lvis
+fairscale>=0.4.4
+fvcore
+easydict
+urllib3
+boto3
+omegaconf
+pycocoevalcap
+imageio-ffmpeg
+gdown==4.7.3
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh
new file mode 100644
index 00000000000..1ea381f547a
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh
@@ -0,0 +1,353 @@
+#!/bin/bash
+set -x
+
+SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+function main {
+  init_params "$@"
+  run_benchmark
+}
+
+function ensure_vbench_repo {
+  if [ ! -d "${vbench_dir}" ]; then
+    echo "VBench directory not found. Start cloning https://github.com/Vchitect/VBench.git ..."
+    git clone https://github.com/Vchitect/VBench.git "${vbench_dir}"
+    if [ $? -ne 0 ]; then
+      echo "Error: failed to clone VBench."
+      exit 1
+    fi
+  fi
+}
+
+function prepare_vbench_inputs {
+  if [ "${task}" = "t2v" ]; then
+    if [ -z "${prompt_folder}" ]; then
+      echo "Error: --prompt_folder is required for task=t2v"
+      exit 1
+    fi
+    if [ -z "${dimension}" ]; then
+      echo "Error: --dimension is required for task=t2v"
+      exit 1
+    fi
+  fi
+
+  if [ "${task}" = "i2v" ]; then
+    if [ -z "${image_folder}" ]; then
+      echo "Error: --image_folder is required for task=i2v"
+      exit 1
+    fi
+    if [ -z "${info_json}" ]; then
+      echo "Error: --info_json is required for task=i2v"
+      exit 1
+    fi
+    if [ -z "${dimension}" ]; then
+      echo "Error: --dimension is required for task=i2v"
+      exit 1
+    fi
+  fi
+
+  if [ -n "${prompt_folder}" ] && [ ! -d "${prompt_folder}" ]; then
+    echo "Error: prompt_folder not found: ${prompt_folder}"
+    exit 1
+  fi
+  if [ -n "${image_folder}" ] && [ ! -d "${image_folder}" ]; then
+    echo "Error: image_folder not found: ${image_folder}"
+    exit 1
+  fi
+  if [ -n "${info_json}" ] && [ ! -f "${info_json}" ]; then
+    echo "Error: info_json not found: ${info_json}"
+    exit 1
+  fi
+}
+
+function init_params {
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --topology=*)
+        topology="${1#*=}"
+        shift
+      ;;
+      --topology)
+        topology="$2"
+        shift 2
+      ;;
+      --input_model=*)
+        input_model="${1#*=}"
+        shift
+      ;;
+      --input_model)
+        input_model="$2"
+        shift 2
+      ;;
+      --task=*)
+        task="${1#*=}"
+        shift
+      ;;
+      --task)
+        task="$2"
+        shift 2
+      ;;
+      --output_video_path=*)
+        output_video_path="${1#*=}"
+        shift
+      ;;
+      --output_video_path)
+        output_video_path="$2"
+        shift 2
+      ;;
+      --prompt_folder=*)
+        prompt_folder="${1#*=}"
+        shift
+      ;;
+      --prompt_folder)
+        prompt_folder="$2"
+        shift 2
+      ;;
+      --image_folder=*)
+        image_folder="${1#*=}"
+        shift
+      ;;
+      --image_folder)
+        image_folder="$2"
+        shift 2
+      ;;
+      --info_json=*)
+        info_json="${1#*=}"
+        shift
+      ;;
+      --info_json)
+        info_json="$2"
+        shift 2
+      ;;
+      --dimension=*)
+        dimension="${1#*=}"
+        shift
+      ;;
+      --dimension)
+        dimension="$2"
+        shift 2
+      ;;
+      --gpu_ids=*)
+        gpu_ids="${1#*=}"
+        shift
+      ;;
+      --gpu_ids)
+        gpu_ids="$2"
+        shift 2
+      ;;
+      --limit=*)
+        limit="${1#*=}"
+        shift
+      ;;
+      --limit)
+        limit="$2"
+        shift 2
+      ;;
+      --mxfp8_chunk_rows=*)
+        mxfp8_chunk_rows="${1#*=}"
+        shift
+      ;;
+      --mxfp8_chunk_rows)
+        mxfp8_chunk_rows="$2"
+        shift 2
+      ;;
+      --disable_mxfp8_inplace_qdq)
+        disable_mxfp8_inplace_qdq=true
+        shift
+      ;;
+      --accuracy)
+        accuracy=true
+        shift
+      ;;
+      --vbench_dir=*)
+        vbench_dir="${1#*=}"
+        shift
+      ;;
+      --vbench_dir)
+        vbench_dir="$2"
+        shift 2
+      ;;
+      *)
+        echo "Error: No such parameter: $1"
+        exit 1
+      ;;
+    esac
+  done
+}
+
+function run_benchmark {
+  task=${task:="t2v"}
+  limit=${limit:=-1}
+  output_video_path=${output_video_path:="./tmp_video"}
+  accuracy=${accuracy:=false}
+  disable_mxfp8_inplace_qdq=${disable_mxfp8_inplace_qdq:=false}
+  vbench_dir=${vbench_dir:="${SCRIPT_DIR}/VBench"}
+
+  if [[ ! "${output_video_path}" = /* ]]; then
+    output_video_path=$(realpath -s "$(pwd)/${output_video_path}")
+  fi
+
+  if [ "${topology}" = "wan_bf16" ]; then
+    scheme="BF16"
+  elif [ "${topology}" = "wan_fp8" ]; then
+    scheme="FP8"
+  elif [ "${topology}" = "wan_mxfp8" ]; then
+    scheme="MXFP8"
+  else
+    echo "Error: unsupported topology ${topology}, use wan_bf16/wan_fp8/wan_mxfp8"
+    exit 1
+  fi
+
+  ensure_vbench_repo
+
+  prepare_vbench_inputs
+
+  normalized_dimensions="${dimension//,/ }"
+  read -r -a dimension_list <<< "${normalized_dimensions}"
+
+  if [ -n "${gpu_ids}" ]; then
+    gpu_list="${gpu_ids}"
+  else
+    gpu_list="${CUDA_VISIBLE_DEVICES:-}"
+  fi
+
+  if [ -n "${gpu_list}" ]; then
+    normalized_gpu_ids="${gpu_list//,/ }"
+    read -r -a gpu_array <<< "${normalized_gpu_ids}"
+    visible_gpus=${#gpu_array[@]}
+    echo "visible_gpus: ${visible_gpus}"
+  else
+    gpu_array=()
+  fi
+
+  mkdir -p "${output_video_path}"
+  shard_tmp_root="${output_video_path}/.prompt_shards"
+
+  function build_benchmark_cmd {
+    local cur_prompt_folder="$2"
+    local cur_info_json="$3"
+    local cmd=(
+      python3 main.py
+      --model "${input_model}"
+      --task "${task}"
+      --scheme "${scheme}"
+      --output_video_path "${output_video_path}"
+      --limit "${limit}"
+      --inference
+    )
+
+    if [ -n "${cur_prompt_folder}" ]; then
+      cmd+=(--prompt_folder "${cur_prompt_folder}")
+    elif [ -n "${prompt_folder}" ]; then
+      cmd+=(--prompt_folder "${prompt_folder}")
+    fi
+    if [ -n "${image_folder}" ]; then
+      cmd+=(--image_folder "${image_folder}")
+    fi
+    if [ -n "${cur_info_json}" ]; then
+      cmd+=(--info_json "${cur_info_json}")
+    elif [ -n "${info_json}" ]; then
+      cmd+=(--info_json "${info_json}")
+    fi
+    if [ -n "$1" ]; then
+      cmd+=(--dimension "$1")
+    fi
+    if [ -n "${mxfp8_chunk_rows}" ]; then
+      cmd+=(--mxfp8_chunk_rows "${mxfp8_chunk_rows}")
+    fi
+    if [ "${disable_mxfp8_inplace_qdq}" = "true" ]; then
+      cmd+=(--disable_mxfp8_inplace_qdq)
+    fi
+
+    printf '%q ' "${cmd[@]}"
+  }
+
+  if [ ${#gpu_array[@]} -eq 0 ]; then
+    if [ ${#dimension_list[@]} -eq 0 ]; then
+      eval "$(build_benchmark_cmd "" "" "")"
+    else
+      for cur_dimension in "${dimension_list[@]}"; do
+        eval "$(build_benchmark_cmd "${cur_dimension}" "" "")"
+      done
+    fi
+  else
+    if [ ${#dimension_list[@]} -eq 0 ]; then
+      echo "Error: multi-GPU sharding requires --dimension"
+      exit 1
+    fi
+
+    num_shards=${#gpu_array[@]}
+    for cur_dimension in "${dimension_list[@]}"; do
+      dim_shard_root="${shard_tmp_root}/${cur_dimension}"
+      rm -rf "${dim_shard_root}"
+      if [ "${task}" = "t2v" ]; then
+        prompt_file="${prompt_folder}/${cur_dimension}.txt"
+        python3 split_t2v_prompts.py \
+          --prompt_file "${prompt_file}" \
+          --num_shards "${num_shards}" \
+          --output_root "${dim_shard_root}"
+      else
+        python3 split_i2v_info.py \
+          --info_json "${info_json}" \
+          --dimension "${cur_dimension}" \
+          --num_shards "${num_shards}" \
+          --output_root "${dim_shard_root}"
+      fi
+
+      program_pid=()
+      for shard_id in "${!gpu_array[@]}"; do
+        gpu_id="${gpu_array[$shard_id]}"
+        log_suffix="${cur_dimension}"
+        if [ -z "${log_suffix}" ]; then
+          log_suffix="all"
+        fi
+        log_file="${output_video_path}/${log_suffix}.gpu${gpu_id}.log"
+        shard_prompt_folder=""
+        shard_info_json=""
+
+        if [ "${task}" = "t2v" ]; then
+          shard_prompt_folder="${dim_shard_root}/shard_${shard_id}"
+        else
+          shard_info_json="${dim_shard_root}/shard_${shard_id}/info.json"
+        fi
+
+        cmd="$(build_benchmark_cmd "${cur_dimension}" "${shard_prompt_folder}" "${shard_info_json}")"
+        CUDA_VISIBLE_DEVICES="${gpu_id}" bash -lc "${cmd}" > "${log_file}" 2>&1 &
+        program_pid+=("$!")
+        echo "Start (PID: ${program_pid[-1]}, GPU: ${gpu_id}, dimension: ${cur_dimension})"
+      done
+
+      for pid in "${program_pid[@]}"; do
+        wait "${pid}" || exit 1
+      done
+    done
+  fi
+
+  if [ "${accuracy}" = "true" ]; then
+    if [ "${task}" = "t2v" ]; then
+      echo "Start VBench evaluation for t2v..."
+      pushd "${vbench_dir}"
+      python evaluate.py \
+        --dimension "subject_consistency motion_smoothness aesthetic_quality imaging_quality overall_consistency" \
+        --videos_path "${output_video_path}" \
+        --mode=vbench_standard 
+      popd
+    elif [ "${task}" = "i2v" ]; then
+      echo "Start VBench evaluation for i2v..."
+      pushd "${vbench_dir}"
+      python evaluate_i2v.py \
+        --dimension "i2v_background i2v_subject subject_consistency background_consistency motion_smoothness" \
+        --videos_path "${output_video_path}" \
+        --ratio "16-9" \
+        --mode=vbench_standard
+      popd
+    else
+      echo "--accuracy does not support task=${task}. Supported tasks: t2v, i2v."
+      exit 1
+    fi
+  else
+    echo "Video generation finished. Use --accuracy to run VBench evaluation for t2v/i2v."
+  fi
+}
+
+main "$@"
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh
new file mode 100644
index 00000000000..ae1ff41e1bb
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+set -x
+
+function main {
+  init_params "$@"
+  run_tuning
+}
+
+function init_params {
+  for var in "$@"
+  do
+    case $var in
+      --topology=*)
+        topology=$(echo $var | cut -f2 -d=)
+      ;;
+      --input_model=*)
+        input_model=$(echo $var | cut -f2 -d=)
+      ;;
+      --task=*)
+        task=$(echo $var | cut -f2 -d=)
+      ;;
+      --output_model=*)
+        tuned_checkpoint=$(echo $var | cut -f2 -d=)
+      ;;
+      *)
+        echo "Error: No such parameter: ${var}"
+        exit 1
+      ;;
+    esac
+  done
+}
+
+function run_tuning {
+  tuned_checkpoint=${tuned_checkpoint:="./tmp_autoround"}
+  task=${task:="t2v"}
+
+  if [ "${topology}" = "wan_fp8" ]; then
+    extra_cmd="--scheme FP8"
+  elif [ "${topology}" = "wan_mxfp8" ]; then
+    extra_cmd="--scheme MXFP8"
+  else
+    echo "Error: unsupported topology ${topology}, use wan_fp8 or wan_mxfp8"
+    exit 1
+  fi
+
+  python3 main.py \
+    --model ${input_model} \
+    --task ${task} \
+    --output_dir ${tuned_checkpoint} \
+    --quantize \
+    ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/setup.sh b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh
new file mode 100644
index 00000000000..c9f9700dbbc
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh
@@ -0,0 +1,2 @@
+pip install --no-cache-dir -r requirements.txt
+pip install VBench --no-deps
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py
new file mode 100644
index 00000000000..326666467ea
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import os
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Split i2v info_json into per-shard files for one dimension.")
+    parser.add_argument("--info_json", required=True, type=str, help="Path to full i2v info json")
+    parser.add_argument("--dimension", required=True, type=str, help="Target dimension")
+    parser.add_argument("--num_shards", required=True, type=int, help="Total shard count")
+    parser.add_argument("--output_root", required=True, type=str, help="Root dir to write shard json files")
+    return parser.parse_args()
+
+
+def has_dimension(info, target_dimension):
+    dims = info.get("dimension", [])
+    if isinstance(dims, str):
+        dims = [dims]
+    return target_dimension in dims
+
+
+def main():
+    args = parse_args()
+
+    if args.num_shards < 1:
+        raise ValueError("--num_shards must be >= 1")
+    if not os.path.isfile(args.info_json):
+        raise FileNotFoundError(f"Info json not found: {args.info_json}")
+
+    with open(args.info_json, "r", encoding="utf-8") as f:
+        info_list = json.load(f)
+
+    filtered = [item for item in info_list if has_dimension(item, args.dimension)]
+
+    shard_buckets = [[] for _ in range(args.num_shards)]
+    for idx, item in enumerate(filtered):
+        shard_buckets[idx % args.num_shards].append(item)
+
+    os.makedirs(args.output_root, exist_ok=True)
+    for shard_id, shard_items in enumerate(shard_buckets):
+        shard_dir = os.path.join(args.output_root, f"shard_{shard_id}")
+        os.makedirs(shard_dir, exist_ok=True)
+        shard_info_json = os.path.join(shard_dir, "info.json")
+        with open(shard_info_json, "w", encoding="utf-8") as f:
+            json.dump(shard_items, f, ensure_ascii=False, indent=2)
+
+    print(
+        f"Split {len(filtered)} i2v entries for dimension '{args.dimension}' "
+        f"into {args.num_shards} shards under {args.output_root}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py
new file mode 100644
index 00000000000..dffe19eb9e0
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+import argparse
+import os
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Split t2v prompt file into per-shard prompt folders.")
+    parser.add_argument("--prompt_file", required=True, type=str, help="Path to <dimension>.txt")
+    parser.add_argument("--num_shards", required=True, type=int, help="Total shard count")
+    parser.add_argument("--output_root", required=True, type=str, help="Root directory to write shard folders")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    if args.num_shards < 1:
+        raise ValueError("--num_shards must be >= 1")
+    if not os.path.isfile(args.prompt_file):
+        raise FileNotFoundError(f"Prompt file not found: {args.prompt_file}")
+
+    dimension = os.path.splitext(os.path.basename(args.prompt_file))[0]
+
+    with open(args.prompt_file, "r", encoding="utf-8") as f:
+        prompts = [line.strip() for line in f if line.strip()]
+
+    os.makedirs(args.output_root, exist_ok=True)
+
+    shard_buckets = [[] for _ in range(args.num_shards)]
+    for idx, prompt in enumerate(prompts):
+        shard_buckets[idx % args.num_shards].append(prompt)
+
+    for shard_id, shard_prompts in enumerate(shard_buckets):
+        shard_dir = os.path.join(args.output_root, f"shard_{shard_id}")
+        os.makedirs(shard_dir, exist_ok=True)
+        shard_prompt_file = os.path.join(shard_dir, f"{dimension}.txt")
+        with open(shard_prompt_file, "w", encoding="utf-8") as f:
+            for prompt in shard_prompts:
+                f.write(prompt + "\n")
+
+    print(
+        f"Split {len(prompts)} prompts from {args.prompt_file} into {args.num_shards} shards under {args.output_root}"
+    )
+
+
+if __name__ == "__main__":
+    main()