diff --git a/examples/README.md b/examples/README.md
index 26e4a5792d7..2b567ef4ef6 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -39,6 +39,19 @@ IntelĀ® Neural Compressor validated examples with multiple compression technique
Quantization (MXFP8/FP8) |
link |
+
+ | Wan2.2-I2V-A14B-Diffusers |
+ Image to Video |
+ Quantization (MXFP8/FP8) |
+ link |
+
+
+ | Wan2.2-T2V-A14B-Diffusers |
+ Text to Video |
+ Quantization (MXFP8/FP8) |
+ link |
+
+
| Llama-4-Scout-17B-16E-Instruct |
Multimodal Modeling |
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/README.md b/examples/pytorch/diffusion_model/diffusers/wan/README.md
new file mode 100644
index 00000000000..85883127570
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/README.md
@@ -0,0 +1,174 @@
+# Step-by-Step
+
+This example provides a unified Wan entry for quantization and evaluation, with both t2v and i2v support.
+
+# Prerequisite
+
+## 1. Environment
+
+```shell
+# Use latest dev branch if needed before release
+# INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master
+# pip install git+https://github.com/intel/auto-round.git@main
+
+# install all runtime dependencies (including evaluation package VBench)
+pip install -r requirements.txt
+pip install VBench --no-deps
+```
+
+## 2. Prepare Model
+
+Use a local Wan diffusers model path, for example:
+
+- Wan2.2-T2V-A14B-Diffusers
+- Wan2.2-I2V-A14B-Diffusers
+
+Download example (from Hugging Face):
+
+```bash
+# optional: update CLI to latest version
+pip install -U "huggingface_hub[cli]"
+
+# t2v model
+hf download Wan-AI/Wan2.2-T2V-A14B-Diffusers \
+ --local-dir /path/to/Wan2.2-T2V-A14B-Diffusers
+
+# i2v model
+hf download Wan-AI/Wan2.2-I2V-A14B-Diffusers \
+ --local-dir /path/to/Wan2.2-I2V-A14B-Diffusers
+```
+
+## 3. Prepare Dataset
+Clone VBench to prepare the required dataset, then download i2v data:
+
+```bash
+# required for dataset preparation
+git clone https://github.com/Vchitect/VBench.git
+cd VBench
+bash vbench2_beta_i2v/download_data.sh
+```
+
+- t2v: pass prompt folder with --prompt_folder, and set --dimension to select `${prompt_folder}/${dimension}.txt`
+- t2v/i2v: pass comma-separated values in `--dimension` to run multiple dimensions in one command (e.g., `subject_consistency,overall_consistency`)
+- t2v: can pass --dimension for evaluation filtering (validated dimensions include `subject_consistency` and `overall_consistency`)
+- i2v: pass --image_folder, --info_json, and --dimension (validated dimensions include `i2v_subject`, `i2v_background`, `subject_consistency`, `background_consistency`, and `motion_smoothness`)
+
+# Run
+
+## Quantization
+
+### t2v
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_quant.sh \
+ --topology=wan_mxfp8 \
+ --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \
+ --task=t2v \
+ --output_model=wan_mxfp8_model_t2v
+```
+
+### i2v
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_quant.sh \
+ --topology=wan_mxfp8 \
+ --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \
+ --task=i2v \
+ --output_model=wan_mxfp8_model_i2v
+```
+
+## Inference + Evaluation
+
+When `--accuracy` is enabled, `run_benchmark.sh` runs VBench evaluation scripts from a local VBench repo.
+
+- Default VBench path is `$(dirname run_benchmark.sh)/VBench`.
+- If your VBench repo is elsewhere, pass `--vbench_dir=/path/to/VBench`.
+
+### t2v bf16
+
+```bash
+bash run_benchmark.sh \
+ --topology=wan_bf16 \
+ --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \
+ --task=t2v \
+ --dimension=subject_consistency,overall_consistency \
+ --prompt_folder=/path/to/VBench/prompts/prompts_per_dimension/ \
+ --output_video_path=wan_t2v_bf16_video \
+ --accuracy
+```
+
+### t2v mxfp8/fp8
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_benchmark.sh \
+ --topology=wan_mxfp8 \
+ --input_model=wan_mxfp8_model_t2v \
+ --task=t2v \
+ --dimension=subject_consistency,overall_consistency \
+ --prompt_folder=./VBench/prompts/prompts_per_dimension/ \
+ --output_video_path=wan_t2v_mxfp8_video \
+ --accuracy
+```
+
+### i2v bf16
+
+```bash
+bash run_benchmark.sh \
+ --topology=wan_bf16 \
+ --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \
+ --task=i2v \
+ --dimension=i2v_background,i2v_subject \
+ --image_folder=/path/to/VBench/vbench2_beta_i2v/data/crop/16-9 \
+ --info_json=/path/to/VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \
+ --output_video_path=wan_i2v_bf16_video \
+ --accuracy
+```
+
+### i2v mxfp8/fp8
+
+```bash
+# topology supports wan_mxfp8 or wan_fp8
+bash run_benchmark.sh \
+ --topology=wan_mxfp8 \
+ --input_model=wan_mxfp8_model_i2v \
+ --task=i2v \
+ --dimension=i2v_background,i2v_subject \
+ --image_folder=./VBench/vbench2_beta_i2v/data/crop/16-9 \
+ --info_json=./VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \
+ --output_video_path=wan_i2v_mxfp8_video \
+ --accuracy
+```
+
+Note: For sharding and multi-GPU execution, set `--gpu_ids` (for example `--gpu_ids=0,1,2,3`) or set `CUDA_VISIBLE_DEVICES` before running `run_benchmark.sh`.
+
+### Standalone Accuracy Evaluation (Optional)
+
+If you already use `--accuracy` in `run_benchmark.sh`, you can skip this section.
+Use this section when you want to evaluate existing videos without re-running generation.
+
+```bash
+# t2v accuracy on generated videos
+cd /path/to/VBench
+python evaluate.py \
+ --dimension subject_consistency motion_smoothness aesthetic_quality overall_consistency imaging_quality \
+ --videos_path /path/to/wan_t2v_bf16_video \
+ --mode vbench_standard
+
+# i2v accuracy on generated videos
+cd /path/to/VBench
+python evaluate_i2v.py \
+ --dimension i2v_background i2v_subject subject_consistency background_consistency motion_smoothness \
+ --videos_path /path/to/wan_i2v_bf16_video \
+ --ratio 16-9 \
+ --mode vbench_standard
+```
+
+# Notes
+
+- Quantized weights are saved under:
+ - /transformer
+ - /transformer_2
+
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/main.py b/examples/pytorch/diffusion_model/diffusers/wan/main.py
new file mode 100644
index 00000000000..c36ebef7177
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/main.py
@@ -0,0 +1,310 @@
+import argparse
+import json
+import os
+import random
+
+import numpy as np
+import torch
+from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, WanPipeline, WanTransformer3DModel
+from diffusers.utils import export_to_video, load_image
+from functools import partial
+from neural_compressor.torch.quantization import AutoRoundConfig, convert, prepare
+
+from auto_round.data_type.fp8 import quant_fp8_sym
+from auto_round.data_type.mxfp import quant_mx_rceil
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="Wan quantization and evaluation example.",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument("--model", "--model_name", "--model_name_or_path", required=True, type=str, help="Wan model path")
+ parser.add_argument("--task", default="t2v", choices=["t2v", "i2v"], help="Wan task type")
+ parser.add_argument("--scheme", default="BF16", choices=["BF16", "FP8", "MXFP8"], type=str, help="Quantization scheme")
+ parser.add_argument("--quantize", action="store_true")
+ parser.add_argument("--inference", action="store_true")
+ parser.add_argument("--output_dir", "--quantized_model_path", default="./tmp_autoround", type=str, help="Directory to save quantized transformer weights")
+ parser.add_argument("--prompt_folder", type=str, default=None, help="T2V prompt folder path")
+ parser.add_argument("--image_folder", type=str, default=None, help="I2V image folder path")
+ parser.add_argument("--info_json", type=str, default=None, help="I2V info json file path")
+ parser.add_argument(
+ "--dimension",
+ type=str,
+ default=None,
+ help=(
+ "VBench dimension used by t2v/i2v evaluation or input filtering "
+ "(validated examples: t2v=subject_consistency,overall_consistency; "
+ "i2v=i2v_subject,i2v_background)"
+ ),
+ )
+ parser.add_argument("--output_video_path", default="./tmp_video", type=str, help="Directory to save generated videos")
+ parser.add_argument("--limit", default=-1, type=int, help="Limit the number of prompts for evaluation")
+ parser.add_argument("--seed", default=42, type=int, help="Random seed")
+ parser.add_argument("--height", default=720, type=int)
+ parser.add_argument("--width", default=1280, type=int)
+ parser.add_argument("--num_frames", default=81, type=int)
+ parser.add_argument("--num_inference_steps", default=40, type=int)
+ parser.add_argument("--guidance_scale", default=4.0, type=float, help="Guidance scale for t2v/i2v")
+ parser.add_argument("--guidance_scale_2", default=3.0, type=float, help="Second guidance scale for t2v only")
+ parser.add_argument("--fps", default=16, type=int)
+ parser.add_argument("--ratio", default="16-9", type=str, help="Aspect ratio used by i2v VBench dataset")
+ parser.add_argument("--image_max_area", default=480 * 832, type=int, help="Maximum i2v image area")
+ parser.add_argument(
+ "--mxfp8_chunk_rows",
+ default=2048,
+ type=int,
+ help="Row chunk size for MXFP8 activation QDQ",
+ )
+ parser.add_argument(
+ "--disable_mxfp8_inplace_qdq",
+ action="store_true",
+ help="Disable in-place MXFP8 activation QDQ",
+ )
+ return parser.parse_args()
+
+
+def setup_seed(seed):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ if torch.cuda.is_available():
+ torch.cuda.manual_seed_all(seed)
+
+
+def get_scheme_config(scheme):
+ if scheme == "FP8":
+ return {"bits": 8, "data_type": "fp", "group_size": 0, "sym": True}
+ if scheme == "MXFP8":
+ return {"bits": 8, "data_type": "mx_fp", "group_size": 32}
+ return None
+
+
+def build_pipeline(args):
+ if args.task == "t2v":
+ vae = AutoencoderKLWan.from_pretrained(args.model, subfolder="vae", torch_dtype=torch.float32)
+ pipe = WanPipeline.from_pretrained(args.model, vae=vae, torch_dtype=torch.bfloat16)
+ pipe.enable_model_cpu_offload()
+ return pipe
+
+ if args.task == "i2v":
+ pipe = WanImageToVideoPipeline.from_pretrained(args.model, torch_dtype=torch.bfloat16)
+ pipe.enable_model_cpu_offload()
+ return pipe
+
+ raise ValueError(f"Unsupported task: {args.task}. Supported tasks are: i2v, t2v")
+
+
+def quantize_pipleine(pipe, args):
+ scheme_cfg = get_scheme_config(args.scheme)
+ if scheme_cfg is None:
+ raise ValueError("BF16 does not need quantization. Use --scheme FP8 or --scheme MXFP8 with --quantize.")
+
+
+ qconfig = AutoRoundConfig(
+ iters=0,
+ export_format="fake",
+ output_dir=args.output_dir,
+ disable_opt_rtn=True,
+ scheme=scheme_cfg,
+ )
+ pipe = prepare(pipe, qconfig)
+ convert(pipe, qconfig)
+
+
+def apply_activation_qdq(pipe, scheme, runtime_args):
+ if scheme == "BF16":
+ return
+
+ if scheme == "FP8":
+ def act_qdq_forward(module, x, *f_args, **f_kwargs):
+ qdq_x, _, _ = quant_fp8_sym(x, group_size=0)
+ return module.orig_forward(qdq_x, *f_args, **f_kwargs)
+ else:
+ def act_qdq_forward(module, x, *f_args, **f_kwargs):
+ chunk_rows = max(1, int(getattr(runtime_args, "mxfp8_chunk_rows", 2048)))
+ use_inplace = not getattr(runtime_args, "disable_mxfp8_inplace_qdq", False)
+
+ if use_inplace and x.is_cuda:
+ # Chunked in-place QDQ reduces peak activation memory on large tensors.
+ x_2d = x.reshape(-1, x.shape[-1])
+ total_rows = x_2d.shape[0]
+ for start in range(0, total_rows, chunk_rows):
+ end = min(start + chunk_rows, total_rows)
+ qdq_chunk = quant_mx_rceil(
+ x_2d[start:end],
+ bits=8,
+ group_size=32,
+ data_type="mx_fp_rceil",
+ )[0]
+ x_2d[start:end].copy_(qdq_chunk)
+ del qdq_chunk
+ qdq_x = x
+ else:
+ qdq_x = quant_mx_rceil(
+ x,
+ bits=8,
+ group_size=32,
+ data_type="mx_fp_rceil",
+ )[0]
+
+ return module.orig_forward(qdq_x, *f_args, **f_kwargs)
+
+ for module_name in ["transformer", "transformer_2"]:
+ module = getattr(pipe, module_name)
+ for n, m in module.named_modules():
+ if m.__class__.__name__ == "Linear" and "blocks" in n:
+ m.orig_forward = m.forward
+ m.forward = partial(act_qdq_forward, m)
+
+
+def build_t2v_inputs(args):
+ prompt_folder = args.prompt_folder
+
+ if not prompt_folder:
+ raise ValueError("--prompt_folder is required for t2v inference/eval")
+ if not args.dimension:
+ raise ValueError("--dimension is required for t2v inference/eval")
+ if not os.path.isdir(prompt_folder):
+ raise FileNotFoundError(f"Prompt folder not found: {prompt_folder}")
+
+ prompt_file = os.path.join(prompt_folder, f"{args.dimension}.txt")
+ if not os.path.exists(prompt_file):
+ raise FileNotFoundError(f"Prompt file not found for dimension '{args.dimension}': {prompt_file}")
+
+ with open(prompt_file, "r", encoding="utf-8") as f:
+ prompt_list = [line.strip() for line in f if line.strip()]
+
+ if args.dimension not in {"subject_consistency", "overall_consistency"}:
+ print(
+ "[WARN] t2v --dimension is not in validated examples "
+ "(subject_consistency, overall_consistency). Continue anyway."
+ )
+
+ if args.limit >= 0:
+ prompt_list = prompt_list[: args.limit]
+
+ return [{"prompt": prompt} for prompt in prompt_list]
+
+
+def build_i2v_inputs(args):
+ image_folder = args.image_folder
+ info_json = args.info_json
+
+ if not image_folder:
+ raise ValueError("--image_folder is required for i2v inference/eval")
+ if not info_json:
+ raise ValueError("--info_json is required for i2v inference/eval")
+ if not args.dimension:
+ raise ValueError(
+ "--dimension is required for i2v inference/eval "
+ "(validated examples: i2v_subject, i2v_background)"
+ )
+ if not os.path.isdir(image_folder):
+ raise FileNotFoundError(f"Image folder not found: {image_folder}")
+ if not os.path.exists(info_json):
+ raise FileNotFoundError(f"Info json not found: {info_json}")
+
+ with open(info_json, "r", encoding="utf-8") as f:
+ info_list = json.load(f)
+
+ results = []
+ for info in info_list:
+ if args.dimension not in info["dimension"]:
+ continue
+
+ image_path = os.path.join(image_folder, info["image_name"])
+ if not os.path.exists(image_path):
+ continue
+ results.append(
+ {
+ "prompt": info["prompt_en"],
+ "image_path": image_path,
+ }
+ )
+
+ if args.limit >= 0:
+ results = results[: args.limit]
+
+ return results
+
+
+def safe_output_path(base_dir, prompt):
+ return os.path.join(base_dir, f"{prompt}-0.mp4")
+
+
+@torch.no_grad()
+def run_inference(args, pipe):
+ setup_seed(args.seed)
+ os.makedirs(args.output_video_path, exist_ok=True)
+ gen = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(args.seed)
+
+ if args.task == "t2v":
+ inputs = build_t2v_inputs(args)
+ else:
+ inputs = build_i2v_inputs(args)
+
+ for item in inputs:
+ prompt = item["prompt"]
+ save_path = safe_output_path(args.output_video_path, prompt)
+ if os.path.exists(save_path):
+ continue
+
+ if args.task == "t2v":
+ frames = pipe(
+ prompt=prompt,
+ height=args.height,
+ width=args.width,
+ num_frames=args.num_frames,
+ guidance_scale=args.guidance_scale,
+ guidance_scale_2=args.guidance_scale_2,
+ num_inference_steps=args.num_inference_steps,
+ generator=gen,
+ ).frames[0]
+ else:
+ image = load_image(item["image_path"])
+ aspect_ratio = image.height / image.width
+ mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
+ height = round(np.sqrt(args.image_max_area * aspect_ratio)) // mod_value * mod_value
+ width = round(np.sqrt(args.image_max_area / aspect_ratio)) // mod_value * mod_value
+ image = image.resize((width, height))
+
+ frames = pipe(
+ image=image,
+ prompt=prompt,
+ height=height,
+ width=width,
+ num_frames=args.num_frames,
+ guidance_scale=args.guidance_scale,
+ num_inference_steps=args.num_inference_steps,
+ generator=gen,
+ ).frames[0]
+
+ export_to_video(frames, save_path, fps=args.fps)
+ print(f"Saved: {save_path}")
+
+
+def main():
+ args = parse_args()
+
+ if not (args.quantize or args.inference):
+ raise ValueError("Please enable at least one stage: --quantize or --inference")
+
+ if args.quantize or args.inference:
+ pipe = build_pipeline(args)
+ else:
+ pipe = None
+
+ if args.quantize:
+ quantize_pipleine(pipe, args)
+
+ if args.inference:
+ if args.scheme in ["FP8", "MXFP8"]:
+ apply_activation_qdq(pipe, args.scheme, args)
+ run_inference(args, pipe)
+
+
+if __name__ == "__main__":
+ main()
+
+
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt
new file mode 100644
index 00000000000..dd0a3842c04
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt
@@ -0,0 +1,43 @@
+# Core runtime
+torch
+torchvision
+diffusers
+transformers
+accelerate
+huggingface_hub
+safetensors
+
+# Quantization stack
+neural-compressor-pt
+auto-round
+
+# Utilities used by example scripts
+einops
+
+# Evaluation package (dataset/eval helpers)
+Pillow
+numpy<2.0.0
+matplotlib
+timm>=0.9,<=1.0.12
+wheel
+cython
+tensorboard
+scipy
+opencv-python
+scikit-learn
+scikit-image
+openai-clip
+decord
+requests
+pyyaml
+pyiqa
+lvis
+fairscale>=0.4.4
+fvcore
+easydict
+urllib3
+boto3
+omegaconf
+pycocoevalcap
+imageio-ffmpeg
+gdown==4.7.3
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh
new file mode 100644
index 00000000000..1ea381f547a
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh
@@ -0,0 +1,353 @@
+#!/bin/bash
+set -x
+
+SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+
+function main {
+ init_params "$@"
+ run_benchmark
+}
+
+function ensure_vbench_repo {
+ if [ ! -d "${vbench_dir}" ]; then
+ echo "VBench directory not found. Start cloning https://github.com/Vchitect/VBench.git ..."
+ git clone https://github.com/Vchitect/VBench.git "${vbench_dir}"
+ if [ $? -ne 0 ]; then
+ echo "Error: failed to clone VBench."
+ exit 1
+ fi
+ fi
+}
+
+function prepare_vbench_inputs {
+ if [ "${task}" = "t2v" ]; then
+ if [ -z "${prompt_folder}" ]; then
+ echo "Error: --prompt_folder is required for task=t2v"
+ exit 1
+ fi
+ if [ -z "${dimension}" ]; then
+ echo "Error: --dimension is required for task=t2v"
+ exit 1
+ fi
+ fi
+
+ if [ "${task}" = "i2v" ]; then
+ if [ -z "${image_folder}" ]; then
+ echo "Error: --image_folder is required for task=i2v"
+ exit 1
+ fi
+ if [ -z "${info_json}" ]; then
+ echo "Error: --info_json is required for task=i2v"
+ exit 1
+ fi
+ if [ -z "${dimension}" ]; then
+ echo "Error: --dimension is required for task=i2v"
+ exit 1
+ fi
+ fi
+
+ if [ -n "${prompt_folder}" ] && [ ! -d "${prompt_folder}" ]; then
+ echo "Error: prompt_folder not found: ${prompt_folder}"
+ exit 1
+ fi
+ if [ -n "${image_folder}" ] && [ ! -d "${image_folder}" ]; then
+ echo "Error: image_folder not found: ${image_folder}"
+ exit 1
+ fi
+ if [ -n "${info_json}" ] && [ ! -f "${info_json}" ]; then
+ echo "Error: info_json not found: ${info_json}"
+ exit 1
+ fi
+}
+
+function init_params {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --topology=*)
+ topology="${1#*=}"
+ shift
+ ;;
+ --topology)
+ topology="$2"
+ shift 2
+ ;;
+ --input_model=*)
+ input_model="${1#*=}"
+ shift
+ ;;
+ --input_model)
+ input_model="$2"
+ shift 2
+ ;;
+ --task=*)
+ task="${1#*=}"
+ shift
+ ;;
+ --task)
+ task="$2"
+ shift 2
+ ;;
+ --output_video_path=*)
+ output_video_path="${1#*=}"
+ shift
+ ;;
+ --output_video_path)
+ output_video_path="$2"
+ shift 2
+ ;;
+ --prompt_folder=*)
+ prompt_folder="${1#*=}"
+ shift
+ ;;
+ --prompt_folder)
+ prompt_folder="$2"
+ shift 2
+ ;;
+ --image_folder=*)
+ image_folder="${1#*=}"
+ shift
+ ;;
+ --image_folder)
+ image_folder="$2"
+ shift 2
+ ;;
+ --info_json=*)
+ info_json="${1#*=}"
+ shift
+ ;;
+ --info_json)
+ info_json="$2"
+ shift 2
+ ;;
+ --dimension=*)
+ dimension="${1#*=}"
+ shift
+ ;;
+ --dimension)
+ dimension="$2"
+ shift 2
+ ;;
+ --gpu_ids=*)
+ gpu_ids="${1#*=}"
+ shift
+ ;;
+ --gpu_ids)
+ gpu_ids="$2"
+ shift 2
+ ;;
+ --limit=*)
+ limit="${1#*=}"
+ shift
+ ;;
+ --limit)
+ limit="$2"
+ shift 2
+ ;;
+ --mxfp8_chunk_rows=*)
+ mxfp8_chunk_rows="${1#*=}"
+ shift
+ ;;
+ --mxfp8_chunk_rows)
+ mxfp8_chunk_rows="$2"
+ shift 2
+ ;;
+ --disable_mxfp8_inplace_qdq)
+ disable_mxfp8_inplace_qdq=true
+ shift
+ ;;
+ --accuracy)
+ accuracy=true
+ shift
+ ;;
+ --vbench_dir=*)
+ vbench_dir="${1#*=}"
+ shift
+ ;;
+ --vbench_dir)
+ vbench_dir="$2"
+ shift 2
+ ;;
+ *)
+ echo "Error: No such parameter: $1"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+function run_benchmark {
+ task=${task:="t2v"}
+ limit=${limit:=-1}
+ output_video_path=${output_video_path:="./tmp_video"}
+ accuracy=${accuracy:=false}
+ disable_mxfp8_inplace_qdq=${disable_mxfp8_inplace_qdq:=false}
+ vbench_dir=${vbench_dir:="${SCRIPT_DIR}/VBench"}
+
+ if [[ ! "${output_video_path}" = /* ]]; then
+ output_video_path=$(realpath -s "$(pwd)/${output_video_path}")
+ fi
+
+ if [ "${topology}" = "wan_bf16" ]; then
+ scheme="BF16"
+ elif [ "${topology}" = "wan_fp8" ]; then
+ scheme="FP8"
+ elif [ "${topology}" = "wan_mxfp8" ]; then
+ scheme="MXFP8"
+ else
+ echo "Error: unsupported topology ${topology}, use wan_bf16/wan_fp8/wan_mxfp8"
+ exit 1
+ fi
+
+ ensure_vbench_repo
+
+ prepare_vbench_inputs
+
+ normalized_dimensions="${dimension//,/ }"
+ read -r -a dimension_list <<< "${normalized_dimensions}"
+
+ if [ -n "${gpu_ids}" ]; then
+ gpu_list="${gpu_ids}"
+ else
+ gpu_list="${CUDA_VISIBLE_DEVICES:-}"
+ fi
+
+ if [ -n "${gpu_list}" ]; then
+ normalized_gpu_ids="${gpu_list//,/ }"
+ read -r -a gpu_array <<< "${normalized_gpu_ids}"
+ visible_gpus=${#gpu_array[@]}
+ echo "visible_gpus: ${visible_gpus}"
+ else
+ gpu_array=()
+ fi
+
+ mkdir -p "${output_video_path}"
+ shard_tmp_root="${output_video_path}/.prompt_shards"
+
+ function build_benchmark_cmd {
+ local cur_prompt_folder="$2"
+ local cur_info_json="$3"
+ local cmd=(
+ python3 main.py
+ --model "${input_model}"
+ --task "${task}"
+ --scheme "${scheme}"
+ --output_video_path "${output_video_path}"
+ --limit "${limit}"
+ --inference
+ )
+
+ if [ -n "${cur_prompt_folder}" ]; then
+ cmd+=(--prompt_folder "${cur_prompt_folder}")
+ elif [ -n "${prompt_folder}" ]; then
+ cmd+=(--prompt_folder "${prompt_folder}")
+ fi
+ if [ -n "${image_folder}" ]; then
+ cmd+=(--image_folder "${image_folder}")
+ fi
+ if [ -n "${cur_info_json}" ]; then
+ cmd+=(--info_json "${cur_info_json}")
+ elif [ -n "${info_json}" ]; then
+ cmd+=(--info_json "${info_json}")
+ fi
+ if [ -n "$1" ]; then
+ cmd+=(--dimension "$1")
+ fi
+ if [ -n "${mxfp8_chunk_rows}" ]; then
+ cmd+=(--mxfp8_chunk_rows "${mxfp8_chunk_rows}")
+ fi
+ if [ "${disable_mxfp8_inplace_qdq}" = "true" ]; then
+ cmd+=(--disable_mxfp8_inplace_qdq)
+ fi
+
+ printf '%q ' "${cmd[@]}"
+ }
+
+ if [ ${#gpu_array[@]} -eq 0 ]; then
+ if [ ${#dimension_list[@]} -eq 0 ]; then
+ eval "$(build_benchmark_cmd "" "" "")"
+ else
+ for cur_dimension in "${dimension_list[@]}"; do
+ eval "$(build_benchmark_cmd "${cur_dimension}" "" "")"
+ done
+ fi
+ else
+ if [ ${#dimension_list[@]} -eq 0 ]; then
+ echo "Error: multi-GPU sharding requires --dimension"
+ exit 1
+ fi
+
+ num_shards=${#gpu_array[@]}
+ for cur_dimension in "${dimension_list[@]}"; do
+ dim_shard_root="${shard_tmp_root}/${cur_dimension}"
+ rm -rf "${dim_shard_root}"
+ if [ "${task}" = "t2v" ]; then
+ prompt_file="${prompt_folder}/${cur_dimension}.txt"
+ python3 split_t2v_prompts.py \
+ --prompt_file "${prompt_file}" \
+ --num_shards "${num_shards}" \
+ --output_root "${dim_shard_root}"
+ else
+ python3 split_i2v_info.py \
+ --info_json "${info_json}" \
+ --dimension "${cur_dimension}" \
+ --num_shards "${num_shards}" \
+ --output_root "${dim_shard_root}"
+ fi
+
+ program_pid=()
+ for shard_id in "${!gpu_array[@]}"; do
+ gpu_id="${gpu_array[$shard_id]}"
+ log_suffix="${cur_dimension}"
+ if [ -z "${log_suffix}" ]; then
+ log_suffix="all"
+ fi
+ log_file="${output_video_path}/${log_suffix}.gpu${gpu_id}.log"
+ shard_prompt_folder=""
+ shard_info_json=""
+
+ if [ "${task}" = "t2v" ]; then
+ shard_prompt_folder="${dim_shard_root}/shard_${shard_id}"
+ else
+ shard_info_json="${dim_shard_root}/shard_${shard_id}/info.json"
+ fi
+
+ cmd="$(build_benchmark_cmd "${cur_dimension}" "${shard_prompt_folder}" "${shard_info_json}")"
+ CUDA_VISIBLE_DEVICES="${gpu_id}" bash -lc "${cmd}" > "${log_file}" 2>&1 &
+ program_pid+=("$!")
+ echo "Start (PID: ${program_pid[-1]}, GPU: ${gpu_id}, dimension: ${cur_dimension})"
+ done
+
+ for pid in "${program_pid[@]}"; do
+ wait "${pid}" || exit 1
+ done
+ done
+ fi
+
+ if [ "${accuracy}" = "true" ]; then
+ if [ "${task}" = "t2v" ]; then
+ echo "Start VBench evaluation for t2v..."
+ pushd "${vbench_dir}"
+ python evaluate.py \
+ --dimension "subject_consistency motion_smoothness aesthetic_quality imaging_quality overall_consistency" \
+ --videos_path "${output_video_path}" \
+ --mode=vbench_standard
+ popd
+ elif [ "${task}" = "i2v" ]; then
+ echo "Start VBench evaluation for i2v..."
+ pushd "${vbench_dir}"
+ python evaluate_i2v.py \
+ --dimension "i2v_background i2v_subject subject_consistency background_consistency motion_smoothness" \
+ --videos_path "${output_video_path}" \
+ --ratio "16-9" \
+ --mode=vbench_standard
+ popd
+ else
+ echo "--accuracy does not support task=${task}. Supported tasks: t2v, i2v."
+ exit 1
+ fi
+ else
+ echo "Video generation finished. Use --accuracy to run VBench evaluation for t2v/i2v."
+ fi
+}
+
+main "$@"
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh
new file mode 100644
index 00000000000..ae1ff41e1bb
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+set -x
+
+function main {
+ init_params "$@"
+ run_tuning
+}
+
+function init_params {
+ for var in "$@"
+ do
+ case $var in
+ --topology=*)
+ topology=$(echo $var | cut -f2 -d=)
+ ;;
+ --input_model=*)
+ input_model=$(echo $var | cut -f2 -d=)
+ ;;
+ --task=*)
+ task=$(echo $var | cut -f2 -d=)
+ ;;
+ --output_model=*)
+ tuned_checkpoint=$(echo $var | cut -f2 -d=)
+ ;;
+ *)
+ echo "Error: No such parameter: ${var}"
+ exit 1
+ ;;
+ esac
+ done
+}
+
+function run_tuning {
+ tuned_checkpoint=${tuned_checkpoint:="./tmp_autoround"}
+ task=${task:="t2v"}
+
+ if [ "${topology}" = "wan_fp8" ]; then
+ extra_cmd="--scheme FP8"
+ elif [ "${topology}" = "wan_mxfp8" ]; then
+ extra_cmd="--scheme MXFP8"
+ else
+ echo "Error: unsupported topology ${topology}, use wan_fp8 or wan_mxfp8"
+ exit 1
+ fi
+
+ python3 main.py \
+ --model ${input_model} \
+ --task ${task} \
+ --output_dir ${tuned_checkpoint} \
+ --quantize \
+ ${extra_cmd}
+}
+
+main "$@"
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/setup.sh b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh
new file mode 100644
index 00000000000..c9f9700dbbc
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh
@@ -0,0 +1,2 @@
+pip install --no-cache-dir -r requirements.txt
+pip install VBench --no-deps
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py
new file mode 100644
index 00000000000..326666467ea
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import os
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Split i2v info_json into per-shard files for one dimension.")
+ parser.add_argument("--info_json", required=True, type=str, help="Path to full i2v info json")
+ parser.add_argument("--dimension", required=True, type=str, help="Target dimension")
+ parser.add_argument("--num_shards", required=True, type=int, help="Total shard count")
+ parser.add_argument("--output_root", required=True, type=str, help="Root dir to write shard json files")
+ return parser.parse_args()
+
+
+def has_dimension(info, target_dimension):
+ dims = info.get("dimension", [])
+ if isinstance(dims, str):
+ dims = [dims]
+ return target_dimension in dims
+
+
+def main():
+ args = parse_args()
+
+ if args.num_shards < 1:
+ raise ValueError("--num_shards must be >= 1")
+ if not os.path.isfile(args.info_json):
+ raise FileNotFoundError(f"Info json not found: {args.info_json}")
+
+ with open(args.info_json, "r", encoding="utf-8") as f:
+ info_list = json.load(f)
+
+ filtered = [item for item in info_list if has_dimension(item, args.dimension)]
+
+ shard_buckets = [[] for _ in range(args.num_shards)]
+ for idx, item in enumerate(filtered):
+ shard_buckets[idx % args.num_shards].append(item)
+
+ os.makedirs(args.output_root, exist_ok=True)
+ for shard_id, shard_items in enumerate(shard_buckets):
+ shard_dir = os.path.join(args.output_root, f"shard_{shard_id}")
+ os.makedirs(shard_dir, exist_ok=True)
+ shard_info_json = os.path.join(shard_dir, "info.json")
+ with open(shard_info_json, "w", encoding="utf-8") as f:
+ json.dump(shard_items, f, ensure_ascii=False, indent=2)
+
+ print(
+ f"Split {len(filtered)} i2v entries for dimension '{args.dimension}' "
+ f"into {args.num_shards} shards under {args.output_root}"
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py
new file mode 100644
index 00000000000..dffe19eb9e0
--- /dev/null
+++ b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+import argparse
+import os
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Split t2v prompt file into per-shard prompt folders.")
+ parser.add_argument("--prompt_file", required=True, type=str, help="Path to .txt")
+ parser.add_argument("--num_shards", required=True, type=int, help="Total shard count")
+ parser.add_argument("--output_root", required=True, type=str, help="Root directory to write shard folders")
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+
+ if args.num_shards < 1:
+ raise ValueError("--num_shards must be >= 1")
+ if not os.path.isfile(args.prompt_file):
+ raise FileNotFoundError(f"Prompt file not found: {args.prompt_file}")
+
+ dimension = os.path.splitext(os.path.basename(args.prompt_file))[0]
+
+ with open(args.prompt_file, "r", encoding="utf-8") as f:
+ prompts = [line.strip() for line in f if line.strip()]
+
+ os.makedirs(args.output_root, exist_ok=True)
+
+ shard_buckets = [[] for _ in range(args.num_shards)]
+ for idx, prompt in enumerate(prompts):
+ shard_buckets[idx % args.num_shards].append(prompt)
+
+ for shard_id, shard_prompts in enumerate(shard_buckets):
+ shard_dir = os.path.join(args.output_root, f"shard_{shard_id}")
+ os.makedirs(shard_dir, exist_ok=True)
+ shard_prompt_file = os.path.join(shard_dir, f"{dimension}.txt")
+ with open(shard_prompt_file, "w", encoding="utf-8") as f:
+ for prompt in shard_prompts:
+ f.write(prompt + "\n")
+
+ print(
+ f"Split {len(prompts)} prompts from {args.prompt_file} into {args.num_shards} shards under {args.output_root}"
+ )
+
+
+if __name__ == "__main__":
+ main()