diff --git a/examples/README.md b/examples/README.md index 26e4a5792d7..2b567ef4ef6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -39,6 +39,19 @@ IntelĀ® Neural Compressor validated examples with multiple compression technique Quantization (MXFP8/FP8) link + + Wan2.2-I2V-A14B-Diffusers + Image to Video + Quantization (MXFP8/FP8) + link + + + Wan2.2-T2V-A14B-Diffusers + Text to Video + Quantization (MXFP8/FP8) + link + + Llama-4-Scout-17B-16E-Instruct Multimodal Modeling diff --git a/examples/pytorch/diffusion_model/diffusers/wan/README.md b/examples/pytorch/diffusion_model/diffusers/wan/README.md new file mode 100644 index 00000000000..85883127570 --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/README.md @@ -0,0 +1,174 @@ +# Step-by-Step + +This example provides a unified Wan entry for quantization and evaluation, with both t2v and i2v support. + +# Prerequisite + +## 1. Environment + +```shell +# Use latest dev branch if needed before release +# INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@master +# pip install git+https://github.com/intel/auto-round.git@main + +# install all runtime dependencies (including evaluation package VBench) +pip install -r requirements.txt +pip install VBench --no-deps +``` + +## 2. Prepare Model + +Use a local Wan diffusers model path, for example: + +- Wan2.2-T2V-A14B-Diffusers +- Wan2.2-I2V-A14B-Diffusers + +Download example (from Hugging Face): + +```bash +# optional: update CLI to latest version +pip install -U "huggingface_hub[cli]" + +# t2v model +hf download Wan-AI/Wan2.2-T2V-A14B-Diffusers \ + --local-dir /path/to/Wan2.2-T2V-A14B-Diffusers + +# i2v model +hf download Wan-AI/Wan2.2-I2V-A14B-Diffusers \ + --local-dir /path/to/Wan2.2-I2V-A14B-Diffusers +``` + +## 3. Prepare Dataset +Clone VBench to prepare the required dataset, then download i2v data: + +```bash +# required for dataset preparation +git clone https://github.com/Vchitect/VBench.git +cd VBench +bash vbench2_beta_i2v/download_data.sh +``` + +- t2v: pass prompt folder with --prompt_folder, and set --dimension to select `${prompt_folder}/${dimension}.txt` +- t2v/i2v: pass comma-separated values in `--dimension` to run multiple dimensions in one command (e.g., `subject_consistency,overall_consistency`) +- t2v: can pass --dimension for evaluation filtering (validated dimensions include `subject_consistency` and `overall_consistency`) +- i2v: pass --image_folder, --info_json, and --dimension (validated dimensions include `i2v_subject`, `i2v_background`, `subject_consistency`, `background_consistency`, and `motion_smoothness`) + +# Run + +## Quantization + +### t2v + +```bash +# topology supports wan_mxfp8 or wan_fp8 +bash run_quant.sh \ + --topology=wan_mxfp8 \ + --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \ + --task=t2v \ + --output_model=wan_mxfp8_model_t2v +``` + +### i2v + +```bash +# topology supports wan_mxfp8 or wan_fp8 +bash run_quant.sh \ + --topology=wan_mxfp8 \ + --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \ + --task=i2v \ + --output_model=wan_mxfp8_model_i2v +``` + +## Inference + Evaluation + +When `--accuracy` is enabled, `run_benchmark.sh` runs VBench evaluation scripts from a local VBench repo. + +- Default VBench path is `$(dirname run_benchmark.sh)/VBench`. +- If your VBench repo is elsewhere, pass `--vbench_dir=/path/to/VBench`. + +### t2v bf16 + +```bash +bash run_benchmark.sh \ + --topology=wan_bf16 \ + --input_model=/path/to/Wan2.2-T2V-A14B-Diffusers \ + --task=t2v \ + --dimension=subject_consistency,overall_consistency \ + --prompt_folder=/path/to/VBench/prompts/prompts_per_dimension/ \ + --output_video_path=wan_t2v_bf16_video \ + --accuracy +``` + +### t2v mxfp8/fp8 + +```bash +# topology supports wan_mxfp8 or wan_fp8 +bash run_benchmark.sh \ + --topology=wan_mxfp8 \ + --input_model=wan_mxfp8_model_t2v \ + --task=t2v \ + --dimension=subject_consistency,overall_consistency \ + --prompt_folder=./VBench/prompts/prompts_per_dimension/ \ + --output_video_path=wan_t2v_mxfp8_video \ + --accuracy +``` + +### i2v bf16 + +```bash +bash run_benchmark.sh \ + --topology=wan_bf16 \ + --input_model=/path/to/Wan2.2-I2V-A14B-Diffusers \ + --task=i2v \ + --dimension=i2v_background,i2v_subject \ + --image_folder=/path/to/VBench/vbench2_beta_i2v/data/crop/16-9 \ + --info_json=/path/to/VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \ + --output_video_path=wan_i2v_bf16_video \ + --accuracy +``` + +### i2v mxfp8/fp8 + +```bash +# topology supports wan_mxfp8 or wan_fp8 +bash run_benchmark.sh \ + --topology=wan_mxfp8 \ + --input_model=wan_mxfp8_model_i2v \ + --task=i2v \ + --dimension=i2v_background,i2v_subject \ + --image_folder=./VBench/vbench2_beta_i2v/data/crop/16-9 \ + --info_json=./VBench/vbench2_beta_i2v/vbench2_i2v_full_info.json \ + --output_video_path=wan_i2v_mxfp8_video \ + --accuracy +``` + +Note: For sharding and multi-GPU execution, set `--gpu_ids` (for example `--gpu_ids=0,1,2,3`) or set `CUDA_VISIBLE_DEVICES` before running `run_benchmark.sh`. + +### Standalone Accuracy Evaluation (Optional) + +If you already use `--accuracy` in `run_benchmark.sh`, you can skip this section. +Use this section when you want to evaluate existing videos without re-running generation. + +```bash +# t2v accuracy on generated videos +cd /path/to/VBench +python evaluate.py \ + --dimension subject_consistency motion_smoothness aesthetic_quality overall_consistency imaging_quality \ + --videos_path /path/to/wan_t2v_bf16_video \ + --mode vbench_standard + +# i2v accuracy on generated videos +cd /path/to/VBench +python evaluate_i2v.py \ + --dimension i2v_background i2v_subject subject_consistency background_consistency motion_smoothness \ + --videos_path /path/to/wan_i2v_bf16_video \ + --ratio 16-9 \ + --mode vbench_standard +``` + +# Notes + +- Quantized weights are saved under: + - /transformer + - /transformer_2 + diff --git a/examples/pytorch/diffusion_model/diffusers/wan/main.py b/examples/pytorch/diffusion_model/diffusers/wan/main.py new file mode 100644 index 00000000000..c36ebef7177 --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/main.py @@ -0,0 +1,310 @@ +import argparse +import json +import os +import random + +import numpy as np +import torch +from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, WanPipeline, WanTransformer3DModel +from diffusers.utils import export_to_video, load_image +from functools import partial +from neural_compressor.torch.quantization import AutoRoundConfig, convert, prepare + +from auto_round.data_type.fp8 import quant_fp8_sym +from auto_round.data_type.mxfp import quant_mx_rceil + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Wan quantization and evaluation example.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--model", "--model_name", "--model_name_or_path", required=True, type=str, help="Wan model path") + parser.add_argument("--task", default="t2v", choices=["t2v", "i2v"], help="Wan task type") + parser.add_argument("--scheme", default="BF16", choices=["BF16", "FP8", "MXFP8"], type=str, help="Quantization scheme") + parser.add_argument("--quantize", action="store_true") + parser.add_argument("--inference", action="store_true") + parser.add_argument("--output_dir", "--quantized_model_path", default="./tmp_autoround", type=str, help="Directory to save quantized transformer weights") + parser.add_argument("--prompt_folder", type=str, default=None, help="T2V prompt folder path") + parser.add_argument("--image_folder", type=str, default=None, help="I2V image folder path") + parser.add_argument("--info_json", type=str, default=None, help="I2V info json file path") + parser.add_argument( + "--dimension", + type=str, + default=None, + help=( + "VBench dimension used by t2v/i2v evaluation or input filtering " + "(validated examples: t2v=subject_consistency,overall_consistency; " + "i2v=i2v_subject,i2v_background)" + ), + ) + parser.add_argument("--output_video_path", default="./tmp_video", type=str, help="Directory to save generated videos") + parser.add_argument("--limit", default=-1, type=int, help="Limit the number of prompts for evaluation") + parser.add_argument("--seed", default=42, type=int, help="Random seed") + parser.add_argument("--height", default=720, type=int) + parser.add_argument("--width", default=1280, type=int) + parser.add_argument("--num_frames", default=81, type=int) + parser.add_argument("--num_inference_steps", default=40, type=int) + parser.add_argument("--guidance_scale", default=4.0, type=float, help="Guidance scale for t2v/i2v") + parser.add_argument("--guidance_scale_2", default=3.0, type=float, help="Second guidance scale for t2v only") + parser.add_argument("--fps", default=16, type=int) + parser.add_argument("--ratio", default="16-9", type=str, help="Aspect ratio used by i2v VBench dataset") + parser.add_argument("--image_max_area", default=480 * 832, type=int, help="Maximum i2v image area") + parser.add_argument( + "--mxfp8_chunk_rows", + default=2048, + type=int, + help="Row chunk size for MXFP8 activation QDQ", + ) + parser.add_argument( + "--disable_mxfp8_inplace_qdq", + action="store_true", + help="Disable in-place MXFP8 activation QDQ", + ) + return parser.parse_args() + + +def setup_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def get_scheme_config(scheme): + if scheme == "FP8": + return {"bits": 8, "data_type": "fp", "group_size": 0, "sym": True} + if scheme == "MXFP8": + return {"bits": 8, "data_type": "mx_fp", "group_size": 32} + return None + + +def build_pipeline(args): + if args.task == "t2v": + vae = AutoencoderKLWan.from_pretrained(args.model, subfolder="vae", torch_dtype=torch.float32) + pipe = WanPipeline.from_pretrained(args.model, vae=vae, torch_dtype=torch.bfloat16) + pipe.enable_model_cpu_offload() + return pipe + + if args.task == "i2v": + pipe = WanImageToVideoPipeline.from_pretrained(args.model, torch_dtype=torch.bfloat16) + pipe.enable_model_cpu_offload() + return pipe + + raise ValueError(f"Unsupported task: {args.task}. Supported tasks are: i2v, t2v") + + +def quantize_pipleine(pipe, args): + scheme_cfg = get_scheme_config(args.scheme) + if scheme_cfg is None: + raise ValueError("BF16 does not need quantization. Use --scheme FP8 or --scheme MXFP8 with --quantize.") + + + qconfig = AutoRoundConfig( + iters=0, + export_format="fake", + output_dir=args.output_dir, + disable_opt_rtn=True, + scheme=scheme_cfg, + ) + pipe = prepare(pipe, qconfig) + convert(pipe, qconfig) + + +def apply_activation_qdq(pipe, scheme, runtime_args): + if scheme == "BF16": + return + + if scheme == "FP8": + def act_qdq_forward(module, x, *f_args, **f_kwargs): + qdq_x, _, _ = quant_fp8_sym(x, group_size=0) + return module.orig_forward(qdq_x, *f_args, **f_kwargs) + else: + def act_qdq_forward(module, x, *f_args, **f_kwargs): + chunk_rows = max(1, int(getattr(runtime_args, "mxfp8_chunk_rows", 2048))) + use_inplace = not getattr(runtime_args, "disable_mxfp8_inplace_qdq", False) + + if use_inplace and x.is_cuda: + # Chunked in-place QDQ reduces peak activation memory on large tensors. + x_2d = x.reshape(-1, x.shape[-1]) + total_rows = x_2d.shape[0] + for start in range(0, total_rows, chunk_rows): + end = min(start + chunk_rows, total_rows) + qdq_chunk = quant_mx_rceil( + x_2d[start:end], + bits=8, + group_size=32, + data_type="mx_fp_rceil", + )[0] + x_2d[start:end].copy_(qdq_chunk) + del qdq_chunk + qdq_x = x + else: + qdq_x = quant_mx_rceil( + x, + bits=8, + group_size=32, + data_type="mx_fp_rceil", + )[0] + + return module.orig_forward(qdq_x, *f_args, **f_kwargs) + + for module_name in ["transformer", "transformer_2"]: + module = getattr(pipe, module_name) + for n, m in module.named_modules(): + if m.__class__.__name__ == "Linear" and "blocks" in n: + m.orig_forward = m.forward + m.forward = partial(act_qdq_forward, m) + + +def build_t2v_inputs(args): + prompt_folder = args.prompt_folder + + if not prompt_folder: + raise ValueError("--prompt_folder is required for t2v inference/eval") + if not args.dimension: + raise ValueError("--dimension is required for t2v inference/eval") + if not os.path.isdir(prompt_folder): + raise FileNotFoundError(f"Prompt folder not found: {prompt_folder}") + + prompt_file = os.path.join(prompt_folder, f"{args.dimension}.txt") + if not os.path.exists(prompt_file): + raise FileNotFoundError(f"Prompt file not found for dimension '{args.dimension}': {prompt_file}") + + with open(prompt_file, "r", encoding="utf-8") as f: + prompt_list = [line.strip() for line in f if line.strip()] + + if args.dimension not in {"subject_consistency", "overall_consistency"}: + print( + "[WARN] t2v --dimension is not in validated examples " + "(subject_consistency, overall_consistency). Continue anyway." + ) + + if args.limit >= 0: + prompt_list = prompt_list[: args.limit] + + return [{"prompt": prompt} for prompt in prompt_list] + + +def build_i2v_inputs(args): + image_folder = args.image_folder + info_json = args.info_json + + if not image_folder: + raise ValueError("--image_folder is required for i2v inference/eval") + if not info_json: + raise ValueError("--info_json is required for i2v inference/eval") + if not args.dimension: + raise ValueError( + "--dimension is required for i2v inference/eval " + "(validated examples: i2v_subject, i2v_background)" + ) + if not os.path.isdir(image_folder): + raise FileNotFoundError(f"Image folder not found: {image_folder}") + if not os.path.exists(info_json): + raise FileNotFoundError(f"Info json not found: {info_json}") + + with open(info_json, "r", encoding="utf-8") as f: + info_list = json.load(f) + + results = [] + for info in info_list: + if args.dimension not in info["dimension"]: + continue + + image_path = os.path.join(image_folder, info["image_name"]) + if not os.path.exists(image_path): + continue + results.append( + { + "prompt": info["prompt_en"], + "image_path": image_path, + } + ) + + if args.limit >= 0: + results = results[: args.limit] + + return results + + +def safe_output_path(base_dir, prompt): + return os.path.join(base_dir, f"{prompt}-0.mp4") + + +@torch.no_grad() +def run_inference(args, pipe): + setup_seed(args.seed) + os.makedirs(args.output_video_path, exist_ok=True) + gen = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(args.seed) + + if args.task == "t2v": + inputs = build_t2v_inputs(args) + else: + inputs = build_i2v_inputs(args) + + for item in inputs: + prompt = item["prompt"] + save_path = safe_output_path(args.output_video_path, prompt) + if os.path.exists(save_path): + continue + + if args.task == "t2v": + frames = pipe( + prompt=prompt, + height=args.height, + width=args.width, + num_frames=args.num_frames, + guidance_scale=args.guidance_scale, + guidance_scale_2=args.guidance_scale_2, + num_inference_steps=args.num_inference_steps, + generator=gen, + ).frames[0] + else: + image = load_image(item["image_path"]) + aspect_ratio = image.height / image.width + mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1] + height = round(np.sqrt(args.image_max_area * aspect_ratio)) // mod_value * mod_value + width = round(np.sqrt(args.image_max_area / aspect_ratio)) // mod_value * mod_value + image = image.resize((width, height)) + + frames = pipe( + image=image, + prompt=prompt, + height=height, + width=width, + num_frames=args.num_frames, + guidance_scale=args.guidance_scale, + num_inference_steps=args.num_inference_steps, + generator=gen, + ).frames[0] + + export_to_video(frames, save_path, fps=args.fps) + print(f"Saved: {save_path}") + + +def main(): + args = parse_args() + + if not (args.quantize or args.inference): + raise ValueError("Please enable at least one stage: --quantize or --inference") + + if args.quantize or args.inference: + pipe = build_pipeline(args) + else: + pipe = None + + if args.quantize: + quantize_pipleine(pipe, args) + + if args.inference: + if args.scheme in ["FP8", "MXFP8"]: + apply_activation_qdq(pipe, args.scheme, args) + run_inference(args, pipe) + + +if __name__ == "__main__": + main() + + diff --git a/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt new file mode 100644 index 00000000000..dd0a3842c04 --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/requirements.txt @@ -0,0 +1,43 @@ +# Core runtime +torch +torchvision +diffusers +transformers +accelerate +huggingface_hub +safetensors + +# Quantization stack +neural-compressor-pt +auto-round + +# Utilities used by example scripts +einops + +# Evaluation package (dataset/eval helpers) +Pillow +numpy<2.0.0 +matplotlib +timm>=0.9,<=1.0.12 +wheel +cython +tensorboard +scipy +opencv-python +scikit-learn +scikit-image +openai-clip +decord +requests +pyyaml +pyiqa +lvis +fairscale>=0.4.4 +fvcore +easydict +urllib3 +boto3 +omegaconf +pycocoevalcap +imageio-ffmpeg +gdown==4.7.3 diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh new file mode 100644 index 00000000000..1ea381f547a --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/run_benchmark.sh @@ -0,0 +1,353 @@ +#!/bin/bash +set -x + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +function main { + init_params "$@" + run_benchmark +} + +function ensure_vbench_repo { + if [ ! -d "${vbench_dir}" ]; then + echo "VBench directory not found. Start cloning https://github.com/Vchitect/VBench.git ..." + git clone https://github.com/Vchitect/VBench.git "${vbench_dir}" + if [ $? -ne 0 ]; then + echo "Error: failed to clone VBench." + exit 1 + fi + fi +} + +function prepare_vbench_inputs { + if [ "${task}" = "t2v" ]; then + if [ -z "${prompt_folder}" ]; then + echo "Error: --prompt_folder is required for task=t2v" + exit 1 + fi + if [ -z "${dimension}" ]; then + echo "Error: --dimension is required for task=t2v" + exit 1 + fi + fi + + if [ "${task}" = "i2v" ]; then + if [ -z "${image_folder}" ]; then + echo "Error: --image_folder is required for task=i2v" + exit 1 + fi + if [ -z "${info_json}" ]; then + echo "Error: --info_json is required for task=i2v" + exit 1 + fi + if [ -z "${dimension}" ]; then + echo "Error: --dimension is required for task=i2v" + exit 1 + fi + fi + + if [ -n "${prompt_folder}" ] && [ ! -d "${prompt_folder}" ]; then + echo "Error: prompt_folder not found: ${prompt_folder}" + exit 1 + fi + if [ -n "${image_folder}" ] && [ ! -d "${image_folder}" ]; then + echo "Error: image_folder not found: ${image_folder}" + exit 1 + fi + if [ -n "${info_json}" ] && [ ! -f "${info_json}" ]; then + echo "Error: info_json not found: ${info_json}" + exit 1 + fi +} + +function init_params { + while [[ $# -gt 0 ]]; do + case "$1" in + --topology=*) + topology="${1#*=}" + shift + ;; + --topology) + topology="$2" + shift 2 + ;; + --input_model=*) + input_model="${1#*=}" + shift + ;; + --input_model) + input_model="$2" + shift 2 + ;; + --task=*) + task="${1#*=}" + shift + ;; + --task) + task="$2" + shift 2 + ;; + --output_video_path=*) + output_video_path="${1#*=}" + shift + ;; + --output_video_path) + output_video_path="$2" + shift 2 + ;; + --prompt_folder=*) + prompt_folder="${1#*=}" + shift + ;; + --prompt_folder) + prompt_folder="$2" + shift 2 + ;; + --image_folder=*) + image_folder="${1#*=}" + shift + ;; + --image_folder) + image_folder="$2" + shift 2 + ;; + --info_json=*) + info_json="${1#*=}" + shift + ;; + --info_json) + info_json="$2" + shift 2 + ;; + --dimension=*) + dimension="${1#*=}" + shift + ;; + --dimension) + dimension="$2" + shift 2 + ;; + --gpu_ids=*) + gpu_ids="${1#*=}" + shift + ;; + --gpu_ids) + gpu_ids="$2" + shift 2 + ;; + --limit=*) + limit="${1#*=}" + shift + ;; + --limit) + limit="$2" + shift 2 + ;; + --mxfp8_chunk_rows=*) + mxfp8_chunk_rows="${1#*=}" + shift + ;; + --mxfp8_chunk_rows) + mxfp8_chunk_rows="$2" + shift 2 + ;; + --disable_mxfp8_inplace_qdq) + disable_mxfp8_inplace_qdq=true + shift + ;; + --accuracy) + accuracy=true + shift + ;; + --vbench_dir=*) + vbench_dir="${1#*=}" + shift + ;; + --vbench_dir) + vbench_dir="$2" + shift 2 + ;; + *) + echo "Error: No such parameter: $1" + exit 1 + ;; + esac + done +} + +function run_benchmark { + task=${task:="t2v"} + limit=${limit:=-1} + output_video_path=${output_video_path:="./tmp_video"} + accuracy=${accuracy:=false} + disable_mxfp8_inplace_qdq=${disable_mxfp8_inplace_qdq:=false} + vbench_dir=${vbench_dir:="${SCRIPT_DIR}/VBench"} + + if [[ ! "${output_video_path}" = /* ]]; then + output_video_path=$(realpath -s "$(pwd)/${output_video_path}") + fi + + if [ "${topology}" = "wan_bf16" ]; then + scheme="BF16" + elif [ "${topology}" = "wan_fp8" ]; then + scheme="FP8" + elif [ "${topology}" = "wan_mxfp8" ]; then + scheme="MXFP8" + else + echo "Error: unsupported topology ${topology}, use wan_bf16/wan_fp8/wan_mxfp8" + exit 1 + fi + + ensure_vbench_repo + + prepare_vbench_inputs + + normalized_dimensions="${dimension//,/ }" + read -r -a dimension_list <<< "${normalized_dimensions}" + + if [ -n "${gpu_ids}" ]; then + gpu_list="${gpu_ids}" + else + gpu_list="${CUDA_VISIBLE_DEVICES:-}" + fi + + if [ -n "${gpu_list}" ]; then + normalized_gpu_ids="${gpu_list//,/ }" + read -r -a gpu_array <<< "${normalized_gpu_ids}" + visible_gpus=${#gpu_array[@]} + echo "visible_gpus: ${visible_gpus}" + else + gpu_array=() + fi + + mkdir -p "${output_video_path}" + shard_tmp_root="${output_video_path}/.prompt_shards" + + function build_benchmark_cmd { + local cur_prompt_folder="$2" + local cur_info_json="$3" + local cmd=( + python3 main.py + --model "${input_model}" + --task "${task}" + --scheme "${scheme}" + --output_video_path "${output_video_path}" + --limit "${limit}" + --inference + ) + + if [ -n "${cur_prompt_folder}" ]; then + cmd+=(--prompt_folder "${cur_prompt_folder}") + elif [ -n "${prompt_folder}" ]; then + cmd+=(--prompt_folder "${prompt_folder}") + fi + if [ -n "${image_folder}" ]; then + cmd+=(--image_folder "${image_folder}") + fi + if [ -n "${cur_info_json}" ]; then + cmd+=(--info_json "${cur_info_json}") + elif [ -n "${info_json}" ]; then + cmd+=(--info_json "${info_json}") + fi + if [ -n "$1" ]; then + cmd+=(--dimension "$1") + fi + if [ -n "${mxfp8_chunk_rows}" ]; then + cmd+=(--mxfp8_chunk_rows "${mxfp8_chunk_rows}") + fi + if [ "${disable_mxfp8_inplace_qdq}" = "true" ]; then + cmd+=(--disable_mxfp8_inplace_qdq) + fi + + printf '%q ' "${cmd[@]}" + } + + if [ ${#gpu_array[@]} -eq 0 ]; then + if [ ${#dimension_list[@]} -eq 0 ]; then + eval "$(build_benchmark_cmd "" "" "")" + else + for cur_dimension in "${dimension_list[@]}"; do + eval "$(build_benchmark_cmd "${cur_dimension}" "" "")" + done + fi + else + if [ ${#dimension_list[@]} -eq 0 ]; then + echo "Error: multi-GPU sharding requires --dimension" + exit 1 + fi + + num_shards=${#gpu_array[@]} + for cur_dimension in "${dimension_list[@]}"; do + dim_shard_root="${shard_tmp_root}/${cur_dimension}" + rm -rf "${dim_shard_root}" + if [ "${task}" = "t2v" ]; then + prompt_file="${prompt_folder}/${cur_dimension}.txt" + python3 split_t2v_prompts.py \ + --prompt_file "${prompt_file}" \ + --num_shards "${num_shards}" \ + --output_root "${dim_shard_root}" + else + python3 split_i2v_info.py \ + --info_json "${info_json}" \ + --dimension "${cur_dimension}" \ + --num_shards "${num_shards}" \ + --output_root "${dim_shard_root}" + fi + + program_pid=() + for shard_id in "${!gpu_array[@]}"; do + gpu_id="${gpu_array[$shard_id]}" + log_suffix="${cur_dimension}" + if [ -z "${log_suffix}" ]; then + log_suffix="all" + fi + log_file="${output_video_path}/${log_suffix}.gpu${gpu_id}.log" + shard_prompt_folder="" + shard_info_json="" + + if [ "${task}" = "t2v" ]; then + shard_prompt_folder="${dim_shard_root}/shard_${shard_id}" + else + shard_info_json="${dim_shard_root}/shard_${shard_id}/info.json" + fi + + cmd="$(build_benchmark_cmd "${cur_dimension}" "${shard_prompt_folder}" "${shard_info_json}")" + CUDA_VISIBLE_DEVICES="${gpu_id}" bash -lc "${cmd}" > "${log_file}" 2>&1 & + program_pid+=("$!") + echo "Start (PID: ${program_pid[-1]}, GPU: ${gpu_id}, dimension: ${cur_dimension})" + done + + for pid in "${program_pid[@]}"; do + wait "${pid}" || exit 1 + done + done + fi + + if [ "${accuracy}" = "true" ]; then + if [ "${task}" = "t2v" ]; then + echo "Start VBench evaluation for t2v..." + pushd "${vbench_dir}" + python evaluate.py \ + --dimension "subject_consistency motion_smoothness aesthetic_quality imaging_quality overall_consistency" \ + --videos_path "${output_video_path}" \ + --mode=vbench_standard + popd + elif [ "${task}" = "i2v" ]; then + echo "Start VBench evaluation for i2v..." + pushd "${vbench_dir}" + python evaluate_i2v.py \ + --dimension "i2v_background i2v_subject subject_consistency background_consistency motion_smoothness" \ + --videos_path "${output_video_path}" \ + --ratio "16-9" \ + --mode=vbench_standard + popd + else + echo "--accuracy does not support task=${task}. Supported tasks: t2v, i2v." + exit 1 + fi + else + echo "Video generation finished. Use --accuracy to run VBench evaluation for t2v/i2v." + fi +} + +main "$@" diff --git a/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh new file mode 100644 index 00000000000..ae1ff41e1bb --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/run_quant.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +function init_params { + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var | cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var | cut -f2 -d=) + ;; + --task=*) + task=$(echo $var | cut -f2 -d=) + ;; + --output_model=*) + tuned_checkpoint=$(echo $var | cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +function run_tuning { + tuned_checkpoint=${tuned_checkpoint:="./tmp_autoround"} + task=${task:="t2v"} + + if [ "${topology}" = "wan_fp8" ]; then + extra_cmd="--scheme FP8" + elif [ "${topology}" = "wan_mxfp8" ]; then + extra_cmd="--scheme MXFP8" + else + echo "Error: unsupported topology ${topology}, use wan_fp8 or wan_mxfp8" + exit 1 + fi + + python3 main.py \ + --model ${input_model} \ + --task ${task} \ + --output_dir ${tuned_checkpoint} \ + --quantize \ + ${extra_cmd} +} + +main "$@" diff --git a/examples/pytorch/diffusion_model/diffusers/wan/setup.sh b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh new file mode 100644 index 00000000000..c9f9700dbbc --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/setup.sh @@ -0,0 +1,2 @@ +pip install --no-cache-dir -r requirements.txt +pip install VBench --no-deps diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py new file mode 100644 index 00000000000..326666467ea --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/split_i2v_info.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +import argparse +import json +import os + + +def parse_args(): + parser = argparse.ArgumentParser(description="Split i2v info_json into per-shard files for one dimension.") + parser.add_argument("--info_json", required=True, type=str, help="Path to full i2v info json") + parser.add_argument("--dimension", required=True, type=str, help="Target dimension") + parser.add_argument("--num_shards", required=True, type=int, help="Total shard count") + parser.add_argument("--output_root", required=True, type=str, help="Root dir to write shard json files") + return parser.parse_args() + + +def has_dimension(info, target_dimension): + dims = info.get("dimension", []) + if isinstance(dims, str): + dims = [dims] + return target_dimension in dims + + +def main(): + args = parse_args() + + if args.num_shards < 1: + raise ValueError("--num_shards must be >= 1") + if not os.path.isfile(args.info_json): + raise FileNotFoundError(f"Info json not found: {args.info_json}") + + with open(args.info_json, "r", encoding="utf-8") as f: + info_list = json.load(f) + + filtered = [item for item in info_list if has_dimension(item, args.dimension)] + + shard_buckets = [[] for _ in range(args.num_shards)] + for idx, item in enumerate(filtered): + shard_buckets[idx % args.num_shards].append(item) + + os.makedirs(args.output_root, exist_ok=True) + for shard_id, shard_items in enumerate(shard_buckets): + shard_dir = os.path.join(args.output_root, f"shard_{shard_id}") + os.makedirs(shard_dir, exist_ok=True) + shard_info_json = os.path.join(shard_dir, "info.json") + with open(shard_info_json, "w", encoding="utf-8") as f: + json.dump(shard_items, f, ensure_ascii=False, indent=2) + + print( + f"Split {len(filtered)} i2v entries for dimension '{args.dimension}' " + f"into {args.num_shards} shards under {args.output_root}" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py new file mode 100644 index 00000000000..dffe19eb9e0 --- /dev/null +++ b/examples/pytorch/diffusion_model/diffusers/wan/split_t2v_prompts.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import argparse +import os + + +def parse_args(): + parser = argparse.ArgumentParser(description="Split t2v prompt file into per-shard prompt folders.") + parser.add_argument("--prompt_file", required=True, type=str, help="Path to .txt") + parser.add_argument("--num_shards", required=True, type=int, help="Total shard count") + parser.add_argument("--output_root", required=True, type=str, help="Root directory to write shard folders") + return parser.parse_args() + + +def main(): + args = parse_args() + + if args.num_shards < 1: + raise ValueError("--num_shards must be >= 1") + if not os.path.isfile(args.prompt_file): + raise FileNotFoundError(f"Prompt file not found: {args.prompt_file}") + + dimension = os.path.splitext(os.path.basename(args.prompt_file))[0] + + with open(args.prompt_file, "r", encoding="utf-8") as f: + prompts = [line.strip() for line in f if line.strip()] + + os.makedirs(args.output_root, exist_ok=True) + + shard_buckets = [[] for _ in range(args.num_shards)] + for idx, prompt in enumerate(prompts): + shard_buckets[idx % args.num_shards].append(prompt) + + for shard_id, shard_prompts in enumerate(shard_buckets): + shard_dir = os.path.join(args.output_root, f"shard_{shard_id}") + os.makedirs(shard_dir, exist_ok=True) + shard_prompt_file = os.path.join(shard_dir, f"{dimension}.txt") + with open(shard_prompt_file, "w", encoding="utf-8") as f: + for prompt in shard_prompts: + f.write(prompt + "\n") + + print( + f"Split {len(prompts)} prompts from {args.prompt_file} into {args.num_shards} shards under {args.output_root}" + ) + + +if __name__ == "__main__": + main()