Skip to content

Commit d724670

Browse files
authored
[diffusion] refactor and added tests for Flux, T2V, TI2V, I2V(#13344)
1 parent efc5d8f commit d724670

File tree

5 files changed

+1596
-909
lines changed

5 files changed

+1596
-909
lines changed
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
"""
2+
Configuration and data structures for diffusion performance tests.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import json
8+
import os
9+
from dataclasses import dataclass
10+
from pathlib import Path
11+
from typing import Sequence
12+
13+
14+
@dataclass
15+
class ToleranceConfig:
16+
"""Tolerance ratios for performance validation."""
17+
18+
e2e: float
19+
stage: float
20+
denoise_step: float
21+
denoise_agg: float
22+
23+
24+
@dataclass
25+
class ScenarioConfig:
26+
"""Expected performance metrics for a test scenario."""
27+
28+
stages_ms: dict[str, float]
29+
denoise_step_ms: dict[int, float]
30+
expected_e2e_ms: float
31+
expected_avg_denoise_ms: float
32+
expected_median_denoise_ms: float
33+
34+
35+
@dataclass
36+
class BaselineConfig:
37+
"""Full baseline configuration."""
38+
39+
scenarios: dict[str, ScenarioConfig]
40+
step_fractions: Sequence[float]
41+
warmup_defaults: dict[str, int]
42+
tolerances: ToleranceConfig
43+
44+
@classmethod
45+
def load(cls, path: Path) -> BaselineConfig:
46+
"""Load baseline configuration from JSON file."""
47+
with path.open("r", encoding="utf-8") as fh:
48+
data = json.load(fh)
49+
50+
tol_data = data["tolerances"]
51+
tolerances = ToleranceConfig(
52+
e2e=float(os.getenv("SGLANG_E2E_TOLERANCE", tol_data["e2e"])),
53+
stage=float(os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["stage"])),
54+
denoise_step=float(
55+
os.getenv("SGLANG_DENOISE_STEP_TOLERANCE", tol_data["denoise_step"])
56+
),
57+
denoise_agg=float(
58+
os.getenv("SGLANG_DENOISE_AGG_TOLERANCE", tol_data["denoise_agg"])
59+
),
60+
)
61+
62+
scenarios = {}
63+
for name, cfg in data["scenarios"].items():
64+
scenarios[name] = ScenarioConfig(
65+
stages_ms=cfg["stages_ms"],
66+
denoise_step_ms={int(k): v for k, v in cfg["denoise_step_ms"].items()},
67+
expected_e2e_ms=float(cfg["expected_e2e_ms"]),
68+
expected_avg_denoise_ms=float(cfg["expected_avg_denoise_ms"]),
69+
expected_median_denoise_ms=float(cfg["expected_median_denoise_ms"]),
70+
)
71+
72+
return cls(
73+
scenarios=scenarios,
74+
step_fractions=tuple(data["sampling"]["step_fractions"]),
75+
warmup_defaults=data["sampling"].get("warmup_requests", {}),
76+
tolerances=tolerances,
77+
)
78+
79+
80+
@dataclass(frozen=True)
81+
class DiffusionCase:
82+
"""Configuration for a single model/scenario test case."""
83+
84+
id: str # pytest test id
85+
model_path: str # HF repo or local path
86+
scenario_name: str # key into BASELINE_CONFIG.scenarios
87+
modality: str = "image" # "image" or "video" or "3d"
88+
prompt: str | None = None # text prompt for generation
89+
output_size: str = "1024x1024" # output image dimensions (or video resolution)
90+
num_frames: int | None = None # for video: number of frames
91+
fps: int | None = None # for video: frames per second
92+
warmup_text: int = 1 # number of text-to-image/video warmups
93+
warmup_edit: int = 0 # number of image/video-edit warmups
94+
image_edit_prompt: str | None = None # prompt for editing
95+
image_edit_path: Path | str | None = (
96+
None # input image/video for editing (Path or URL)
97+
)
98+
startup_grace_seconds: float = 0.0 # wait time after server starts
99+
custom_validator: str | None = None # optional custom validator name
100+
seconds: int = 4 # for video: duration in seconds
101+
102+
def is_image_url(self) -> bool:
103+
"""Check if image_edit_path is a URL."""
104+
if self.image_edit_path is None:
105+
return False
106+
return isinstance(self.image_edit_path, str) and (
107+
self.image_edit_path.startswith("http://")
108+
or self.image_edit_path.startswith("https://")
109+
)
110+
111+
112+
@dataclass
113+
class PerformanceSummary:
114+
"""Summary of performance metrics."""
115+
116+
e2e_ms: float
117+
avg_denoise_ms: float
118+
median_denoise_ms: float
119+
stage_metrics: dict[str, float]
120+
sampled_steps: dict[int, float]
121+
frames_per_second: float | None = None
122+
total_frames: int | None = None
123+
avg_frame_time_ms: float | None = None
124+
125+
126+
# Common paths
127+
IMAGE_INPUT_FILE = Path(__file__).resolve().parents[1] / "test_files" / "girl.jpg"
128+
129+
# All test cases with clean default values
130+
# To test different models, simply add more DiffusionCase entries
131+
DIFFUSION_CASES: list[DiffusionCase] = [
132+
# === Text to Image (T2I) ===
133+
DiffusionCase(
134+
id="qwen_image_t2i",
135+
model_path="Qwen/Qwen-Image",
136+
scenario_name="text_to_image",
137+
modality="image",
138+
prompt="A futuristic cityscape at sunset with flying cars",
139+
output_size="1024x1024",
140+
warmup_text=1,
141+
warmup_edit=0,
142+
startup_grace_seconds=30.0,
143+
),
144+
DiffusionCase(
145+
id="flux_image_t2i",
146+
model_path="black-forest-labs/FLUX.1-dev",
147+
scenario_name="text_to_image",
148+
modality="image",
149+
prompt="A futuristic cityscape at sunset with flying cars",
150+
output_size="1024x1024",
151+
warmup_text=1,
152+
warmup_edit=0,
153+
startup_grace_seconds=30.0,
154+
),
155+
# === Text and Image to Image (TI2I) ===
156+
DiffusionCase(
157+
id="qwen_image_edit_ti2i",
158+
model_path="Qwen/Qwen-Image-Edit",
159+
scenario_name="image_edit",
160+
modality="image",
161+
prompt=None, # not used for editing
162+
output_size="1024x1536",
163+
warmup_text=0,
164+
warmup_edit=1,
165+
image_edit_prompt="Convert 2D style to 3D style",
166+
image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
167+
startup_grace_seconds=30.0,
168+
),
169+
# === Text to Video (T2V) ===
170+
DiffusionCase(
171+
id="fastwan2_1_t2v",
172+
model_path="Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
173+
scenario_name="text_to_video",
174+
modality="video",
175+
prompt="A curious raccoon",
176+
output_size="848x480",
177+
seconds=4,
178+
warmup_text=0, # warmups only for image gen models
179+
warmup_edit=0,
180+
startup_grace_seconds=30.0,
181+
custom_validator="video",
182+
),
183+
# # === Image to Video (I2V) ===
184+
# DiffusionCase(
185+
# id="wan2_1_i2v_480p",
186+
# model_path="Wan-AI/Wan2.1-I2V-14B-Diffusers",
187+
# scenario_name="image_to_video",
188+
# modality="video",
189+
# prompt="generate", # passing in something since failing if no prompt is passed
190+
# warmup_text=0, # warmups only for image gen models
191+
# warmup_edit=0,
192+
# output_size="1024x1536",
193+
# image_edit_prompt="generate",
194+
# image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
195+
# startup_grace_seconds=30.0,
196+
# custom_validator="video",
197+
# seconds=4,
198+
# ),
199+
# === Text and Image to Video (TI2V) ===
200+
DiffusionCase(
201+
id="wan2_2_ti2v_5b",
202+
model_path="Wan-AI/Wan2.2-TI2V-5B-Diffusers",
203+
scenario_name="text_image_to_video",
204+
modality="video",
205+
prompt="Animate this image",
206+
output_size="832x1104",
207+
warmup_text=0, # warmups only for image gen models
208+
warmup_edit=0,
209+
image_edit_prompt="Add dynamic motion to the scene",
210+
image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
211+
startup_grace_seconds=30.0,
212+
custom_validator="video",
213+
seconds=4,
214+
),
215+
]
216+
217+
218+
# Load global configuration
219+
BASELINE_CONFIG = BaselineConfig.load(Path(__file__).with_name("perf_baselines.json"))

0 commit comments

Comments
 (0)