|
| 1 | +""" |
| 2 | +Configuration and data structures for diffusion performance tests. |
| 3 | +""" |
| 4 | + |
| 5 | +from __future__ import annotations |
| 6 | + |
| 7 | +import json |
| 8 | +import os |
| 9 | +from dataclasses import dataclass |
| 10 | +from pathlib import Path |
| 11 | +from typing import Sequence |
| 12 | + |
| 13 | + |
| 14 | +@dataclass |
| 15 | +class ToleranceConfig: |
| 16 | + """Tolerance ratios for performance validation.""" |
| 17 | + |
| 18 | + e2e: float |
| 19 | + stage: float |
| 20 | + denoise_step: float |
| 21 | + denoise_agg: float |
| 22 | + |
| 23 | + |
| 24 | +@dataclass |
| 25 | +class ScenarioConfig: |
| 26 | + """Expected performance metrics for a test scenario.""" |
| 27 | + |
| 28 | + stages_ms: dict[str, float] |
| 29 | + denoise_step_ms: dict[int, float] |
| 30 | + expected_e2e_ms: float |
| 31 | + expected_avg_denoise_ms: float |
| 32 | + expected_median_denoise_ms: float |
| 33 | + |
| 34 | + |
| 35 | +@dataclass |
| 36 | +class BaselineConfig: |
| 37 | + """Full baseline configuration.""" |
| 38 | + |
| 39 | + scenarios: dict[str, ScenarioConfig] |
| 40 | + step_fractions: Sequence[float] |
| 41 | + warmup_defaults: dict[str, int] |
| 42 | + tolerances: ToleranceConfig |
| 43 | + |
| 44 | + @classmethod |
| 45 | + def load(cls, path: Path) -> BaselineConfig: |
| 46 | + """Load baseline configuration from JSON file.""" |
| 47 | + with path.open("r", encoding="utf-8") as fh: |
| 48 | + data = json.load(fh) |
| 49 | + |
| 50 | + tol_data = data["tolerances"] |
| 51 | + tolerances = ToleranceConfig( |
| 52 | + e2e=float(os.getenv("SGLANG_E2E_TOLERANCE", tol_data["e2e"])), |
| 53 | + stage=float(os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["stage"])), |
| 54 | + denoise_step=float( |
| 55 | + os.getenv("SGLANG_DENOISE_STEP_TOLERANCE", tol_data["denoise_step"]) |
| 56 | + ), |
| 57 | + denoise_agg=float( |
| 58 | + os.getenv("SGLANG_DENOISE_AGG_TOLERANCE", tol_data["denoise_agg"]) |
| 59 | + ), |
| 60 | + ) |
| 61 | + |
| 62 | + scenarios = {} |
| 63 | + for name, cfg in data["scenarios"].items(): |
| 64 | + scenarios[name] = ScenarioConfig( |
| 65 | + stages_ms=cfg["stages_ms"], |
| 66 | + denoise_step_ms={int(k): v for k, v in cfg["denoise_step_ms"].items()}, |
| 67 | + expected_e2e_ms=float(cfg["expected_e2e_ms"]), |
| 68 | + expected_avg_denoise_ms=float(cfg["expected_avg_denoise_ms"]), |
| 69 | + expected_median_denoise_ms=float(cfg["expected_median_denoise_ms"]), |
| 70 | + ) |
| 71 | + |
| 72 | + return cls( |
| 73 | + scenarios=scenarios, |
| 74 | + step_fractions=tuple(data["sampling"]["step_fractions"]), |
| 75 | + warmup_defaults=data["sampling"].get("warmup_requests", {}), |
| 76 | + tolerances=tolerances, |
| 77 | + ) |
| 78 | + |
| 79 | + |
| 80 | +@dataclass(frozen=True) |
| 81 | +class DiffusionCase: |
| 82 | + """Configuration for a single model/scenario test case.""" |
| 83 | + |
| 84 | + id: str # pytest test id |
| 85 | + model_path: str # HF repo or local path |
| 86 | + scenario_name: str # key into BASELINE_CONFIG.scenarios |
| 87 | + modality: str = "image" # "image" or "video" or "3d" |
| 88 | + prompt: str | None = None # text prompt for generation |
| 89 | + output_size: str = "1024x1024" # output image dimensions (or video resolution) |
| 90 | + num_frames: int | None = None # for video: number of frames |
| 91 | + fps: int | None = None # for video: frames per second |
| 92 | + warmup_text: int = 1 # number of text-to-image/video warmups |
| 93 | + warmup_edit: int = 0 # number of image/video-edit warmups |
| 94 | + image_edit_prompt: str | None = None # prompt for editing |
| 95 | + image_edit_path: Path | str | None = ( |
| 96 | + None # input image/video for editing (Path or URL) |
| 97 | + ) |
| 98 | + startup_grace_seconds: float = 0.0 # wait time after server starts |
| 99 | + custom_validator: str | None = None # optional custom validator name |
| 100 | + seconds: int = 4 # for video: duration in seconds |
| 101 | + |
| 102 | + def is_image_url(self) -> bool: |
| 103 | + """Check if image_edit_path is a URL.""" |
| 104 | + if self.image_edit_path is None: |
| 105 | + return False |
| 106 | + return isinstance(self.image_edit_path, str) and ( |
| 107 | + self.image_edit_path.startswith("http://") |
| 108 | + or self.image_edit_path.startswith("https://") |
| 109 | + ) |
| 110 | + |
| 111 | + |
| 112 | +@dataclass |
| 113 | +class PerformanceSummary: |
| 114 | + """Summary of performance metrics.""" |
| 115 | + |
| 116 | + e2e_ms: float |
| 117 | + avg_denoise_ms: float |
| 118 | + median_denoise_ms: float |
| 119 | + stage_metrics: dict[str, float] |
| 120 | + sampled_steps: dict[int, float] |
| 121 | + frames_per_second: float | None = None |
| 122 | + total_frames: int | None = None |
| 123 | + avg_frame_time_ms: float | None = None |
| 124 | + |
| 125 | + |
| 126 | +# Common paths |
| 127 | +IMAGE_INPUT_FILE = Path(__file__).resolve().parents[1] / "test_files" / "girl.jpg" |
| 128 | + |
| 129 | +# All test cases with clean default values |
| 130 | +# To test different models, simply add more DiffusionCase entries |
| 131 | +DIFFUSION_CASES: list[DiffusionCase] = [ |
| 132 | + # === Text to Image (T2I) === |
| 133 | + DiffusionCase( |
| 134 | + id="qwen_image_t2i", |
| 135 | + model_path="Qwen/Qwen-Image", |
| 136 | + scenario_name="text_to_image", |
| 137 | + modality="image", |
| 138 | + prompt="A futuristic cityscape at sunset with flying cars", |
| 139 | + output_size="1024x1024", |
| 140 | + warmup_text=1, |
| 141 | + warmup_edit=0, |
| 142 | + startup_grace_seconds=30.0, |
| 143 | + ), |
| 144 | + DiffusionCase( |
| 145 | + id="flux_image_t2i", |
| 146 | + model_path="black-forest-labs/FLUX.1-dev", |
| 147 | + scenario_name="text_to_image", |
| 148 | + modality="image", |
| 149 | + prompt="A futuristic cityscape at sunset with flying cars", |
| 150 | + output_size="1024x1024", |
| 151 | + warmup_text=1, |
| 152 | + warmup_edit=0, |
| 153 | + startup_grace_seconds=30.0, |
| 154 | + ), |
| 155 | + # === Text and Image to Image (TI2I) === |
| 156 | + DiffusionCase( |
| 157 | + id="qwen_image_edit_ti2i", |
| 158 | + model_path="Qwen/Qwen-Image-Edit", |
| 159 | + scenario_name="image_edit", |
| 160 | + modality="image", |
| 161 | + prompt=None, # not used for editing |
| 162 | + output_size="1024x1536", |
| 163 | + warmup_text=0, |
| 164 | + warmup_edit=1, |
| 165 | + image_edit_prompt="Convert 2D style to 3D style", |
| 166 | + image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", |
| 167 | + startup_grace_seconds=30.0, |
| 168 | + ), |
| 169 | + # === Text to Video (T2V) === |
| 170 | + DiffusionCase( |
| 171 | + id="fastwan2_1_t2v", |
| 172 | + model_path="Wan-AI/Wan2.1-T2V-1.3B-Diffusers", |
| 173 | + scenario_name="text_to_video", |
| 174 | + modality="video", |
| 175 | + prompt="A curious raccoon", |
| 176 | + output_size="848x480", |
| 177 | + seconds=4, |
| 178 | + warmup_text=0, # warmups only for image gen models |
| 179 | + warmup_edit=0, |
| 180 | + startup_grace_seconds=30.0, |
| 181 | + custom_validator="video", |
| 182 | + ), |
| 183 | + # # === Image to Video (I2V) === |
| 184 | + # DiffusionCase( |
| 185 | + # id="wan2_1_i2v_480p", |
| 186 | + # model_path="Wan-AI/Wan2.1-I2V-14B-Diffusers", |
| 187 | + # scenario_name="image_to_video", |
| 188 | + # modality="video", |
| 189 | + # prompt="generate", # passing in something since failing if no prompt is passed |
| 190 | + # warmup_text=0, # warmups only for image gen models |
| 191 | + # warmup_edit=0, |
| 192 | + # output_size="1024x1536", |
| 193 | + # image_edit_prompt="generate", |
| 194 | + # image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", |
| 195 | + # startup_grace_seconds=30.0, |
| 196 | + # custom_validator="video", |
| 197 | + # seconds=4, |
| 198 | + # ), |
| 199 | + # === Text and Image to Video (TI2V) === |
| 200 | + DiffusionCase( |
| 201 | + id="wan2_2_ti2v_5b", |
| 202 | + model_path="Wan-AI/Wan2.2-TI2V-5B-Diffusers", |
| 203 | + scenario_name="text_image_to_video", |
| 204 | + modality="video", |
| 205 | + prompt="Animate this image", |
| 206 | + output_size="832x1104", |
| 207 | + warmup_text=0, # warmups only for image gen models |
| 208 | + warmup_edit=0, |
| 209 | + image_edit_prompt="Add dynamic motion to the scene", |
| 210 | + image_edit_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", |
| 211 | + startup_grace_seconds=30.0, |
| 212 | + custom_validator="video", |
| 213 | + seconds=4, |
| 214 | + ), |
| 215 | +] |
| 216 | + |
| 217 | + |
| 218 | +# Load global configuration |
| 219 | +BASELINE_CONFIG = BaselineConfig.load(Path(__file__).with_name("perf_baselines.json")) |
0 commit comments