|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# ignore-tidy-linelength |
| 3 | + |
| 4 | +# Compatible with Python 3.6+ |
| 5 | + |
| 6 | +import contextlib |
| 7 | +import getpass |
| 8 | +import glob |
| 9 | +import logging |
| 10 | +import os |
| 11 | +import pprint |
| 12 | +import shutil |
| 13 | +import subprocess |
| 14 | +import sys |
| 15 | +import time |
| 16 | +import traceback |
| 17 | +import urllib.request |
| 18 | +from collections import OrderedDict |
| 19 | +from io import StringIO |
| 20 | +from pathlib import Path |
| 21 | +from typing import Callable, Dict, Iterable, List, Optional, Union |
| 22 | + |
| 23 | +PGO_HOST = os.environ["PGO_HOST"] |
| 24 | + |
| 25 | +LOGGER = logging.getLogger("stage-build") |
| 26 | + |
| 27 | +LLVM_PGO_CRATES = [ |
| 28 | + "syn-1.0.89", |
| 29 | + "cargo-0.60.0", |
| 30 | + "serde-1.0.136", |
| 31 | + "ripgrep-13.0.0", |
| 32 | + "regex-1.5.5", |
| 33 | + "clap-3.1.6", |
| 34 | + "hyper-0.14.18" |
| 35 | +] |
| 36 | + |
| 37 | +RUSTC_PGO_CRATES = [ |
| 38 | + "externs", |
| 39 | + "ctfe-stress-5", |
| 40 | + "cargo-0.60.0", |
| 41 | + "token-stream-stress", |
| 42 | + "match-stress", |
| 43 | + "tuple-stress", |
| 44 | + "diesel-1.4.8", |
| 45 | + "bitmaps-3.1.0" |
| 46 | +] |
| 47 | + |
| 48 | +LLVM_BOLT_CRATES = LLVM_PGO_CRATES |
| 49 | + |
| 50 | + |
| 51 | +class Pipeline: |
| 52 | + # Paths |
| 53 | + def checkout_path(self) -> Path: |
| 54 | + """ |
| 55 | + The root checkout, where the source is located. |
| 56 | + """ |
| 57 | + raise NotImplementedError |
| 58 | + |
| 59 | + def downloaded_llvm_dir(self) -> Path: |
| 60 | + """ |
| 61 | + Directory where the host LLVM is located. |
| 62 | + """ |
| 63 | + raise NotImplementedError |
| 64 | + |
| 65 | + def build_root(self) -> Path: |
| 66 | + """ |
| 67 | + The main directory where the build occurs. |
| 68 | + """ |
| 69 | + raise NotImplementedError |
| 70 | + |
| 71 | + def build_artifacts(self) -> Path: |
| 72 | + return self.build_root() / "build" / PGO_HOST |
| 73 | + |
| 74 | + def rustc_stage_0(self) -> Path: |
| 75 | + return self.build_artifacts() / "stage0" / "bin" / "rustc" |
| 76 | + |
| 77 | + def cargo_stage_0(self) -> Path: |
| 78 | + return self.build_artifacts() / "stage0" / "bin" / "cargo" |
| 79 | + |
| 80 | + def rustc_stage_2(self) -> Path: |
| 81 | + return self.build_artifacts() / "stage2" / "bin" / "rustc" |
| 82 | + |
| 83 | + def opt_artifacts(self) -> Path: |
| 84 | + raise NotImplementedError |
| 85 | + |
| 86 | + def llvm_profile_dir_root(self) -> Path: |
| 87 | + return self.opt_artifacts() / "llvm-pgo" |
| 88 | + |
| 89 | + def llvm_profile_merged_file(self) -> Path: |
| 90 | + return self.opt_artifacts() / "llvm-pgo.profdata" |
| 91 | + |
| 92 | + def rustc_perf_dir(self) -> Path: |
| 93 | + return self.opt_artifacts() / "rustc-perf" |
| 94 | + |
| 95 | + def build_rustc_perf(self): |
| 96 | + raise NotImplementedError() |
| 97 | + |
| 98 | + def rustc_profile_dir_root(self) -> Path: |
| 99 | + return self.opt_artifacts() / "rustc-pgo" |
| 100 | + |
| 101 | + def rustc_profile_merged_file(self) -> Path: |
| 102 | + return self.opt_artifacts() / "rustc-pgo.profdata" |
| 103 | + |
| 104 | + def rustc_profile_template_path(self) -> Path: |
| 105 | + """ |
| 106 | + The profile data is written into a single filepath that is being repeatedly merged when each |
| 107 | + rustc invocation ends. Empirically, this can result in some profiling data being lost. That's |
| 108 | + why we override the profile path to include the PID. This will produce many more profiling |
| 109 | + files, but the resulting profile will produce a slightly faster rustc binary. |
| 110 | + """ |
| 111 | + return self.rustc_profile_dir_root() / "default_%m_%p.profraw" |
| 112 | + |
| 113 | + def supports_bolt(self) -> bool: |
| 114 | + raise NotImplementedError |
| 115 | + |
| 116 | + def llvm_bolt_profile_merged_file(self) -> Path: |
| 117 | + return self.opt_artifacts() / "bolt.profdata" |
| 118 | + |
| 119 | + |
| 120 | +class LinuxPipeline(Pipeline): |
| 121 | + def checkout_path(self) -> Path: |
| 122 | + return Path("/checkout") |
| 123 | + |
| 124 | + def downloaded_llvm_dir(self) -> Path: |
| 125 | + return Path("/rustroot") |
| 126 | + |
| 127 | + def build_root(self) -> Path: |
| 128 | + return self.checkout_path() / "obj" |
| 129 | + |
| 130 | + def opt_artifacts(self) -> Path: |
| 131 | + return Path("/tmp/tmp-multistage/opt-artifacts") |
| 132 | + |
| 133 | + def build_rustc_perf(self): |
| 134 | + # /tmp/rustc-perf comes from the Dockerfile |
| 135 | + shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir()) |
| 136 | + cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()]) |
| 137 | + |
| 138 | + with change_cwd(self.rustc_perf_dir()): |
| 139 | + cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict( |
| 140 | + RUSTC=str(self.rustc_stage_0()), |
| 141 | + RUSTC_BOOTSTRAP="1" |
| 142 | + )) |
| 143 | + |
| 144 | + def supports_bolt(self) -> bool: |
| 145 | + return True |
| 146 | + |
| 147 | + |
| 148 | +class WindowsPipeline(Pipeline): |
| 149 | + def __init__(self): |
| 150 | + self.checkout_dir = Path(os.getcwd()) |
| 151 | + |
| 152 | + def checkout_path(self) -> Path: |
| 153 | + return self.checkout_dir |
| 154 | + |
| 155 | + def downloaded_llvm_dir(self) -> Path: |
| 156 | + return self.checkout_path() / "citools" / "clang-rust" |
| 157 | + |
| 158 | + def build_root(self) -> Path: |
| 159 | + return self.checkout_path() |
| 160 | + |
| 161 | + def opt_artifacts(self) -> Path: |
| 162 | + return self.checkout_path() / "opt-artifacts" |
| 163 | + |
| 164 | + def rustc_stage_0(self) -> Path: |
| 165 | + return super().rustc_stage_0().with_suffix(".exe") |
| 166 | + |
| 167 | + def cargo_stage_0(self) -> Path: |
| 168 | + return super().cargo_stage_0().with_suffix(".exe") |
| 169 | + |
| 170 | + def rustc_stage_2(self) -> Path: |
| 171 | + return super().rustc_stage_2().with_suffix(".exe") |
| 172 | + |
| 173 | + def build_rustc_perf(self): |
| 174 | + # rustc-perf version from 2022-07-22 |
| 175 | + perf_commit = "3c253134664fdcba862c539d37f0de18557a9a4c" |
| 176 | + rustc_perf_zip_path = self.opt_artifacts() / "perf.zip" |
| 177 | + |
| 178 | + def download_rustc_perf(): |
| 179 | + download_file( |
| 180 | + f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip", |
| 181 | + rustc_perf_zip_path |
| 182 | + ) |
| 183 | + with change_cwd(self.opt_artifacts()): |
| 184 | + unpack_archive(rustc_perf_zip_path) |
| 185 | + move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir()) |
| 186 | + delete_file(rustc_perf_zip_path) |
| 187 | + |
| 188 | + retry_action(download_rustc_perf, "Download rustc-perf") |
| 189 | + |
| 190 | + with change_cwd(self.rustc_perf_dir()): |
| 191 | + cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict( |
| 192 | + RUSTC=str(self.rustc_stage_0()), |
| 193 | + RUSTC_BOOTSTRAP="1" |
| 194 | + )) |
| 195 | + |
| 196 | + def rustc_profile_template_path(self) -> Path: |
| 197 | + """ |
| 198 | + On Windows, we don't have enough space to use separate files for each rustc invocation. |
| 199 | + Therefore, we use a single file for the generated profiles. |
| 200 | + """ |
| 201 | + return self.rustc_profile_dir_root() / "default_%m.profraw" |
| 202 | + |
| 203 | + def supports_bolt(self) -> bool: |
| 204 | + return False |
| 205 | + |
| 206 | + |
| 207 | +class Timer: |
| 208 | + def __init__(self): |
| 209 | + # We want this dictionary to be ordered by insertion. |
| 210 | + # We use `OrderedDict` for compatibility with older Python versions. |
| 211 | + self.stages = OrderedDict() |
| 212 | + |
| 213 | + @contextlib.contextmanager |
| 214 | + def stage(self, name: str): |
| 215 | + assert name not in self.stages |
| 216 | + |
| 217 | + start = time.time() |
| 218 | + exc = None |
| 219 | + try: |
| 220 | + LOGGER.info(f"Stage `{name}` starts") |
| 221 | + yield |
| 222 | + except BaseException as exception: |
| 223 | + exc = exception |
| 224 | + raise |
| 225 | + finally: |
| 226 | + end = time.time() |
| 227 | + duration = end - start |
| 228 | + self.stages[name] = duration |
| 229 | + if exc is None: |
| 230 | + LOGGER.info(f"Stage `{name}` ended: OK ({duration:.2f}s)") |
| 231 | + else: |
| 232 | + LOGGER.info(f"Stage `{name}` ended: FAIL ({duration:.2f}s)") |
| 233 | + |
| 234 | + def print_stats(self): |
| 235 | + total_duration = sum(self.stages.values()) |
| 236 | + |
| 237 | + # 57 is the width of the whole table |
| 238 | + divider = "-" * 57 |
| 239 | + |
| 240 | + with StringIO() as output: |
| 241 | + print(divider, file=output) |
| 242 | + for (name, duration) in self.stages.items(): |
| 243 | + pct = (duration / total_duration) * 100 |
| 244 | + name_str = f"{name}:" |
| 245 | + print(f"{name_str:<34} {duration:>12.2f}s ({pct:>5.2f}%)", file=output) |
| 246 | + |
| 247 | + total_duration_label = "Total duration:" |
| 248 | + print(f"{total_duration_label:<34} {total_duration:>12.2f}s", file=output) |
| 249 | + print(divider, file=output, end="") |
| 250 | + LOGGER.info(f"Timer results\n{output.getvalue()}") |
| 251 | + |
| 252 | + |
| 253 | +@contextlib.contextmanager |
| 254 | +def change_cwd(dir: Path): |
| 255 | + """ |
| 256 | + Temporarily change working directory to `dir`. |
| 257 | + """ |
| 258 | + cwd = os.getcwd() |
| 259 | + LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`") |
| 260 | + os.chdir(dir) |
| 261 | + try: |
| 262 | + yield |
| 263 | + finally: |
| 264 | + LOGGER.debug(f"Reverting working dir to `{cwd}`") |
| 265 | + os.chdir(cwd) |
| 266 | + |
| 267 | + |
| 268 | +def move_path(src: Path, dst: Path): |
| 269 | + LOGGER.info(f"Moving `{src}` to `{dst}`") |
| 270 | + shutil.move(src, dst) |
| 271 | + |
| 272 | + |
| 273 | +def delete_file(path: Path): |
| 274 | + LOGGER.info(f"Deleting file `{path}`") |
| 275 | + os.unlink(path) |
| 276 | + |
| 277 | + |
| 278 | +def delete_directory(path: Path): |
| 279 | + LOGGER.info(f"Deleting directory `{path}`") |
| 280 | + shutil.rmtree(path) |
| 281 | + |
| 282 | + |
| 283 | +def unpack_archive(archive: Path): |
| 284 | + LOGGER.info(f"Unpacking archive `{archive}`") |
| 285 | + shutil.unpack_archive(archive) |
| 286 | + |
| 287 | + |
| 288 | +def download_file(src: str, target: Path): |
| 289 | + LOGGER.info(f"Downloading `{src}` into `{target}`") |
| 290 | + urllib.request.urlretrieve(src, str(target)) |
| 291 | + |
| 292 | + |
| 293 | +def retry_action(action, name: str, max_fails: int = 5): |
| 294 | + LOGGER.info(f"Attempting to perform action `{name}` with retry") |
| 295 | + for iteration in range(max_fails): |
| 296 | + LOGGER.info(f"Attempt {iteration + 1}/{max_fails}") |
| 297 | + try: |
| 298 | + action() |
| 299 | + return |
| 300 | + except: |
| 301 | + LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}") |
| 302 | + |
| 303 | + raise Exception(f"Action `{name}` has failed after {max_fails} attempts") |
| 304 | + |
| 305 | + |
| 306 | +def cmd( |
| 307 | + args: List[Union[str, Path]], |
| 308 | + env: Optional[Dict[str, str]] = None, |
| 309 | + output_path: Optional[Path] = None |
| 310 | +): |
| 311 | + args = [str(arg) for arg in args] |
| 312 | + |
| 313 | + environment = os.environ.copy() |
| 314 | + |
| 315 | + cmd_str = "" |
| 316 | + if env is not None: |
| 317 | + environment.update(env) |
| 318 | + cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items()) |
| 319 | + cmd_str += " " |
| 320 | + cmd_str += " ".join(args) |
| 321 | + if output_path is not None: |
| 322 | + cmd_str += f" > {output_path}" |
| 323 | + LOGGER.info(f"Executing `{cmd_str}`") |
| 324 | + |
| 325 | + if output_path is not None: |
| 326 | + with open(output_path, "w") as f: |
| 327 | + return subprocess.run( |
| 328 | + args, |
| 329 | + env=environment, |
| 330 | + check=True, |
| 331 | + stdout=f |
| 332 | + ) |
| 333 | + return subprocess.run(args, env=environment, check=True) |
| 334 | + |
| 335 | + |
| 336 | +def run_compiler_benchmarks( |
| 337 | + pipeline: Pipeline, |
| 338 | + profiles: List[str], |
| 339 | + scenarios: List[str], |
| 340 | + crates: List[str], |
| 341 | + env: Optional[Dict[str, str]] = None |
| 342 | +): |
| 343 | + env = env if env is not None else {} |
| 344 | + |
| 345 | + # Compile libcore, both in opt-level=0 and opt-level=3 |
| 346 | + with change_cwd(pipeline.build_root()): |
| 347 | + cmd([ |
| 348 | + pipeline.rustc_stage_2(), |
| 349 | + "--edition", "2021", |
| 350 | + "--crate-type", "lib", |
| 351 | + str(pipeline.checkout_path() / "library/core/src/lib.rs"), |
| 352 | + "--out-dir", pipeline.opt_artifacts() |
| 353 | + ], env=dict(RUSTC_BOOTSTRAP="1", **env)) |
| 354 | + |
| 355 | + cmd([ |
| 356 | + pipeline.rustc_stage_2(), |
| 357 | + "--edition", "2021", |
| 358 | + "--crate-type", "lib", |
| 359 | + "-Copt-level=3", |
| 360 | + str(pipeline.checkout_path() / "library/core/src/lib.rs"), |
| 361 | + "--out-dir", pipeline.opt_artifacts() |
| 362 | + ], env=dict(RUSTC_BOOTSTRAP="1", **env)) |
| 363 | + |
| 364 | + # Run rustc-perf benchmarks |
| 365 | + # Benchmark using profile_local with eprintln, which essentially just means |
| 366 | + # don't actually benchmark -- just make sure we run rustc a bunch of times. |
| 367 | + with change_cwd(pipeline.rustc_perf_dir()): |
| 368 | + cmd([ |
| 369 | + pipeline.cargo_stage_0(), |
| 370 | + "run", |
| 371 | + "-p", "collector", "--bin", "collector", "--", |
| 372 | + "profile_local", "eprintln", |
| 373 | + pipeline.rustc_stage_2(), |
| 374 | + "--id", "Test", |
| 375 | + "--cargo", pipeline.cargo_stage_0(), |
| 376 | + "--profiles", ",".join(profiles), |
| 377 | + "--scenarios", ",".join(scenarios), |
| 378 | + "--include", ",".join(crates) |
| 379 | + ], env=dict( |
| 380 | + RUST_LOG="collector=debug", |
| 381 | + RUSTC=str(pipeline.rustc_stage_0()), |
| 382 | + RUSTC_BOOTSTRAP="1", |
| 383 | + **env |
| 384 | + )) |
| 385 | + |
| 386 | + |
| 387 | +# https://stackoverflow.com/a/31631711/1107768 |
| 388 | +def format_bytes(size: int) -> str: |
| 389 | + """Return the given bytes as a human friendly KiB, MiB or GiB string.""" |
| 390 | + KB = 1024 |
| 391 | + MB = KB ** 2 # 1,048,576 |
| 392 | + GB = KB ** 3 # 1,073,741,824 |
| 393 | + TB = KB ** 4 # 1,099,511,627,776 |
| 394 | + |
| 395 | + if size < KB: |
| 396 | + return f"{size} B" |
| 397 | + elif KB <= size < MB: |
| 398 | + return f"{size / KB:.2f} KiB" |
| 399 | + elif MB <= size < GB: |
| 400 | + return f"{size / MB:.2f} MiB" |
| 401 | + elif GB <= size < TB: |
| 402 | + return f"{size / GB:.2f} GiB" |
| 403 | + else: |
| 404 | + return str(size) |
| 405 | + |
| 406 | + |
| 407 | +# https://stackoverflow.com/a/63307131/1107768 |
| 408 | +def count_files(path: Path) -> int: |
| 409 | + return sum(1 for p in path.rglob("*") if p.is_file()) |
| 410 | + |
| 411 | + |
| 412 | +def count_files_with_prefix(path: Path) -> int: |
| 413 | + return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file()) |
| 414 | + |
| 415 | + |
| 416 | +# https://stackoverflow.com/a/55659577/1107768 |
| 417 | +def get_path_size(path: Path) -> int: |
| 418 | + if path.is_dir(): |
| 419 | + return sum(p.stat().st_size for p in path.rglob("*")) |
| 420 | + return path.stat().st_size |
| 421 | + |
| 422 | + |
| 423 | +def get_path_prefix_size(path: Path) -> int: |
| 424 | + """ |
| 425 | + Get size of all files beginning with the prefix `path`. |
| 426 | + Alternative to shell `du -sh <path>*`. |
| 427 | + """ |
| 428 | + return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*")) |
| 429 | + |
| 430 | + |
| 431 | +def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]: |
| 432 | + for file in os.listdir(directory): |
| 433 | + path = directory / file |
| 434 | + if filter is None or filter(path): |
| 435 | + yield path |
| 436 | + |
| 437 | + |
| 438 | +def build_rustc( |
| 439 | + pipeline: Pipeline, |
| 440 | + args: List[str], |
| 441 | + env: Optional[Dict[str, str]] = None |
| 442 | +): |
| 443 | + arguments = [ |
| 444 | + sys.executable, |
| 445 | + pipeline.checkout_path() / "x.py", |
| 446 | + "build", |
| 447 | + "--target", PGO_HOST, |
| 448 | + "--host", PGO_HOST, |
| 449 | + "--stage", "2", |
| 450 | + "library/std" |
| 451 | + ] + args |
| 452 | + cmd(arguments, env=env) |
| 453 | + |
| 454 | + |
| 455 | +def create_pipeline() -> Pipeline: |
| 456 | + if sys.platform == "linux": |
| 457 | + return LinuxPipeline() |
| 458 | + elif sys.platform in ("cygwin", "win32"): |
| 459 | + return WindowsPipeline() |
| 460 | + else: |
| 461 | + raise Exception(f"Optimized build is not supported for platform {sys.platform}") |
| 462 | + |
| 463 | + |
| 464 | +def gather_llvm_profiles(pipeline: Pipeline): |
| 465 | + LOGGER.info("Running benchmarks with PGO instrumented LLVM") |
| 466 | + run_compiler_benchmarks( |
| 467 | + pipeline, |
| 468 | + profiles=["Debug", "Opt"], |
| 469 | + scenarios=["Full"], |
| 470 | + crates=LLVM_PGO_CRATES |
| 471 | + ) |
| 472 | + |
| 473 | + profile_path = pipeline.llvm_profile_merged_file() |
| 474 | + LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}") |
| 475 | + cmd([ |
| 476 | + pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata", |
| 477 | + "merge", |
| 478 | + "-o", profile_path, |
| 479 | + pipeline.llvm_profile_dir_root() |
| 480 | + ]) |
| 481 | + |
| 482 | + LOGGER.info("LLVM PGO statistics") |
| 483 | + LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}") |
| 484 | + LOGGER.info( |
| 485 | + f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}") |
| 486 | + LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}") |
| 487 | + |
| 488 | + # We don't need the individual .profraw files now that they have been merged |
| 489 | + # into a final .profdata |
| 490 | + delete_directory(pipeline.llvm_profile_dir_root()) |
| 491 | + |
| 492 | + |
| 493 | +def gather_rustc_profiles(pipeline: Pipeline): |
| 494 | + LOGGER.info("Running benchmarks with PGO instrumented rustc") |
| 495 | + |
| 496 | + # Here we're profiling the `rustc` frontend, so we also include `Check`. |
| 497 | + # The benchmark set includes various stress tests that put the frontend under pressure. |
| 498 | + run_compiler_benchmarks( |
| 499 | + pipeline, |
| 500 | + profiles=["Check", "Debug", "Opt"], |
| 501 | + scenarios=["All"], |
| 502 | + crates=RUSTC_PGO_CRATES, |
| 503 | + env=dict( |
| 504 | + LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path()) |
| 505 | + ) |
| 506 | + ) |
| 507 | + |
| 508 | + profile_path = pipeline.rustc_profile_merged_file() |
| 509 | + LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}") |
| 510 | + cmd([ |
| 511 | + pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata", |
| 512 | + "merge", |
| 513 | + "-o", profile_path, |
| 514 | + pipeline.rustc_profile_dir_root() |
| 515 | + ]) |
| 516 | + |
| 517 | + LOGGER.info("Rustc PGO statistics") |
| 518 | + LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}") |
| 519 | + LOGGER.info( |
| 520 | + f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}") |
| 521 | + LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}") |
| 522 | + |
| 523 | + # We don't need the individual .profraw files now that they have been merged |
| 524 | + # into a final .profdata |
| 525 | + delete_directory(pipeline.rustc_profile_dir_root()) |
| 526 | + |
| 527 | + |
| 528 | +def gather_llvm_bolt_profiles(pipeline: Pipeline): |
| 529 | + LOGGER.info("Running benchmarks with BOLT instrumented LLVM") |
| 530 | + run_compiler_benchmarks( |
| 531 | + pipeline, |
| 532 | + profiles=["Check", "Debug", "Opt"], |
| 533 | + scenarios=["Full"], |
| 534 | + crates=LLVM_BOLT_CRATES |
| 535 | + ) |
| 536 | + |
| 537 | + merged_profile_path = pipeline.llvm_bolt_profile_merged_file() |
| 538 | + profile_files_path = Path("/tmp/prof.fdata") |
| 539 | + LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}") |
| 540 | + |
| 541 | + profile_files = sorted(glob.glob(f"{profile_files_path}*")) |
| 542 | + cmd([ |
| 543 | + "merge-fdata", |
| 544 | + *profile_files, |
| 545 | + ], output_path=merged_profile_path) |
| 546 | + |
| 547 | + LOGGER.info("LLVM BOLT statistics") |
| 548 | + LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}") |
| 549 | + LOGGER.info( |
| 550 | + f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}") |
| 551 | + LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}") |
| 552 | + |
| 553 | + |
| 554 | +def clear_llvm_files(pipeline: Pipeline): |
| 555 | + """ |
| 556 | + Rustbuild currently doesn't support rebuilding LLVM when PGO options |
| 557 | + change (or any other llvm-related options); so just clear out the relevant |
| 558 | + directories ourselves. |
| 559 | + """ |
| 560 | + LOGGER.info("Clearing LLVM build files") |
| 561 | + delete_directory(pipeline.build_artifacts() / "llvm") |
| 562 | + delete_directory(pipeline.build_artifacts() / "lld") |
| 563 | + |
| 564 | + |
| 565 | +def print_binary_sizes(pipeline: Pipeline): |
| 566 | + bin_dir = pipeline.build_artifacts() / "stage2" / "bin" |
| 567 | + binaries = get_files(bin_dir) |
| 568 | + |
| 569 | + lib_dir = pipeline.build_artifacts() / "stage2" / "lib" |
| 570 | + libraries = get_files(lib_dir, lambda p: p.suffix == ".so") |
| 571 | + |
| 572 | + paths = sorted(binaries) + sorted(libraries) |
| 573 | + with StringIO() as output: |
| 574 | + for path in paths: |
| 575 | + path_str = f"{path.name}:" |
| 576 | + print(f"{path_str:<30}{format_bytes(path.stat().st_size):>14}", file=output) |
| 577 | + LOGGER.info(f"Rustc binary size\n{output.getvalue()}") |
| 578 | + |
| 579 | + |
| 580 | +def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]): |
| 581 | + # Clear and prepare tmp directory |
| 582 | + shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True) |
| 583 | + os.makedirs(pipeline.opt_artifacts(), exist_ok=True) |
| 584 | + |
| 585 | + pipeline.build_rustc_perf() |
| 586 | + |
| 587 | + # Stage 1: Build rustc + PGO instrumented LLVM |
| 588 | + with timer.stage("Build rustc (LLVM PGO)"): |
| 589 | + build_rustc(pipeline, args=[ |
| 590 | + "--llvm-profile-generate" |
| 591 | + ], env=dict( |
| 592 | + LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p") |
| 593 | + )) |
| 594 | + |
| 595 | + with timer.stage("Gather profiles (LLVM PGO)"): |
| 596 | + gather_llvm_profiles(pipeline) |
| 597 | + |
| 598 | + clear_llvm_files(pipeline) |
| 599 | + final_build_args += [ |
| 600 | + "--llvm-profile-use", |
| 601 | + pipeline.llvm_profile_merged_file() |
| 602 | + ] |
| 603 | + |
| 604 | + # Stage 2: Build PGO instrumented rustc + LLVM |
| 605 | + with timer.stage("Build rustc (rustc PGO)"): |
| 606 | + build_rustc(pipeline, args=[ |
| 607 | + "--rust-profile-generate", |
| 608 | + pipeline.rustc_profile_dir_root() |
| 609 | + ]) |
| 610 | + |
| 611 | + with timer.stage("Gather profiles (rustc PGO)"): |
| 612 | + gather_rustc_profiles(pipeline) |
| 613 | + |
| 614 | + clear_llvm_files(pipeline) |
| 615 | + final_build_args += [ |
| 616 | + "--rust-profile-use", |
| 617 | + pipeline.rustc_profile_merged_file() |
| 618 | + ] |
| 619 | + |
| 620 | + # Stage 3: Build rustc + BOLT instrumented LLVM |
| 621 | + if pipeline.supports_bolt(): |
| 622 | + with timer.stage("Build rustc (LLVM BOLT)"): |
| 623 | + build_rustc(pipeline, args=[ |
| 624 | + "--llvm-profile-use", |
| 625 | + pipeline.llvm_profile_merged_file(), |
| 626 | + "--llvm-bolt-profile-generate", |
| 627 | + ]) |
| 628 | + with timer.stage("Gather profiles (LLVM BOLT)"): |
| 629 | + gather_llvm_bolt_profiles(pipeline) |
| 630 | + |
| 631 | + clear_llvm_files(pipeline) |
| 632 | + final_build_args += [ |
| 633 | + "--llvm-bolt-profile-use", |
| 634 | + pipeline.llvm_bolt_profile_merged_file() |
| 635 | + ] |
| 636 | + |
| 637 | + # Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM |
| 638 | + with timer.stage("Final build"): |
| 639 | + cmd(final_build_args) |
| 640 | + |
| 641 | + |
| 642 | +if __name__ == "__main__": |
| 643 | + logging.basicConfig( |
| 644 | + level=logging.DEBUG, |
| 645 | + format="%(name)s %(levelname)-4s: %(message)s", |
| 646 | + ) |
| 647 | + |
| 648 | + LOGGER.info(f"Running multi-stage build using Python {sys.version}") |
| 649 | + LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}") |
| 650 | + |
| 651 | + build_args = sys.argv[1:] |
| 652 | + |
| 653 | + timer = Timer() |
| 654 | + pipeline = create_pipeline() |
| 655 | + try: |
| 656 | + execute_build_pipeline(timer, pipeline, build_args) |
| 657 | + except BaseException as e: |
| 658 | + LOGGER.error("The multi-stage build has failed") |
| 659 | + raise e |
| 660 | + finally: |
| 661 | + timer.print_stats() |
| 662 | + |
| 663 | + print_binary_sizes(pipeline) |
0 commit comments