Skip to content

Commit ba5c4a7

Browse files
clee2000huydhn
authored andcommitted
Upload sccache stats into benchmark database with build step time (pytorch#140839)
Guinea pig benchmark database Pull Request resolved: pytorch#140839 Approved by: https://github.com/huydhn Co-authored-by: Huy Do <[email protected]>
1 parent 7b2138b commit ba5c4a7

File tree

4 files changed

+111
-13
lines changed

4 files changed

+111
-13
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Upload sccache stats to artifacts, and also as benchmark data when on an aws
2+
# linux or windows machine. Does not currently handle mac builds
3+
name: Upload sccache stats
4+
5+
description: Upload sccache stats to artifacts
6+
7+
inputs:
8+
github-token:
9+
description: GITHUB_TOKEN
10+
required: true
11+
build-time:
12+
description: Build time in seconds
13+
14+
runs:
15+
using: composite
16+
steps:
17+
- name: Upload sccache to s3
18+
uses: seemethere/upload-artifact-s3@v5
19+
with:
20+
s3-prefix: |
21+
${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
22+
retention-days: 14
23+
if-no-files-found: warn
24+
path: sccache-stats-*.json
25+
26+
- name: Format sccache stats
27+
shell: bash
28+
run: |
29+
python3 -m tools.stats.sccache_stats_to_benchmark_format
30+
env:
31+
BUILD_TIME: ${{ inputs.build-time }}
32+
33+
- name: Upload sccache stats as benchmark
34+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
35+
with:
36+
benchmark-results-dir: test/test-reports
37+
dry-run: false
38+
schema-version: v3
39+
github-token: ${{ inputs.github-token }}

.github/workflows/_linux-build.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ jobs:
212212
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
213213
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
214214
run: |
215+
START_TIME=$(date +%s)
215216
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
216217
JENKINS_USER=
217218
USED_IMAGE="${DOCKER_IMAGE_S390X}"
@@ -256,6 +257,9 @@ jobs:
256257
)
257258
docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
258259
260+
END_TIME=$(date +%s)
261+
echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT"
262+
259263
- name: Archive artifacts into zip
260264
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
261265
run: |
@@ -301,14 +305,10 @@ jobs:
301305

302306
- name: Upload sccache stats
303307
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
304-
uses: seemethere/upload-artifact-s3@v5
308+
uses: ./.github/actions/upload-sccache-stats
305309
with:
306-
s3-prefix: |
307-
${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
308-
retention-days: 365
309-
if-no-files-found: warn
310-
path: sccache-stats-*.json
311-
s3-bucket: ${{ inputs.s3-bucket }}
310+
github-token: ${{ secrets.GITHUB_TOKEN }}
311+
build-time: ${{ steps.build.outputs.build_time }}
312312

313313
- name: Teardown Linux
314314
uses: pytorch/test-infra/.github/actions/teardown-linux@main

.github/workflows/_win-build.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,13 +182,9 @@ jobs:
182182

183183
- name: Upload sccache stats
184184
if: steps.build.outcome != 'skipped'
185-
uses: seemethere/upload-artifact-s3@v5
185+
uses: ./.github/actions/upload-sccache-stats
186186
with:
187-
s3-prefix: |
188-
${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
189-
retention-days: 14
190-
if-no-files-found: warn
191-
path: sccache-stats-*.json
187+
github-token: ${{ secrets.GITHUB_TOKEN }}
192188

193189
- name: Teardown Windows
194190
uses: ./.github/actions/teardown-win
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import glob
2+
import json
3+
import os
4+
from pathlib import Path
5+
from typing import Any, Dict
6+
7+
8+
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
9+
10+
11+
def flatten_data(d: Dict[str, Any]) -> Dict[str, Any]:
12+
# Flatten the sccache stats data from a possibly nested dictionary to a flat
13+
# dictionary. For example, the input:
14+
# {
15+
# "cache": {
16+
# "hit": 1,
17+
# "miss": 2,
18+
# },
19+
# }
20+
# will be transformed to:
21+
# {
22+
# "cache_hit": 1,
23+
# "cache_miss": 2,
24+
# }
25+
flat_data = {}
26+
for key, value in d.items():
27+
if isinstance(value, dict):
28+
for k, v in flatten_data(value).items():
29+
flat_data[f"{key}_{k}"] = v
30+
else:
31+
flat_data[key] = value
32+
return flat_data
33+
34+
35+
def main() -> None:
36+
records = []
37+
for file in glob.glob(str(REPO_ROOT / "sccache-stats-*.json")):
38+
with open(file) as f:
39+
data = json.load(f)
40+
41+
# I don't know what sccache info will be most useful yet, and the
42+
# sccache json has a decent number of keys, so just flatten the data
43+
# and store all of it
44+
records.append(
45+
{
46+
"benchmark": {
47+
"name": "sccache_stats",
48+
},
49+
"metric": {
50+
"name": "sccache_stats",
51+
"benchmark_values": [int(os.environ.get("BUILD_TIME") or 0)],
52+
"extra_info": flatten_data(data),
53+
},
54+
}
55+
)
56+
output_file = REPO_ROOT / "test" / "test-reports" / "sccache-stats.json"
57+
os.makedirs(output_file.parent, exist_ok=True)
58+
with open(output_file, "w") as f:
59+
json.dump(records, f)
60+
61+
62+
if __name__ == "__main__":
63+
main()

0 commit comments

Comments
 (0)