Skip to content

Commit 6b548dd

Browse files
Merge pull request #62 from hilldani/main
move LLC under L2 and add pivoted csv's for cpu and socket runmodes
2 parents 676d3e4 + a95e419 commit 6b548dd

File tree

4 files changed

+131
-88
lines changed

4 files changed

+131
-88
lines changed

_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.3.10
1+
1.3.11

events/metric_icx.json

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,42 @@
8686
"expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]",
8787
"expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]"
8888
},
89+
{
90+
"name": "metric_LLC code read MPI (demand+prefetch)",
91+
"name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
92+
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]",
93+
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]"
94+
},
95+
{
96+
"name": "metric_LLC data read MPI (demand+prefetch)",
97+
"name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
98+
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
99+
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
100+
},
101+
{
102+
"name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
103+
"name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
104+
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
105+
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]"
106+
},
107+
{
108+
"name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
109+
"name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
110+
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
111+
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]"
112+
},
113+
{
114+
"name": "metric_Average LLC demand data read miss latency (in ns)",
115+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
116+
},
117+
{
118+
"name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
119+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
120+
},
121+
{
122+
"name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
123+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
124+
},
89125
{
90126
"name": "metric_UPI Data transmit BW (MB/sec) (only data)",
91127
"expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1"
@@ -138,42 +174,6 @@
138174
"name": "metric_memory bandwidth total (MB/sec)",
139175
"expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1"
140176
},
141-
{
142-
"name": "metric_LLC code read MPI (demand+prefetch)",
143-
"name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
144-
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]",
145-
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]"
146-
},
147-
{
148-
"name": "metric_LLC data read MPI (demand+prefetch)",
149-
"name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
150-
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
151-
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
152-
},
153-
{
154-
"name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
155-
"name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
156-
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
157-
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]"
158-
},
159-
{
160-
"name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
161-
"name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
162-
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
163-
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]"
164-
},
165-
{
166-
"name": "metric_Average LLC demand data read miss latency (in ns)",
167-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
168-
},
169-
{
170-
"name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
171-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
172-
},
173-
{
174-
"name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
175-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
176-
},
177177
{
178178
"name": "metric_ITLB (2nd level) MPI",
179179
"name-txn": "metric_ITLB (2nd level) misses per txn",

events/metric_spr_emr.json

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,44 @@
8686
"expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]",
8787
"expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]"
8888
},
89+
{
90+
"name": "metric_LLC code read MPI (demand+prefetch)",
91+
"name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
92+
"expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]",
93+
"expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]"
94+
},
95+
{
96+
"name": "metric_LLC data read MPI (demand+prefetch)",
97+
"name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
98+
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
99+
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
100+
},
101+
{
102+
"name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
103+
"name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
104+
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
105+
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]",
106+
"origin": "perfspect"
107+
},
108+
{
109+
"name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
110+
"name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
111+
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
112+
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]",
113+
"origin": "perfspect"
114+
},
115+
{
116+
"name": "metric_Average LLC demand data read miss latency (in ns)",
117+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
118+
},
119+
{
120+
"name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
121+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
122+
},
123+
{
124+
"name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
125+
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
126+
},
89127
{
90128
"name": "metric_UPI Data transmit BW (MB/sec) (only data)",
91129
"expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1"
@@ -140,44 +178,6 @@
140178
"name": "metric_memory bandwidth total (MB/sec)",
141179
"expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1"
142180
},
143-
{
144-
"name": "metric_LLC code read MPI (demand+prefetch)",
145-
"name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
146-
"expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]",
147-
"expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]"
148-
},
149-
{
150-
"name": "metric_LLC data read MPI (demand+prefetch)",
151-
"name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
152-
"expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
153-
"expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
154-
},
155-
{
156-
"name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
157-
"name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
158-
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
159-
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]",
160-
"origin": "perfspect"
161-
},
162-
{
163-
"name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
164-
"name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
165-
"expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
166-
"expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]",
167-
"origin": "perfspect"
168-
},
169-
{
170-
"name": "metric_Average LLC demand data read miss latency (in ns)",
171-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
172-
},
173-
{
174-
"name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
175-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
176-
},
177-
{
178-
"name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
179-
"expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
180-
},
181181
{
182182
"name": "metric_ITLB (2nd level) MPI",
183183
"name-txn": "metric_ITLB (2nd level) misses per txn",

perf-postprocess.py

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,28 @@ def get_extra_out_file(out_file, t):
3939
text = "socket"
4040
elif t == "sa":
4141
text = "socket.average"
42+
elif t == "savg":
43+
text = "socket.avg.pivot"
44+
elif t == "smax":
45+
text = "socket.max.pivot"
46+
elif t == "smin":
47+
text = "socket.min.pivot"
48+
elif t == "sp95":
49+
text = "socket.p95.pivot"
4250
elif t == "sr":
4351
text = "socket.raw"
4452
elif t == "c":
4553
text = "cpu"
4654
elif t == "ca":
4755
text = "cpu.average"
56+
elif t == "cavg":
57+
text = "cpu.avg.pivot"
58+
elif t == "cmax":
59+
text = "cpu.max.pivot"
60+
elif t == "cmin":
61+
text = "cpu.min.pivot"
62+
elif t == "cp95":
63+
text = "cpu.p95.pivot"
4864
elif t == "cr":
4965
text = "cpu.raw"
5066
elif t == "m":
@@ -571,16 +587,8 @@ def generate_metrics_time_series(time_series_df, perf_mode, out_file_path):
571587

572588

573589
def generate_metrics_averages(
574-
time_series_df: pd.DataFrame, perf_mode: Mode, out_file_path: str
590+
time_series_df: pd.DataFrame, perf_mode: Mode, out_file_path: str, metrics
575591
) -> None:
576-
average_metric_file_name = ""
577-
if perf_mode == Mode.System:
578-
average_metric_file_name = get_extra_out_file(out_file_path, "a")
579-
if perf_mode == Mode.Socket:
580-
average_metric_file_name = get_extra_out_file(out_file_path, "sa")
581-
if perf_mode == Mode.CPU:
582-
average_metric_file_name = get_extra_out_file(out_file_path, "ca")
583-
584592
time_series_df.index.name = "metrics"
585593
avgcol = time_series_df.mean(numeric_only=True, axis=1).to_frame().reset_index()
586594
p95col = time_series_df.quantile(q=0.95, axis=1).to_frame().reset_index()
@@ -591,15 +599,45 @@ def generate_metrics_averages(
591599
p95col.columns = ["metrics", "p95"]
592600
mincol.columns = ["metrics", "min"]
593601
maxcol.columns = ["metrics", "max"]
602+
594603
# merge columns
595604
time_series_df = time_series_df.merge(avgcol, on="metrics", how="outer")
596605
time_series_df = time_series_df.merge(p95col, on="metrics", how="outer")
597606
time_series_df = time_series_df.merge(mincol, on="metrics", how="outer")
598607
time_series_df = time_series_df.merge(maxcol, on="metrics", how="outer")
599608

609+
average_metric_file_name = ""
610+
if perf_mode == Mode.System:
611+
average_metric_file_name = get_extra_out_file(out_file_path, "a")
612+
elif perf_mode == Mode.CPU:
613+
average_metric_file_name = get_extra_out_file(out_file_path, "ca")
614+
elif perf_mode == Mode.Socket:
615+
average_metric_file_name = get_extra_out_file(out_file_path, "sa")
616+
600617
time_series_df[["metrics", "avg", "p95", "min", "max"]].to_csv(
601618
average_metric_file_name, index=False
602619
)
620+
if perf_mode != Mode.System:
621+
for table, type in [
622+
[avgcol, "avg"],
623+
[p95col, "p95"],
624+
[mincol, "min"],
625+
[maxcol, "max"],
626+
]:
627+
table["part"] = table["metrics"].map(
628+
lambda x: int("".join(filter(str.isdigit, x.split(".")[-1])))
629+
)
630+
table["metrics"] = table["metrics"].map(lambda x: x.rsplit(".", 1)[0])
631+
table = table.pivot_table(
632+
index=["metrics"], columns=["part"], values=table.columns[1]
633+
)
634+
table = table.reindex(index=metrics)
635+
table = table.reindex(sorted(table.columns), axis=1)
636+
637+
average_metric_file_name = get_extra_out_file(
638+
out_file_path, ("s" if perf_mode == Mode.Socket else "c") + type
639+
)
640+
table.to_csv(average_metric_file_name)
603641
return
604642

605643

@@ -994,9 +1032,9 @@ def generate_metrics(
9941032
verbose, filtered_metrics, metadata, group_to_event, group_to_df, errors
9951033
)
9961034

997-
time_series_df = pd.DataFrame(time_metrics_result).reindex(
998-
index=list(time_metrics_result[list(time_metrics_result.keys())[0]].keys())
999-
)
1035+
metrics = list(time_metrics_result[list(time_metrics_result.keys())[0]].keys())
1036+
1037+
time_series_df = pd.DataFrame(time_metrics_result).reindex(index=metrics)
10001038

10011039
if verbose:
10021040
for error in errors:
@@ -1025,7 +1063,12 @@ def generate_metrics(
10251063
]
10261064

10271065
generate_metrics_time_series(time_series_df, perf_mode, out_file_path)
1028-
generate_metrics_averages(time_series_df, perf_mode, out_file_path)
1066+
generate_metrics_averages(
1067+
time_series_df,
1068+
perf_mode,
1069+
out_file_path,
1070+
[*dict.fromkeys([e.rsplit(".", 1)[0] for e in metrics])],
1071+
)
10291072
if perf_mode == Mode.System:
10301073
write_html(time_series_df, perf_mode, out_file_path, meta_data, pertxn)
10311074
return

0 commit comments

Comments
 (0)