Skip to content

Commit 09bc9f2

Browse files
authored
Merge pull request #296 from opcm/push-2021-05-15
Push 2021 05 15
2 parents 442f410 + 79df99f commit 09bc9f2

File tree

11 files changed

+906
-309
lines changed

11 files changed

+906
-309
lines changed

cpucounters.cpp

Lines changed: 125 additions & 36 deletions
Large diffs are not rendered by default.

cpucounters.h

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,38 @@ class PCM_API PCM
732732
IIO_STACK_COUNT = 6
733733
};
734734

735+
// Offsets/enumeration of IIO stacks Skylake server.
736+
enum SkylakeIIOStacks {
737+
SKX_IIO_CBDMA_DMI = 0,
738+
SKX_IIO_PCIe0 = 1,
739+
SKX_IIO_PCIe1 = 2,
740+
SKX_IIO_PCIe2 = 3,
741+
SKX_IIO_MCP0 = 4,
742+
SKX_IIO_MCP1 = 5,
743+
SKX_IIO_STACK_COUNT = 6
744+
};
745+
746+
// Offsets/enumeration of IIO stacks for IceLake server.
747+
enum IcelakeIIOStacks {
748+
ICX_IIO_PCIe0 = 0,
749+
ICX_IIO_PCIe1 = 1,
750+
ICX_IIO_MCP0 = 2,
751+
ICX_IIO_PCIe2 = 3,
752+
ICX_IIO_PCIe3 = 4,
753+
ICX_IIO_CBDMA_DMI = 5,
754+
ICX_IIO_STACK_COUNT = 6
755+
};
756+
757+
// Offsets/enumeration of IIO stacks for IceLake server.
758+
enum SnowridgeIIOStacks {
759+
SNR_IIO_QAT = 0,
760+
SNR_IIO_CBDMA_DMI = 1,
761+
SNR_IIO_NIS = 2,
762+
SNR_IIO_HQM = 3,
763+
SNR_IIO_PCIe0 = 4,
764+
SNR_IIO_STACK_COUNT = 5
765+
};
766+
735767
struct SimplePCIeDevInfo
736768
{
737769
enum PCIeWidthMode width;
@@ -1259,6 +1291,7 @@ class PCM_API PCM
12591291
CHERRYTRAIL = 76,
12601292
APOLLO_LAKE = 92,
12611293
DENVERTON = 95,
1294+
SNOWRIDGE = 134,
12621295
CLARKDALE = 37,
12631296
WESTMERE_EP = 44,
12641297
NEHALEM_EX = 46,
@@ -1414,6 +1447,7 @@ class PCM_API PCM
14141447
case ICX:
14151448
case BDX:
14161449
case KNL:
1450+
case SNOWRIDGE:
14171451
return (server_pcicfg_uncore.size() && server_pcicfg_uncore[0].get()) ? (server_pcicfg_uncore[0]->getNumMCChannels()) : 0;
14181452
}
14191453
return 0;
@@ -1441,6 +1475,7 @@ class PCM_API PCM
14411475
case ICX:
14421476
case BDX:
14431477
case KNL:
1478+
case SNOWRIDGE:
14441479
return (socket < server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get()) ? (server_pcicfg_uncore[socket]->getNumMCChannels(controller)) : 0;
14451480
}
14461481
return 0;
@@ -1466,6 +1501,8 @@ class PCM_API PCM
14661501
if (ICL == cpu_model || TGL == cpu_model) return 5;
14671502
switch (cpu_model)
14681503
{
1504+
case SNOWRIDGE:
1505+
return 4;
14691506
case DENVERTON:
14701507
return 3;
14711508
case NEHALEM_EP:
@@ -1512,6 +1549,7 @@ class PCM_API PCM
15121549
return 1000000000ULL; // 1 GHz
15131550
case SKX:
15141551
case ICX:
1552+
case SNOWRIDGE:
15151553
return 1100000000ULL; // 1.1 GHz
15161554
}
15171555
return 0;
@@ -1533,6 +1571,7 @@ class PCM_API PCM
15331571
case BDX_DE:
15341572
case SKX:
15351573
case ICX:
1574+
case SNOWRIDGE:
15361575
case KNL:
15371576
return true;
15381577
default:
@@ -1711,6 +1750,7 @@ class PCM_API PCM
17111750
|| cpu_model_ == CHERRYTRAIL
17121751
|| cpu_model_ == APOLLO_LAKE
17131752
|| cpu_model_ == DENVERTON
1753+
// || cpu_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE
17141754
;
17151755
}
17161756

@@ -1733,6 +1773,7 @@ class PCM_API PCM
17331773
|| cpu_model == PCM::BAYTRAIL
17341774
|| cpu_model == PCM::APOLLO_LAKE
17351775
|| cpu_model == PCM::DENVERTON
1776+
|| cpu_model == PCM::SNOWRIDGE
17361777
|| cpu_model == PCM::HASWELLX
17371778
|| cpu_model == PCM::BROADWELL
17381779
|| cpu_model == PCM::BDX_DE
@@ -1807,10 +1848,8 @@ class PCM_API PCM
18071848

18081849
bool memoryTrafficMetricsAvailable() const
18091850
{
1810-
return !(
1811-
isAtom()
1812-
|| cpu_model == PCM::CLARKDALE
1813-
);
1851+
return (!(isAtom() || cpu_model == PCM::CLARKDALE))
1852+
;
18141853
}
18151854

18161855
bool MCDRAMmemoryTrafficMetricsAvailable() const
@@ -1835,6 +1874,7 @@ class PCM_API PCM
18351874
return (
18361875
cpu_model == PCM::SKX
18371876
|| cpu_model == PCM::ICX
1877+
|| cpu_model == PCM::SNOWRIDGE
18381878
);
18391879
}
18401880

@@ -1863,6 +1903,7 @@ class PCM_API PCM
18631903
isCLX()
18641904
|| isCPX()
18651905
|| cpu_model == PCM::ICX
1906+
|| cpu_model == PCM::SNOWRIDGE
18661907
);
18671908
}
18681909

@@ -1880,6 +1921,7 @@ class PCM_API PCM
18801921
|| ((SKX == cpu_model) && (num_sockets == 1))
18811922
#endif
18821923
|| ICX == cpu_model
1924+
|| SNOWRIDGE == cpu_model
18831925
);
18841926
}
18851927

@@ -1894,6 +1936,7 @@ class PCM_API PCM
18941936
{
18951937
return (
18961938
cpu_model == PCM::JAKETOWN
1939+
|| cpu_model == PCM::SNOWRIDGE
18971940
|| cpu_model == PCM::IVYTOWN
18981941
|| cpu_model == PCM::HASWELLX
18991942
|| cpu_model == PCM::BDX_DE
@@ -2284,7 +2327,7 @@ uint64 getDRAMClocks(uint32 channel, const CounterStateType & before, const Coun
22842327
{
22852328
const auto clk = after.DRAMClocks[channel] - before.DRAMClocks[channel];
22862329
const auto cpu_model = PCM::getInstance()->getCPUModel();
2287-
if (cpu_model == PCM::ICX)
2330+
if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
22882331
{
22892332
return 2 * clk;
22902333
}
@@ -3151,10 +3194,11 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType
31513194
{
31523195
auto pcm = PCM::getInstance();
31533196
if (pcm->isL2CacheMissesAvailable() == false) return 0ULL;
3154-
if (pcm->useSkylakeEvents()) {
3197+
const auto cpu_model = pcm->getCPUModel();
3198+
if (pcm->useSkylakeEvents() || cpu_model == PCM::SNOWRIDGE) {
31553199
return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos];
31563200
}
3157-
if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL)
3201+
if (pcm->isAtom() || cpu_model == PCM::KNL)
31583202
{
31593203
return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos];
31603204
}
@@ -3243,8 +3287,17 @@ uint64 getL3CacheHitsNoSnoop(const CounterStateType & before, const CounterState
32433287
template <class CounterStateType>
32443288
uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateType & after)
32453289
{
3246-
if (!PCM::getInstance()->isL3CacheHitsSnoopAvailable()) return 0;
3247-
if (PCM::getInstance()->useSkylakeEvents()) {
3290+
auto pcm = PCM::getInstance();
3291+
if (!pcm->isL3CacheHitsSnoopAvailable()) return 0;
3292+
const auto cpu_model = pcm->getCPUModel();
3293+
if (cpu_model == PCM::SNOWRIDGE)
3294+
{
3295+
const int64 misses = getL3CacheMisses(before, after);
3296+
const int64 refs = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos];
3297+
const int64 hits = refs - misses;
3298+
return (hits > 0)? hits : 0;
3299+
}
3300+
if (pcm->useSkylakeEvents()) {
32483301
return after.Event[BasicCounterState::SKLL3HitPos] - before.Event[BasicCounterState::SKLL3HitPos];
32493302
}
32503303
return after.Event[BasicCounterState::L2HitMPos] - before.Event[BasicCounterState::L2HitMPos];

lspci.h

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ struct bdf {
214214
uint8_t busno;
215215
uint8_t devno;
216216
uint8_t funcno;
217+
bdf () : busno(0), devno(0), funcno(0) {}
217218
};
218219

219220
struct pci {
@@ -251,6 +252,7 @@ struct pci {
251252
};
252253
uint32_t link_info;
253254
};
255+
pci () : exist(false), offset_0(0), header_type(0), offset_18(0), link_info(0) {}
254256
};
255257

256258
struct counter {
@@ -279,6 +281,34 @@ struct iio_skx {
279281
uint32_t socket_id;
280282
};
281283

284+
struct iio_bifurcated_part {
285+
int part_id;
286+
/* single device represent root port */
287+
struct pci root_pci_dev;
288+
/* Contain child switch and end-point devices */
289+
std::vector<struct pci> child_pci_devs;
290+
};
291+
292+
struct iio_stack {
293+
std::vector<struct iio_bifurcated_part> parts;
294+
uint32_t iio_unit_id;
295+
std::string stack_name;
296+
std::vector<uint64_t> values;
297+
bool flipped = false;
298+
/* holding busno for each IIO stack */
299+
uint8_t busno;
300+
};
301+
302+
bool operator<(const iio_stack& lh, const iio_stack& rh)
303+
{
304+
return lh.iio_unit_id < rh.iio_unit_id;
305+
}
306+
307+
struct iio_stacks_on_socket {
308+
std::vector<struct iio_stack> stacks;
309+
uint32_t socket_id;
310+
};
311+
282312
bool operator < (const bdf &l, const bdf &r) {
283313
if (l.busno < r.busno)
284314
return true;
@@ -325,35 +355,38 @@ void probe_capability_pci_express(struct pci *p, uint32_t cap_ptr)
325355
}
326356
}
327357

328-
void probe_pci(struct pci *p)
358+
bool probe_pci(struct pci *p)
329359
{
330360
uint32 value;
361+
p->exist = false;
331362
struct bdf *bdf = &p->bdf;
332363
if (PciHandleType::exists(0, bdf->busno, bdf->devno, bdf->funcno)) {
333-
p->exist = true;
334364
PciHandleType h(0, bdf->busno, bdf->devno, bdf->funcno);
335-
h.read32(0x0, &value); //VID:DID
336-
if (value == (std::numeric_limits<unsigned int>::max)()) // invalid VID::DID
337-
{
338-
p->exist = false;
339-
return;
340-
}
341-
p->offset_0 = value;
342-
h.read32(0xc, &value);
343-
p->header_type = (value >> 16) & 0x7f;
344-
if (p->header_type == 0) {
345-
h.read32(0x4, &value); //Status register
346-
if (value & 0x100000) {//Capability list == true
347-
h.read32(0x34, &value); //Capability pointer
348-
probe_capability_pci_express(p, value);
365+
// VID:DID
366+
h.read32(0x0, &value);
367+
// Invalid VID::DID
368+
if (value != (std::numeric_limits<unsigned int>::max)()) {
369+
p->offset_0 = value;
370+
h.read32(0xc, &value);
371+
p->header_type = (value >> 16) & 0x7f;
372+
if (p->header_type == 0) {
373+
// Status register
374+
h.read32(0x4, &value);
375+
// Capability list == true
376+
if (value & 0x100000) {
377+
// Capability pointer
378+
h.read32(0x34, &value);
379+
probe_capability_pci_express(p, value);
380+
}
381+
} else if (p->header_type == 1) {
382+
h.read32(0x18, &value);
383+
p->offset_18 = value;
349384
}
350-
} else if (p->header_type == 1) {
351-
h.read32(0x18, &value);
352-
p->offset_18 = value;
385+
p->exist = true;
353386
}
354387
}
355-
else
356-
p->exist = false;
388+
389+
return p->exist;
357390
}
358391

359392
/*

msr.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
2323
#endif
2424
#include "types.h"
2525
#include "msr.h"
26+
#include "utils.h"
2627
#include <assert.h>
2728

2829
#ifdef _MSC_VER
@@ -214,6 +215,16 @@ int32 MsrHandle::read(uint64 msr_number, uint64 * value)
214215
// here comes a Linux version
215216
MsrHandle::MsrHandle(uint32 cpu) : fd(-1), cpu_id(cpu)
216217
{
218+
constexpr auto allowWritesPath = "/sys/module/msr/parameters/allow_writes";
219+
static bool writesEnabled = false;
220+
if (writesEnabled == false)
221+
{
222+
if (readSysFS(allowWritesPath, true).length() > 0)
223+
{
224+
writeSysFS(allowWritesPath, "on", false);
225+
}
226+
writesEnabled = true;
227+
}
217228
char * path = new char[200];
218229
snprintf(path, 200, "/dev/cpu/%d/msr", cpu);
219230
int handle = ::open(path, O_RDWR);

opCode-106.txt

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,19 @@ ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hn
55
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 (2nd x4)
66
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 (2nd x8/3rd x4)
77
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 (4th x4)
8-
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 (1st x16/x8/x4)
9-
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 (2nd x4)
10-
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 (2nd x8/3rd x4)
11-
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 (4th x4)
128
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 (1st x16/x8/x4)
139
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 (2nd x4)
1410
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 (2nd x8/3rd x4)
1511
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 (4th x4)
16-
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 (1st x16/x8/x4)
17-
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 (2nd x4)
18-
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 (2nd x8/3rd x4)
19-
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 (4th x4)
2012
# Outbound (CPU MMIO to the PCIe device) payload events
2113
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part0 (1st x16/x8/x4)
2214
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part1 (2nd x4)
2315
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part2 (2nd x8/3rd x4)
2416
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part3 (4th x4)
25-
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part4 (1st x16/x8/x4)
26-
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part5 (2nd x4)
27-
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part6 (2nd x8/3rd x4)
28-
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part7 (4th x4)
2917
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part0 (1st x16/x8/x4)
3018
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part1 (2nd x4)
3119
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part2 (2nd x8/3rd x4)
3220
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part3 (4th x4)
33-
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part4 (1st x16/x8/x4)
34-
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part5 (2nd x4)
35-
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part6 (2nd x8/3rd x4)
36-
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part7 (4th x4)
3721
# IOMMU events
3822
ctr=0,ev_sel=0x40,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total
3923
ctr=1,ev_sel=0x40,umask=0x20,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total
@@ -42,4 +26,4 @@ ctr=3,ev_sel=0x41,umask=0x10,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1
4226
ctr=0,ev_sel=0x41,umask=0x08,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total
4327
ctr=1,ev_sel=0x41,umask=0x04,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total
4428
ctr=2,ev_sel=0x41,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=4K Cache Hit,vname=Total
45-
ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total
29+
ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total

0 commit comments

Comments
 (0)