Skip to content

Commit d151772

Browse files
committed
Added papi-rocm test (still needs validation)
1 parent 5326e8c commit d151772

File tree

11 files changed

+672
-0
lines changed

11 files changed

+672
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
PAPI_ROCM_ROOT ?= $(ROCM_ROOT) #/path/to/rocm/
2+
3+
CC = $(PAPI_ROCM_ROOT)/bin/hipcc
4+
CXX = $(PAPI_ROCM_ROOT)/bin/hipcc
5+
CXXFLAGS += -g -O2 -fopenmp
6+
CPPFLAGS += -I$(PAPI_ROCM_ROOT)/include -I$(PAPI_ROOT)/include
7+
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi -fopenmp
8+
9+
ALL: single_monitor multi_monitor overflow
10+
11+
single_monitor: single_monitor.o matmul.o
12+
13+
multi_monitor: multi_monitor.o matmul.o
14+
15+
overflow: overflow.o matmul.o
16+
17+
run:
18+
./run_rocm_tests.sh
19+
20+
clean:
21+
rm -f *.o single_monitor multi_monitor overflow
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
make clean
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
. ./setup.sh
3+
set -e
4+
set -x
5+
6+
make PAPIROOT=$PAPI_ROOT
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#include <stdio.h>
2+
#include "matmul.h"
3+
4+
#define BLOCK_DIM_X (16)
5+
#define BLOCK_DIM_Y (16)
6+
#define ROWS (4096)
7+
#define COLS (ROWS)
8+
9+
__global__ void matmul(float *A, float *B, float *C, int N)
10+
{
11+
int i = (hipBlockIdx_y * hipBlockDim_y) + hipThreadIdx_y;
12+
int j = (hipBlockIdx_x * hipBlockDim_x) + hipThreadIdx_x;
13+
14+
if (i < N && j < N) {
15+
float sum = 0.0;
16+
for (int k = 0; k < N; ++k) {
17+
sum += A[(i * N) + k] * B[(k * N) + j];
18+
}
19+
C[(i * N) + j] = sum;
20+
}
21+
}
22+
23+
struct matmul_arrays {
24+
float *h_A;
25+
float *h_B;
26+
float *h_C;
27+
float *d_A;
28+
float *d_B;
29+
float *d_C;
30+
};
31+
32+
int matmul_init(void **handle)
33+
{
34+
hipError_t hip_errno;
35+
36+
struct matmul_arrays *handle_p = (struct matmul_arrays *) malloc(sizeof(*handle_p));
37+
if (handle_p == NULL) {
38+
return MATMUL_ENOMEM;
39+
}
40+
41+
hip_errno = hipHostMalloc(&handle_p->h_A, sizeof(float) * ROWS * COLS);
42+
if (hip_errno != hipSuccess) {
43+
return MATMUL_ENOMEM;
44+
}
45+
46+
hip_errno = hipHostMalloc(&handle_p->h_B, sizeof(float) * ROWS * COLS);
47+
if (hip_errno != hipSuccess) {
48+
return MATMUL_ENOMEM;
49+
}
50+
51+
hip_errno = hipHostMalloc(&handle_p->h_C, sizeof(float) * ROWS * COLS);
52+
if (hip_errno != hipSuccess) {
53+
return MATMUL_ENOMEM;
54+
}
55+
56+
hip_errno = hipMalloc(&handle_p->d_A, sizeof(float) * ROWS * COLS);
57+
if (hip_errno != hipSuccess) {
58+
return MATMUL_ENOMEM;
59+
}
60+
61+
hip_errno = hipMalloc(&handle_p->d_B, sizeof(float) * ROWS * COLS);
62+
if (hip_errno != hipSuccess) {
63+
return MATMUL_ENOMEM;
64+
}
65+
66+
hip_errno = hipMalloc(&handle_p->d_C, sizeof(float) * ROWS * COLS);
67+
if (hip_errno != hipSuccess) {
68+
return MATMUL_ENOMEM;
69+
}
70+
71+
for (int i = 0; i < ROWS * COLS; ++i) {
72+
handle_p->h_A[i] = handle_p->h_B[i] = (float) (rand() % 1000);
73+
handle_p->h_C[i] = 0.0;
74+
}
75+
76+
*handle = handle_p;
77+
78+
return MATMUL_SUCCESS;
79+
}
80+
81+
int matmul_run(void *handle, hipStream_t stream)
82+
{
83+
hipError_t hip_errno;
84+
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
85+
86+
struct matmul_arrays *handle_p = (struct matmul_arrays *) handle;
87+
h_A = handle_p->h_A;
88+
h_B = handle_p->h_B;
89+
h_C = handle_p->h_C;
90+
d_A = handle_p->d_A;
91+
d_B = handle_p->d_B;
92+
d_C = handle_p->d_C;
93+
94+
hip_errno = hipMemcpyAsync(d_A, h_A, sizeof(float) * ROWS * COLS, hipMemcpyHostToDevice, stream);
95+
if (hip_errno != hipSuccess) {
96+
return MATMUL_EMISC;
97+
}
98+
99+
hip_errno = hipMemcpyAsync(d_B, h_B, sizeof(float) * ROWS * COLS, hipMemcpyHostToDevice, stream);
100+
if (hip_errno != hipSuccess) {
101+
return MATMUL_EMISC;
102+
}
103+
104+
dim3 grid_dim = dim3(ROWS / BLOCK_DIM_X, COLS / BLOCK_DIM_Y);
105+
dim3 block_dim = dim3(BLOCK_DIM_X, BLOCK_DIM_Y);
106+
107+
hipLaunchKernelGGL(matmul, grid_dim, block_dim, 0, stream, d_A, d_B, d_C, ROWS);
108+
hip_errno = hipGetLastError();
109+
if (hip_errno != hipSuccess) {
110+
return MATMUL_EMISC;
111+
}
112+
113+
hip_errno = hipMemcpyAsync(h_C, d_C, sizeof(float) * ROWS * COLS, hipMemcpyDeviceToHost, stream);
114+
if (hip_errno != hipSuccess) {
115+
return MATMUL_EMISC;
116+
}
117+
118+
return MATMUL_SUCCESS;
119+
}
120+
121+
int matmul_finalize(void **handle)
122+
{
123+
struct matmul_arrays *handle_p = (struct matmul_arrays *) (*handle);
124+
hipFree(handle_p->h_A);
125+
hipFree(handle_p->h_B);
126+
hipFree(handle_p->h_C);
127+
hipFree(handle_p->d_A);
128+
hipFree(handle_p->d_B);
129+
hipFree(handle_p->d_C);
130+
free(handle_p);
131+
*handle = NULL;
132+
return MATMUL_SUCCESS;
133+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifndef __MATMUL_H__
2+
#define __MATMUL_H__
3+
4+
#include <hip/hip_runtime.h>
5+
6+
#define MATMUL_SUCCESS ( 0)
7+
#define MATMUL_ENOMEM (-1)
8+
#define MATMUL_EMISC (-2)
9+
10+
int matmul_init(void **handle);
11+
int matmul_run(void *handle, hipStream_t stream);
12+
int matmul_finalize(void **handle);
13+
14+
#endif /* End of __MATMUL_H__ */
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <unistd.h>
5+
#include <omp.h>
6+
#include "papi.h"
7+
#include "matmul.h"
8+
9+
int main(int argc, char *argv[])
10+
{
11+
int papi_errno;
12+
hipError_t hip_errno;
13+
14+
int num_threads = 1;
15+
if (argc > 1) {
16+
if (strncmp(argv[1], "--threads=", strlen("--threads=")) == 0) {
17+
num_threads = (int) strtol(argv[1] + strlen("--threads="), NULL, 10);
18+
} else if (strcmp(argv[1], "--help") == 0) {
19+
fprintf(stdout, "Usage %s [OPTIONS]\n", argv[0]);
20+
fprintf(stdout, "[OPTIONS]\n");
21+
fprintf(stdout, " --help\n");
22+
fprintf(stdout, " --threads=[N]\n");
23+
exit(EXIT_FAILURE);
24+
}
25+
}
26+
27+
papi_errno = PAPI_library_init(PAPI_VER_CURRENT);
28+
if (papi_errno != PAPI_VER_CURRENT) {
29+
fprintf(stderr, "ERROR: PAPI_library_init: runtime lib ver %d not equal to %d\n", papi_errno, PAPI_VER_CURRENT);
30+
exit(EXIT_FAILURE);
31+
}
32+
33+
papi_errno = PAPI_thread_init((unsigned long (*)(void)) omp_get_thread_num);
34+
if (papi_errno != PAPI_OK) {
35+
fprintf(stderr, "ERROR: PAPI_thread_init: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
36+
exit(EXIT_FAILURE);
37+
}
38+
39+
int num_devices;
40+
hip_errno = hipGetDeviceCount(&num_devices);
41+
if (hip_errno != hipSuccess) {
42+
fprintf(stderr, "ERROR: hipGetDeviceCount: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
43+
exit(EXIT_FAILURE);
44+
}
45+
46+
num_threads = (num_threads < num_devices) ? num_threads : num_devices;
47+
omp_set_num_threads(num_threads);
48+
fprintf(stdout, "Run rocm test with %d threads\n", num_threads);
49+
50+
#define NUM_EVENTS 2
51+
const char *events[NUM_EVENTS] = {
52+
"rocm:::SQ_WAVES",
53+
"rocm:::SQ_WAVES_RESTORED",
54+
};
55+
56+
#pragma omp parallel
57+
{
58+
int eventset = PAPI_NULL;
59+
papi_errno = PAPI_create_eventset(&eventset);
60+
if (papi_errno != PAPI_OK) {
61+
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
62+
exit(EXIT_FAILURE);
63+
}
64+
65+
int thread_num = omp_get_thread_num();
66+
for (int j = 0; j < NUM_EVENTS; ++j) {
67+
char named_event[PAPI_MAX_STR_LEN] = { 0 };
68+
sprintf(named_event, "%s:device=%d", events[j], thread_num);
69+
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
70+
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
71+
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
72+
exit(EXIT_FAILURE);
73+
}
74+
}
75+
76+
papi_errno = PAPI_start(eventset);
77+
if (papi_errno != PAPI_OK) {
78+
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
79+
exit(EXIT_FAILURE);
80+
}
81+
82+
hip_errno = hipSetDevice(thread_num);
83+
if (hip_errno != hipSuccess) {
84+
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
85+
exit(EXIT_FAILURE);
86+
}
87+
88+
hipStream_t stream;
89+
hip_errno = hipStreamCreate(&stream);
90+
if (hip_errno != hipSuccess) {
91+
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
92+
exit(EXIT_FAILURE);
93+
}
94+
95+
void *handle;
96+
int matmul_errno;
97+
matmul_errno = matmul_init(&handle);
98+
if (matmul_errno != MATMUL_SUCCESS) {
99+
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
100+
exit(EXIT_FAILURE);
101+
}
102+
103+
matmul_errno = matmul_run(handle, stream);
104+
if (matmul_errno != MATMUL_SUCCESS) {
105+
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
106+
exit(EXIT_FAILURE);
107+
}
108+
109+
hip_errno = hipStreamSynchronize(stream);
110+
if (hip_errno != hipSuccess) {
111+
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
112+
exit(EXIT_FAILURE);
113+
}
114+
115+
hip_errno = hipStreamDestroy(stream);
116+
if (hip_errno != hipSuccess) {
117+
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
118+
exit(EXIT_FAILURE);
119+
}
120+
121+
matmul_errno = matmul_finalize(&handle);
122+
if (matmul_errno != MATMUL_SUCCESS) {
123+
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
124+
exit(EXIT_FAILURE);
125+
}
126+
127+
long long counters[NUM_EVENTS] = { 0 };
128+
papi_errno = PAPI_stop(eventset, counters);
129+
if (papi_errno != PAPI_OK) {
130+
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
131+
exit(EXIT_FAILURE);
132+
}
133+
134+
for (int i = 0; i < NUM_EVENTS; ++i) {
135+
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
136+
omp_get_thread_num(), events[i], thread_num,
137+
counters[i]);
138+
}
139+
140+
papi_errno = PAPI_cleanup_eventset(eventset);
141+
if (papi_errno != PAPI_OK) {
142+
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
143+
exit(EXIT_FAILURE);
144+
}
145+
146+
papi_errno = PAPI_destroy_eventset(&eventset);
147+
if (papi_errno != PAPI_OK) {
148+
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
149+
exit(EXIT_FAILURE);
150+
}
151+
}
152+
153+
PAPI_shutdown();
154+
155+
return EXIT_SUCCESS;
156+
}

0 commit comments

Comments
 (0)