Skip to content

Commit 86344a8

Browse files
committed
nv2a: Add swizzle test and benchmark
1 parent 7cb7bb6 commit 86344a8

File tree

2 files changed

+241
-0
lines changed

2 files changed

+241
-0
lines changed

tests/xbox/swizzle/Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CC=clang
2+
CC=gcc
3+
CFLAGS=-O2 -Wall -g
4+
5+
swizzle-test: swizzle-test.o swizzle-a.o
6+
$(CC) -o $@ $^
7+
8+
swizzle-test.o: swizzle-test.c
9+
10+
swizzle-a.o: swizzle.o
11+
objcopy \
12+
--redefine-sym swizzle_box=swizzle_box_A \
13+
--redefine-sym unswizzle_box=unswizzle_box_A \
14+
$< $@
15+
16+
swizzle.o: ../../../hw/xbox/nv2a/pgraph/swizzle.c
17+
$(CC) -o $@ $(CFLAGS) -c $<
18+
19+
%.o: %.c
20+
$(CC) -o $@ $(CFLAGS) -c $<
21+
22+
.PHONY: clean
23+
clean:
24+
rm -f swizzle-test swizzle.o swizzle-a.o

tests/xbox/swizzle/swizzle-test.c

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Crosscheck and benchmark swizzle.
3+
*
4+
* Copyright (c) 2025 Matt Borgerson
5+
*
6+
* This library is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 2 of the License, or (at your option) any later version.
10+
*
11+
* This library is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18+
*/
19+
#include <assert.h>
20+
#include <stdio.h>
21+
#include <stdint.h>
22+
#include <stdlib.h>
23+
#include <string.h>
24+
#include <unistd.h>
25+
#include <time.h>
26+
27+
#define X_METHODS \
28+
X(A)
29+
// X(B)
30+
31+
typedef void (*swizzle_box_handler)(
32+
const uint8_t *src_buf,
33+
unsigned int width,
34+
unsigned int height,
35+
unsigned int depth,
36+
uint8_t *dst_buf,
37+
unsigned int row_pitch,
38+
unsigned int slice_pitch,
39+
unsigned int bytes_per_pixel);
40+
41+
typedef struct Method {
42+
const char *name;
43+
swizzle_box_handler swizzle, unswizzle;
44+
} Method;
45+
46+
#define PROTO(m) \
47+
void m( \
48+
const uint8_t *src_buf, \
49+
unsigned int width, \
50+
unsigned int height, \
51+
unsigned int depth, \
52+
uint8_t *dst_buf, \
53+
unsigned int row_pitch, \
54+
unsigned int slice_pitch, \
55+
unsigned int bytes_per_pixel);
56+
57+
#define X(m) \
58+
PROTO(swizzle_box_ ## m) \
59+
PROTO(unswizzle_box_ ## m)
60+
X_METHODS
61+
#undef X
62+
63+
const Method methods[] = {
64+
#define X(m) { #m, swizzle_box_ ## m, unswizzle_box_ ## m},
65+
X_METHODS
66+
#undef X
67+
};
68+
69+
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
70+
71+
int widths[] = { 1, 2, 4, 8, 16, 32 };
72+
int heights[] = { 1, 2, 4, 8, 16, 32 };
73+
int depths[] = { 1, 2, 4, 8, 16, 32 };
74+
int bpps[] = { 1, 2, 3, 4 };
75+
76+
static void crosscheck(void)
77+
{
78+
assert(ARRAY_SIZE(methods) > 0);
79+
fprintf(stderr, "%s...", __func__);
80+
for (int row_pitch_adjust = 0; row_pitch_adjust < 4; row_pitch_adjust++)
81+
for (int slice_pitch_adjust = 0; slice_pitch_adjust < 4; slice_pitch_adjust++)
82+
for (int depth_idx = 0; depth_idx < ARRAY_SIZE(depths); depth_idx++)
83+
for (int width_idx = 0; width_idx < ARRAY_SIZE(widths); width_idx++)
84+
for (int height_idx = 0; height_idx < ARRAY_SIZE(heights); height_idx++)
85+
for (int bpp_idx = 0; bpp_idx < ARRAY_SIZE(bpps); bpp_idx++) {
86+
87+
int width = widths[width_idx];
88+
int height = heights[height_idx];
89+
int depth = depths[depth_idx];
90+
int bpp = bpps[bpp_idx];
91+
92+
size_t row_pitch = width * bpp + row_pitch_adjust;
93+
size_t slice_pitch = row_pitch * height;
94+
size_t size_bytes = slice_pitch * depth + slice_pitch_adjust;
95+
96+
uint8_t *original_data = malloc(size_bytes);
97+
for (int i = 0; i < size_bytes; i++) {
98+
original_data[i] = rand();
99+
}
100+
101+
void *swizzled_data_A = malloc(size_bytes);
102+
memcpy(swizzled_data_A, original_data, size_bytes);
103+
methods[0].swizzle(original_data, width, height, depth, swizzled_data_A,
104+
row_pitch, slice_pitch, bpp);
105+
106+
void *unswizzled_data_A = malloc(size_bytes);
107+
memcpy(unswizzled_data_A, original_data, size_bytes);
108+
methods[0].unswizzle(swizzled_data_A, width, height, depth,
109+
unswizzled_data_A, row_pitch, slice_pitch, bpp);
110+
assert(!memcmp(original_data, unswizzled_data_A, size_bytes));
111+
112+
for (int method_idx = 1;
113+
method_idx < ARRAY_SIZE(methods);
114+
method_idx++) {
115+
void *swizzled_data_B = malloc(size_bytes);
116+
memcpy(swizzled_data_B, original_data, size_bytes);
117+
methods[method_idx].swizzle(original_data, width, height, depth,
118+
swizzled_data_B, row_pitch, slice_pitch,
119+
bpp);
120+
assert(!memcmp(swizzled_data_B, swizzled_data_A, size_bytes));
121+
122+
void *unswizzled_data_B = malloc(size_bytes);
123+
memcpy(unswizzled_data_B, original_data, size_bytes);
124+
methods[method_idx].unswizzle(swizzled_data_B, width, height, depth,
125+
unswizzled_data_B, row_pitch,
126+
slice_pitch, bpp);
127+
assert(!memcmp(original_data, unswizzled_data_B, size_bytes));
128+
129+
free(unswizzled_data_B);
130+
free(swizzled_data_B);
131+
}
132+
133+
free(unswizzled_data_A);
134+
free(swizzled_data_A);
135+
free(original_data);
136+
137+
// fprintf(stderr, "w:%d, h:%d, d:%d, bpp:%d pitch:%d,%d\n", width, height, depth, bpp, row_pitch_adjust, slice_pitch_adjust);
138+
}
139+
140+
fprintf(stderr, "ok!\n");
141+
}
142+
143+
#define NUM_ITERATIONS 10
144+
145+
static int compare_ints(const void *a, const void *b)
146+
{
147+
return *(int*)a - *(int*)b;
148+
}
149+
150+
static void bench(void)
151+
{
152+
fprintf(stderr, "%s...", __func__);
153+
154+
int width = 256;
155+
int height = 256;
156+
int depth = 256;
157+
int bpp = 4;
158+
159+
size_t row_pitch = width * bpp;
160+
size_t slice_pitch = row_pitch * height;
161+
size_t size_bytes = slice_pitch * depth;
162+
size_t size_mib = size_bytes / (1024*1024);
163+
fprintf(stderr, "with w: %d, h: %d, d: %d, bpp: %d, "
164+
"size: %zu MiB, iterations: %d\n",
165+
width, height, depth, bpp, size_mib, NUM_ITERATIONS);
166+
167+
void *original_data = malloc(size_bytes);
168+
memset(original_data, 0, size_bytes);
169+
170+
void *swizzled_data = malloc(size_bytes);
171+
memset(swizzled_data, 0, size_bytes);
172+
173+
174+
for (int method_idx = 0; method_idx < ARRAY_SIZE(methods); method_idx++) {
175+
const Method * const method = &methods[method_idx];
176+
fprintf(stderr, "[%6s] ", method->name);
177+
178+
int samples[NUM_ITERATIONS];
179+
int sum = 0;
180+
181+
for (int iter = 0; iter < NUM_ITERATIONS; iter++ ) {
182+
struct timespec start, end;
183+
184+
clock_gettime(CLOCK_MONOTONIC, &start);
185+
method->swizzle(original_data, width, height, depth, swizzled_data, row_pitch, slice_pitch, bpp);
186+
clock_gettime(CLOCK_MONOTONIC, &end);
187+
188+
uint64_t start_ns = (uint64_t)start.tv_sec * (uint64_t)1000000000 + start.tv_nsec;
189+
uint64_t end_ns = (uint64_t)end.tv_sec * (uint64_t)1000000000 + end.tv_nsec;
190+
191+
samples[iter] = (end_ns - start_ns) / 1000;
192+
sum += samples[iter];
193+
}
194+
195+
qsort(samples, ARRAY_SIZE(samples), sizeof(samples[0]), compare_ints);
196+
197+
int min = samples[0],
198+
max = samples[ARRAY_SIZE(samples) - 1],
199+
avg = sum / ARRAY_SIZE(samples),
200+
med = samples[ARRAY_SIZE(samples) / 2];
201+
fprintf(stderr, "min: %6d us, max: %6d us, avg: %6d us, med: %6d us -- %.2g GiB/s\n",
202+
min, max, avg, med, (size_mib / 1024.0) / (med / 1000000.0));
203+
}
204+
205+
free(swizzled_data);
206+
free(original_data);
207+
}
208+
209+
int main(int argc, char const *argv[])
210+
{
211+
srand(1337);
212+
213+
crosscheck();
214+
bench();
215+
216+
return 0;
217+
}

0 commit comments

Comments
 (0)