Skip to content

Commit f3ea6de

Browse files
committed
CPU (Linux): detects march on aarch64
1 parent 7611d67 commit f3ea6de

File tree

3 files changed

+251
-70
lines changed

3 files changed

+251
-70
lines changed

src/detection/cpu/cpu.c

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,247 @@ const char* ffCPUQualcommCodeToName(uint32_t code)
6060
default: return NULL;
6161
}
6262
}
63+
64+
#if defined(__x86_64__) || defined(__i386__)
65+
66+
#include <cpuid.h>
67+
68+
void ffCPUDetectByCpuid(FFCPUResult* cpu)
69+
{
70+
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
71+
if (__get_cpuid(0x16, &eax, &ebx, &ecx, &edx))
72+
{
73+
// WARNING: CPUID may report frequencies of efficient cores
74+
// cpuid returns 0 MHz when hypervisor is enabled
75+
if (eax) cpu->frequencyBase = eax;
76+
if (ebx) cpu->frequencyMax = ebx;
77+
}
78+
79+
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
80+
{
81+
// Feature tests (leaf1.ecx, leaf7.ebx)
82+
bool sse2 = (ecx & bit_SSE2) != 0;
83+
bool sse4_2 = (ecx & bit_SSE4_2) != 0;
84+
bool pclmul = (ecx & bit_PCLMUL) != 0;
85+
bool popcnt = (ecx & bit_POPCNT) != 0;
86+
bool fma = (ecx & bit_FMA) != 0;
87+
bool osxsave = (ecx & bit_OSXSAVE) != 0;
88+
89+
unsigned int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
90+
__get_cpuid_count(7, 0, &eax7, &ebx7, &ecx7, &edx7);
91+
92+
bool avx2 = (ebx7 & bit_AVX2) != 0;
93+
bool bmi2 = (ebx7 & bit_BMI2) != 0;
94+
bool avx512f = (ebx7 & bit_AVX512F) != 0;
95+
bool avx512bw = (ebx7 & bit_AVX512BW) != 0;
96+
bool avx512dq = (ebx7 & bit_AVX512DQ) != 0;
97+
98+
// OS support for AVX/AVX512: check XGETBV (requires OSXSAVE)
99+
bool avx_os = false;
100+
bool avx512_os = false;
101+
if (osxsave)
102+
{
103+
__asm__ __volatile__(
104+
"xgetbv"
105+
: "=a"(eax), "=d"(edx)
106+
: "c"(0)
107+
:
108+
);
109+
uint64_t xcr0 = ((uint64_t)edx << 32) | eax;
110+
111+
// AVX requires XCR0[1:2] == 11b (XMM and YMM state)
112+
avx_os = (xcr0 & 0x6ULL) == 0x6ULL;
113+
// AVX512 requires XCR0[7,5,6] etc. common mask 0xE6 (bits 1,2,5,6,7)
114+
avx512_os = (xcr0 & 0xE6ULL) == 0xE6ULL;
115+
}
116+
117+
cpu->march = "unknown";
118+
if (avx512f && avx512bw && avx512dq && avx512_os) cpu->march = "x86_64-v4";
119+
else if (avx2 && fma && bmi2 && avx_os) cpu->march = "x86_64-v3";
120+
else if (sse4_2 && popcnt && pclmul) cpu->march = "x86_64-v2";
121+
else if (sse2) cpu->march = "x86_64-v1";
122+
}
123+
}
124+
125+
#elif defined(__aarch64__) || defined(__arm__)
126+
127+
#ifdef __linux__
128+
#include "common/io/io.h"
129+
#include <elf.h>
130+
#include <asm/hwcap.h>
131+
132+
#ifndef HWCAP2_SME
133+
#define HWCAP2_SME (1 << 23)
134+
#endif
135+
#ifndef HWCAP2_SME2
136+
#define HWCAP2_SME2 (1UL << 37)
137+
#endif
138+
#ifndef HWCAP2_CSSC
139+
#define HWCAP2_CSSC (1UL << 34)
140+
#endif
141+
#ifndef HWCAP2_SME2P1
142+
#define HWCAP2_SME2P1 (1UL << 38)
143+
#endif
144+
#ifndef HWCAP2_MOPS
145+
#define HWCAP2_MOPS (1UL << 43)
146+
#endif
147+
#ifndef HWCAP2_F8E4M3
148+
#define HWCAP2_F8E4M3 (1UL << 55)
149+
#endif
150+
#ifndef HWCAP2_F8E5M2
151+
#define HWCAP2_F8E5M2 (1UL << 56)
152+
#endif
153+
154+
void ffCPUDetectByCpuid(FFCPUResult* cpu)
155+
{
156+
// This is not accurate because a lot of flags are optional in old versions
157+
// https://developer.arm.com/documentation/109697/2025_06/Feature-descriptions?lang=en
158+
// https://en.wikipedia.org/wiki/AArch64#ARM-A_(application_architecture)
159+
// Worth noting: Apple M1 is marked as ARMv8.5-A on Wikipedia, but it lacks BTI (mandatory in v8.5)
160+
161+
char buf[PROC_FILE_BUFFSIZ];
162+
ssize_t nRead = ffReadFileData("/proc/self/auxv", ARRAY_SIZE(buf), buf);
163+
164+
if (nRead < (ssize_t) sizeof(Elf64_auxv_t)) return;
165+
166+
uint64_t hwcap = 0, hwcap2 = 0;
167+
168+
for (Elf64_auxv_t* auxv = (Elf64_auxv_t*)buf; (char*)auxv < buf + nRead; ++auxv)
169+
{
170+
if (auxv->a_type == AT_HWCAP)
171+
{
172+
hwcap = auxv->a_un.a_val;
173+
}
174+
else if (auxv->a_type == AT_HWCAP2)
175+
{
176+
hwcap2 = auxv->a_un.a_val;
177+
}
178+
}
179+
180+
if (!hwcap) return;
181+
182+
cpu->march = "unknown";
183+
184+
// ARMv8-A
185+
bool has_fp = (hwcap & HWCAP_FP) != 0;
186+
bool has_asimd = (hwcap & HWCAP_ASIMD) != 0;
187+
188+
// ARMv8.1-A
189+
bool has_atomics = (hwcap & HWCAP_ATOMICS) != 0; // optional in v8.0
190+
bool has_crc32 = (hwcap & HWCAP_CRC32) != 0; // optional in v8.0
191+
bool has_asimdrdm = (hwcap & HWCAP_ASIMDRDM) != 0; // optional in v8.0
192+
193+
// ARMv8.2-A
194+
bool has_fphp = (hwcap & HWCAP_FPHP) != 0; // optional
195+
bool has_dcpop = (hwcap & HWCAP_DCPOP) != 0; // DC CVAP, optional in v8.1
196+
197+
// ARMv8.3-A
198+
bool has_paca = (hwcap & HWCAP_PACA) != 0; // optional in v8.2
199+
bool has_pacg = (hwcap & HWCAP_PACG) != 0; // optional in v8.2
200+
bool has_lrcpc = (hwcap & HWCAP_LRCPC) != 0; // optional in v8.2
201+
bool has_fcma = (hwcap & HWCAP_FCMA) != 0; // optional in v8.2
202+
bool has_jscvt = (hwcap & HWCAP_JSCVT) != 0; // optional in v8.2
203+
204+
// ARMv8.4-A
205+
bool has_dit = (hwcap & HWCAP_DIT) != 0; // optional in v8.3
206+
bool has_flagm = (hwcap & HWCAP_FLAGM) != 0; // optinal in v8.1
207+
bool has_ilrcpc = (hwcap & HWCAP_ILRCPC) != 0; // optinal in v8.2
208+
209+
// ARMv8.5-A
210+
bool has_bti = (hwcap2 & HWCAP2_BTI) != 0; // optional in v8.4
211+
bool has_sb = (hwcap & HWCAP_SB) != 0; // optional in v8.0
212+
bool has_dcpodp = (hwcap2 & HWCAP2_DCPODP) != 0; // optional in v8.1
213+
bool has_flagm2 = (hwcap2 & HWCAP2_FLAGM2) != 0; // optional in v8.4
214+
bool has_frint = (hwcap2 & HWCAP2_FRINT) != 0; // optional in v8.4
215+
216+
// ARMv9.0-A
217+
bool has_sve2 = (hwcap2 & HWCAP2_SVE2) != 0;
218+
219+
// ARMv9.1-A
220+
// ARMv8.6-A
221+
bool has_bf16 = (hwcap2 & HWCAP2_BF16) != 0; // optional in v8.2
222+
bool has_i8mm = (hwcap2 & HWCAP2_I8MM) != 0; // optional in v8.1
223+
224+
// ARMv8.7-A
225+
bool has_afp = (hwcap2 & HWCAP2_AFP) != 0; // optional in v8.6
226+
227+
// ARMv9.2-A
228+
bool has_sme = (hwcap2 & HWCAP2_SME) != 0;
229+
230+
// ARMv9.3-A
231+
bool has_sme2 = (hwcap2 & HWCAP2_SME2) != 0; // optional in v9.2
232+
233+
// ARMv8.8-A
234+
bool has_mops = (hwcap2 & HWCAP2_MOPS) != 0; // optional in v8.7
235+
236+
// ARMv8.9-A
237+
bool has_cssc = (hwcap2 & HWCAP2_CSSC) != 0; // optional in v8.7
238+
239+
// ARMv9.4-A
240+
bool has_sme2p1 = (hwcap2 & HWCAP2_SME2P1) != 0; // optional in v9.2
241+
242+
// ARMv9.5-A
243+
bool has_f8e4m3 = (hwcap2 & HWCAP2_F8E4M3) != 0;
244+
bool has_f8e5m2 = (hwcap2 & HWCAP2_F8E5M2) != 0;
245+
246+
// ARMv9.6-A
247+
bool has_cmpbr = (hwcap & HWCAP_CMPBR) != 0; // optional in v9.5
248+
bool has_fprcvt = (hwcap & HWCAP_FPRCVT) != 0; // optional in v9.5
249+
250+
if (has_sve2 || has_sme) {
251+
// ARMv9
252+
if (has_cmpbr && has_fprcvt) {
253+
cpu->march = "ARMv9.6-A";
254+
} else if (has_f8e5m2 && has_f8e4m3) {
255+
cpu->march = "ARMv9.5-A";
256+
} else if (has_sme2p1) {
257+
cpu->march = "ARMv9.4-A";
258+
} else if (has_sme2) {
259+
cpu->march = "ARMv9.3-A";
260+
} else if (has_sme) {
261+
cpu->march = "ARMv9.2-A";
262+
} else if (has_i8mm && has_bf16) {
263+
cpu->march = "ARMv9.1-A";
264+
} else {
265+
cpu->march = "ARMv9.0-A";
266+
}
267+
} else {
268+
// ARMv8
269+
if (has_cssc) {
270+
cpu->march = "ARMv8.9-A";
271+
} else if (has_mops) {
272+
cpu->march = "ARMv8.8-A";
273+
} else if (has_afp) {
274+
cpu->march = "ARMv8.7-A";
275+
} else if (has_i8mm && has_bf16) {
276+
cpu->march = "ARMv8.6-A";
277+
} else if (has_bti && has_sb && has_dcpodp && has_flagm2 && has_frint) {
278+
cpu->march = "ARMv8.5-A";
279+
} else if (has_dit && has_flagm && has_ilrcpc) {
280+
cpu->march = "ARMv8.4-A";
281+
} else if (has_pacg && has_paca && has_lrcpc && has_fcma && has_jscvt) {
282+
cpu->march = "ARMv8.3-A";
283+
} else if (has_fphp && has_dcpop) {
284+
cpu->march = "ARMv8.2-A";
285+
} else if (has_atomics && has_crc32 && has_asimdrdm) {
286+
cpu->march = "ARMv8.1-A";
287+
} else if (has_asimd && has_fp) {
288+
cpu->march = "ARMv8-A";
289+
}
290+
}
291+
}
292+
#else
293+
void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
294+
{
295+
// Unsupported platform
296+
}
297+
#endif // __linux__
298+
299+
#else
300+
301+
void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
302+
{
303+
// Unsupported platform
304+
}
305+
306+
#endif

src/detection/cpu/cpu.h

Lines changed: 1 addition & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -33,73 +33,4 @@ typedef struct FFCPUResult
3333
const char* ffDetectCPU(const FFCPUOptions* options, FFCPUResult* cpu);
3434
const char* ffCPUAppleCodeToName(uint32_t code);
3535
const char* ffCPUQualcommCodeToName(uint32_t code);
36-
37-
#if defined(__x86_64__) || defined(__i386__)
38-
39-
#include <cpuid.h>
40-
41-
inline static void ffCPUDetectByCpuid(FFCPUResult* cpu)
42-
{
43-
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
44-
if (__get_cpuid(0x16, &eax, &ebx, &ecx, &edx))
45-
{
46-
// WARNING: CPUID may report frequencies of efficient cores
47-
// cpuid returns 0 MHz when hypervisor is enabled
48-
if (eax) cpu->frequencyBase = eax;
49-
if (ebx) cpu->frequencyMax = ebx;
50-
}
51-
52-
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
53-
{
54-
// Feature tests (leaf1.ecx, leaf7.ebx)
55-
bool sse2 = (ecx & bit_SSE2) != 0;
56-
bool sse4_2 = (ecx & bit_SSE4_2) != 0;
57-
bool pclmul = (ecx & bit_PCLMUL) != 0;
58-
bool popcnt = (ecx & bit_POPCNT) != 0;
59-
bool fma = (ecx & bit_FMA) != 0;
60-
bool osxsave = (ecx & bit_OSXSAVE) != 0;
61-
62-
unsigned int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
63-
__get_cpuid_count(7, 0, &eax7, &ebx7, &ecx7, &edx7);
64-
65-
bool avx2 = (ebx7 & bit_AVX2) != 0;
66-
bool bmi2 = (ebx7 & bit_BMI2) != 0;
67-
bool avx512f = (ebx7 & bit_AVX512F) != 0;
68-
bool avx512bw = (ebx7 & bit_AVX512BW) != 0;
69-
bool avx512dq = (ebx7 & bit_AVX512DQ) != 0;
70-
71-
// OS support for AVX/AVX512: check XGETBV (requires OSXSAVE)
72-
bool avx_os = false;
73-
bool avx512_os = false;
74-
if (osxsave)
75-
{
76-
__asm__ __volatile__(
77-
"xgetbv"
78-
: "=a"(eax), "=d"(edx)
79-
: "c"(0)
80-
:
81-
);
82-
uint64_t xcr0 = ((uint64_t)edx << 32) | eax;
83-
84-
// AVX requires XCR0[1:2] == 11b (XMM and YMM state)
85-
avx_os = (xcr0 & 0x6ULL) == 0x6ULL;
86-
// AVX512 requires XCR0[7,5,6] etc. common mask 0xE6 (bits 1,2,5,6,7)
87-
avx512_os = (xcr0 & 0xE6ULL) == 0xE6ULL;
88-
}
89-
90-
cpu->march = "unknown";
91-
if (avx512f && avx512bw && avx512dq && avx512_os) cpu->march = "x86_64-v4";
92-
else if (avx2 && fma && bmi2 && avx_os) cpu->march = "x86_64-v3";
93-
else if (sse4_2 && popcnt && pclmul) cpu->march = "x86_64-v2";
94-
else if (sse2) cpu->march = "x86_64-v1";
95-
}
96-
}
97-
98-
#else
99-
100-
inline static void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
101-
{
102-
// Unsupported platform
103-
}
104-
105-
#endif
36+
void ffCPUDetectByCpuid(FFCPUResult* cpu);

src/detection/cpu/cpu_linux.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ static double parseHwmonDir(FFstrbuf* dir, FFstrbuf* buffer)
4646

4747
if(
4848
ffStrbufContainS(buffer, "cpu") ||
49+
#if __x86_64__ || __i386__
4950
ffStrbufEqualS(buffer, "k10temp") || // AMD
5051
ffStrbufEqualS(buffer, "fam15h_power") || // AMD
5152
ffStrbufEqualS(buffer, "coretemp") // Intel
53+
#else
54+
ffStrbufEqualS(buffer, "temp") // Asahi
55+
#endif
5256
) return value / 1000.;
5357

5458
return FF_CPU_TEMP_UNSET;
@@ -734,6 +738,8 @@ FF_MAYBE_UNUSED static const char* detectCPUOthers(const FFCPUOptions* options,
734738
if (cpu->coresPhysical == 0)
735739
detectPhysicalCores(cpu);
736740

741+
ffCPUDetectByCpuid(cpu);
742+
737743
return NULL;
738744
}
739745
#endif

0 commit comments

Comments
 (0)