@@ -60,3 +60,247 @@ const char* ffCPUQualcommCodeToName(uint32_t code)
60
60
default : return NULL ;
61
61
}
62
62
}
63
+
64
+ #if defined(__x86_64__ ) || defined(__i386__ )
65
+
66
+ #include <cpuid.h>
67
+
68
+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
69
+ {
70
+ uint32_t eax = 0 , ebx = 0 , ecx = 0 , edx = 0 ;
71
+ if (__get_cpuid (0x16 , & eax , & ebx , & ecx , & edx ))
72
+ {
73
+ // WARNING: CPUID may report frequencies of efficient cores
74
+ // cpuid returns 0 MHz when hypervisor is enabled
75
+ if (eax ) cpu -> frequencyBase = eax ;
76
+ if (ebx ) cpu -> frequencyMax = ebx ;
77
+ }
78
+
79
+ if (__get_cpuid (1 , & eax , & ebx , & ecx , & edx ))
80
+ {
81
+ // Feature tests (leaf1.ecx, leaf7.ebx)
82
+ bool sse2 = (ecx & bit_SSE2 ) != 0 ;
83
+ bool sse4_2 = (ecx & bit_SSE4_2 ) != 0 ;
84
+ bool pclmul = (ecx & bit_PCLMUL ) != 0 ;
85
+ bool popcnt = (ecx & bit_POPCNT ) != 0 ;
86
+ bool fma = (ecx & bit_FMA ) != 0 ;
87
+ bool osxsave = (ecx & bit_OSXSAVE ) != 0 ;
88
+
89
+ unsigned int eax7 = 0 , ebx7 = 0 , ecx7 = 0 , edx7 = 0 ;
90
+ __get_cpuid_count (7 , 0 , & eax7 , & ebx7 , & ecx7 , & edx7 );
91
+
92
+ bool avx2 = (ebx7 & bit_AVX2 ) != 0 ;
93
+ bool bmi2 = (ebx7 & bit_BMI2 ) != 0 ;
94
+ bool avx512f = (ebx7 & bit_AVX512F ) != 0 ;
95
+ bool avx512bw = (ebx7 & bit_AVX512BW ) != 0 ;
96
+ bool avx512dq = (ebx7 & bit_AVX512DQ ) != 0 ;
97
+
98
+ // OS support for AVX/AVX512: check XGETBV (requires OSXSAVE)
99
+ bool avx_os = false;
100
+ bool avx512_os = false;
101
+ if (osxsave )
102
+ {
103
+ __asm__ __volatile__(
104
+ "xgetbv"
105
+ : "=a" (eax ), "=d" (edx )
106
+ : "c" (0 )
107
+ :
108
+ );
109
+ uint64_t xcr0 = ((uint64_t )edx << 32 ) | eax ;
110
+
111
+ // AVX requires XCR0[1:2] == 11b (XMM and YMM state)
112
+ avx_os = (xcr0 & 0x6ULL ) == 0x6ULL ;
113
+ // AVX512 requires XCR0[7,5,6] etc. common mask 0xE6 (bits 1,2,5,6,7)
114
+ avx512_os = (xcr0 & 0xE6ULL ) == 0xE6ULL ;
115
+ }
116
+
117
+ cpu -> march = "unknown" ;
118
+ if (avx512f && avx512bw && avx512dq && avx512_os ) cpu -> march = "x86_64-v4" ;
119
+ else if (avx2 && fma && bmi2 && avx_os ) cpu -> march = "x86_64-v3" ;
120
+ else if (sse4_2 && popcnt && pclmul ) cpu -> march = "x86_64-v2" ;
121
+ else if (sse2 ) cpu -> march = "x86_64-v1" ;
122
+ }
123
+ }
124
+
125
+ #elif defined(__aarch64__ ) || defined(__arm__ )
126
+
127
+ #ifdef __linux__
128
+ #include "common/io/io.h"
129
+ #include <elf.h>
130
+ #include <asm/hwcap.h>
131
+
132
+ #ifndef HWCAP2_SME
133
+ #define HWCAP2_SME (1 << 23)
134
+ #endif
135
+ #ifndef HWCAP2_SME2
136
+ #define HWCAP2_SME2 (1UL << 37)
137
+ #endif
138
+ #ifndef HWCAP2_CSSC
139
+ #define HWCAP2_CSSC (1UL << 34)
140
+ #endif
141
+ #ifndef HWCAP2_SME2P1
142
+ #define HWCAP2_SME2P1 (1UL << 38)
143
+ #endif
144
+ #ifndef HWCAP2_MOPS
145
+ #define HWCAP2_MOPS (1UL << 43)
146
+ #endif
147
+ #ifndef HWCAP2_F8E4M3
148
+ #define HWCAP2_F8E4M3 (1UL << 55)
149
+ #endif
150
+ #ifndef HWCAP2_F8E5M2
151
+ #define HWCAP2_F8E5M2 (1UL << 56)
152
+ #endif
153
+
154
+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
155
+ {
156
+ // This is not accurate because a lot of flags are optional in old versions
157
+ // https://developer.arm.com/documentation/109697/2025_06/Feature-descriptions?lang=en
158
+ // https://en.wikipedia.org/wiki/AArch64#ARM-A_(application_architecture)
159
+ // Worth noting: Apple M1 is marked as ARMv8.5-A on Wikipedia, but it lacks BTI (mandatory in v8.5)
160
+
161
+ char buf [PROC_FILE_BUFFSIZ ];
162
+ ssize_t nRead = ffReadFileData ("/proc/self/auxv" , ARRAY_SIZE (buf ), buf );
163
+
164
+ if (nRead < (ssize_t ) sizeof (Elf64_auxv_t )) return ;
165
+
166
+ uint64_t hwcap = 0 , hwcap2 = 0 ;
167
+
168
+ for (Elf64_auxv_t * auxv = (Elf64_auxv_t * )buf ; (char * )auxv < buf + nRead ; ++ auxv )
169
+ {
170
+ if (auxv -> a_type == AT_HWCAP )
171
+ {
172
+ hwcap = auxv -> a_un .a_val ;
173
+ }
174
+ else if (auxv -> a_type == AT_HWCAP2 )
175
+ {
176
+ hwcap2 = auxv -> a_un .a_val ;
177
+ }
178
+ }
179
+
180
+ if (!hwcap ) return ;
181
+
182
+ cpu -> march = "unknown" ;
183
+
184
+ // ARMv8-A
185
+ bool has_fp = (hwcap & HWCAP_FP ) != 0 ;
186
+ bool has_asimd = (hwcap & HWCAP_ASIMD ) != 0 ;
187
+
188
+ // ARMv8.1-A
189
+ bool has_atomics = (hwcap & HWCAP_ATOMICS ) != 0 ; // optional in v8.0
190
+ bool has_crc32 = (hwcap & HWCAP_CRC32 ) != 0 ; // optional in v8.0
191
+ bool has_asimdrdm = (hwcap & HWCAP_ASIMDRDM ) != 0 ; // optional in v8.0
192
+
193
+ // ARMv8.2-A
194
+ bool has_fphp = (hwcap & HWCAP_FPHP ) != 0 ; // optional
195
+ bool has_dcpop = (hwcap & HWCAP_DCPOP ) != 0 ; // DC CVAP, optional in v8.1
196
+
197
+ // ARMv8.3-A
198
+ bool has_paca = (hwcap & HWCAP_PACA ) != 0 ; // optional in v8.2
199
+ bool has_pacg = (hwcap & HWCAP_PACG ) != 0 ; // optional in v8.2
200
+ bool has_lrcpc = (hwcap & HWCAP_LRCPC ) != 0 ; // optional in v8.2
201
+ bool has_fcma = (hwcap & HWCAP_FCMA ) != 0 ; // optional in v8.2
202
+ bool has_jscvt = (hwcap & HWCAP_JSCVT ) != 0 ; // optional in v8.2
203
+
204
+ // ARMv8.4-A
205
+ bool has_dit = (hwcap & HWCAP_DIT ) != 0 ; // optional in v8.3
206
+ bool has_flagm = (hwcap & HWCAP_FLAGM ) != 0 ; // optinal in v8.1
207
+ bool has_ilrcpc = (hwcap & HWCAP_ILRCPC ) != 0 ; // optinal in v8.2
208
+
209
+ // ARMv8.5-A
210
+ bool has_bti = (hwcap2 & HWCAP2_BTI ) != 0 ; // optional in v8.4
211
+ bool has_sb = (hwcap & HWCAP_SB ) != 0 ; // optional in v8.0
212
+ bool has_dcpodp = (hwcap2 & HWCAP2_DCPODP ) != 0 ; // optional in v8.1
213
+ bool has_flagm2 = (hwcap2 & HWCAP2_FLAGM2 ) != 0 ; // optional in v8.4
214
+ bool has_frint = (hwcap2 & HWCAP2_FRINT ) != 0 ; // optional in v8.4
215
+
216
+ // ARMv9.0-A
217
+ bool has_sve2 = (hwcap2 & HWCAP2_SVE2 ) != 0 ;
218
+
219
+ // ARMv9.1-A
220
+ // ARMv8.6-A
221
+ bool has_bf16 = (hwcap2 & HWCAP2_BF16 ) != 0 ; // optional in v8.2
222
+ bool has_i8mm = (hwcap2 & HWCAP2_I8MM ) != 0 ; // optional in v8.1
223
+
224
+ // ARMv8.7-A
225
+ bool has_afp = (hwcap2 & HWCAP2_AFP ) != 0 ; // optional in v8.6
226
+
227
+ // ARMv9.2-A
228
+ bool has_sme = (hwcap2 & HWCAP2_SME ) != 0 ;
229
+
230
+ // ARMv9.3-A
231
+ bool has_sme2 = (hwcap2 & HWCAP2_SME2 ) != 0 ; // optional in v9.2
232
+
233
+ // ARMv8.8-A
234
+ bool has_mops = (hwcap2 & HWCAP2_MOPS ) != 0 ; // optional in v8.7
235
+
236
+ // ARMv8.9-A
237
+ bool has_cssc = (hwcap2 & HWCAP2_CSSC ) != 0 ; // optional in v8.7
238
+
239
+ // ARMv9.4-A
240
+ bool has_sme2p1 = (hwcap2 & HWCAP2_SME2P1 ) != 0 ; // optional in v9.2
241
+
242
+ // ARMv9.5-A
243
+ bool has_f8e4m3 = (hwcap2 & HWCAP2_F8E4M3 ) != 0 ;
244
+ bool has_f8e5m2 = (hwcap2 & HWCAP2_F8E5M2 ) != 0 ;
245
+
246
+ // ARMv9.6-A
247
+ bool has_cmpbr = (hwcap & HWCAP_CMPBR ) != 0 ; // optional in v9.5
248
+ bool has_fprcvt = (hwcap & HWCAP_FPRCVT ) != 0 ; // optional in v9.5
249
+
250
+ if (has_sve2 || has_sme ) {
251
+ // ARMv9
252
+ if (has_cmpbr && has_fprcvt ) {
253
+ cpu -> march = "ARMv9.6-A" ;
254
+ } else if (has_f8e5m2 && has_f8e4m3 ) {
255
+ cpu -> march = "ARMv9.5-A" ;
256
+ } else if (has_sme2p1 ) {
257
+ cpu -> march = "ARMv9.4-A" ;
258
+ } else if (has_sme2 ) {
259
+ cpu -> march = "ARMv9.3-A" ;
260
+ } else if (has_sme ) {
261
+ cpu -> march = "ARMv9.2-A" ;
262
+ } else if (has_i8mm && has_bf16 ) {
263
+ cpu -> march = "ARMv9.1-A" ;
264
+ } else {
265
+ cpu -> march = "ARMv9.0-A" ;
266
+ }
267
+ } else {
268
+ // ARMv8
269
+ if (has_cssc ) {
270
+ cpu -> march = "ARMv8.9-A" ;
271
+ } else if (has_mops ) {
272
+ cpu -> march = "ARMv8.8-A" ;
273
+ } else if (has_afp ) {
274
+ cpu -> march = "ARMv8.7-A" ;
275
+ } else if (has_i8mm && has_bf16 ) {
276
+ cpu -> march = "ARMv8.6-A" ;
277
+ } else if (has_bti && has_sb && has_dcpodp && has_flagm2 && has_frint ) {
278
+ cpu -> march = "ARMv8.5-A" ;
279
+ } else if (has_dit && has_flagm && has_ilrcpc ) {
280
+ cpu -> march = "ARMv8.4-A" ;
281
+ } else if (has_pacg && has_paca && has_lrcpc && has_fcma && has_jscvt ) {
282
+ cpu -> march = "ARMv8.3-A" ;
283
+ } else if (has_fphp && has_dcpop ) {
284
+ cpu -> march = "ARMv8.2-A" ;
285
+ } else if (has_atomics && has_crc32 && has_asimdrdm ) {
286
+ cpu -> march = "ARMv8.1-A" ;
287
+ } else if (has_asimd && has_fp ) {
288
+ cpu -> march = "ARMv8-A" ;
289
+ }
290
+ }
291
+ }
292
+ #else
293
+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
294
+ {
295
+ // Unsupported platform
296
+ }
297
+ #endif // __linux__
298
+
299
+ #else
300
+
301
+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
302
+ {
303
+ // Unsupported platform
304
+ }
305
+
306
+ #endif
0 commit comments