Skip to content

Commit 1b3bb3f

Browse files
yok7nihui
authored andcommitted
Fix popcount64 linking issue and improve compatibility
- Fix undefined reference to __popcountdi2 by adding __POPCNT__ check - Use Brian Kernighan's algorithm for better fallback performance - Improve C compatibility by using NULL instead of nullptr - Use stdint.h instead of cstdint for better C compatibility - Prioritize MSVC __popcnt64 over GCC builtin for better reliability This resolves linking errors in environments where compiler builtins are not properly linked, particularly affecting test compilation.
1 parent a356f6e commit 1b3bb3f

File tree

1 file changed

+17
-15
lines changed

1 file changed

+17
-15
lines changed

src/cpu.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#if !NCNN_SIMPLESTL
1818
#include <algorithm>
19-
#include <cstdint>
19+
#include <stdint.h>
2020
#include <utility>
2121
#include <vector>
2222
#endif
@@ -1775,7 +1775,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
17751775
if (glpie != NULL)
17761776
{
17771777
DWORD bufferSize = 0;
1778-
glpie(RelationProcessorCore, nullptr, &bufferSize);
1778+
glpie(RelationProcessorCore, NULL, &bufferSize);
17791779
std::vector<BYTE> buffer(bufferSize);
17801780
if (!glpie(RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(buffer.data()), &bufferSize))
17811781
{
@@ -2425,15 +2425,15 @@ namespace ncnn {
24252425

24262426
// New unified CpuSet implementation supporting >64 CPUs
24272427
CpuSet::CpuSet()
2428-
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
2428+
: fast_mask(0), extended_mask(NULL), extended_capacity(0), use_extended(false)
24292429
#if defined _WIN32
24302430
,
24312431
legacy_mask_cache(0),
24322432
legacy_mask_valid(false)
24332433
#endif
24342434
#if defined __ANDROID__ || defined __linux__
24352435
,
2436-
cpu_set_cache(nullptr),
2436+
cpu_set_cache(NULL),
24372437
cpu_set_valid(false)
24382438
#endif
24392439
#if __APPLE__
@@ -2445,15 +2445,15 @@ CpuSet::CpuSet()
24452445
}
24462446

24472447
CpuSet::CpuSet(const CpuSet& other)
2448-
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
2448+
: fast_mask(0), extended_mask(NULL), extended_capacity(0), use_extended(false)
24492449
#if defined _WIN32
24502450
,
24512451
legacy_mask_cache(0),
24522452
legacy_mask_valid(false)
24532453
#endif
24542454
#if defined __ANDROID__ || defined __linux__
24552455
,
2456-
cpu_set_cache(nullptr),
2456+
cpu_set_cache(NULL),
24572457
cpu_set_valid(false)
24582458
#endif
24592459
#if __APPLE__
@@ -2494,7 +2494,7 @@ void CpuSet::copy_from(const CpuSet& other)
24942494
if (extended_mask)
24952495
{
24962496
free(extended_mask);
2497-
extended_mask = nullptr;
2497+
extended_mask = NULL;
24982498
}
24992499
extended_capacity = 0;
25002500

@@ -2522,7 +2522,7 @@ void CpuSet::copy_from(const CpuSet& other)
25222522
if (cpu_set_cache)
25232523
{
25242524
CPU_FREE(cpu_set_cache);
2525-
cpu_set_cache = nullptr;
2525+
cpu_set_cache = NULL;
25262526
}
25272527
#endif
25282528
#if __APPLE__
@@ -2673,18 +2673,20 @@ bool CpuSet::is_enabled(int cpu) const
26732673
// Helper function to count bits in a 64-bit integer
26742674
static int popcount64(uint64_t x)
26752675
{
2676-
#if defined(__GNUC__) || defined(__clang__)
2677-
return __builtin_popcountll(x);
2678-
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
2676+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
26792677
// __popcnt64 is only available on x86/x64, not on ARM
26802678
return (int)__popcnt64(x);
2679+
#elif (defined(__GNUC__) || defined(__clang__)) && defined(__POPCNT__) && !defined(__FREESTANDING__) && !NCNN_SIMPLESTL
2680+
// Only use builtin if POPCNT instruction is available
2681+
return __builtin_popcountll(x);
26812682
#else
2682-
// Fallback implementation for ARM and other architectures
2683+
// Fallback implementation for compatibility
2684+
// Use Brian Kernighan's algorithm for better performance
26832685
int count = 0;
26842686
while (x)
26852687
{
2686-
count += x & 1;
2687-
x >>= 1;
2688+
x &= x - 1; // Clear the lowest set bit
2689+
count++;
26882690
}
26892691
return count;
26902692
#endif
@@ -2842,7 +2844,7 @@ const cpu_set_t* CpuSet::get_cpu_set() const
28422844
{
28432845
cpu_set_cache = CPU_ALLOC(CPU_SETSIZE);
28442846
if (!cpu_set_cache)
2845-
return nullptr;
2847+
return NULL;
28462848
}
28472849

28482850
CPU_ZERO_S(CPU_ALLOC_SIZE(CPU_SETSIZE), cpu_set_cache);

0 commit comments

Comments
 (0)