1616
1717#if !NCNN_SIMPLESTL
1818#include < algorithm>
19- #include < cstdint >
19+ #include < stdint.h >
2020#include < utility>
2121#include < vector>
2222#endif
@@ -1775,7 +1775,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
17751775 if (glpie != NULL )
17761776 {
17771777 DWORD bufferSize = 0 ;
1778- glpie (RelationProcessorCore, nullptr , &bufferSize);
1778+ glpie (RelationProcessorCore, NULL , &bufferSize);
17791779 std::vector<BYTE> buffer (bufferSize);
17801780 if (!glpie (RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(buffer.data ()), &bufferSize))
17811781 {
@@ -2425,15 +2425,15 @@ namespace ncnn {
24252425
24262426// New unified CpuSet implementation supporting >64 CPUs
24272427CpuSet::CpuSet ()
2428- : fast_mask(0 ), extended_mask(nullptr ), extended_capacity(0 ), use_extended(false )
2428+ : fast_mask(0 ), extended_mask(NULL ), extended_capacity(0 ), use_extended(false )
24292429#if defined _WIN32
24302430 ,
24312431 legacy_mask_cache (0 ),
24322432 legacy_mask_valid(false )
24332433#endif
24342434#if defined __ANDROID__ || defined __linux__
24352435 ,
2436- cpu_set_cache (nullptr ),
2436+ cpu_set_cache (NULL ),
24372437 cpu_set_valid(false )
24382438#endif
24392439#if __APPLE__
@@ -2445,15 +2445,15 @@ CpuSet::CpuSet()
24452445}
24462446
24472447CpuSet::CpuSet (const CpuSet& other)
2448- : fast_mask(0 ), extended_mask(nullptr ), extended_capacity(0 ), use_extended(false )
2448+ : fast_mask(0 ), extended_mask(NULL ), extended_capacity(0 ), use_extended(false )
24492449#if defined _WIN32
24502450 ,
24512451 legacy_mask_cache (0 ),
24522452 legacy_mask_valid(false )
24532453#endif
24542454#if defined __ANDROID__ || defined __linux__
24552455 ,
2456- cpu_set_cache (nullptr ),
2456+ cpu_set_cache (NULL ),
24572457 cpu_set_valid(false )
24582458#endif
24592459#if __APPLE__
@@ -2494,7 +2494,7 @@ void CpuSet::copy_from(const CpuSet& other)
24942494 if (extended_mask)
24952495 {
24962496 free (extended_mask);
2497- extended_mask = nullptr ;
2497+ extended_mask = NULL ;
24982498 }
24992499 extended_capacity = 0 ;
25002500
@@ -2522,7 +2522,7 @@ void CpuSet::copy_from(const CpuSet& other)
25222522 if (cpu_set_cache)
25232523 {
25242524 CPU_FREE (cpu_set_cache);
2525- cpu_set_cache = nullptr ;
2525+ cpu_set_cache = NULL ;
25262526 }
25272527#endif
25282528#if __APPLE__
@@ -2673,18 +2673,20 @@ bool CpuSet::is_enabled(int cpu) const
26732673// Helper function to count bits in a 64-bit integer
26742674static int popcount64 (uint64_t x)
26752675{
2676- #if defined(__GNUC__) || defined(__clang__)
2677- return __builtin_popcountll (x);
2678- #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
2676+ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
26792677 // __popcnt64 is only available on x86/x64, not on ARM
26802678 return (int )__popcnt64 (x);
2679+ #elif (defined(__GNUC__) || defined(__clang__)) && defined(__POPCNT__) && !defined(__FREESTANDING__) && !NCNN_SIMPLESTL
2680+ // Only use builtin if POPCNT instruction is available
2681+ return __builtin_popcountll (x);
26812682#else
2682- // Fallback implementation for ARM and other architectures
2683+ // Fallback implementation for compatibility
2684+ // Use Brian Kernighan's algorithm for better performance
26832685 int count = 0 ;
26842686 while (x)
26852687 {
2686- count + = x & 1 ;
2687- x >>= 1 ;
2688+ x & = x - 1 ; // Clear the lowest set bit
2689+ count++ ;
26882690 }
26892691 return count;
26902692#endif
@@ -2842,7 +2844,7 @@ const cpu_set_t* CpuSet::get_cpu_set() const
28422844 {
28432845 cpu_set_cache = CPU_ALLOC (CPU_SETSIZE);
28442846 if (!cpu_set_cache)
2845- return nullptr ;
2847+ return NULL ;
28462848 }
28472849
28482850 CPU_ZERO_S (CPU_ALLOC_SIZE (CPU_SETSIZE), cpu_set_cache);
0 commit comments