53
53
#endif
54
54
55
55
#if defined __ANDROID__ || defined __OHOS__ || __linux__
56
- #include < cstring>
56
+ #include < cstring>
57
57
#if defined __ANDROID__
58
58
#if __ANDROID_API__ >= 18
59
59
#include < sys/auxv.h> // getauxval()
@@ -879,39 +879,46 @@ static int get_cpucount()
879
879
else
880
880
count = 1 ;
881
881
#elif defined _WIN32
882
- typedef BOOL (WINAPI *LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
882
+ typedef BOOL (WINAPI * LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
883
883
LPFN_GLPIEX glpiex = (LPFN_GLPIEX)GetProcAddress (GetModuleHandle (TEXT (" kernel32" )), " GetLogicalProcessorInformationEx" );
884
- if (glpiex != NULL ) {
884
+ if (glpiex != NULL )
885
+ {
885
886
DWORD length = 0 ;
886
887
glpiex (RelationAll, NULL , &length);
887
-
888
- if (length > 0 ) {
889
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer =
890
- (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc (length);
891
-
892
- if (buffer && glpiex (RelationAll, buffer, &length)) {
888
+
889
+ if (length > 0 )
890
+ {
891
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc (length);
892
+
893
+ if (buffer && glpiex (RelationAll, buffer, &length))
894
+ {
893
895
count = 0 ;
894
896
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX ptr = buffer;
895
897
DWORD offset = 0 ;
896
-
897
- while (offset < length) {
898
- if (ptr->Relationship == RelationProcessorCore) {
899
- for (WORD i = 0 ; i < ptr->Processor .GroupCount ; i++) {
898
+
899
+ while (offset < length)
900
+ {
901
+ if (ptr->Relationship == RelationProcessorCore)
902
+ {
903
+ for (WORD i = 0 ; i < ptr->Processor .GroupCount ; i++)
904
+ {
900
905
count += __popcnt64 (ptr->Processor .GroupMask [i].Mask );
901
906
}
902
907
}
903
908
offset += ptr->Size ;
904
909
ptr = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char *)ptr + ptr->Size );
905
910
}
906
911
}
907
-
908
- if (buffer) {
912
+
913
+ if (buffer)
914
+ {
909
915
free (buffer);
910
916
}
911
917
}
912
918
}
913
919
// If cpu's count <= 64, use the previouse version.
914
- if (count == 0 ) {
920
+ if (count == 0 )
921
+ {
915
922
SYSTEM_INFO system_info;
916
923
GetSystemInfo (&system_info);
917
924
count = system_info.dwNumberOfProcessors ;
@@ -1396,14 +1403,14 @@ static ncnn::CpuSet get_smt_cpu_mask()
1396
1403
{
1397
1404
DWORD length = 0 ;
1398
1405
glpiex (RelationProcessorCore, NULL , &length);
1399
-
1406
+
1400
1407
if (length > 0 )
1401
1408
{
1402
1409
std::vector<char > buffer (length);
1403
1410
if (glpiex (RelationProcessorCore, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data (), &length))
1404
1411
{
1405
1412
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data ();
1406
-
1413
+
1407
1414
while ((char *)current < buffer.data () + length)
1408
1415
{
1409
1416
if (current->Relationship == RelationProcessorCore)
@@ -1413,7 +1420,7 @@ static ncnn::CpuSet get_smt_cpu_mask()
1413
1420
{
1414
1421
total_logical_count += __popcnt64 (current->Processor .GroupMask [group].Mask );
1415
1422
}
1416
-
1423
+
1417
1424
if (total_logical_count > 1 )
1418
1425
{
1419
1426
for (WORD group = 0 ; group < current->Processor .GroupCount ; group++)
@@ -1431,15 +1438,15 @@ static ncnn::CpuSet get_smt_cpu_mask()
1431
1438
}
1432
1439
}
1433
1440
}
1434
-
1441
+
1435
1442
current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char *)current + current->Size );
1436
1443
}
1437
-
1444
+
1438
1445
return smt_cpu_mask;
1439
1446
}
1440
1447
}
1441
1448
}
1442
-
1449
+
1443
1450
// Under 64, use the old API
1444
1451
typedef BOOL (WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
1445
1452
LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress (GetModuleHandle (TEXT (" kernel32" )), " GetLogicalProcessorInformation" );
@@ -1465,8 +1472,10 @@ static ncnn::CpuSet get_smt_cpu_mask()
1465
1472
if (logical_count > 1 )
1466
1473
{
1467
1474
ULONG_PTR mask = ptr->ProcessorMask ;
1468
- for (int cpu = 0 ; cpu < 64 && mask; cpu++) {
1469
- if (mask & (1ULL << cpu)) {
1475
+ for (int cpu = 0 ; cpu < 64 && mask; cpu++)
1476
+ {
1477
+ if (mask & (1ULL << cpu))
1478
+ {
1470
1479
smt_cpu_mask.enable (cpu);
1471
1480
mask &= ~(1ULL << cpu);
1472
1481
}
@@ -1534,13 +1543,13 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
1534
1543
groupAffinity.Reserved [0 ] = 0 ;
1535
1544
groupAffinity.Reserved [1 ] = 0 ;
1536
1545
groupAffinity.Reserved [2 ] = 0 ;
1537
-
1546
+
1538
1547
if (!SetThreadGroupAffinity (GetCurrentThread (), &groupAffinity, NULL ))
1539
1548
{
1540
1549
NCNN_LOGE (" SetThreadGroupAffinity failed %d" , GetLastError ());
1541
1550
return -1 ;
1542
1551
}
1543
- break ;
1552
+ break ;
1544
1553
}
1545
1554
}
1546
1555
return 0 ;
@@ -1711,7 +1720,6 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
1711
1720
}
1712
1721
#endif // __APPLE__
1713
1722
1714
-
1715
1723
static void initialize_cpu_thread_affinity_mask (ncnn::CpuSet& mask_all, ncnn::CpuSet& mask_little, ncnn::CpuSet& mask_big)
1716
1724
{
1717
1725
mask_all.disable_all ();
@@ -2254,7 +2262,7 @@ static void initialize_global_cpu_info()
2254
2262
2255
2263
g_cpucount = get_cpucount ();
2256
2264
g_physical_cpucount = get_physical_cpucount ();
2257
- g_powersave = 0 ;
2265
+ g_powersave = 0 ;
2258
2266
initialize_cpu_thread_affinity_mask (g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big);
2259
2267
2260
2268
#if (defined _WIN32 && (__aarch64__ || __arm__)) || ((defined __ANDROID__ || defined __linux__) && __riscv)
@@ -2380,42 +2388,46 @@ CpuSet::CpuSet()
2380
2388
void CpuSet::enable (int cpu)
2381
2389
{
2382
2390
if (cpu < 0 || cpu >= max_cpus) return ;
2383
-
2391
+
2384
2392
int group = cpu / 64 ;
2385
2393
int bit = cpu % 64 ;
2386
-
2387
- if (group < MAX_CPU_GROUPS) {
2394
+
2395
+ if (group < MAX_CPU_GROUPS)
2396
+ {
2388
2397
masks[group] |= (1ULL << bit);
2389
2398
}
2390
2399
}
2391
2400
2392
2401
void CpuSet::disable (int cpu)
2393
2402
{
2394
2403
if (cpu < 0 || cpu >= max_cpus) return ;
2395
-
2404
+
2396
2405
int group = cpu / 64 ;
2397
2406
int bit = cpu % 64 ;
2398
-
2399
- if (group < MAX_CPU_GROUPS) {
2407
+
2408
+ if (group < MAX_CPU_GROUPS)
2409
+ {
2400
2410
masks[group] &= ~(1ULL << bit);
2401
2411
}
2402
2412
}
2403
2413
2404
2414
void CpuSet::disable_all ()
2405
2415
{
2406
- for (int i = 0 ; i < MAX_CPU_GROUPS; i++) {
2416
+ for (int i = 0 ; i < MAX_CPU_GROUPS; i++)
2417
+ {
2407
2418
masks[i] = 0 ;
2408
2419
}
2409
2420
}
2410
2421
2411
2422
bool CpuSet::is_enabled (int cpu) const
2412
2423
{
2413
2424
if (cpu < 0 || cpu >= max_cpus) return false ;
2414
-
2425
+
2415
2426
int group = cpu / 64 ;
2416
2427
int bit = cpu % 64 ;
2417
-
2418
- if (group < MAX_CPU_GROUPS) {
2428
+
2429
+ if (group < MAX_CPU_GROUPS)
2430
+ {
2419
2431
return (masks[group] & (1ULL << bit)) != 0 ;
2420
2432
}
2421
2433
return false ;
@@ -2424,15 +2436,17 @@ bool CpuSet::is_enabled(int cpu) const
2424
2436
int CpuSet::num_enabled () const
2425
2437
{
2426
2438
int count = 0 ;
2427
- for (int i = 0 ; i < MAX_CPU_GROUPS; i++) {
2439
+ for (int i = 0 ; i < MAX_CPU_GROUPS; i++)
2440
+ {
2428
2441
count += __builtin_popcountll (masks[i]);
2429
2442
}
2430
2443
return count;
2431
2444
}
2432
2445
2433
2446
ULONG_PTR CpuSet::get_group_mask (int group) const
2434
2447
{
2435
- if (group < 0 || group >= MAX_CPU_GROUPS) {
2448
+ if (group < 0 || group >= MAX_CPU_GROUPS)
2449
+ {
2436
2450
return 0 ;
2437
2451
}
2438
2452
return masks[group];
@@ -2441,8 +2455,10 @@ ULONG_PTR CpuSet::get_group_mask(int group) const
2441
2455
int CpuSet::get_active_group_count () const
2442
2456
{
2443
2457
int count = 0 ;
2444
- for (int i = 0 ; i < MAX_CPU_GROUPS; i++) {
2445
- if (masks[i] != 0 ) {
2458
+ for (int i = 0 ; i < MAX_CPU_GROUPS; i++)
2459
+ {
2460
+ if (masks[i] != 0 )
2461
+ {
2446
2462
count++;
2447
2463
}
2448
2464
}
0 commit comments