Skip to content
156 changes: 128 additions & 28 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1557,45 +1557,145 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
}

#if defined _WIN32
// get max freq mhz for all cores
int max_freq_mhz_min = INT_MAX;
int max_freq_mhz_max = 0;
std::vector<int> cpu_max_freq_mhz = get_max_freq_mhz();
for (int i = 0; i < g_cpucount; i++)
{
int max_freq_mhz = cpu_max_freq_mhz[i];

// NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);

if (max_freq_mhz > max_freq_mhz_max)
max_freq_mhz_max = max_freq_mhz;
if (max_freq_mhz < max_freq_mhz_min)
max_freq_mhz_min = max_freq_mhz;
}
// Check SDK >= Win7
#if _WIN32_WINNT >= _WIN32_WINNT_WIN7 // win7

int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2;
if (max_freq_mhz_medium == max_freq_mhz_max)
// Load GetLogicalProcessorInformationEx
HMODULE kernel32 = LoadLibrary(TEXT("kernel32.dll"));
if (!kernel32)
{
mask_little.disable_all();
mask_big = mask_all;
NCNN_LOGE("LoadLibrary kernel32.dll failed");
return;
}

ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask();
typedef BOOL(WINAPI * LPFN_GLPIE)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
LPFN_GLPIE glpie = (LPFN_GLPIE)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");

for (int i = 0; i < g_cpucount; i++)
if (glpie != NULL)
{
if (smt_cpu_mask.is_enabled(i))
DWORD bufferSize = 0;
glpie(RelationProcessorCore, nullptr, &bufferSize);
std::vector<BYTE> buffer(bufferSize);
if (!GetLogicalProcessorInformationEx(RelationProcessorCore,
reinterpret_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*>(buffer.data()), &bufferSize))
{
// always treat smt core as big core
mask_big.enable(i);
continue;
NCNN_LOGE("GetLogicalProcessorInformationEx failed");
return;
}

if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
mask_little.enable(i);
// A map from processor number to whether it is an E core
std::vector<std::pair<DWORD, bool> > processorCoreType;
BYTE maxEfficiencyClass = 0; // In a system without E cores, all cores EfficiencyClass is 0

BYTE* ptr = buffer.data();
while (ptr < buffer.data() + bufferSize)
{
auto info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
if (info->Relationship == RelationProcessorCore)
{
// Mingw and some old MSVC do not have EfficiencyClass in PROCESSOR_RELATIONSHIP
// So we should redefine PROCESSOR_RELATIONSHIP
// Because ncnn need to support c++98, so we can't use some new features in c++11
// So there is a ugly implementation

BYTE efficiencyClass = ((BYTE*)&info->Processor)[1];

bool isECore = (efficiencyClass == 0);
maxEfficiencyClass = (std::max)(maxEfficiencyClass, efficiencyClass);

for (WORD g = 0; g < info->Processor.GroupCount; ++g)
{
const GROUP_AFFINITY& ga = info->Processor.GroupMask[g];
KAFFINITY mask = ga.Mask;
WORD group = ga.Group;
for (int bit = 0; bit < 64; ++bit)
{ // for each bit in the mask
if (mask & (static_cast<KAFFINITY>(1) << bit))
{
DWORD processorNumber = group * 64 + bit;
processorCoreType.push_back(std::pair<DWORD, bool>(processorNumber, isECore));
}
}
}
}
ptr += info->Size;
}

if (maxEfficiencyClass == 0)
{
// All cores are P cores
mask_little.disable_all();
mask_big = mask_all;
}
else
mask_big.enable(i);
{
for (int i = 0; i < g_cpucount; i++)
{
bool isECore = false;
for (auto& p : processorCoreType)
{
if (p.first == i)
{
isECore = p.second;
break;
}
}
// fprintf(stderr, "processor %d is %s\n", i, isECore ? "E" : "P");

if (isECore)
{
mask_little.enable(i);
}
else
{
mask_big.enable(i);
}
}
}
}
else
#endif
{
// get max freq mhz for all cores
int max_freq_mhz_min = INT_MAX;
int max_freq_mhz_max = 0;
std::vector<int> cpu_max_freq_mhz = get_max_freq_mhz();
for (int i = 0; i < g_cpucount; i++)
{
int max_freq_mhz = cpu_max_freq_mhz[i];

// NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);

if (max_freq_mhz > max_freq_mhz_max)
max_freq_mhz_max = max_freq_mhz;
if (max_freq_mhz < max_freq_mhz_min)
max_freq_mhz_min = max_freq_mhz;
}

int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2;
if (max_freq_mhz_medium == max_freq_mhz_max)
{
mask_little.disable_all();
mask_big = mask_all;
return;
}

ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask();

for (int i = 0; i < g_cpucount; i++)
{
if (smt_cpu_mask.is_enabled(i))
{
// always treat smt core as big core
mask_big.enable(i);
continue;
}

if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
mask_little.enable(i);
else
mask_big.enable(i);
}
}
#elif defined __ANDROID__ || defined __linux__
int max_freq_khz_min = INT_MAX;
Expand Down
22 changes: 22 additions & 0 deletions tests/test_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,33 @@ static int test_cpu_powersave()

#else

#if defined _WIN32
// Check SDK >= Win7
#if _WIN32_WINNT >= _WIN32_WINNT_WIN7 // win7

static int test_cpu_info()
{
int cpucount = ncnn::get_cpu_count();
int bigcpucount = ncnn::get_big_cpu_count();
int littlecpucount = ncnn::get_little_cpu_count();

fprintf(stderr, "cpucount = %d\n", cpucount);
fprintf(stderr, "bigcpucount = %d\n", bigcpucount);
fprintf(stderr, "littlecpucount = %d\n", littlecpucount);

return 0;
}

#endif
#else

static int test_cpu_info()
{
return 0;
}

#endif

static int test_cpu_omp()
{
return 0;
Expand Down
Loading