Skip to content

[RFC] Utilize shared memory to deduplicate the network system-wide #6173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 57 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
6e247a1
basics but won't work for multiple reasons. The largest being we'd ne…
Sopel97 Jul 22, 2025
03359f5
leaky hacky but working
Sopel97 Jul 22, 2025
26eead5
asd
Sopel97 Jul 22, 2025
598c97e
LP
Sopel97 Jul 22, 2025
3bc2237
aa
Sopel97 Jul 23, 2025
c8cc652
proper hash
Sopel97 Jul 23, 2025
00562ca
const correct write
Sopel97 Jul 23, 2025
8bafc35
minor touches + comments
Sopel97 Jul 23, 2025
d01d5df
remove non-const accessor
Sopel97 Jul 23, 2025
2b1b765
actually use large pages
Sopel97 Jul 23, 2025
2ed8130
update linux
Disservin Jul 23, 2025
b179417
update exe
Disservin Jul 27, 2025
3c3630d
update linux
Disservin Jul 27, 2025
89bdf48
add fallback and windows
Disservin Jul 27, 2025
e749672
unlink
Disservin Jul 27, 2025
981e62b
add compile flag
Disservin Jul 27, 2025
25a3978
fix name
Disservin Jul 27, 2025
ad6bf6a
proper init and cleanup
Disservin Jul 30, 2025
88c9170
fix uninitialized value and stack allocation
Disservin Jul 31, 2025
5300fd5
remove unnecessary destructor call
Disservin Jul 31, 2025
712c35e
fix init
Disservin Jul 31, 2025
ce72e28
Fix warning
vondele Aug 4, 2025
e13d45c
Move -lrt so it works with debug=no optimize=no builds
vondele Aug 4, 2025
005fc3f
Try to duplicate nets only to HW numa domains
vondele Aug 4, 2025
c2cd0aa
Avoid copy on stack, fails on macos
vondele Aug 4, 2025
d4ca986
Add some of the used debug/warn flags, including stack-usage
vondele Aug 5, 2025
321d946
Only include shm_linux when not on windows
Sopel97 Aug 6, 2025
513e209
Fix windows compilation
Sopel97 Aug 6, 2025
ce31ddc
includes
Sopel97 Aug 6, 2025
b8c9b34
Disable shared memory on android because it does not exist
Sopel97 Aug 6, 2025
2aac07e
no lrt on mac
Sopel97 Aug 6, 2025
227d445
dsa
Sopel97 Aug 6, 2025
90665e6
macos...
Sopel97 Aug 6, 2025
9930437
remove printouts
Sopel97 Aug 6, 2025
0ffd1ee
includes again
Sopel97 Aug 6, 2025
df58908
win32
Sopel97 Aug 6, 2025
b1dc1df
dsa
Sopel97 Aug 6, 2025
f6956ab
make format
Sopel97 Aug 6, 2025
4102fe7
win32
Sopel97 Aug 6, 2025
b9129d6
sda
Sopel97 Aug 6, 2025
7c6ddc7
dumb
Sopel97 Aug 6, 2025
805e89c
win32 disable
Sopel97 Aug 6, 2025
e5f94f8
actually dont disable
Sopel97 Aug 6, 2025
8f6055b
dsa
Sopel97 Aug 6, 2025
0d4a86e
dsa
Sopel97 Aug 6, 2025
2f92e42
macos?!
Sopel97 Aug 6, 2025
3f2b2f9
fwd
Sopel97 Aug 6, 2025
98a4fa8
Use standard exit codes
vondele Aug 8, 2025
2cf4a89
Wait size
vondele Aug 8, 2025
8f9a337
Update src/misc.h
Sopel97 Aug 9, 2025
c961ea3
asdasd
Sopel97 Aug 9, 2025
3db0ed8
std::launder exising shm memory on linux
Sopel97 Aug 9, 2025
8bfb3ae
magic number
Sopel97 Aug 9, 2025
2ad139c
use semaphores again
Disservin Aug 9, 2025
bb34fd0
add posix_fallocate to ensure shm tmpfs is large enough for file
Disservin Aug 9, 2025
cd5a699
add max sem name length assert
Disservin Aug 9, 2025
8274fc5
fix compilation issues
Disservin Aug 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ endif
ifeq ($(COMP),gcc)
comp=gcc
CXX=g++
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations -Wstack-usage=128000
Copy link
Preview

Copilot AI Aug 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The stack usage warning threshold of 128000 bytes (128KB) is extremely high and may indicate potential stack overflow issues. Consider using a lower threshold like 32KB or 64KB.

Suggested change
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations -Wstack-usage=128000
CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations -Wstack-usage=65536

Copilot uses AI. Check for mistakes.


ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64))
ifeq ($(OS),Android)
Expand Down Expand Up @@ -631,6 +631,19 @@ ifneq ($(comp),mingw)
ifneq ($(KERNEL),Haiku)
ifneq ($(COMP),ndk)
LDFLAGS += -lpthread

add_lrt = yes
ifeq ($(target_windows),yes)
add_lrt = no
endif

ifeq ($(KERNEL),Darwin)
add_lrt = no
endif

ifeq ($(add_lrt),yes)
LDFLAGS += -lrt
endif
endif
endif
endif
Expand All @@ -641,6 +654,7 @@ ifeq ($(debug),no)
CXXFLAGS += -DNDEBUG
else
CXXFLAGS += -g
CXXFLAGS += -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_DEBUG
endif

### 3.2.2 Debugging with undefined behavior sanitizers
Expand Down
11 changes: 7 additions & 4 deletions src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "misc.h"
#include "nnue/network.h"
#include "nnue/nnue_common.h"
#include "nnue/nnue_misc.h"
#include "numa.h"
#include "perft.h"
#include "position.h"
Expand All @@ -57,11 +58,13 @@ Engine::Engine(std::optional<std::string> path) :
threads(),
networks(
numaContext,
NN::Networks(
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
pos.set(StartFEN, false, &states->back());
std::make_unique<NN::Networks>( // requires heap alloc due to sizeof
std::make_unique<NN::NetworkBig>(NN::EvalFile{EvalFileDefaultNameBig, "None", ""},
NN::EmbeddedNNUEType::BIG),
std::make_unique<NN::NetworkSmall>(NN::EvalFile{EvalFileDefaultNameSmall, "None", ""},
NN::EmbeddedNNUEType::SMALL))) {

pos.set(StartFEN, false, &states->back());

options.add( //
"Debug Log File", Option("", [](const Option& o) {
Expand Down
8 changes: 4 additions & 4 deletions src/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ class Engine {
Position pos;
StateListPtr states;

OptionsMap options;
ThreadPool threads;
TranspositionTable tt;
LazyNumaReplicated<Eval::NNUE::Networks> networks;
OptionsMap options;
ThreadPool threads;
TranspositionTable tt;
LazyNumaReplicatedSystemWide<Eval::NNUE::Networks> networks;

Search::SearchManager::UpdateContext updateContext;
std::function<void(std::string_view)> onVerifyNetworks;
Expand Down
54 changes: 50 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,74 @@
*/

#include <iostream>
#include <memory>

#include "bitboard.h"
#include "misc.h"
#include "position.h"
#include "tune.h"
#include "types.h"
#include "uci.h"
#include "tune.h"

#include "shm.h"

#if defined(SHM_CLEANUP)
#include <cstdlib>
#include <cstdio>

#include <signal.h>

#include "shm_linux.h"
#endif

namespace Stockfish {
namespace Eval {
namespace NNUE {
struct Networks;
}
}
}

using namespace Stockfish;

namespace {
#if defined(SHM_CLEANUP)

void register_cleanup() {
// hack to invoke atexit
int signals[] = {SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGABRT, SIGFPE,
SIGSEGV, SIGTERM, SIGBUS, SIGSYS, SIGXCPU, SIGXFSZ};

struct sigaction sa;
sa.sa_handler = [](int sig) { std::exit(128 + sig); };
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;

for (int sig : signals)
if (sigaction(sig, &sa, nullptr) == -1)
std::perror("sigaction");

// Cleanup function to ensure shared memory is unlinked on exit
std::atexit([]() { shm::SharedMemory<Eval::NNUE::Networks>::cleanup_all_instances(); });
}
#else
void register_cleanup() {}
#endif
}

int main(int argc, char* argv[]) {
register_cleanup();

std::cout << engine_info() << std::endl;

Bitboards::init();
Position::init();

UCIEngine uci(argc, argv);
auto uci = std::make_unique<UCIEngine>(argc, argv);

Tune::init(uci.engine_options());
Tune::init(uci->engine_options());

uci.loop();
uci->loop();

return 0;
}
85 changes: 8 additions & 77 deletions src/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@
// the calls at compile time), try to load them at runtime. To do this we need
// first to define the corresponding function pointers.

extern "C" {
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
using AdjustTokenPrivileges_t =
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif


Expand Down Expand Up @@ -106,77 +100,14 @@ void std_aligned_free(void* ptr) {

static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {

#if !defined(_WIN64)
return nullptr;
#else

HANDLE hProcessToken{};
LUID luid{};
void* mem = nullptr;

const size_t largePageSize = GetLargePageMinimum();
if (!largePageSize)
return nullptr;

// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges

HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));

if (!hAdvapi32)
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));

auto OpenProcessToken_f =
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
if (!OpenProcessToken_f)
return nullptr;
auto LookupPrivilegeValueA_f =
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
if (!LookupPrivilegeValueA_f)
return nullptr;
auto AdjustTokenPrivileges_f =
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
if (!AdjustTokenPrivileges_f)
return nullptr;

// We need SeLockMemoryPrivilege, so try to enable it for the process

if (!OpenProcessToken_f( // OpenProcessToken()
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return nullptr;

if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
{
TOKEN_PRIVILEGES tp{};
TOKEN_PRIVILEGES prevTp{};
DWORD prevTpLen = 0;

tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;

// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
// succeeds, we still need to query GetLastError() to ensure that the privileges
// were actually obtained.

if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
&prevTpLen)
&& GetLastError() == ERROR_SUCCESS)
{
// Round up size to full pages and allocate
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_READWRITE);

// Privilege no longer needed, restore previous state
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
}
}

CloseHandle(hProcessToken);

return mem;

#endif
return windows_try_with_large_page_priviliges(
[&](size_t largePageSize) {
// Round up size to full pages and allocate
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
return VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_READWRITE);
},
[]() { return (void*) nullptr; });
}

void* aligned_large_pages_alloc(size_t allocSize) {
Expand Down
98 changes: 98 additions & 0 deletions src/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,29 @@

#include "types.h"

#if defined(_WIN64)

#if _WIN32_WINNT < 0x0601
#undef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
#endif

#if !defined(NOMINMAX)
#define NOMINMAX
#endif
#include <windows.h>

#include <psapi.h>

extern "C" {
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
using AdjustTokenPrivileges_t =
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
}
#endif


namespace Stockfish {

void* std_aligned_alloc(size_t alignment, size_t size);
Expand Down Expand Up @@ -211,6 +234,81 @@ T* align_ptr_up(T* ptr) {
reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
}

#if defined(_WIN32)

template<typename FuncYesT, typename FuncNoT>
auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) {

#if !defined(_WIN64)
return fno();
#else

HANDLE hProcessToken{};
LUID luid{};

const size_t largePageSize = GetLargePageMinimum();
if (!largePageSize)
return fno();

// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges

HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));

if (!hAdvapi32)
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));

auto OpenProcessToken_f =
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
if (!OpenProcessToken_f)
return fno();
auto LookupPrivilegeValueA_f =
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
if (!LookupPrivilegeValueA_f)
return fno();
auto AdjustTokenPrivileges_f =
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
if (!AdjustTokenPrivileges_f)
return fno();

// We need SeLockMemoryPrivilege, so try to enable it for the process

if (!OpenProcessToken_f( // OpenProcessToken()
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
return fno();

if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
return fno();

TOKEN_PRIVILEGES tp{};
TOKEN_PRIVILEGES prevTp{};
DWORD prevTpLen = 0;

tp.PrivilegeCount = 1;
tp.Privileges[0].Luid = luid;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;

// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
// succeeds, we still need to query GetLastError() to ensure that the privileges
// were actually obtained.

if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
&prevTpLen)
|| GetLastError() != ERROR_SUCCESS)
return fno();

auto&& ret = fyes(largePageSize);

// Privilege no longer needed, restore previous state
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);

CloseHandle(hProcessToken);

return std::forward<decltype(ret)>(ret);

#endif
}

#endif

} // namespace Stockfish

Expand Down
Loading
Loading