Skip to content

Commit b4f0cb4

Browse files
committed
refactor(libstore): Replace AWS SDK with curl-based S3 implementation
This commit replaces the AWS C++ SDK with a lighter curl-based approach for S3 binary cache operations. - Removed dependency on the heavy aws-cpp-sdk-s3 and aws-cpp-sdk-transfer - Added lightweight aws-crt-cpp for credential resolution only - Leverages curl's native AWS SigV4 authentication (requires curl >= 7.75.0) - S3BinaryCacheStore now delegates to HttpBinaryCacheStore - Function s3ToHttpsUrl converts ParsedS3URL to ParsedURL - Multipart uploads are no longer supported (may be reimplemented later) - Build now requires curl >= 7.75.0 for AWS SigV4 support Fixes: #13084, #12671, #11748, #12403, #5947
1 parent d76dc24 commit b4f0cb4

25 files changed

+850
-756
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
synopsis: "Improved S3 binary cache support via HTTP"
3+
prs: [13752]
4+
issues: [13084, 12671, 11748, 12403, 5947]
5+
---
6+
7+
S3 binary cache operations now happen via HTTP, leveraging `libcurl`'s native AWS
8+
SigV4 authentication instead of the AWS C++ SDK, providing significant
9+
improvements:
10+
11+
- **Reduced memory usage**: Eliminates memory buffering issues that caused
12+
segfaults with large files (>3.5GB)
13+
- **Fixed upload reliability**: Resolves AWS SDK chunking errors
14+
(`InvalidChunkSizeError`)
15+
- **Resolved OpenSSL conflicts**: No more S2N engine override issues in
16+
sandboxed builds
17+
- **Lighter dependencies**: Uses lightweight `aws-crt-cpp` instead of full
18+
`aws-cpp-sdk`, reducing build complexity
19+
20+
The new implementation requires curl >= 7.75.0 and `aws-crt-cpp` for credential
21+
management.
22+
23+
All existing S3 URL formats and parameters remain supported.
24+
25+
## Breaking changes
26+
27+
The legacy `S3BinaryCacheStore` implementation has been removed in favor of the
28+
new curl-based approach, as a result multipart uploads are no longer supported. They may be reimplemented in the future.
29+
30+
**Migration**: No action required for most users. S3 URLs continue to work
31+
with the same syntax. Users directly using `S3BinaryCacheStore` class
32+
should migrate to standard HTTP binary cache stores with S3 endpoints.
33+
34+
**Build requirement**: S3 support now requires curl >= 7.75.0 for AWS SigV4
35+
authentication. Build configuration will warn if `aws-crt-cpp` is available
36+
but S3 support is disabled due to an insufficient curl version.

meson.options

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,10 @@ option(
2727
value : false,
2828
description : 'Build benchmarks (requires gbenchmark)',
2929
)
30+
31+
option(
32+
's3-store',
33+
type : 'feature',
34+
value : 'auto',
35+
description : 'Enable S3 binary cache store support (requires aws-crt-cpp and curl >= 7.75.0)',
36+
)

packaging/components.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ in
458458
459459
Example:
460460
```
461-
overrideScope (finalScope: prevScope: { aws-sdk-cpp = null; })
461+
overrideScope (finalScope: prevScope: { aws-crt-cpp = null; })
462462
```
463463
*/
464464
overrideScope = f: (scope.overrideScope f).nix-everything;

packaging/dependencies.nix

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,6 @@ in
1616
scope: {
1717
inherit stdenv;
1818

19-
aws-sdk-cpp =
20-
(pkgs.aws-sdk-cpp.override {
21-
apis = [
22-
"identity-management"
23-
"s3"
24-
"transfer"
25-
];
26-
customMemoryManagement = false;
27-
}).overrideAttrs
28-
{
29-
# only a stripped down version is built, which takes a lot less resources
30-
# to build, so we don't need a "big-parallel" machine.
31-
requiredSystemFeatures = [ ];
32-
};
33-
3419
boehmgc =
3520
(pkgs.boehmgc.override {
3621
enableLargeConfig = true;

src/libstore-tests/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ sources = files(
7777
'realisation.cc',
7878
'references.cc',
7979
's3-binary-cache-store.cc',
80-
's3.cc',
80+
's3-url.cc',
8181
'serve-protocol.cc',
8282
'ssh-store.cc',
8383
'store-reference.cc',

src/libstore-tests/s3-binary-cache-store.cc

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#include "nix/store/s3-binary-cache-store.hh"
2+
#include "nix/store/http-binary-cache-store.hh"
3+
#include "nix/store/filetransfer.hh"
4+
#include "nix/store/s3-url.hh"
25

36
#if NIX_WITH_S3_SUPPORT
47

@@ -10,7 +13,62 @@ TEST(S3BinaryCacheStore, constructConfig)
1013
{
1114
S3BinaryCacheStoreConfig config{"s3", "foobar", {}};
1215

13-
EXPECT_EQ(config.bucketName, "foobar");
16+
// The bucket name is stored as the host part of the authority in cacheUri
17+
ASSERT_TRUE(config.cacheUri.authority.has_value());
18+
EXPECT_EQ(config.cacheUri.authority->host, "foobar");
19+
EXPECT_EQ(config.cacheUri.scheme, "s3");
20+
}
21+
22+
TEST(S3BinaryCacheStore, constructConfigWithRegion)
23+
{
24+
Store::Config::Params params{{"region", "eu-west-1"}};
25+
S3BinaryCacheStoreConfig config{"s3", "my-bucket", params};
26+
27+
ASSERT_TRUE(config.cacheUri.authority.has_value());
28+
EXPECT_EQ(config.cacheUri.authority->host, "my-bucket");
29+
EXPECT_EQ(config.region.get(), "eu-west-1");
30+
}
31+
32+
TEST(S3BinaryCacheStore, defaultSettings)
33+
{
34+
S3BinaryCacheStoreConfig config{"s3", "test-bucket", {}};
35+
36+
// Check default values
37+
EXPECT_EQ(config.region.get(), "us-east-1");
38+
EXPECT_EQ(config.profile.get(), "");
39+
EXPECT_EQ(config.endpoint.get(), "");
40+
}
41+
42+
/**
43+
* Test that S3BinaryCacheStore properly preserves S3-specific parameters
44+
*/
45+
TEST(S3BinaryCacheStore, s3StoreConfigPreservesParameters)
46+
{
47+
StringMap params;
48+
params["region"] = "eu-west-1";
49+
params["endpoint"] = "custom.s3.com";
50+
51+
S3BinaryCacheStoreConfig config("s3", "test-bucket", params);
52+
53+
// The config should preserve S3-specific parameters
54+
EXPECT_EQ(config.cacheUri.scheme, "s3");
55+
EXPECT_EQ(config.cacheUri.authority->host, "test-bucket");
56+
EXPECT_FALSE(config.cacheUri.query.empty());
57+
EXPECT_EQ(config.cacheUri.query["region"], "eu-west-1");
58+
EXPECT_EQ(config.cacheUri.query["endpoint"], "custom.s3.com");
59+
}
60+
61+
/**
62+
* Test that S3 store scheme is properly registered
63+
*/
64+
TEST(S3BinaryCacheStore, s3SchemeRegistration)
65+
{
66+
auto schemes = S3BinaryCacheStoreConfig::uriSchemes();
67+
EXPECT_TRUE(schemes.count("s3") > 0) << "S3 scheme should be supported";
68+
69+
// Verify HttpBinaryCacheStoreConfig doesn't directly list S3
70+
auto httpSchemes = HttpBinaryCacheStoreConfig::uriSchemes();
71+
EXPECT_FALSE(httpSchemes.count("s3") > 0) << "HTTP store shouldn't directly list S3 scheme";
1472
}
1573

1674
} // namespace nix

src/libstore-tests/s3.cc renamed to src/libstore-tests/s3-url.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "nix/store/s3.hh"
1+
#include "nix/store/s3-url.hh"
22
#include "nix/util/tests/gmock-matchers.hh"
33

44
#if NIX_WITH_S3_SUPPORT

src/libstore/aws-creds.cc

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#include "nix/store/aws-creds.hh"
2+
3+
#if NIX_WITH_S3_SUPPORT
4+
5+
# include <aws/crt/Types.h>
6+
# include "nix/store/s3-url.hh"
7+
# include "nix/util/finally.hh"
8+
# include "nix/util/logging.hh"
9+
# include "nix/util/sync.hh"
10+
# include "nix/util/url.hh"
11+
# include "nix/util/util.hh"
12+
13+
# include <aws/crt/Api.h>
14+
# include <aws/crt/auth/Credentials.h>
15+
# include <aws/crt/io/Bootstrap.h>
16+
17+
# include <boost/unordered/concurrent_flat_map.hpp>
18+
19+
# include <chrono>
20+
# include <condition_variable>
21+
# include <mutex>
22+
# include <unistd.h>
23+
24+
namespace nix {
25+
26+
namespace {
27+
28+
static void initAwsCrt()
29+
{
30+
struct CrtWrapper
31+
{
32+
Aws::Crt::ApiHandle apiHandle;
33+
34+
CrtWrapper()
35+
{
36+
apiHandle.InitializeLogging(Aws::Crt::LogLevel::Warn, static_cast<FILE *>(nullptr));
37+
}
38+
39+
~CrtWrapper()
40+
{
41+
try {
42+
// CRITICAL: Clear credential provider cache BEFORE AWS CRT shuts down
43+
// This ensures all providers (which hold references to ClientBootstrap)
44+
// are destroyed while AWS CRT is still valid
45+
clearAwsCredentialsCache();
46+
// Now it's safe for ApiHandle destructor to run
47+
} catch (...) {
48+
ignoreExceptionInDestructor();
49+
}
50+
}
51+
};
52+
53+
static CrtWrapper crt;
54+
}
55+
56+
static AwsCredentials getCredentialsFromProvider(std::shared_ptr<Aws::Crt::Auth::ICredentialsProvider> provider)
57+
{
58+
if (!provider || !provider->IsValid()) {
59+
throw AwsAuthError("AWS credential provider is invalid");
60+
}
61+
62+
struct State
63+
{
64+
std::optional<AwsCredentials> result;
65+
int resolvedErrorCode = 0;
66+
bool resolved = false;
67+
};
68+
69+
Sync<State> state;
70+
std::condition_variable cv;
71+
72+
provider->GetCredentials([&](std::shared_ptr<Aws::Crt::Auth::Credentials> credentials, int errorCode) {
73+
auto state_ = state.lock();
74+
75+
if (errorCode != 0 || !credentials) {
76+
state_->resolvedErrorCode = errorCode;
77+
} else {
78+
auto accessKeyId = Aws::Crt::ByteCursorToStringView(credentials->GetAccessKeyId());
79+
auto secretAccessKey = Aws::Crt::ByteCursorToStringView(credentials->GetSecretAccessKey());
80+
auto sessionToken = Aws::Crt::ByteCursorToStringView(credentials->GetSessionToken());
81+
82+
std::optional<std::string> sessionTokenStr;
83+
if (!sessionToken.empty()) {
84+
sessionTokenStr = std::string(sessionToken.data(), sessionToken.size());
85+
}
86+
87+
state_->result = AwsCredentials(
88+
std::string(accessKeyId.data(), accessKeyId.size()),
89+
std::string(secretAccessKey.data(), secretAccessKey.size()),
90+
sessionTokenStr);
91+
}
92+
93+
state_->resolved = true;
94+
cv.notify_one();
95+
});
96+
97+
{
98+
auto state_ = state.lock();
99+
// AWS CRT GetCredentials is asynchronous and only guarantees the callback will be
100+
// invoked if the initial call returns success. There's no documented timeout mechanism,
101+
// so we add a timeout to prevent indefinite hanging if the callback is never called.
102+
// Use an absolute deadline to handle spurious wakeups correctly.
103+
auto timeout = std::chrono::seconds(30);
104+
auto deadline = std::chrono::steady_clock::now() + timeout;
105+
106+
while (!state_->resolved) {
107+
if (state_.wait_until(cv, deadline) == std::cv_status::timeout) {
108+
// Double-check the condition after timeout to avoid race
109+
if (!state_->resolved) {
110+
throw AwsAuthError(
111+
"Timeout waiting for AWS credentials (%d seconds)",
112+
std::chrono::duration_cast<std::chrono::seconds>(timeout).count());
113+
}
114+
break;
115+
}
116+
}
117+
}
118+
119+
auto state_ = state.lock();
120+
if (!state_->result) {
121+
throw AwsAuthError("Failed to resolve AWS credentials: error code %d", state_->resolvedErrorCode);
122+
}
123+
124+
return *state_->result;
125+
}
126+
127+
// Global credential provider cache using boost's concurrent map
128+
// Key: profile name (empty string for default profile)
129+
using CredentialProviderCache =
130+
boost::concurrent_flat_map<std::string, std::shared_ptr<Aws::Crt::Auth::ICredentialsProvider>>;
131+
132+
static CredentialProviderCache credentialProviderCache;
133+
134+
} // anonymous namespace
135+
136+
AwsCredentials getAwsCredentials(const std::string & profile)
137+
{
138+
// Get or create credential provider with caching
139+
std::shared_ptr<Aws::Crt::Auth::ICredentialsProvider> provider;
140+
141+
// Try to find existing provider
142+
credentialProviderCache.visit(profile, [&](const auto & pair) { provider = pair.second; });
143+
144+
if (!provider) {
145+
// Create new provider if not found
146+
debug(
147+
"[pid=%d] creating new AWS credential provider for profile '%s'",
148+
getpid(),
149+
profile.empty() ? "(default)" : profile.c_str());
150+
151+
try {
152+
initAwsCrt();
153+
154+
if (profile.empty()) {
155+
Aws::Crt::Auth::CredentialsProviderChainDefaultConfig config;
156+
config.Bootstrap = Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap();
157+
provider = Aws::Crt::Auth::CredentialsProvider::CreateCredentialsProviderChainDefault(config);
158+
} else {
159+
Aws::Crt::Auth::CredentialsProviderProfileConfig config;
160+
config.Bootstrap = Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap();
161+
config.ProfileNameOverride = Aws::Crt::ByteCursorFromCString(profile.c_str());
162+
provider = Aws::Crt::Auth::CredentialsProvider::CreateCredentialsProviderProfile(config);
163+
}
164+
} catch (Error & e) {
165+
e.addTrace(
166+
{},
167+
"while creating AWS credentials provider for %s",
168+
profile.empty() ? "default profile" : fmt("profile '%s'", profile));
169+
throw;
170+
}
171+
172+
if (!provider) {
173+
throw AwsAuthError(
174+
"Failed to create AWS credentials provider for %s",
175+
profile.empty() ? "default profile" : fmt("profile '%s'", profile));
176+
}
177+
178+
// Insert into cache (try_emplace is thread-safe and won't overwrite if another thread added it)
179+
credentialProviderCache.try_emplace(profile, provider);
180+
}
181+
182+
return getCredentialsFromProvider(provider);
183+
}
184+
185+
void invalidateAwsCredentials(const std::string & profile)
186+
{
187+
credentialProviderCache.erase(profile);
188+
}
189+
190+
void clearAwsCredentialsCache()
191+
{
192+
credentialProviderCache.clear();
193+
}
194+
195+
std::optional<AwsCredentials> preResolveAwsCredentials(const std::string & url)
196+
{
197+
try {
198+
auto parsedUrl = parseURL(url);
199+
if (parsedUrl.scheme != "s3") {
200+
return std::nullopt;
201+
}
202+
203+
auto s3Url = ParsedS3URL::parse(parsedUrl);
204+
std::string profile = s3Url.profile.value_or("");
205+
206+
// Get credentials (automatically cached)
207+
return getAwsCredentials(profile);
208+
} catch (const AwsAuthError & e) {
209+
debug("Failed to pre-resolve AWS credentials: %s", e.what());
210+
return std::nullopt;
211+
} catch (const std::exception & e) {
212+
debug("Failed to pre-resolve AWS credentials: %s", e.what());
213+
return std::nullopt;
214+
}
215+
}
216+
217+
} // namespace nix
218+
219+
#endif

0 commit comments

Comments
 (0)