Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
blob_storage_scheme == other.blob_storage_scheme &&
dfs_storage_scheme == other.dfs_storage_scheme &&
default_metadata == other.default_metadata &&
account_name_ == other.account_name_ &&
account_name == other.account_name &&
credential_kind_ == other.credential_kind_;
if (!equals) {
return false;
Expand Down Expand Up @@ -104,17 +104,17 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key) {
Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
account_name_ = account_name;
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account name");
}
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
return Status::OK();
}

Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_name,
const std::string& tenant_id,
Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret) {
credential_kind_ = CredentialKind::kTokenCredential;
Expand All @@ -123,45 +123,57 @@ Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_
return Status::OK();
}

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
Status AzureOptions::ConfigureDefaultCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Status AzureOptions::ConfigureWorkloadIdentityCredential(
const std::string& account_name) {
Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ =
std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
return Status::OK();
}

Status AzureOptions::ConfigureWorkloadIdentityCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
return Status::OK();
}

Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
const {
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account name");
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another simplification we can make:

  • Rename kAnonymous to kDefaultCredential
  • On the kDefaultCredential case here, initialize token_credential_ to std::make_shared<Azure::Identity::DefaultAzureCredential>() if it's not initialized already.

In the kTokenCredential case you can add a DCHECK(token_credential_) as documentation that when credential_kind_ is set to kTokenCredential we expect it to be set already (something that is guaranteed by credential_kind_ being private and set only by functions that also initialize token_credential_.

What you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its useful to support both anonymous and default credential as distinct things. Admittedly, its a niche use-case but storage accounts can be public, which is when anonymous is useful https://learn.microsoft.com/en-us/azure/storage/blobs/anonymous-read-access-configure?tabs=portal.

Its a good question though which should be the default. I know adlfs deafults to anonomous rather than default credential. In some respects it might be good to be consistent but also this choice of default has caused issues for me in the past e.g. flyteorg/flyte#3962

Copy link
Contributor

@felipecrv felipecrv Dec 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Makes sense to keep kAnonymous then. But what would you return in AzureOptions::MakeBlobServiceClient() when credential_kind_ is anonymous? Lazy-init token_credential_ with DefaultAzureCredential and create the client with it? can you create the client without any credentials?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked and there is a constructor that doesn't take any credentials. I guess that's what we should call :-)

Copy link
Contributor Author

@Tom-Newton Tom-Newton Dec 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Do you mind if we do that in a separate PR to fix up anonymous and change the default to default credential. This was supposed to be a super simple PR until kou realised all the auths were broken 😅

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem.

switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
}

Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
AzureOptions::MakeDataLakeServiceClient() const {
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account name");
}
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name_), token_credential_);
AccountDfsUrl(account_name), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name_), storage_shared_key_credential_);
AccountDfsUrl(account_name), storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
}
Expand Down
16 changes: 9 additions & 7 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class TestAzureFileSystem;

/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
/// \brief account name of the Azure Storage account.
std::string account_name;

/// \brief hostname[:port] of the Azure Blob Storage Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
Expand Down Expand Up @@ -94,7 +97,6 @@ struct ARROW_EXPORT AzureOptions {
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;

std::string account_name_;
std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;
Expand All @@ -103,15 +105,15 @@ struct ARROW_EXPORT AzureOptions {
AzureOptions();
~AzureOptions();

Status ConfigureDefaultCredential(const std::string& account_name);
Status ConfigureDefaultCredential();

Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());

Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
Status ConfigureWorkloadIdentityCredential();

Status ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key);
Status ConfigureAccountKeyCredential(const std::string& account_key);

Status ConfigureClientSecretCredential(const std::string& account_name,
const std::string& tenant_id,
Status ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret);

Expand Down
34 changes: 28 additions & 6 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,22 +271,44 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
bool WithHierarchicalNamespace() const final { return true; }
};

TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
AzureOptions options;
ASSERT_RAISES(Invalid, options.ConfigureAccountKeyCredential("account_key"));

ARROW_EXPECT_OK(
options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
AzureOptions options;
ARROW_EXPECT_OK(options.ConfigureClientSecretCredential(
"dummy-account-name", "tenant_id", "client_id", "client_secret"));
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(
options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));

ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
AzureOptions options;
ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

Expand Down Expand Up @@ -383,6 +405,7 @@ class TestAzureFileSystem : public ::testing::Test {

static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
AzureOptions options;
options.account_name = env->account_name();
switch (env->backend()) {
case AzureBackend::kAzurite:
options.blob_storage_authority = "127.0.0.1:10000";
Expand All @@ -394,8 +417,7 @@ class TestAzureFileSystem : public ::testing::Test {
// Use the default values
break;
}
ARROW_EXPECT_OK(
options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential(env->account_key()));
return options;
}

Expand Down