Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "arrow/filesystem/azurefs.h"
#include "arrow/filesystem/azurefs_internal.h"

#include <azure/identity.hpp>
#include <azure/storage/blobs.hpp>
#include <azure/storage/files/datalake.hpp>

Expand Down Expand Up @@ -61,6 +62,8 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
switch (credential_kind_) {
case CredentialKind::kAnonymous:
return true;
case CredentialKind::kTokenCredential:
return token_credential_ == other.token_credential_;
case CredentialKind::kStorageSharedKeyCredential:
return storage_shared_key_credential_->AccountName ==
other.storage_shared_key_credential_->AccountName;
Expand All @@ -69,15 +72,26 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
return false;
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key) {
void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
if (this->backend == AzureBackend::kAzurite) {
account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
}
}

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
Expand All @@ -89,6 +103,9 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
storage_shared_key_credential_);
Expand All @@ -101,6 +118,9 @@ AzureOptions::MakeDataLakeServiceClient() const {
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
account_dfs_url_, storage_shared_key_credential_);
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,23 @@ struct ARROW_EXPORT AzureOptions {

enum class CredentialKind {
kAnonymous,
kTokenCredential,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;

std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;

std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;

void SetUrlsForAccountName(const std::string& account_name);

public:
AzureOptions();
~AzureOptions();

Status ConfigureDefaultCredential(const std::string& account_name);

Status ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key);

Expand Down
18 changes: 6 additions & 12 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@
#include <gmock/gmock-matchers.h>
#include <gmock/gmock-more-matchers.h>
#include <gtest/gtest.h>
#include <azure/identity/client_secret_credential.hpp>
#include <azure/identity/default_azure_credential.hpp>
#include <azure/identity/managed_identity_credential.hpp>
#include <azure/storage/blobs.hpp>
#include <azure/storage/common/storage_credential.hpp>
#include <azure/storage/files/datalake.hpp>
Expand Down Expand Up @@ -266,15 +263,12 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
bool WithHierarchicalNamespace() const final { return true; }
};

// Placeholder tests
// TODO: GH-18014 Remove once a proper test is added
TEST(AzureFileSystem, InitializeCredentials) {
auto default_credential = std::make_shared<Azure::Identity::DefaultAzureCredential>();
auto managed_identity_credential =
std::make_shared<Azure::Identity::ManagedIdentityCredential>();
auto service_principal_credential =
std::make_shared<Azure::Identity::ClientSecretCredential>("tenant_id", "client_id",
"client_secret");
TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
options.backend = AzureBackend::kAzurite; // Irrelevant for this test because it
// doesn't connect to the server.
ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that we can use Azurite with the DefaultAzureCredential: https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio%2Cblob-storage#azure-sdks

Can we do an operation (CreateDir()?) and check the result to verify whether this filesystem is valid or not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a bit of a try but I couldn't get the SSL setup working. I'll give it another go but I don't have great hope for making it work in CI even if I get it working locally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I'll also try it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried and understand why you said SSL.
Azure::Identity::DefaultAzureCredential uses a Bearer token and Azure SDK for C++ rejects it with http: Bearer token authentication is not permitted for non TLS protected (https) endpoints.
If we want to use DefaultAzureCredential with Azurite, we need to generate a key and certificate pair and use it.

I looked at how to set it to Azure SDK for C++. It seems that we need to BlobClientOptions::Transport::Transport:

If we set BlobClientOptions::Transport::Transport, we need to specify curl based HTTP transport implementation or WinHTTP based HTTP transport implementation. They have different configurations for TLS...

How about using TestAzureHierarchicalNSFileSystem to test DefaultAzureCredential? If we use the real Azure service, we don't need to custom TLS configuration.

Copy link
Contributor Author

@Tom-Newton Tom-Newton Dec 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would definitely be possible to test against real blob storage but there will be a significant amount of manual configuration for all the identities to test all the different authentications.

Then we need to provide details of these identities to TestAzureHierarchicalNSFileSystem either they are required always or we need to add new versions for each auth e.g. TestAzureHierarchicalNSFileSystemWithServicePrincipal.

Also there are some Auth methods that are not feasible to test. For example managed identity can only work on Azure VMs and workload identity can only work in kubernetes.

Personally I don't think this is worthwhile to make a more comprehensive test because of how little complexity there is outside the Azure SDK.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this test is enough, because as @pitrou said the other day: "we are not re-implementing the Azure SDK".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also there are some Auth methods that are not feasible to test. For example managed identity can only work on Azure VMs and workload identity can only work in kubernetes.

Oh...

OK. I see.

}

TEST(AzureFileSystem, OptionsCompare) {
Expand Down