Skip to content

[azidentity] Implement support for custom token endpoint mode in WorkloadIdentityCredential with CA data support #25057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
94a696b
Initial plan
Copilot Aug 6, 2025
c857416
Implement workload identity binding mode support
Copilot Aug 6, 2025
153a718
Address code review feedback: improve path checking, parse URL during…
Copilot Aug 6, 2025
b693895
Validate CA file and certificate during credential construction, fix …
Copilot Aug 6, 2025
aa168cc
Address code review feedback: remove host checks, early return, extra…
Copilot Aug 6, 2025
6ad38c9
refactor: push down the CA loading logic to ib transport object
bcho Aug 11, 2025
7153143
Add comprehensive unit tests for WorkloadIdentityCredential identity …
Copilot Aug 11, 2025
f7ac5a7
Address code review feedback: check errors, use 0600 permissions, add…
Copilot Aug 11, 2025
0642c65
fix: fallback to http.DefaultClient
bcho Aug 11, 2025
0ec7258
fix: replace path
bcho Aug 11, 2025
67144d7
Address review comments: rename to customTokenEndpoint, simplify logi…
Copilot Aug 13, 2025
2997901
Simplify custom token endpoint transport implementation
Copilot Aug 13, 2025
bc8788d
Error out when CA file is empty in custom token endpoint transport
Copilot Aug 13, 2025
9e50159
Address code review feedback: add CA data support, improve validation…
Copilot Aug 13, 2025
d4c9718
refactor: extract CA loading logic to separate method
Copilot Aug 13, 2025
bdac69f
Simplify CA data validation by consolidating empty checks
Copilot Aug 13, 2025
69166e1
refactor: simplify custom token endpoint transport setup flow
bcho Aug 14, 2025
e788d20
Add comprehensive unit tests for custom token endpoint functionality
Copilot Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sdk/azidentity/azidentity.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ const (
azureClientID = "AZURE_CLIENT_ID"
azureClientSecret = "AZURE_CLIENT_SECRET"
azureFederatedTokenFile = "AZURE_FEDERATED_TOKEN_FILE"
azureKubernetesCAFile = "AZURE_KUBERNETES_CA_FILE"
azureKubernetesSNIName = "AZURE_KUBERNETES_SNI_NAME"
azureKubernetesTokenEndpoint = "AZURE_KUBERNETES_TOKEN_ENDPOINT"
azurePassword = "AZURE_PASSWORD"
azureRegionalAuthorityName = "AZURE_REGIONAL_AUTHORITY_NAME"
azureTenantID = "AZURE_TENANT_ID"
Expand Down
221 changes: 219 additions & 2 deletions sdk/azidentity/workload_identity.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@
package azidentity

import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -87,14 +94,22 @@ func NewWorkloadIdentityCredential(options *WorkloadIdentityCredentialOptions) (
return nil, errors.New("no tenant ID specified. Check pod configuration or set TenantID in the options")
}
}

w := WorkloadIdentityCredential{file: file, mtx: &sync.RWMutex{}}
caco := ClientAssertionCredentialOptions{
caco := &ClientAssertionCredentialOptions{
AdditionallyAllowedTenants: options.AdditionallyAllowedTenants,
Cache: options.Cache,
ClientOptions: options.ClientOptions,
DisableInstanceDiscovery: options.DisableInstanceDiscovery,
}
cred, err := NewClientAssertionCredential(tenantID, clientID, w.getAssertion, &caco)

// configure identity binding if environment variables are present.
// In identity binding enabled mode, a dedicated transport will be used for proxying token requests to a dedicated endpoint.
if err := w.configureIdentityBinding(caco); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The SDK should not mention any AKS specific things. Identity bindings is an AKS specific API.

See #24442 (review) for my last review in this repo.

return nil, err
}

cred, err := NewClientAssertionCredential(tenantID, clientID, w.getAssertion, caco)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -139,3 +154,205 @@ func (w *WorkloadIdentityCredential) getAssertion(context.Context) (string, erro
}
return w.assertion, nil
}

// configureIdentityBinding configures identity binding mode if the required environment variables are present
func (w *WorkloadIdentityCredential) configureIdentityBinding(caco *ClientAssertionCredentialOptions) error {
// check for identity binding mode environment variables
kubernetesTokenEndpointStr := os.Getenv(azureKubernetesTokenEndpoint)
kubernetesSNIName := os.Getenv(azureKubernetesSNIName)
kubernetesCAFile := os.Getenv(azureKubernetesCAFile)

if kubernetesTokenEndpointStr == "" && kubernetesSNIName == "" && kubernetesCAFile == "" {
// identity binding is not set
return nil
}

// All three variables must be present for identity binding mode
if kubernetesTokenEndpointStr == "" || kubernetesSNIName == "" || kubernetesCAFile == "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only kubernetesTokenEndpointStr is mandatory.

return errors.New("identity binding mode requires all three environment variables: AZURE_KUBERNETES_TOKEN_ENDPOINT, AZURE_KUBERNETES_SNI_NAME, and AZURE_KUBERNETES_CA_FILE")
}

transporter, err := newIdentityBindingTransport(
kubernetesCAFile, kubernetesSNIName, kubernetesTokenEndpointStr,
caco.Transport,
)
if err != nil {
return err
}
caco.Transport = transporter
return nil
}

const (
tokenEndpointSuffix = "/oauth2/v2.0/token"
caReloadInterval = 10 * time.Minute
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should only be hitting the token endpoint when we need a new token, so why do we need caching at this layer at all?

)

// identityBindingTransport is a custom HTTP transport that redirects token requests
// to the Kubernetes token endpoint when in identity binding mode
type identityBindingTransport struct {
caFile string
sniName string
tokenEndpoint *url.URL
fallbackTransporter policy.Transporter

mtx *sync.RWMutex

nextRead time.Time
currentCA []byte
transport *http.Transport
}

func newIdentityBindingTransport(
caFile, sniName, tokenEndpointStr string,
fallbackTransporter policy.Transporter,
) (*identityBindingTransport, error) {
tokenEndpoint, err := url.Parse(tokenEndpointStr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Require this to use https.

if err != nil {
return nil, fmt.Errorf("failed to parse token endpoint URL %q: %w", tokenEndpointStr, err)
}

if fallbackTransporter == nil {
// FIXME: can we callback to the defaultHTTPClient from azcore/runtime?
fallbackTransporter = http.DefaultClient
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this looks wrong to me.

}

initialTransport := func() *http.Transport {
// try reusing the user provided transport if available
if httpClient, ok := fallbackTransporter.(*http.Client); ok {
if transport, ok := httpClient.Transport.(*http.Transport); ok {
return transport.Clone()
}
}

// if the user did not provide a policy.Transporter or it's not a *http.Client,
// we fall back to the default one.
// FIXME: can we callback to the defaultHTTPClient from azcore/runtime?
if transport, ok := http.DefaultTransport.(*http.Transport); ok {
return transport.Clone()
}

// this should not happen, but if the user mutates the net/http.DefaultTransport
// to something else, we fall back to a sane default
return &http.Transport{
Comment on lines +207 to +209
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Require the fallbackTransporter to implement unwrapping instead of falling back to a hard coded default.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a good way to declare such thing because *http.Client doesn't come with a method for extracting the underlying *http.Transport. Giving this route should be used in side the Kubernetes cluster, which should not involve with complex network environment setup, I simplified the logic as follows:

  1. try clone from the global default http transport
  2. if the global http transport is not a *http.Transport type, fallback to a minimal default transport with timeout settings.

WDYT?

ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
}
}()

tr := &identityBindingTransport{
caFile: caFile,
sniName: sniName,
tokenEndpoint: tokenEndpoint,
fallbackTransporter: fallbackTransporter,
mtx: &sync.RWMutex{},
transport: initialTransport,
}

// perform an initial load to surface any issues with the CA file and transport settings.
// Lock is not held here as this is called in the constructor
if err := tr.reloadCA(); err != nil {
return nil, err
}

return tr, nil
}

func (i *identityBindingTransport) Do(req *http.Request) (*http.Response, error) {
if !strings.HasSuffix(req.URL.Path, tokenEndpointSuffix) {
// not a token request, fallback to the original transporter
return i.fallbackTransporter.Do(req)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the client does discovery and figures out the actual token endpoint at runtime, so to me it would be safer to check for that exact value instead of hard coding something like this. Entra is free to change its token endpoint at any time.

}

tr, err := i.getTokenTransporter()
if err != nil {
return nil, err
}

newReq := req.Clone(req.Context())
newReq.URL.Scheme = i.tokenEndpoint.Scheme // this will always be https
newReq.URL.Host = i.tokenEndpoint.Host
newReq.URL.Path = ""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the path from the token endpoint as well.

newReq.Host = i.tokenEndpoint.Host

return tr.RoundTrip(newReq)
}

func (i *identityBindingTransport) getTokenTransporter() (*http.Transport, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect this to just make a new transport every time it is called with the assumption that that will only happen when the AD access token expires.

i.mtx.RLock()
if i.nextRead.Before(time.Now()) {
i.mtx.RUnlock()
i.mtx.Lock()
defer i.mtx.Unlock()
// double check on the read time
if now := time.Now(); i.nextRead.Before(now) {
if err := i.reloadCA(); err != nil {
// we return error if any attempt of reloading CA fails
// This should surface in the token calls and we expect the caller to
// have proper error handling / rate limit so we don't fall into deadloop here
// due to scenario like broken CA file.
return nil, err
}
}
} else {
defer i.mtx.RUnlock()
}
return i.transport, nil
}

func (i *identityBindingTransport) createTransportWithCAPool(
fromTransport *http.Transport,
caPool *x509.CertPool,
) *http.Transport {
transport := fromTransport.Clone()
if transport.TLSClientConfig == nil {
transport.TLSClientConfig = &tls.Config{}
}
transport.TLSClientConfig.ServerName = i.sniName
transport.TLSClientConfig.RootCAs = caPool
return transport
}

// reloadCA attempts to read the latest CA from the CA file and updates the transport if the content has changed.
// If a new CA is discovered, the existing transport will be replaced with a new one that uses the new CA.
// It expects the caller to hold the write lock on i.mtx to ensure thread safety.
func (i *identityBindingTransport) reloadCA() error {
newCA, err := os.ReadFile(i.caFile)
if err != nil {
return fmt.Errorf("read CA file %q: %w", i.caFile, err)
}

if len(newCA) == 0 {
// the CA file might be in the middle of rotation without the content written.
// We return nil and rely on next check.
return nil
}

if bytes.Equal(i.currentCA, newCA) {
// no change in CA content, no need to replace
i.nextRead = time.Now().Add(caReloadInterval)
return nil
}

newCAPool := x509.NewCertPool()
if !newCAPool.AppendCertsFromPEM(newCA) {
return fmt.Errorf("parse CA file %q: no valid certificates found", i.caFile)
}

newTransport := i.createTransportWithCAPool(i.transport, newCAPool)
oldTransport := i.transport

i.transport = newTransport
i.currentCA = newCA
i.nextRead = time.Now().Add(caReloadInterval)

if oldTransport != nil {
// drop any idle connections from previous transport so new requests can be
// moved to the new transport
oldTransport.CloseIdleConnections()
}

return nil
}
Loading