Skip to content

Commit ad34794

Browse files
committed
feature(azure): Add Azure Key Management Service (KMS) integration
- Add Azure KMS configuration support to provisioner and VM provider - Pre-create Azure KMS keys per region and reuse SCT configuration option 'enterprise_disable_kms' to enable EaR with managed identity. - Add SCT configuration option 'enable_kms_key_rotation' for key rotation. Signed-off-by: Lakshmipathi.Ganapathi <[email protected]>
1 parent 52ab6fb commit ad34794

File tree

12 files changed

+301
-5
lines changed

12 files changed

+301
-5
lines changed

docs/configuration_options.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2834,13 +2834,22 @@ options will be used for enable encryption at-rest for tables
28342834

28352835
## **kms_key_rotation_interval** / SCT_KMS_KEY_ROTATION_INTERVAL
28362836

2837-
The time interval in minutes which gets waited before the KMS key rotation happens. Applied when the AWS KMS service is configured to be used.
2837+
The time interval in minutes which gets waited before the KMS key rotation happens. Applied when AWS KMS or Azure KMS service is configured to be used. NOTE: Be aware that Azure Key rotations cost $1/rotation.
28382838

28392839
**default:** N/A
28402840

28412841
**type:** int
28422842

28432843

2844+
## **enable_kms_key_rotation** / SCT_ENABLE_KMS_KEY_ROTATION
2845+
2846+
Allows to disable KMS keys rotation. Applicable only to Azure backend. In case of AWS backend its KMS keys will always be rotated as of now.
2847+
2848+
**default:** N/A
2849+
2850+
**type:** boolean
2851+
2852+
28442853
## **enterprise_disable_kms** / SCT_ENTERPRISE_DISABLE_KMS
28452854

28462855
An escape hatch to disable KMS for enterprise run, when needed, we enable kms by default since if we use scylla 2023.1.3 and up

sdcm/cluster.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@
8787
from sdcm.utils import properties
8888
from sdcm.utils.adaptive_timeouts import Operations, adaptive_timeout
8989
from sdcm.utils.aws_kms import AwsKms
90+
from sdcm.utils.azure_utils import AzureService
91+
from sdcm.provision.azure.kms_provider import AzureKmsProvider
92+
from azure.core.exceptions import ResourceNotFoundError as AzureResourceNotFoundError
9093
from sdcm.utils.cql_utils import cql_quote_if_needed
9194
from sdcm.utils.benchmarks import ScyllaClusterBenchmarkManager
9295
from sdcm.utils.common import (
@@ -4790,6 +4793,31 @@ def _rotate_kms_key(kms_key_alias_name, kms_key_rotation_interval, db_cluster):
47904793
kms_key_rotation_thread.start()
47914794
return None
47924795

4796+
def start_azure_kms_key_rotation_thread(self) -> None:
4797+
if self.params.get("cluster_backend") != 'azure':
4798+
return None
4799+
if not self.params.get("enable_kms_key_rotation"):
4800+
return None
4801+
4802+
test_id = str(self.test_config.test_id())
4803+
region = self.params.get('azure_region_name')[0]
4804+
4805+
def _rotate():
4806+
azure_service = AzureService()
4807+
4808+
while True:
4809+
time.sleep(self.params.get("kms_key_rotation_interval") * 60)
4810+
try:
4811+
key_uri = AzureKmsProvider.get_key_uri_for_test(region, test_id)
4812+
rotated_key_id = azure_service.rotate_vault_key(key_uri)
4813+
self.log.info(f"Azure KMS key rotated for test {test_id}: {rotated_key_id}")
4814+
except AzureResourceNotFoundError as e:
4815+
self.log.error(f"Azure KMS key not found for rotation: {e}")
4816+
4817+
threading.Thread(target=_rotate, daemon=True, name='AzureKmsRotationThread').start()
4818+
self.log.info("Started Azure KMS rotation thread for test: %s", test_id)
4819+
return None
4820+
47934821
def scylla_configure_non_root_installation(self, node, devname):
47944822
node.stop_scylla_server(verify_down=False)
47954823
node.remoter.run(f'{node.offline_install_dir}/sbin/scylla_setup --nic {devname} --no-raid-setup',

sdcm/keystore.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ def get_docker_hub_credentials(self):
114114
def get_azure_credentials(self):
115115
return self.get_json("azure.json")
116116

117+
def get_azure_kms_config(self):
118+
return self.get_json("azure_kms_config.json")
119+
117120
def get_argusdb_credentials(self):
118121
return self.get_json("argusdb_config_v2.json")
119122

sdcm/provision/azure/kms_provider.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
2+
# This program is free software; you can redistribute it and/or modify
3+
# it under the terms of the GNU Affero General Public License as published by
4+
# the Free Software Foundation; either version 3 of the License, or
5+
# (at your option) any later version.
6+
#
7+
# This program is distributed in the hope that it will be useful,
8+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10+
#
11+
# See LICENSE for more details.
12+
#
13+
# Copyright (c) 2025 ScyllaDB
14+
import logging
15+
from dataclasses import dataclass
16+
17+
from azure.core.exceptions import AzureError
18+
from sdcm.utils.azure_utils import AzureService
19+
from sdcm.keystore import KeyStore
20+
21+
LOGGER = logging.getLogger(__name__)
22+
23+
24+
@dataclass
25+
class AzureKmsProvider:
26+
_resource_group_name: str
27+
_region: str
28+
_az: str
29+
_azure_service: AzureService = AzureService()
30+
31+
def __post_init__(self):
32+
self._kms_config = KeyStore().get_azure_kms_config()
33+
34+
@property
35+
def managed_identity_config(self):
36+
return {
37+
'resource_group': self._kms_config['resource_group'],
38+
'identity_name': self._kms_config['identity_name'],
39+
'principal_id': self._kms_config['managed_identity_principal_id']
40+
}
41+
42+
@property
43+
def sct_service_principal_id(self):
44+
return self._kms_config['sct_service_principal_id']
45+
46+
@classmethod
47+
def _get_vault_name(cls, region: str) -> str:
48+
"""Generate vault name for the given region"""
49+
kms_config = KeyStore().get_azure_kms_config()
50+
return f"{kms_config['shared_vault_name']}-{region}"
51+
52+
def _get_managed_identity_id(self) -> str:
53+
return (
54+
f"/subscriptions/{self._azure_service.subscription_id}"
55+
f"/resourcegroups/{self.managed_identity_config['resource_group']}"
56+
"/providers/Microsoft.ManagedIdentity"
57+
f"/userAssignedIdentities/{self.managed_identity_config['identity_name']}"
58+
)
59+
60+
@classmethod
61+
def get_key_uri_for_test(cls, region: str, test_id: str) -> str:
62+
vault_name = cls._get_vault_name(region)
63+
vault_uri = f"https://{vault_name}.vault.azure.net/"
64+
kms_config = KeyStore().get_azure_kms_config()
65+
num_of_keys = kms_config['num_of_keys']
66+
key_number = (hash(test_id) % num_of_keys) + 1
67+
return f"{vault_uri}scylla-key-{key_number}"
68+
69+
def get_or_create_keyvault_and_identity(self, test_id: str):
70+
"""Use fixed vault with keys"""
71+
vault_name = self._get_vault_name(self._region)
72+
try:
73+
vault = self._azure_service.keyvault.vaults.begin_create_or_update(
74+
resource_group_name=self._kms_config['resource_group'], vault_name=vault_name,
75+
parameters={
76+
"location": self._region,
77+
"properties": {
78+
"tenant_id": self._azure_service.azure_credentials["tenant_id"],
79+
"sku": {"name": "standard", "family": "A"},
80+
"enabled_for_disk_encryption": True,
81+
"enable_rbac_authorization": False,
82+
"access_policies": [{
83+
"tenant_id": self._azure_service.azure_credentials["tenant_id"],
84+
"object_id": self.managed_identity_config['principal_id'],
85+
"permissions": {
86+
"keys": ["get", "encrypt", "decrypt", "wrapKey", "unwrapKey"],
87+
"secrets": ["get"],
88+
"certificates": ["get"]
89+
}
90+
}, {
91+
# SCT service principal
92+
"tenant_id": self._azure_service.azure_credentials["tenant_id"],
93+
"object_id": self.sct_service_principal_id,
94+
"permissions": {
95+
"keys": ["create", "get", "list", "update", "import", "delete", "rotate"],
96+
"secrets": ["get"],
97+
"certificates": ["get"]
98+
}
99+
}],
100+
}
101+
}
102+
).result()
103+
104+
vault_uri = vault.properties.vault_uri
105+
106+
# Pick one key, if required create keys.
107+
num_of_keys = self._kms_config['num_of_keys']
108+
for i in range(1, num_of_keys + 1):
109+
key_name = f"scylla-key-{i}"
110+
if not self._azure_service.get_vault_key(vault_uri, key_name):
111+
self._azure_service.create_vault_key(vault_uri, key_name)
112+
LOGGER.info(f"Created key: {key_name}")
113+
114+
key_number = (hash(test_id) % num_of_keys) + 1
115+
key_uri = f"{vault_uri}scylla-key-{key_number}"
116+
vault_info = {
117+
'identity_id': self._get_managed_identity_id(),
118+
'vault_uri': vault_uri,
119+
'key_uri': key_uri
120+
}
121+
return vault_info
122+
except AzureError as e:
123+
LOGGER.warning(f"Failed to setup Azure KMS: {e}")
124+
return None

sdcm/provision/azure/provisioner.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,11 @@ class AzureProvisioner(Provisioner):
3838
"""Provides api for VM provisioning in Azure cloud, tuned for Scylla QA. """
3939

4040
def __init__(self, test_id: str, region: str, availability_zone: str,
41-
azure_service: AzureService = AzureService(), **_):
41+
azure_service: AzureService = AzureService(), **config):
4242
availability_zone = self._convert_az_to_zone(availability_zone)
4343
super().__init__(test_id, region, availability_zone)
44+
# NOTE: disable Azure KMS if not configured explicitly
45+
self._enable_azure_kms = not config.get('enterprise_disable_kms') in (True, None)
4446
self._azure_service: AzureService = azure_service
4547
self._cache: Dict[str, VmInstance] = {}
4648
LOGGER.debug("getting resources for %s...", self._resource_group_name)
@@ -54,7 +56,7 @@ def __init__(self, test_id: str, region: str, availability_zone: str,
5456
self._ip_provider = IpAddressProvider(self._resource_group_name, self._region, self._az, self._azure_service)
5557
self._nic_provider = NetworkInterfaceProvider(self._resource_group_name, self._region, self._azure_service)
5658
self._vm_provider = VirtualMachineProvider(
57-
self._resource_group_name, self._region, self._az, self._azure_service)
59+
self._resource_group_name, self._region, self._az, self._enable_azure_kms, self._azure_service)
5860
for v_m in self._vm_provider.list():
5961
try:
6062
self._cache[v_m.name] = self._vm_to_instance(v_m)
@@ -145,7 +147,7 @@ def get_or_create_instances(self,
145147
self._resource_group_name, self._region, self._az, self._azure_service)
146148
self._nic_provider = NetworkInterfaceProvider(self._resource_group_name, self._region, self._azure_service)
147149
self._vm_provider = VirtualMachineProvider(
148-
self._resource_group_name, self._region, self._az, self._azure_service)
150+
self._resource_group_name, self._region, self._az, self._enable_azure_kms, self._azure_service)
149151
raise
150152
for definition, v_m in zip(definitions, v_ms):
151153
instance = self._vm_to_instance(v_m)

sdcm/provision/azure/virtual_machine_provider.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,22 @@
2424
from azure.mgmt.compute.models import VirtualMachine, RunCommandInput
2525
from invoke import Result
2626

27+
from sdcm.provision.azure.kms_provider import AzureKmsProvider
2728
from sdcm.provision.provisioner import InstanceDefinition, PricingModel, ProvisionError, OperationPreemptedError
2829
from sdcm.provision.user_data import UserDataBuilder
2930
from sdcm.utils.azure_utils import AzureService
3031

3132
LOGGER = logging.getLogger(__name__)
3233

34+
SCT_RESOURCE_GROUP_PREFIX = "SCT-"
35+
3336

3437
@dataclass
3538
class VirtualMachineProvider:
3639
_resource_group_name: str
3740
_region: str
3841
_az: str
42+
_enable_azure_kms: bool = False
3943
_azure_service: AzureService = AzureService()
4044
_cache: Dict[str, VirtualMachine] = field(default_factory=dict)
4145

@@ -80,6 +84,19 @@ def get_or_create(self, definitions: List[InstanceDefinition], nics_ids: List[st
8084
}],
8185
},
8286
}
87+
88+
if self._enable_azure_kms:
89+
self._kms_provider = AzureKmsProvider(
90+
self._resource_group_name, self._region, self._az, self._azure_service)
91+
# Extract test_id from resource group name
92+
test_id = self._resource_group_name.split(SCT_RESOURCE_GROUP_PREFIX)[-1][:36]
93+
vault_info = self._kms_provider.get_or_create_keyvault_and_identity(test_id)
94+
params["identity"] = {"type": "UserAssigned",
95+
"user_assigned_identities": {vault_info['identity_id']: {}}}
96+
LOGGER.info(f"Azure Key Vault enabled for {definition.name}")
97+
else:
98+
LOGGER.info(f"Azure Key Vault disabled for {definition.name}")
99+
83100
if definition.user_data is None:
84101
# in case we use specialized image, we don't change things like computer_name, usernames, ssh_keys
85102
os_profile = {}

sdcm/sct_config.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1440,7 +1440,14 @@ class SCTConfiguration(dict):
14401440

14411441
dict(name="kms_key_rotation_interval", env="SCT_KMS_KEY_ROTATION_INTERVAL", type=int,
14421442
help="The time interval in minutes which gets waited before the KMS key rotation happens."
1443-
" Applied when the AWS KMS service is configured to be used."),
1443+
" Applied when AWS KMS or Azure KMS service is configured to be used."
1444+
" NOTE: Be aware that Azure Key rotations cost $1/rotation."),
1445+
1446+
# TODO: AWS KMS needs to support the enable_kms_key_rotation config option
1447+
1448+
dict(name="enable_kms_key_rotation", env="SCT_ENABLE_KMS_KEY_ROTATION", type=boolean,
1449+
help="Allows to disable KMS keys rotation. Applicable only to Azure backend. "
1450+
"In case of AWS backend its KMS keys will always be rotated as of now."),
14441451

14451452
dict(name="enterprise_disable_kms", env="SCT_ENTERPRISE_DISABLE_KMS", type=boolean,
14461453
help="An escape hatch to disable KMS for enterprise run, when needed, "

sdcm/tester.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from argus.common.enums import TestStatus
5050
from sdcm import nemesis, cluster_docker, cluster_k8s, cluster_baremetal, db_stats, wait
5151
from sdcm.cloud_api_client import ScyllaCloudAPIClient
52+
from sdcm.provision.azure.kms_provider import AzureKmsProvider
5253
from sdcm.cluster import BaseCluster, NoMonitorSet, SCYLLA_DIR, TestConfig, UserRemoteCredentials, BaseLoaderSet, BaseMonitorSet, \
5354
BaseScyllaCluster, BaseNode, MINUTE_IN_SEC
5455
from sdcm.cluster_azure import ScyllaAzureCluster, LoaderSetAzure, MonitorSetAzure
@@ -885,6 +886,56 @@ def prepare_kms_host(self) -> None:
885886
self.params["append_scylla_yaml"] = append_scylla_yaml
886887
return None
887888

889+
def prepare_azure_kms(self) -> None:
890+
scylla_version = self.params.scylla_version
891+
if not scylla_version:
892+
return None
893+
version_supports_kms = ComparableScyllaVersion(scylla_version) >= '2025.4.0~dev'
894+
backend_support_kms = self.params.get('cluster_backend') in ('azure',)
895+
kms_configured_in_sct = self.params.get('scylla_encryption_options')
896+
test_uses_oracle = self.params.get("db_type") == "mixed_scylla"
897+
should_enable_kms = (version_supports_kms and
898+
backend_support_kms and
899+
not kms_configured_in_sct and
900+
not test_uses_oracle and
901+
not self.params.get('enterprise_disable_kms'))
902+
903+
if should_enable_kms:
904+
self.params['scylla_encryption_options'] = "{ 'cipher_algorithm' : 'AES/ECB/PKCS5Padding', 'secret_key_strength' : 128, 'key_provider': 'AzureKeyProviderFactory', 'azure_host': 'scylla-azure-kms'}"
905+
if not (scylla_encryption_options := self.params.get("scylla_encryption_options") or ''):
906+
return None
907+
azure_host = (yaml.safe_load(scylla_encryption_options) or {}).get("azure_host") or ''
908+
if not azure_host:
909+
return None
910+
911+
test_id = str(self.test_config.test_id())
912+
append_scylla_yaml = self.params.get("append_scylla_yaml") or {}
913+
if "azure_hosts" not in append_scylla_yaml:
914+
append_scylla_yaml["azure_hosts"] = {}
915+
916+
regions = self.params.get('azure_region_name')
917+
if len(regions) > 1:
918+
raise NotImplementedError("Azure KMS multi-dc support is not yet implemented")
919+
key_uri = AzureKmsProvider.get_key_uri_for_test(regions[0], test_id)
920+
921+
append_scylla_yaml["azure_hosts"][azure_host] = {
922+
'master_key': key_uri
923+
}
924+
925+
append_scylla_yaml['user_info_encryption'] = {
926+
'enabled': True,
927+
'key_provider': 'AzureKeyProviderFactory',
928+
'azure_host': azure_host,
929+
}
930+
append_scylla_yaml['system_info_encryption'] = {
931+
'enabled': True,
932+
'key_provider': 'AzureKeyProviderFactory',
933+
'azure_host': azure_host,
934+
}
935+
936+
self.params["append_scylla_yaml"] = append_scylla_yaml
937+
return None
938+
888939
def kafka_configure(self):
889940
if self.kafka_cluster:
890941
for connector_config in self.params.get('kafka_connectors'):
@@ -1014,6 +1065,7 @@ def setUp(self): # noqa: PLR0912, PLR0915
10141065
if self.is_encrypt_keys_needed:
10151066
self.download_encrypt_keys()
10161067
self.prepare_kms_host()
1068+
self.prepare_azure_kms()
10171069

10181070
self.nemesis_allocator = NemesisNodeAllocator(self)
10191071

@@ -1108,6 +1160,7 @@ def _db_post_validation():
11081160
for db_cluster in self.db_clusters_multitenant:
11091161
if db_cluster:
11101162
db_cluster.start_kms_key_rotation_thread()
1163+
db_cluster.start_azure_kms_key_rotation_thread()
11111164

11121165
for future in as_completed(futures):
11131166
future.result()

0 commit comments

Comments
 (0)