Skip to content

Commit 30b3069

Browse files
authored
chore: allow automatically replacing a node even if it is active as an API BN (dfinity#3707)
When a node is redeployed, but hasn't been removed from the registry before that, it is automatically replaced: the old node record is removed and the new one created. Initially, this was only possible for unassigned nodes. Then, functionality was added such that this also worked for nodes assigned to a subnet. This PR enables the same behavior for API boundary nodes. When a node that is currently active as API boundary node is redeployed, the old node record and the old API boundary node record are removed, and a new node record and a new API boundary node record are created. We can't just use any node as API boundary node. The node needs to be configured with a domain name. This case will fail as we have an invariant in `invariants/api_boundary_node.rs` that ensures that each API boundary node record has an associated node record with a domain name. In addition, I removed the test `should_replace_node_in_subnet` in `do_remove_node_directly.rs` as it is a literal copy of `should_replace_node_in_subnet_and_update_allowance` with one difference in the comment of line 458 and 528.
1 parent 65a50e4 commit 30b3069

File tree

4 files changed

+214
-73
lines changed

4 files changed

+214
-73
lines changed

rs/nns/governance/unreleased_changelog.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on the process that this file is part of, see
1313

1414
Two new fields are added to the request, and one to the response.
1515

16-
The request now supports `page_size` and `page_number`. If `page_size` is greater than
16+
The request now supports `page_size` and `page_number`. If `page_size` is greater than
1717
`MAX_LIST_NEURONS_RESULTS` (currently 500), the API will treat it as `MAX_LIST_NEURONS_RESULTS`, and
1818
continue procesisng the request. If `page_number` is None, the API will treat it as Some(0)
1919

@@ -22,7 +22,7 @@ additional requests need to be made.
2222

2323
This will only affect neuron holders with more than 500 neurons, which is a small minority.
2424

25-
This allows neuron holders with many neurons to list all of their neurons, whereas before,
25+
This allows neuron holders with many neurons to list all of their neurons, whereas before,
2626
responses could be too large to be sent by the protocol.
2727

2828
### Migrating Active Neurons to Stable Memory

rs/registry/canister/src/mutations/node_management/do_add_node.rs

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ impl Registry {
5555
println!("{}do_add_node: The node id is {:?}", LOG_PREFIX, node_id);
5656

5757
// 2. Clear out any nodes that already exist at this IP.
58-
// This will only succeed if:
59-
// - the same NO was in control of the original nodes.
60-
// - the nodes are no longer in subnets.
58+
// This will only succeed if the same NO was in control of the original nodes.
6159
//
6260
// (We use the http endpoint to be in line with what is used by the
6361
// release dashboard.)
@@ -118,7 +116,7 @@ impl Registry {
118116
.transpose()?
119117
.map(|node_reward_type| node_reward_type as i32);
120118

121-
// 5. Validate the domain is valid
119+
// 5. Validate the domain
122120
let domain: Option<String> = payload
123121
.domain
124122
.as_ref()
@@ -132,7 +130,7 @@ impl Registry {
132130
})
133131
.transpose()?;
134132

135-
// 6. If there is an IPv4 config, make sure that the IPv4 is not used by anyone else
133+
// 6. If there is an IPv4 config, make sure that the same IPv4 address is not used by any other node
136134
let ipv4_intf_config = payload.public_ipv4_config.clone().map(|ipv4_config| {
137135
ipv4_config.panic_on_invalid();
138136
IPv4InterfaceConfig {
@@ -322,10 +320,15 @@ mod tests {
322320
use ic_base_types::{NodeId, PrincipalId};
323321
use ic_config::crypto::CryptoConfig;
324322
use ic_crypto_node_key_generation::generate_node_keys_once;
325-
use ic_protobuf::registry::node_operator::v1::NodeOperatorRecord;
323+
use ic_protobuf::registry::{
324+
api_boundary_node::v1::ApiBoundaryNodeRecord, node_operator::v1::NodeOperatorRecord,
325+
};
326326
use ic_registry_canister_api::IPv4Config;
327-
use ic_registry_keys::{make_node_operator_record_key, make_node_record_key};
327+
use ic_registry_keys::{
328+
make_api_boundary_node_record_key, make_node_operator_record_key, make_node_record_key,
329+
};
328330
use ic_registry_transport::insert;
331+
use ic_types::ReplicaVersion;
329332
use itertools::Itertools;
330333
use lazy_static::lazy_static;
331334
use prost::Message;
@@ -859,6 +862,59 @@ mod tests {
859862
}
860863
}
861864

865+
#[test]
866+
fn should_add_node_and_replace_existing_api_boundary_node() {
867+
// This test verifies that adding a new node replaces an existing node in a subnet
868+
let mut registry = invariant_compliant_registry(0);
869+
870+
// Add a node to the registry
871+
let (mutate_request, node_ids_and_dkg_pks) = prepare_registry_with_nodes(1, 1);
872+
registry.maybe_apply_mutation_internal(mutate_request.mutations);
873+
let node_ids: Vec<NodeId> = node_ids_and_dkg_pks.keys().cloned().collect();
874+
875+
let old_node_id = node_ids[0];
876+
let old_node = registry.get_node(old_node_id).unwrap();
877+
878+
let node_operator_id = registry_add_node_operator_for_node(&mut registry, old_node_id, 0);
879+
880+
// Turn that node into an API boundary node
881+
let api_bn = ApiBoundaryNodeRecord {
882+
version: ReplicaVersion::default().to_string(),
883+
};
884+
registry.maybe_apply_mutation_internal(vec![insert(
885+
make_api_boundary_node_record_key(old_node_id),
886+
api_bn.encode_to_vec(),
887+
)]);
888+
889+
// Add a new node with the same IP address and port as an existing node, which should replace the existing node
890+
let (mut payload, _valid_pks) = prepare_add_node_payload(2);
891+
let http = old_node.http.unwrap();
892+
payload
893+
.http_endpoint
894+
.clone_from(&format!("[{}]:{}", http.ip_addr, http.port));
895+
let new_node_id = registry
896+
.do_add_node_(payload.clone(), node_operator_id)
897+
.expect("failed to add a node");
898+
899+
// Verify that there is an API boundary node record for the new node
900+
assert!(registry
901+
.get(
902+
make_api_boundary_node_record_key(new_node_id).as_bytes(),
903+
registry.latest_version()
904+
)
905+
.is_some());
906+
907+
// Verify the old node is removed from the registry
908+
assert!(registry.get_node(old_node_id).is_none());
909+
910+
// Verify the new node is present in the registry
911+
assert!(registry.get_node(new_node_id).is_some());
912+
913+
// Verify node operator allowance is unchanged
914+
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
915+
assert_eq!(updated_operator.node_allowance, 0);
916+
}
917+
862918
#[test]
863919
#[should_panic(expected = "Node allowance for this Node Operator is exhausted")]
864920
fn should_panic_if_node_allowance_is_exhausted() {

rs/registry/canister/src/mutations/node_management/do_remove_node_directly.rs

Lines changed: 143 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use dfn_core::println;
1010
use ic_base_types::{NodeId, PrincipalId};
1111
use ic_registry_keys::{make_api_boundary_node_record_key, make_subnet_record_key};
1212
use ic_registry_transport::pb::v1::RegistryMutation;
13-
use ic_registry_transport::upsert;
13+
use ic_registry_transport::{delete, insert, upsert};
1414
use prost::Message;
1515

1616
impl Registry {
@@ -51,8 +51,9 @@ impl Registry {
5151
}
5252

5353
// Prepare mutations for removing or replacing a node in the registry.
54-
// If new_node_id is Some, the old node is in-place replaced with the new node, even if the old node is in a subnet.
55-
// If new_node_id is None, the old node is only removed from the registry and is not allowed to be in a subnet.
54+
// * If new_node_id is Some, the old node is in-place replaced with the new node, even if the old node is
55+
// in active use (i.e., assigned to a subnet or acts as an API boundary node).
56+
// * If new_node_id is None, the old node is only removed from the registry and is not allowed to be in active use.
5657
pub fn make_remove_or_replace_node_mutations(
5758
&mut self,
5859
payload: RemoveNodeDirectlyPayload,
@@ -121,21 +122,32 @@ impl Registry {
121122
);
122123
}
123124

124-
// 3. Ensure the node is not an API Boundary Node.
125-
// In order to succeed, a corresponding ApiBoundaryNodeRecord should be removed first via proposal.
126-
let api_bn_id = self.get_api_boundary_node_record(payload.node_id);
127-
if api_bn_id.is_some() {
128-
panic!(
129-
"{}do_remove_node_directly: Cannot remove a node, as it has ApiBoundaryNodeRecord with record_key: {}",
130-
LOG_PREFIX,
131-
make_api_boundary_node_record_key(payload.node_id)
132-
);
125+
let mut mutations = vec![];
126+
127+
// 3. Check if the node is an API Boundary Node. If there is a replacement node, remove the existing node
128+
// and try to assign the new one to act as API boundary node. This will only work if the new node meets all
129+
// the requirements of an API boundary node (e.g., it is configured with a domain name).
130+
if let Some(api_bn_record) = self.get_api_boundary_node_record(payload.node_id) {
131+
let Some(replacement_node_id) = new_node_id else {
132+
panic!(
133+
"{}do_remove_node_directly: Cannot remove this node, as it is an active API boundary node: {}",
134+
LOG_PREFIX,
135+
make_api_boundary_node_record_key(payload.node_id)
136+
);
137+
};
138+
139+
// remove the existing API boundary node record
140+
let old_key = make_api_boundary_node_record_key(payload.node_id);
141+
mutations.push(delete(old_key));
142+
143+
// create the new API boundary node record by just cloning the old one and inserting it with the new key
144+
let new_key = make_api_boundary_node_record_key(replacement_node_id);
145+
mutations.push(insert(new_key, api_bn_record.clone().encode_to_vec()));
133146
}
134147

135148
// 4. Check if node is in a subnet, and if so, replace it in the subnet by updating the membership in the subnet record.
136149
let subnet_list_record = get_subnet_list_record(self);
137150
let is_node_in_subnet = find_subnet_for_node(self, payload.node_id, &subnet_list_record);
138-
let mut mutations = vec![];
139151
if let Some(subnet_id) = is_node_in_subnet {
140152
if new_node_id.is_some() {
141153
// The node is in a subnet and is being replaced with a new node.
@@ -157,10 +169,10 @@ impl Registry {
157169
&mut subnet_record,
158170
subnet_membership,
159171
);
160-
mutations = vec![upsert(
172+
mutations.push(upsert(
161173
make_subnet_record_key(subnet_id),
162174
subnet_record.encode_to_vec(),
163-
)];
175+
));
164176
} else {
165177
panic!("{}do_remove_node_directly: Cannot remove a node that is a member of a subnet. This node is a member of Subnet: {}",
166178
LOG_PREFIX,
@@ -217,7 +229,7 @@ mod tests {
217229
api_boundary_node::v1::ApiBoundaryNodeRecord, node_operator::v1::NodeOperatorRecord,
218230
};
219231
use ic_registry_keys::{make_node_operator_record_key, make_node_record_key};
220-
use ic_registry_transport::insert;
232+
use ic_registry_transport::{insert, update};
221233
use ic_types::ReplicaVersion;
222234
use prost::Message;
223235
use std::str::FromStr;
@@ -234,8 +246,8 @@ mod tests {
234246
}
235247

236248
#[test]
237-
#[should_panic(expected = "Cannot remove a node, as it has ApiBoundaryNodeRecord")]
238-
fn should_panic_if_node_is_api_boundary_node() {
249+
#[should_panic(expected = "Cannot remove this node, as it is an active API boundary node")]
250+
fn should_panic_if_node_is_api_boundary_node_and_no_replacement() {
239251
let mut registry = invariant_compliant_registry(0);
240252
// Add node to registry
241253
let (mutate_request, node_ids_and_dkg_pks) =
@@ -261,6 +273,119 @@ mod tests {
261273
registry.do_remove_node(payload, node_operator_id);
262274
}
263275

276+
#[test]
277+
fn should_succeed_to_replace_api_boundary_node() {
278+
let mut registry = invariant_compliant_registry(0);
279+
// Add node to registry
280+
let (mutate_request, node_ids_and_dkg_pks) =
281+
prepare_registry_with_nodes(1 /* mutation id */, 2 /* node count */);
282+
registry.maybe_apply_mutation_internal(mutate_request.mutations);
283+
let mut node_ids = node_ids_and_dkg_pks.keys();
284+
let old_node_id = node_ids
285+
.next()
286+
.expect("should contain at least one node ID")
287+
.to_owned();
288+
let new_node_id = node_ids
289+
.next()
290+
.expect("should contain at least two node IDs")
291+
.to_owned();
292+
293+
let node_operator_id = registry_add_node_operator_for_node(&mut registry, old_node_id, 0);
294+
295+
// turn first node into an API BN by adding the record to the registry
296+
let api_bn = ApiBoundaryNodeRecord {
297+
version: ReplicaVersion::default().to_string(),
298+
};
299+
registry.maybe_apply_mutation_internal(vec![insert(
300+
make_api_boundary_node_record_key(old_node_id),
301+
api_bn.encode_to_vec(),
302+
)]);
303+
let payload = RemoveNodeDirectlyPayload {
304+
node_id: old_node_id,
305+
};
306+
307+
registry.do_replace_node_with_another(payload, node_operator_id, new_node_id);
308+
309+
// Verify the removed node's API boundary node record has been removed
310+
assert!(registry
311+
.get(
312+
make_api_boundary_node_record_key(old_node_id).as_bytes(),
313+
registry.latest_version()
314+
)
315+
.is_none());
316+
317+
// Verify the replacement node has been turned into an API boundary node
318+
assert!(registry
319+
.get(
320+
make_api_boundary_node_record_key(new_node_id).as_bytes(),
321+
registry.latest_version()
322+
)
323+
.is_some());
324+
325+
// Verify the old node is removed from the registry
326+
assert!(registry
327+
.get(
328+
make_node_record_key(old_node_id).as_bytes(),
329+
registry.latest_version()
330+
)
331+
.is_none());
332+
333+
// Verify the new node is present in the registry
334+
assert!(registry.get_node(new_node_id).is_some());
335+
336+
// Verify node operator allowance set to 1
337+
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
338+
assert_eq!(updated_operator.node_allowance, 1);
339+
}
340+
341+
#[test]
342+
#[should_panic(
343+
expected = "invariant check failed with message: domain field of the NodeRecord"
344+
)]
345+
fn should_panic_to_replace_api_boundary_node_if_new_node_has_no_domain() {
346+
let mut registry = invariant_compliant_registry(0);
347+
// Add node to registry
348+
let (mutate_request, node_ids_and_dkg_pks) =
349+
prepare_registry_with_nodes(1 /* mutation id */, 2 /* node count */);
350+
registry.maybe_apply_mutation_internal(mutate_request.mutations);
351+
let mut node_ids = node_ids_and_dkg_pks.keys();
352+
let old_node_id = node_ids
353+
.next()
354+
.expect("should contain at least one node ID")
355+
.to_owned();
356+
let new_node_id = node_ids
357+
.next()
358+
.expect("should contain at least two node IDs")
359+
.to_owned();
360+
361+
let node_operator_id = registry_add_node_operator_for_node(&mut registry, old_node_id, 0);
362+
363+
// turn first node into an API BN by adding the record to the registry
364+
let api_bn = ApiBoundaryNodeRecord {
365+
version: ReplicaVersion::default().to_string(),
366+
};
367+
registry.maybe_apply_mutation_internal(vec![insert(
368+
make_api_boundary_node_record_key(old_node_id),
369+
api_bn.encode_to_vec(),
370+
)]);
371+
372+
// remove the default domain name from the replacement node such that the API boundary node invariant check fails
373+
let mut node_record = registry.get_node_or_panic(new_node_id);
374+
node_record.domain = None;
375+
let update_node_record = update(
376+
make_node_record_key(new_node_id).as_bytes(),
377+
node_record.encode_to_vec(),
378+
);
379+
let mutations = vec![update_node_record];
380+
registry.maybe_apply_mutation_internal(mutations);
381+
382+
let payload = RemoveNodeDirectlyPayload {
383+
node_id: old_node_id,
384+
};
385+
386+
registry.do_replace_node_with_another(payload, node_operator_id, new_node_id);
387+
}
388+
264389
#[test]
265390
fn should_succeed() {
266391
let mut registry = invariant_compliant_registry(0);
@@ -441,52 +566,6 @@ mod tests {
441566
// Should fail because the DC of operator1 and operator2 does not match
442567
registry.do_remove_node(payload, operator2_id);
443568
}
444-
#[test]
445-
fn should_replace_node_in_subnet() {
446-
let mut registry = invariant_compliant_registry(0);
447-
448-
// Add nodes to the registry
449-
let (mutate_request, node_ids_and_dkg_pks) = prepare_registry_with_nodes(1, 2);
450-
registry.maybe_apply_mutation_internal(mutate_request.mutations);
451-
let node_ids = node_ids_and_dkg_pks.keys().cloned().collect::<Vec<_>>();
452-
let node_operator_id = registry_add_node_operator_for_node(&mut registry, node_ids[0], 0);
453-
454-
// Create a subnet with the first node
455-
let subnet_id =
456-
registry_create_subnet_with_nodes(&mut registry, &node_ids_and_dkg_pks, &[0]);
457-
458-
// Replace the node_ids[0] with node_ids[1], while node_ids[0] is in a subnet
459-
let payload = RemoveNodeDirectlyPayload {
460-
node_id: node_ids[0],
461-
};
462-
463-
registry.do_replace_node_with_another(payload, node_operator_id, node_ids[1]);
464-
465-
// Verify the subnet record is updated with the new node
466-
let expected_membership: Vec<NodeId> = vec![node_ids[1]];
467-
let actual_membership: Vec<NodeId> = registry
468-
.get_subnet_or_panic(subnet_id)
469-
.membership
470-
.iter()
471-
.map(|bytes| NodeId::from(PrincipalId::try_from(bytes).unwrap()))
472-
.collect();
473-
assert_eq!(actual_membership, expected_membership);
474-
475-
// Verify the old node is removed from the registry
476-
assert!(registry
477-
.get(
478-
make_node_record_key(node_ids[0]).as_bytes(),
479-
registry.latest_version()
480-
)
481-
.is_none());
482-
483-
// Verify the new node is present in the registry
484-
assert!(registry.get_node(node_ids[1]).is_some());
485-
486-
// Verify node operator allowance increased by 1
487-
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
488-
assert_eq!(updated_operator.node_allowance, 1);
489-
}
490569

491570
#[test]
492571
#[should_panic(expected = "Cannot remove a node that is a member of a subnet")]

0 commit comments

Comments
 (0)