Skip to content

Commit 826e090

Browse files
committed
Update node health endpoint (#4310)
## Issue Addressed [#4292](#4292) ## Proposed Changes Updated the node health endpoint will return a 200 status code if `!syncing && !el_offline && !optimistic` wil return a 206 if `(syncing || optimistic) && !el_offline` will return a 503 if `el_offline` ## Additional Info
1 parent edd0932 commit 826e090

File tree

3 files changed

+125
-22
lines changed

3 files changed

+125
-22
lines changed

beacon_node/http_api/src/lib.rs

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,24 +2418,41 @@ pub fn serve<T: BeaconChainTypes>(
24182418
.and(warp::path("health"))
24192419
.and(warp::path::end())
24202420
.and(network_globals.clone())
2421-
.and_then(|network_globals: Arc<NetworkGlobals<T::EthSpec>>| {
2422-
blocking_response_task(move || match *network_globals.sync_state.read() {
2423-
SyncState::SyncingFinalized { .. }
2424-
| SyncState::SyncingHead { .. }
2425-
| SyncState::SyncTransition
2426-
| SyncState::BackFillSyncing { .. } => Ok(warp::reply::with_status(
2427-
warp::reply(),
2428-
warp::http::StatusCode::PARTIAL_CONTENT,
2429-
)),
2430-
SyncState::Synced => Ok(warp::reply::with_status(
2431-
warp::reply(),
2432-
warp::http::StatusCode::OK,
2433-
)),
2434-
SyncState::Stalled => Err(warp_utils::reject::not_synced(
2435-
"sync stalled, beacon chain may not yet be initialized.".to_string(),
2436-
)),
2437-
})
2438-
});
2421+
.and(chain_filter.clone())
2422+
.and_then(
2423+
|network_globals: Arc<NetworkGlobals<T::EthSpec>>, chain: Arc<BeaconChain<T>>| {
2424+
async move {
2425+
let el_offline = if let Some(el) = &chain.execution_layer {
2426+
el.is_offline_or_erroring().await
2427+
} else {
2428+
true
2429+
};
2430+
2431+
blocking_response_task(move || {
2432+
let is_optimistic = chain
2433+
.is_optimistic_or_invalid_head()
2434+
.map_err(warp_utils::reject::beacon_chain_error)?;
2435+
2436+
let is_syncing = !network_globals.sync_state.read().is_synced();
2437+
2438+
if el_offline {
2439+
Err(warp_utils::reject::not_synced("execution layer is offline".to_string()))
2440+
} else if is_syncing || is_optimistic {
2441+
Ok(warp::reply::with_status(
2442+
warp::reply(),
2443+
warp::http::StatusCode::PARTIAL_CONTENT,
2444+
))
2445+
} else {
2446+
Ok(warp::reply::with_status(
2447+
warp::reply(),
2448+
warp::http::StatusCode::OK,
2449+
))
2450+
}
2451+
})
2452+
.await
2453+
}
2454+
},
2455+
);
24392456

24402457
// GET node/peers/{peer_id}
24412458
let get_node_peers_by_id = eth_v1

beacon_node/http_api/tests/status_tests.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use beacon_chain::{
33
test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},
44
BlockError,
55
};
6+
use eth2::StatusCode;
67
use execution_layer::{PayloadStatusV1, PayloadStatusV1Status};
78
use http_api::test_utils::InteractiveTester;
89
use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot};
@@ -143,3 +144,82 @@ async fn el_error_on_new_payload() {
143144
assert_eq!(api_response.is_optimistic, Some(false));
144145
assert_eq!(api_response.is_syncing, false);
145146
}
147+
148+
/// Check `node health` endpoint when the EL is offline.
149+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
150+
async fn node_health_el_offline() {
151+
let num_blocks = E::slots_per_epoch() / 2;
152+
let num_validators = E::slots_per_epoch();
153+
let tester = post_merge_tester(num_blocks, num_validators).await;
154+
let harness = &tester.harness;
155+
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
156+
157+
// EL offline
158+
mock_el.server.set_syncing_response(Err("offline".into()));
159+
mock_el.el.upcheck().await;
160+
161+
let status = tester.client.get_node_health().await;
162+
match status {
163+
Ok(_) => {
164+
panic!("should return 503 error status code");
165+
}
166+
Err(e) => {
167+
assert_eq!(e.status().unwrap(), 503);
168+
}
169+
}
170+
}
171+
172+
/// Check `node health` endpoint when the EL is online and synced.
173+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
174+
async fn node_health_el_online_and_synced() {
175+
let num_blocks = E::slots_per_epoch() / 2;
176+
let num_validators = E::slots_per_epoch();
177+
let tester = post_merge_tester(num_blocks, num_validators).await;
178+
let harness = &tester.harness;
179+
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
180+
181+
// EL synced
182+
mock_el.server.set_syncing_response(Ok(false));
183+
mock_el.el.upcheck().await;
184+
185+
let status = tester.client.get_node_health().await;
186+
match status {
187+
Ok(response) => {
188+
assert_eq!(response, StatusCode::OK);
189+
}
190+
Err(_) => {
191+
panic!("should return 200 status code");
192+
}
193+
}
194+
}
195+
196+
/// Check `node health` endpoint when the EL is online but not synced.
197+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
198+
async fn node_health_el_online_and_not_synced() {
199+
let num_blocks = E::slots_per_epoch() / 2;
200+
let num_validators = E::slots_per_epoch();
201+
let tester = post_merge_tester(num_blocks, num_validators).await;
202+
let harness = &tester.harness;
203+
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
204+
205+
// EL not synced
206+
harness.advance_slot();
207+
mock_el.server.all_payloads_syncing(true);
208+
harness
209+
.extend_chain(
210+
1,
211+
BlockStrategy::OnCanonicalHead,
212+
AttestationStrategy::AllValidators,
213+
)
214+
.await;
215+
216+
let status = tester.client.get_node_health().await;
217+
match status {
218+
Ok(response) => {
219+
assert_eq!(response, StatusCode::PARTIAL_CONTENT);
220+
}
221+
Err(_) => {
222+
panic!("should return 206 status code");
223+
}
224+
}
225+
}

beacon_node/http_api/tests/tests.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use eth2::{
88
mixin::{RequestAccept, ResponseForkName, ResponseOptional},
99
reqwest::RequestBuilder,
1010
types::{BlockId as CoreBlockId, ForkChoiceNode, StateId as CoreStateId, *},
11-
BeaconNodeHttpClient, Error, StatusCode, Timeouts,
11+
BeaconNodeHttpClient, Error, Timeouts,
1212
};
1313
use execution_layer::test_utils::TestingBuilder;
1414
use execution_layer::test_utils::DEFAULT_BUILDER_THRESHOLD_WEI;
@@ -1762,9 +1762,15 @@ impl ApiTester {
17621762
}
17631763

17641764
pub async fn test_get_node_health(self) -> Self {
1765-
let status = self.client.get_node_health().await.unwrap();
1766-
assert_eq!(status, StatusCode::OK);
1767-
1765+
let status = self.client.get_node_health().await;
1766+
match status {
1767+
Ok(_) => {
1768+
panic!("should return 503 error status code");
1769+
}
1770+
Err(e) => {
1771+
assert_eq!(e.status().unwrap(), 503);
1772+
}
1773+
}
17681774
self
17691775
}
17701776

0 commit comments

Comments
 (0)