Skip to content

Commit fdaeec6

Browse files
committed
Monitoring service api (sigp#2251)
## Issue Addressed N/A ## Proposed Changes Adds a client side api for collecting system and process metrics and pushing it to a monitoring service.
1 parent 55aada0 commit fdaeec6

File tree

30 files changed

+1103
-60
lines changed

30 files changed

+1103
-60
lines changed

Cargo.lock

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ members = [
4040
"common/validator_dir",
4141
"common/warp_utils",
4242
"common/fallback",
43+
"common/monitoring_api",
4344

4445
"consensus/cached_tree_hash",
4546
"consensus/int_to_bytes",

beacon_node/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,5 @@ hyper = "0.14.4"
4444
lighthouse_version = { path = "../common/lighthouse_version" }
4545
hex = "0.4.2"
4646
slasher = { path = "../slasher" }
47+
monitoring_api = { path = "../common/monitoring_api" }
4748
sensitive_url = { path = "../common/sensitive_url" }

beacon_node/client/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,4 @@ http_api = { path = "../http_api" }
4444
http_metrics = { path = "../http_metrics" }
4545
slasher = { path = "../../slasher" }
4646
slasher_service = { path = "../../slasher/service" }
47+
monitoring_api = {path = "../../common/monitoring_api"}

beacon_node/client/src/builder.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use environment::RuntimeContext;
1414
use eth1::{Config as Eth1Config, Service as Eth1Service};
1515
use eth2_libp2p::NetworkGlobals;
1616
use genesis::{interop_genesis_state, Eth1GenesisService};
17+
use monitoring_api::{MonitoringHttpClient, ProcessType};
1718
use network::{NetworkConfig, NetworkMessage, NetworkService};
1819
use slasher::Slasher;
1920
use slasher_service::SlasherService;
@@ -374,6 +375,22 @@ where
374375
SlasherService::new(beacon_chain, network_send).run(&context.executor)
375376
}
376377

378+
/// Start the explorer client which periodically sends beacon
379+
/// and system metrics to the configured endpoint.
380+
pub fn monitoring_client(self, config: &monitoring_api::Config) -> Result<Self, String> {
381+
let context = self
382+
.runtime_context
383+
.as_ref()
384+
.ok_or("monitoring_client requires a runtime_context")?
385+
.service_context("monitoring_client".into());
386+
let monitoring_client = MonitoringHttpClient::new(config, context.log().clone())?;
387+
monitoring_client.auto_update(
388+
context.executor,
389+
vec![ProcessType::BeaconNode, ProcessType::System],
390+
);
391+
Ok(self)
392+
}
393+
377394
/// Immediately starts the service that periodically logs information each slot.
378395
pub fn notifier(self) -> Result<Self, String> {
379396
let context = self

beacon_node/client/src/config.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pub struct Config {
6666
pub eth1: eth1::Config,
6767
pub http_api: http_api::Config,
6868
pub http_metrics: http_metrics::Config,
69+
pub monitoring_api: Option<monitoring_api::Config>,
6970
pub slasher: Option<slasher::Config>,
7071
}
7172

@@ -87,6 +88,7 @@ impl Default for Config {
8788
graffiti: Graffiti::default(),
8889
http_api: <_>::default(),
8990
http_metrics: <_>::default(),
91+
monitoring_api: None,
9092
slasher: None,
9193
validator_monitor_auto: false,
9294
validator_monitor_pubkeys: vec![],

beacon_node/client/src/metrics.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,14 @@ lazy_static! {
66
"sync_slots_per_second",
77
"The number of blocks being imported per second"
88
);
9+
10+
pub static ref IS_SYNCED: Result<IntGauge> = try_create_int_gauge(
11+
"sync_eth2_synced",
12+
"Metric to check if the beacon chain is synced to head. 0 if not synced and non-zero if synced"
13+
);
14+
15+
pub static ref NOTIFIER_HEAD_SLOT: Result<IntGauge> = try_create_int_gauge(
16+
"notifier_head_slot",
17+
"The head slot sourced from the beacon chain notifier"
18+
);
919
}

beacon_node/client/src/notifier.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
7777
};
7878

7979
let head_slot = head_info.slot;
80+
81+
metrics::set_gauge(&metrics::NOTIFIER_HEAD_SLOT, head_slot.as_u64() as i64);
82+
8083
let current_slot = match beacon_chain.slot() {
8184
Ok(slot) => slot,
8285
Err(e) => {
@@ -123,6 +126,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
123126

124127
// Log if we are syncing
125128
if sync_state.is_syncing() {
129+
metrics::set_gauge(&metrics::IS_SYNCED, 0);
126130
let distance = format!(
127131
"{} slots ({})",
128132
head_distance.as_u64(),
@@ -151,6 +155,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
151155
);
152156
}
153157
} else if sync_state.is_synced() {
158+
metrics::set_gauge(&metrics::IS_SYNCED, 1);
154159
let block_info = if current_slot > head_slot {
155160
" … empty".to_string()
156161
} else {
@@ -167,6 +172,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
167172
"slot" => current_slot,
168173
);
169174
} else {
175+
metrics::set_gauge(&metrics::IS_SYNCED, 0);
170176
info!(
171177
log,
172178
"Searching for peers";

beacon_node/eth1/src/metrics.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,23 @@ lazy_static! {
2626
pub static ref ENDPOINT_REQUESTS: Result<IntCounterVec> = try_create_int_counter_vec(
2727
"eth1_endpoint_requests", "The number of eth1 requests for each endpoint", &["endpoint"]
2828
);
29+
30+
/*
31+
* Eth1 rpc connection
32+
*/
33+
34+
pub static ref ETH1_CONNECTED: Result<IntGauge> = try_create_int_gauge(
35+
"sync_eth1_connected", "Set to 1 if connected to an eth1 node, otherwise set to 0"
36+
);
37+
38+
pub static ref ETH1_FALLBACK_CONFIGURED: Result<IntGauge> = try_create_int_gauge(
39+
"sync_eth1_fallback_configured", "Number of configured eth1 fallbacks"
40+
);
41+
42+
// Note: This metric only checks if an eth1 fallback is configured, not if it is connected and synced.
43+
// Checking for liveness of the fallback would require moving away from lazy checking of fallbacks.
44+
pub static ref ETH1_FALLBACK_CONNECTED: Result<IntGauge> = try_create_int_gauge(
45+
"eth1_sync_fallback_connected", "Set to 1 if an eth1 fallback is connected, otherwise set to 0"
46+
);
47+
2948
}

beacon_node/eth1/src/service.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ impl EndpointsCache {
9494
&crate::metrics::ENDPOINT_ERRORS,
9595
&[&endpoint.0.to_string()],
9696
);
97+
crate::metrics::set_gauge(&metrics::ETH1_CONNECTED, 0);
98+
} else {
99+
crate::metrics::set_gauge(&metrics::ETH1_CONNECTED, 1);
97100
}
98101
state
99102
}
@@ -730,13 +733,23 @@ impl Service {
730733

731734
let mut interval = interval_at(Instant::now(), update_interval);
732735

736+
let num_fallbacks = self.config().endpoints.len() - 1;
733737
let update_future = async move {
734738
loop {
735739
interval.tick().await;
736740
self.do_update(update_interval).await.ok();
737741
}
738742
};
739743

744+
// Set the number of configured eth1 servers
745+
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONFIGURED, num_fallbacks as i64);
746+
// Since we lazily update eth1 fallbacks, it's not possible to know connection status of fallback.
747+
// Hence, we set it to 1 if we have atleast one configured fallback.
748+
if num_fallbacks > 0 {
749+
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONNECTED, 1);
750+
} else {
751+
metrics::set_gauge(&metrics::ETH1_FALLBACK_CONNECTED, 0);
752+
}
740753
handle.spawn(update_future, "eth1");
741754
}
742755

0 commit comments

Comments
 (0)