Skip to content

Commit 3cbf8ac

Browse files
michaelsproulWoodpile37
authored andcommitted
Fix bug in block root storage (sigp#4663)
Fix a bug in the storage of the linear block roots array in the freezer DB. Previously this array was always written as part of state storage (or block backfill). With state pruning enabled by sigp#4610, these states were no longer being written and as a result neither were the block roots. The impact is quite low, we would just log an error when trying to forwards-iterate the block roots, which for validating nodes only happens when they try to look up blocks for peers: > Aug 25 03:42:36.980 ERRO Missing chunk in forwards iterator chunk index: 49726, service: freezer_db Any node checkpoint synced off `unstable` is affected and has a corrupt database. If you see the log above, you need to re-sync with the fix. Nodes that haven't checkpoint synced recently should _not_ be corrupted, even if they ran the buggy version. - Use a `ChunkWriter` to write the block roots when states are not being stored. - Tweak the usage of `get_latest_restore_point` so that it doesn't return a nonsense value when state pruning is enabled. - Tweak the guarantee on the block roots array so that block roots are assumed available up to the split slot (exclusive). This is a bit nicer than relying on anything to do with the latest restore point, which is a nonsensical concept when there aren't any restore points. I'm looking forward to deleting the chunked vector code for good when we merge tree-states :grin:
1 parent a39b05a commit 3cbf8ac

File tree

7 files changed

+79
-25
lines changed

7 files changed

+79
-25
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

beacon_node/beacon_chain/src/builder.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,21 @@ where
485485
let (_, updated_builder) = self.set_genesis_state(genesis_state)?;
486486
self = updated_builder;
487487

488+
// Fill in the linear block roots between the checkpoint block's slot and the aligned
489+
// state's slot. All slots less than the block's slot will be handled by block backfill,
490+
// while states greater or equal to the checkpoint state will be handled by `migrate_db`.
491+
let block_root_batch = store
492+
.store_frozen_block_root_at_skip_slots(
493+
weak_subj_block.slot(),
494+
weak_subj_state.slot(),
495+
weak_subj_block_root,
496+
)
497+
.map_err(|e| format!("Error writing frozen block roots: {e:?}"))?;
498+
store
499+
.cold_db
500+
.do_atomically(block_root_batch)
501+
.map_err(|e| format!("Error writing frozen block roots: {e:?}"))?;
502+
488503
// Write the state and block non-atomically, it doesn't matter if they're forgotten
489504
// about on a crash restart.
490505
store

beacon_node/beacon_chain/tests/store_tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ async fn forwards_iter_block_and_state_roots_until() {
417417

418418
// The last restore point slot is the point at which the hybrid forwards iterator behaviour
419419
// changes.
420-
let last_restore_point_slot = store.get_latest_restore_point_slot();
420+
let last_restore_point_slot = store.get_latest_restore_point_slot().unwrap();
421421
assert!(last_restore_point_slot > 0);
422422

423423
let chain = &harness.chain;

beacon_node/store/src/chunked_iter.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,16 @@ where
3030
/// Create a new iterator which can yield elements from `start_vindex` up to the last
3131
/// index stored by the restore point at `last_restore_point_slot`.
3232
///
33-
/// The `last_restore_point` slot should be the slot of a recent restore point as obtained from
34-
/// `HotColdDB::get_latest_restore_point_slot`. We pass it as a parameter so that the caller can
33+
/// The `freezer_upper_limit` slot should be the slot of a recent restore point as obtained from
34+
/// `Root::freezer_upper_limit`. We pass it as a parameter so that the caller can
3535
/// maintain a stable view of the database (see `HybridForwardsBlockRootsIterator`).
3636
pub fn new(
3737
store: &'a HotColdDB<E, Hot, Cold>,
3838
start_vindex: usize,
39-
last_restore_point_slot: Slot,
39+
freezer_upper_limit: Slot,
4040
spec: &ChainSpec,
4141
) -> Self {
42-
let (_, end_vindex) = F::start_and_end_vindex(last_restore_point_slot, spec);
42+
let (_, end_vindex) = F::start_and_end_vindex(freezer_upper_limit, spec);
4343

4444
// Set the next chunk to the one containing `start_vindex`.
4545
let next_cindex = start_vindex / F::chunk_size();

beacon_node/store/src/forwards_iter.rs

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ pub trait Root<E: EthSpec>: Field<E, Value = Hash256> {
1919
end_state: BeaconState<E>,
2020
end_root: Hash256,
2121
) -> Result<SimpleForwardsIterator>;
22+
23+
/// The first slot for which this field is *no longer* stored in the freezer database.
24+
///
25+
/// If `None`, then this field is not stored in the freezer database at all due to pruning
26+
/// configuration.
27+
fn freezer_upper_limit<Hot: ItemStore<E>, Cold: ItemStore<E>>(
28+
store: &HotColdDB<E, Hot, Cold>,
29+
) -> Option<Slot>;
2230
}
2331

2432
impl<E: EthSpec> Root<E> for BlockRoots {
@@ -39,6 +47,13 @@ impl<E: EthSpec> Root<E> for BlockRoots {
3947
)?;
4048
Ok(SimpleForwardsIterator { values })
4149
}
50+
51+
fn freezer_upper_limit<Hot: ItemStore<E>, Cold: ItemStore<E>>(
52+
store: &HotColdDB<E, Hot, Cold>,
53+
) -> Option<Slot> {
54+
// Block roots are stored for all slots up to the split slot (exclusive).
55+
Some(store.get_split_slot())
56+
}
4257
}
4358

4459
impl<E: EthSpec> Root<E> for StateRoots {
@@ -59,6 +74,15 @@ impl<E: EthSpec> Root<E> for StateRoots {
5974
)?;
6075
Ok(SimpleForwardsIterator { values })
6176
}
77+
78+
fn freezer_upper_limit<Hot: ItemStore<E>, Cold: ItemStore<E>>(
79+
store: &HotColdDB<E, Hot, Cold>,
80+
) -> Option<Slot> {
81+
// State roots are stored for all slots up to the latest restore point (exclusive).
82+
// There may not be a latest restore point if state pruning is enabled, in which
83+
// case this function will return `None`.
84+
store.get_latest_restore_point_slot()
85+
}
6286
}
6387

6488
/// Forwards root iterator that makes use of a flat field table in the freezer DB.
@@ -118,6 +142,7 @@ impl Iterator for SimpleForwardsIterator {
118142
pub enum HybridForwardsIterator<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>> {
119143
PreFinalization {
120144
iter: Box<FrozenForwardsIterator<'a, E, F, Hot, Cold>>,
145+
end_slot: Option<Slot>,
121146
/// Data required by the `PostFinalization` iterator when we get to it.
122147
continuation_data: Option<Box<(BeaconState<E>, Hash256)>>,
123148
},
@@ -129,6 +154,7 @@ pub enum HybridForwardsIterator<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, C
129154
PostFinalization {
130155
iter: SimpleForwardsIterator,
131156
},
157+
Finished,
132158
}
133159

134160
impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
@@ -138,8 +164,8 @@ impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
138164
///
139165
/// The `get_state` closure should return a beacon state and final block/state root to backtrack
140166
/// from in the case where the iterated range does not lie entirely within the frozen portion of
141-
/// the database. If an `end_slot` is provided and it is before the database's latest restore
142-
/// point slot then the `get_state` closure will not be called at all.
167+
/// the database. If an `end_slot` is provided and it is before the database's freezer upper
168+
/// limit for the field then the `get_state` closure will not be called at all.
143169
///
144170
/// It is OK for `get_state` to hold a lock while this function is evaluated, as the returned
145171
/// iterator is as lazy as possible and won't do any work apart from calling `get_state`.
@@ -155,27 +181,30 @@ impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
155181
) -> Result<Self> {
156182
use HybridForwardsIterator::*;
157183

158-
let latest_restore_point_slot = store.get_latest_restore_point_slot();
184+
// First slot at which this field is *not* available in the freezer. i.e. all slots less
185+
// than this slot have their data available in the freezer.
186+
let freezer_upper_limit = F::freezer_upper_limit(store).unwrap_or(Slot::new(0));
159187

160-
let result = if start_slot < latest_restore_point_slot {
188+
let result = if start_slot < freezer_upper_limit {
161189
let iter = Box::new(FrozenForwardsIterator::new(
162190
store,
163191
start_slot,
164-
latest_restore_point_slot,
192+
freezer_upper_limit,
165193
spec,
166194
));
167195

168196
// No continuation data is needed if the forwards iterator plans to halt before
169197
// `end_slot`. If it tries to continue further a `NoContinuationData` error will be
170198
// returned.
171199
let continuation_data =
172-
if end_slot.map_or(false, |end_slot| end_slot < latest_restore_point_slot) {
200+
if end_slot.map_or(false, |end_slot| end_slot < freezer_upper_limit) {
173201
None
174202
} else {
175203
Some(Box::new(get_state()?))
176204
};
177205
PreFinalization {
178206
iter,
207+
end_slot,
179208
continuation_data,
180209
}
181210
} else {
@@ -195,6 +224,7 @@ impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
195224
match self {
196225
PreFinalization {
197226
iter,
227+
end_slot,
198228
continuation_data,
199229
} => {
200230
match iter.next() {
@@ -203,10 +233,17 @@ impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
203233
// to a post-finalization iterator beginning from the last slot
204234
// of the pre iterator.
205235
None => {
236+
// If the iterator has an end slot (inclusive) which has already been
237+
// covered by the (exclusive) frozen forwards iterator, then we're done!
238+
let iter_end_slot = Slot::from(iter.inner.end_vindex);
239+
if end_slot.map_or(false, |end_slot| iter_end_slot == end_slot + 1) {
240+
*self = Finished;
241+
return Ok(None);
242+
}
243+
206244
let continuation_data = continuation_data.take();
207245
let store = iter.inner.store;
208-
let start_slot = Slot::from(iter.inner.end_vindex);
209-
246+
let start_slot = iter_end_slot;
210247
*self = PostFinalizationLazy {
211248
continuation_data,
212249
store,
@@ -230,6 +267,7 @@ impl<'a, E: EthSpec, F: Root<E>, Hot: ItemStore<E>, Cold: ItemStore<E>>
230267
self.do_next()
231268
}
232269
PostFinalization { iter } => iter.next().transpose(),
270+
Finished => Ok(None),
233271
}
234272
}
235273
}

watch/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ network = { path = "../beacon_node/network" }
4545
testcontainers = { git = "https://github.com/testcontainers/testcontainers-rs/", rev = "0f2c9851" }
4646
unused_port = { path = "../common/unused_port" }
4747
task_executor = { path = "../common/task_executor" }
48+
logging = { path = "../common/logging" }

watch/tests/tests.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,21 @@ use beacon_chain::{
77
};
88
use eth2::{types::BlockId, BeaconNodeHttpClient, SensitiveUrl, Timeouts};
99
use http_api::test_utils::{create_api_server, ApiServer};
10+
use log::error;
11+
use logging::test_logger;
1012
use network::NetworkReceivers;
11-
1213
use rand::distributions::Alphanumeric;
1314
use rand::{thread_rng, Rng};
15+
use std::collections::HashMap;
16+
use std::env;
17+
use std::net::SocketAddr;
18+
use std::time::Duration;
19+
use testcontainers::{clients::Cli, core::WaitFor, Image, RunnableImage};
1420
use tokio::sync::oneshot;
21+
use tokio::{runtime, task::JoinHandle};
22+
use tokio_postgres::{config::Config as PostgresConfig, Client, NoTls};
1523
use types::{Hash256, MainnetEthSpec, Slot};
24+
use unused_port::unused_tcp4_port;
1625
use url::Url;
1726
use watch::{
1827
client::WatchHttpClient,
@@ -22,17 +31,6 @@ use watch::{
2231
updater::{handler::*, run_updater, Config as UpdaterConfig, WatchSpec},
2332
};
2433

25-
use log::error;
26-
use std::collections::HashMap;
27-
use std::env;
28-
use std::net::SocketAddr;
29-
use std::time::Duration;
30-
use tokio::{runtime, task::JoinHandle};
31-
use tokio_postgres::{config::Config as PostgresConfig, Client, NoTls};
32-
use unused_port::unused_tcp4_port;
33-
34-
use testcontainers::{clients::Cli, core::WaitFor, Image, RunnableImage};
35-
3634
#[derive(Debug)]
3735
pub struct Postgres(HashMap<String, String>);
3836

@@ -132,6 +130,7 @@ impl TesterBuilder {
132130
reconstruct_historic_states: true,
133131
..ChainConfig::default()
134132
})
133+
.logger(test_logger())
135134
.deterministic_keypairs(VALIDATOR_COUNT)
136135
.fresh_ephemeral_store()
137136
.build();

0 commit comments

Comments
 (0)