Skip to content

Commit 080fa4d

Browse files
authored
add docs/example and Vec<u32> values to sstable (#2660)
1 parent 988c2b3 commit 080fa4d

File tree

4 files changed

+175
-4
lines changed

4 files changed

+175
-4
lines changed

sstable/src/lib.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,40 @@
1+
//! `tantivy_sstable` is a crate that provides a sorted string table data structure.
2+
//!
3+
//! It is used in `tantivy` to store the term dictionary.
4+
//!
5+
//! A `sstable` is a map of sorted `&[u8]` keys to values.
6+
//! The keys are encoded using incremental encoding.
7+
//!
8+
//! Values and keys are compressed using zstd with the default feature flag `zstd-compression`.
9+
//!
10+
//! # Example
11+
//!
12+
//! Here is an example of how to create and search an `sstable`:
13+
//!
14+
//! ```rust
15+
//! use common::OwnedBytes;
16+
//! use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
17+
//!
18+
//! // Create a new sstable in memory.
19+
//! let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
20+
//! builder.insert(b"apple", &1).unwrap();
21+
//! builder.insert(b"banana", &2).unwrap();
22+
//! builder.insert(b"orange", &3).unwrap();
23+
//! let sstable_bytes = builder.finish().unwrap();
24+
//!
25+
//! // Open the sstable.
26+
//! let sstable =
27+
//! Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
28+
//!
29+
//! // Search for a key.
30+
//! let value = sstable.get(b"banana").unwrap();
31+
//! assert_eq!(value, Some(2));
32+
//!
33+
//! // Search for a non-existent key.
34+
//! let value = sstable.get(b"grape").unwrap();
35+
//! assert_eq!(value, None);
36+
//! ```
37+
138
use std::io::{self, Write};
239
use std::ops::Range;
340

@@ -19,6 +56,7 @@ pub use streamer::{Streamer, StreamerBuilder};
1956

2057
mod block_reader;
2158
use common::{BinarySerializable, OwnedBytes};
59+
use value::{VecU32ValueReader, VecU32ValueWriter};
2260

2361
pub use self::block_reader::BlockReader;
2462
pub use self::delta::{DeltaReader, DeltaWriter};
@@ -130,6 +168,15 @@ impl SSTable for RangeSSTable {
130168
type ValueWriter = RangeValueWriter;
131169
}
132170

171+
/// SSTable associating keys to Vec<u32>.
172+
pub struct VecU32ValueSSTable;
173+
174+
impl SSTable for VecU32ValueSSTable {
175+
type Value = Vec<u32>;
176+
type ValueReader = VecU32ValueReader;
177+
type ValueWriter = VecU32ValueWriter;
178+
}
179+
133180
/// SSTable reader.
134181
pub struct Reader<TValueReader> {
135182
key: Vec<u8>,

sstable/src/value/mod.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
pub(crate) mod index;
22
mod range;
33
mod u64_monotonic;
4+
mod vec_u32;
45
mod void;
56

67
use std::io;
78

9+
pub use range::{RangeValueReader, RangeValueWriter};
10+
pub use u64_monotonic::{U64MonotonicValueReader, U64MonotonicValueWriter};
11+
pub use vec_u32::{VecU32ValueReader, VecU32ValueWriter};
12+
pub use void::{VoidValueReader, VoidValueWriter};
13+
814
/// `ValueReader` is a trait describing the contract of something
915
/// reading blocks of value, and offering random access within this values.
1016
pub trait ValueReader: Default {
@@ -40,10 +46,6 @@ pub trait ValueWriter: Default {
4046
fn clear(&mut self);
4147
}
4248

43-
pub use range::{RangeValueReader, RangeValueWriter};
44-
pub use u64_monotonic::{U64MonotonicValueReader, U64MonotonicValueWriter};
45-
pub use void::{VoidValueReader, VoidValueWriter};
46-
4749
fn deserialize_vint_u64(data: &mut &[u8]) -> u64 {
4850
let (num_bytes, val) = super::vint::deserialize_read(data);
4951
*data = &data[num_bytes..];

sstable/src/value/vec_u32.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use std::io;
2+
3+
use super::{ValueReader, ValueWriter};
4+
5+
#[derive(Default)]
6+
pub struct VecU32ValueReader {
7+
vals: Vec<Vec<u32>>,
8+
}
9+
10+
impl ValueReader for VecU32ValueReader {
11+
type Value = Vec<u32>;
12+
13+
#[inline(always)]
14+
fn value(&self, idx: usize) -> &Self::Value {
15+
&self.vals[idx]
16+
}
17+
18+
fn load(&mut self, mut data: &[u8]) -> io::Result<usize> {
19+
let original_num_bytes = data.len();
20+
self.vals.clear();
21+
22+
// The first 4 bytes are the number of blocks
23+
let num_blocks = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
24+
data = &data[4..];
25+
26+
for _ in 0..num_blocks {
27+
// Each block starts with a 4-byte length
28+
let segment_len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
29+
data = &data[4..];
30+
31+
// Read the segment IDs for this block
32+
let mut segment_ids = Vec::with_capacity(segment_len);
33+
for _ in 0..segment_len {
34+
let segment_id = u32::from_le_bytes(data[..4].try_into().unwrap());
35+
segment_ids.push(segment_id);
36+
data = &data[4..];
37+
}
38+
self.vals.push(segment_ids);
39+
}
40+
41+
// Return the number of bytes consumed
42+
Ok(original_num_bytes - data.len())
43+
}
44+
}
45+
46+
#[derive(Default)]
47+
pub struct VecU32ValueWriter {
48+
vals: Vec<Vec<u32>>,
49+
}
50+
51+
impl ValueWriter for VecU32ValueWriter {
52+
type Value = Vec<u32>;
53+
54+
fn write(&mut self, val: &Self::Value) {
55+
self.vals.push(val.to_vec());
56+
}
57+
58+
fn serialize_block(&self, output: &mut Vec<u8>) {
59+
let num_blocks = self.vals.len() as u32;
60+
output.extend_from_slice(&num_blocks.to_le_bytes());
61+
for vals in &self.vals {
62+
let len = vals.len() as u32;
63+
output.extend_from_slice(&len.to_le_bytes());
64+
for &segment_id in vals.iter() {
65+
output.extend_from_slice(&segment_id.to_le_bytes());
66+
}
67+
}
68+
}
69+
70+
fn clear(&mut self) {
71+
self.vals.clear();
72+
}
73+
}

sstable/tests/sstable_test.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
use common::OwnedBytes;
2+
use tantivy_sstable::{Dictionary, MonotonicU64SSTable, VecU32ValueSSTable};
3+
4+
#[test]
5+
fn test_create_and_search_sstable() {
6+
// Create a new sstable in memory.
7+
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
8+
builder.insert(b"apple", &1).unwrap();
9+
builder.insert(b"banana", &2).unwrap();
10+
builder.insert(b"orange", &3).unwrap();
11+
let sstable_bytes = builder.finish().unwrap();
12+
13+
// Open the sstable.
14+
let sstable =
15+
Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
16+
17+
// Search for a key.
18+
let value = sstable.get(b"banana").unwrap();
19+
assert_eq!(value, Some(2));
20+
21+
// Search for a non-existent key.
22+
let value = sstable.get(b"blub").unwrap();
23+
assert_eq!(value, None);
24+
}
25+
26+
#[test]
27+
fn test_custom_value_sstable() {
28+
// Create a new sstable with custom values.
29+
let mut builder = Dictionary::<VecU32ValueSSTable>::builder(Vec::new()).unwrap();
30+
builder.set_block_len(4096); // Ensure both values are in the same block
31+
builder.insert(b"first", &vec![1, 2, 3]).unwrap();
32+
builder.insert(b"second", &vec![4, 5]).unwrap();
33+
let sstable_bytes = builder.finish().unwrap();
34+
35+
// Open the sstable.
36+
let sstable =
37+
Dictionary::<VecU32ValueSSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
38+
39+
let mut stream = sstable.stream().unwrap();
40+
assert!(stream.advance());
41+
assert_eq!(stream.key(), b"first");
42+
assert_eq!(stream.value(), &vec![1, 2, 3]);
43+
44+
assert!(stream.advance());
45+
assert_eq!(stream.key(), b"second");
46+
assert_eq!(stream.value(), &vec![4, 5]);
47+
48+
assert!(!stream.advance());
49+
}

0 commit comments

Comments
 (0)