Skip to content

Commit 7312fae

Browse files
swehnerindygreg
authored andcommitted
frameparams: support defining frame format for get_frame_parameters()
Closes #217.
1 parent 7b6ae78 commit 7312fae

File tree

10 files changed

+216
-169
lines changed

10 files changed

+216
-169
lines changed

c-ext/decompressor.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ PyObject *Decompressor_decompress(ZstdDecompressor *self, PyObject *args,
281281
size_t zresult;
282282
ZSTD_outBuffer outBuffer;
283283
ZSTD_inBuffer inBuffer;
284+
ZSTD_frameHeader frameHeader;
284285

285286
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|nOO:decompress", kwlist,
286287
&source, &maxOutputSize, &readAcrossFrames,
@@ -299,15 +300,14 @@ PyObject *Decompressor_decompress(ZstdDecompressor *self, PyObject *args,
299300
goto finally;
300301
}
301302

302-
decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
303-
304-
if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
303+
if (ZSTD_getFrameHeader_advanced(&frameHeader, source.buf, source.len, self->format) != 0) {
305304
PyErr_SetString(ZstdError,
306305
"error determining content size from frame header");
307306
goto finally;
308307
}
308+
decompressedSize = frameHeader.frameContentSize;
309309
/* Special case of empty frame. */
310-
else if (0 == decompressedSize) {
310+
if (0 == decompressedSize) {
311311
result = PyBytes_FromStringAndSize("", 0);
312312
goto finally;
313313
}

c-ext/frameparams.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,20 @@ extern PyObject *ZstdError;
1212

1313
FrameParametersObject *get_frame_parameters(PyObject *self, PyObject *args,
1414
PyObject *kwargs) {
15-
static char *kwlist[] = {"data", NULL};
15+
static char *kwlist[] = {"data", "format", NULL};
1616

1717
Py_buffer source;
1818
ZSTD_frameHeader header;
19+
ZSTD_format_e format = ZSTD_f_zstd1;
1920
FrameParametersObject *result = NULL;
2021
size_t zresult;
2122

22-
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
23-
kwlist, &source)) {
23+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:get_frame_parameters",
24+
kwlist, &source, &format)) {
2425
return NULL;
2526
}
2627

27-
zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
28+
zresult = ZSTD_getFrameHeader_advanced(&header, source.buf, source.len, format);
2829

2930
if (ZSTD_isError(zresult)) {
3031
PyErr_Format(ZstdError, "cannot get frame parameters: %s",

docs/news.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ Changes
7070
an apparant conda bug.
7171
* Added CI coverage for Anaconda 3.12 and 3.13 on Linux and Windows x86-64
7272
(but the Windows builds no-op due to above issue).
73+
* ``get_frame_parameters()`` now accepts an optional ``format`` argument
74+
defining the zstandard frame type. You can pass e.g.
75+
``zstandard.FORMAT_ZSTD1_MAGICLESS`` to decode frames without header magic.
76+
(#217)
7377

7478
Backwards Compatibility Notes
7579
-----------------------------

rust-ext/src/decompressor.rs

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,7 @@ use {
1919
types::{PyBytes, PyList},
2020
wrap_pyfunction,
2121
},
22-
std::{
23-
ffi::c_ulonglong,
24-
sync::Arc
25-
},
22+
std::{ffi::c_ulonglong, sync::Arc},
2623
};
2724

2825
#[pyclass(module = "zstandard.backend_rust")]
@@ -181,27 +178,35 @@ impl ZstdDecompressor {
181178

182179
self.setup_dctx(py, true)?;
183180

184-
let output_size =
185-
unsafe { zstd_sys::ZSTD_getFrameContentSize(buffer.buf_ptr(), buffer.len_bytes()) };
181+
let mut header: zstd_sys::ZSTD_FrameHeader = unsafe { std::mem::zeroed() };
182+
let zresult = unsafe {
183+
zstd_sys::ZSTD_getFrameHeader_advanced(
184+
&mut header,
185+
buffer.buf_ptr(),
186+
buffer.len_bytes(),
187+
self.format,
188+
)
189+
};
190+
191+
if zresult != 0 {
192+
return Err(ZstdError::new_err(
193+
"error determining content size from frame header",
194+
));
195+
}
186196

187-
let (output_buffer_size, output_size) =
188-
if output_size == zstd_sys::ZSTD_CONTENTSIZE_ERROR as c_ulonglong {
197+
let (output_buffer_size, output_size) = if header.frameContentSize == 0 {
198+
return Ok(PyBytes::new(py, &[]));
199+
} else if header.frameContentSize == zstd_sys::ZSTD_CONTENTSIZE_UNKNOWN as c_ulonglong {
200+
if max_output_size == 0 {
189201
return Err(ZstdError::new_err(
190-
"error determining content size from frame header",
202+
"could not determine content size in frame header",
191203
));
192-
} else if output_size == 0 {
193-
return Ok(PyBytes::new(py, &[]));
194-
} else if output_size == zstd_sys::ZSTD_CONTENTSIZE_UNKNOWN as c_ulonglong {
195-
if max_output_size == 0 {
196-
return Err(ZstdError::new_err(
197-
"could not determine content size in frame header",
198-
));
199-
}
204+
}
200205

201-
(max_output_size, 0)
202-
} else {
203-
(output_size as _, output_size)
204-
};
206+
(max_output_size, 0)
207+
} else {
208+
(header.frameContentSize as _, header.frameContentSize)
209+
};
205210

206211
let mut dest_buffer: Vec<u8> = Vec::new();
207212
dest_buffer

rust-ext/src/frame_parameters.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use {
88
crate::ZstdError,
9-
pyo3::{buffer::PyBuffer, prelude::*, wrap_pyfunction},
9+
pyo3::{buffer::PyBuffer, exceptions::PyValueError, prelude::*, wrap_pyfunction},
1010
std::ffi::c_ulonglong,
1111
};
1212

@@ -70,10 +70,22 @@ fn frame_header_size(data: PyBuffer<u8>) -> PyResult<usize> {
7070
}
7171

7272
#[pyfunction]
73-
fn get_frame_parameters(py: Python, buffer: PyBuffer<u8>) -> PyResult<Py<FrameParameters>> {
73+
#[pyo3(signature = (buffer, format=zstd_sys::ZSTD_format_e::ZSTD_f_zstd1 as u32))]
74+
fn get_frame_parameters(
75+
py: Python,
76+
buffer: PyBuffer<u8>,
77+
format: u32,
78+
) -> PyResult<Py<FrameParameters>> {
7479
let raw_data = unsafe {
7580
std::slice::from_raw_parts::<u8>(buffer.buf_ptr() as *const _, buffer.len_bytes())
7681
};
82+
let format = if format == zstd_sys::ZSTD_format_e::ZSTD_f_zstd1 as u32 {
83+
zstd_sys::ZSTD_format_e::ZSTD_f_zstd1
84+
} else if format == zstd_sys::ZSTD_format_e::ZSTD_f_zstd1_magicless as u32 {
85+
zstd_sys::ZSTD_format_e::ZSTD_f_zstd1_magicless
86+
} else {
87+
return Err(PyValueError::new_err(format!("invalid format value")));
88+
};
7789

7890
let mut header = zstd_sys::ZSTD_FrameHeader {
7991
frameContentSize: 0,
@@ -87,7 +99,12 @@ fn get_frame_parameters(py: Python, buffer: PyBuffer<u8>) -> PyResult<Py<FramePa
8799
_reserved2: 0,
88100
};
89101
let zresult = unsafe {
90-
zstd_sys::ZSTD_getFrameHeader(&mut header, raw_data.as_ptr() as *const _, raw_data.len())
102+
zstd_sys::ZSTD_getFrameHeader_advanced(
103+
&mut header,
104+
raw_data.as_ptr() as *const _,
105+
raw_data.len(),
106+
format,
107+
)
91108
};
92109

93110
if unsafe { zstd_sys::ZSTD_isError(zresult) } != 0 {

tests/test_compressor_fuzzing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,7 @@ def test_data_equivalence(self, original, threads, use_dict):
767767
dctx = zstd.ZstdDecompressor(**kwargs)
768768

769769
for i, frame in enumerate(result):
770-
self.assertEqual(dctx.decompress(frame), original[i])
770+
self.assertEqual(dctx.decompress(frame.tobytes()), original[i])
771771

772772

773773
@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")

tests/test_data_structures.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,16 @@ def test_attributes(self):
190190
self.assertEqual(params.dict_id, 15)
191191
self.assertTrue(params.has_checksum)
192192

193+
def test_attributes_without_header(self):
194+
# Set multiple things without magic header
195+
params = zstd.get_frame_parameters(
196+
b"\x45\x40\x0f\x10\x00", format=zstd.FORMAT_ZSTD1_MAGICLESS
197+
)
198+
self.assertEqual(params.content_size, 272)
199+
self.assertEqual(params.window_size, 262144)
200+
self.assertEqual(params.dict_id, 15)
201+
self.assertTrue(params.has_checksum)
202+
193203
def test_input_types(self):
194204
v = zstd.FRAME_HEADER + b"\x00\x00"
195205

tests/test_decompressor_decompress.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ def test_input_types(self):
3737
for source in sources:
3838
self.assertEqual(dctx.decompress(source), b"foo")
3939

40+
def test_headerless(self):
41+
compression_params = zstd.ZstdCompressionParameters(
42+
format=zstd.FORMAT_ZSTD1_MAGICLESS,
43+
)
44+
cctx = zstd.ZstdCompressor(compression_params=compression_params)
45+
compressed = cctx.compress(b"foo")
46+
47+
dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
48+
self.assertEqual(dctx.decompress(compressed), b"foo")
49+
4050
def test_no_content_size_in_frame(self):
4151
cctx = zstd.ZstdCompressor(write_content_size=False)
4252
compressed = cctx.compress(b"foobar")

zstandard/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ class FrameParameters(object):
450450
def estimate_decompression_context_size() -> int: ...
451451
def frame_content_size(data: ByteString) -> int: ...
452452
def frame_header_size(data: ByteString) -> int: ...
453-
def get_frame_parameters(data: ByteString) -> FrameParameters: ...
453+
def get_frame_parameters(data: ByteString, format: Optional[int] = None) -> FrameParameters: ...
454454
def train_dictionary(
455455
dict_size: int,
456456
samples: list[ByteString],

0 commit comments

Comments
 (0)