Skip to content

Commit c686d00

Browse files
committed
Implement gzseek for reads, as well as gzrewind
1 parent 525490f commit c686d00

File tree

2 files changed

+414
-11
lines changed

2 files changed

+414
-11
lines changed

libz-rs-sys/src/gz.rs

Lines changed: 212 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
use zlib_rs::allocate::*;
44
pub use zlib_rs::c_api::*;
55

6+
use crate::gz::GzMode::GZ_READ;
67
use crate::{
78
deflate, deflateEnd, deflateInit2_, deflateReset, inflate, inflateEnd, inflateInit2,
89
inflateReset, z_off_t, zlibVersion,
910
};
1011
use core::ffi::{c_char, c_int, c_uint, c_void, CStr};
1112
use core::ptr;
13+
use libc::off_t;
1214
use libc::size_t; // FIXME: Switch to core::ffi::c_size_t when it's stable.
13-
use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY, SEEK_CUR, SEEK_END};
15+
use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY, SEEK_CUR, SEEK_END, SEEK_SET};
1416
use std::cmp;
1517
use std::cmp::Ordering;
1618
use zlib_rs::deflate::Strategy;
@@ -1031,16 +1033,13 @@ unsafe fn gz_read(state: &mut GzState, mut buf: *mut u8, mut len: usize) -> usiz
10311033
return 0;
10321034
}
10331035

1034-
/* FIXME: Uncomment this when seek support is added:
1035-
1036-
// process a skip request
1036+
// Process a skip request.
10371037
if state.seek {
10381038
state.seek = false;
1039-
if gz_skip(state, state.skip) == -1 {
1039+
if gz_skip(state, state.skip).is_err() {
10401040
return 0;
10411041
}
10421042
}
1043-
*/
10441043

10451044
// Loop until we get enough bytes or reach the end of the file.
10461045
let mut got = 0;
@@ -1118,6 +1117,57 @@ unsafe fn gz_read(state: &mut GzState, mut buf: *mut u8, mut len: usize) -> usiz
11181117
got
11191118
}
11201119

1120+
// Given an unsigned value `x`, determine whether `x` is larger than the maximum
1121+
// signed 64-bit offset value.
1122+
// Note: This can happen only on targets where the C unsigned int is a 64-bit value.
1123+
macro_rules! gt_off {
1124+
($x:expr) => {
1125+
core::mem::size_of_val(&$x) == core::mem::size_of::<i64>()
1126+
&& $x as usize > i64::MAX as usize
1127+
};
1128+
}
1129+
1130+
// Skip len uncompressed bytes of output.
1131+
//
1132+
// # Returns
1133+
//
1134+
// - `Ok` on success.
1135+
// - `Err` on error.
1136+
fn gz_skip(state: &mut GzState, mut len: i64) -> Result<(), ()> {
1137+
/* skip over len bytes or reach end-of-file, whichever comes first */
1138+
while len != 0 {
1139+
// Skip over whatever is in output buffer.
1140+
if state.have != 0 {
1141+
// For consistency with zlib-ng, we use `gt_off` to check whether the value
1142+
// of `state.have` is too large to be represented as a signed 64-bit offset.
1143+
// This case can be triggered only if the platform has 64-bit C ints and
1144+
// `state.have` is >= 2^63.
1145+
let n = if gt_off!(state.have) || state.have as i64 > len {
1146+
len as usize
1147+
} else {
1148+
state.have as usize
1149+
};
1150+
state.have -= n as c_uint;
1151+
// Safety: `n` <= `state.have` and there are at least `state.have` accessible
1152+
// bytes after `state.next` in the buffer.
1153+
state.next = unsafe { state.next.add(n) };
1154+
state.pos += n as i64;
1155+
len -= n as i64;
1156+
} else if state.eof && state.stream.avail_in == 0 {
1157+
// Output buffer empty -- return if we're at the end of the input.
1158+
break;
1159+
} else {
1160+
// Need more data to skip -- load up output buffer.
1161+
// Get more output, looking for header if required.
1162+
// Safety: `state` is valid, and `state.have` is zero in this branch.
1163+
if unsafe { gz_fetch(state) }.is_err() {
1164+
return Err(());
1165+
}
1166+
}
1167+
}
1168+
Ok(())
1169+
}
1170+
11211171
// Given a gzip file opened for reading, check for a gzip header, and set
11221172
// `state.direct` accordingly.
11231173
//
@@ -1423,7 +1473,7 @@ unsafe fn gz_decomp(state: &mut GzState) -> Result<(), ()> {
14231473
///
14241474
/// # Returns
14251475
///
1426-
/// - The number of uncompress bytes written, on success.
1476+
/// - The number of uncompressed bytes written, on success.
14271477
/// - Or 0 in case of error.
14281478
///
14291479
/// # Safety
@@ -2190,15 +2240,13 @@ pub unsafe extern "C-unwind" fn gzgets(file: gzFile, buf: *mut c_char, len: c_in
21902240
return ptr::null_mut();
21912241
}
21922242

2193-
/* FIXME uncomment when seek support is implemented.
21942243
// Process a skip request.
21952244
if state.seek {
21962245
state.seek = false;
2197-
if gz_skip(state, state.skip) == -1 {
2246+
if gz_skip(state, state.skip).is_err() {
21982247
return ptr::null_mut();
21992248
}
22002249
}
2201-
*/
22022250

22032251
// Copy output bytes up to newline or `len - 1`, whichever comes first.
22042252
let mut left = len as usize - 1;
@@ -2331,6 +2379,160 @@ pub unsafe extern "C-unwind" fn gzsetparams(file: gzFile, level: c_int, strategy
23312379
Z_OK
23322380
}
23332381

2382+
/// Set the starting position to `offset` relative to `whence` for the next [`gzread`]
2383+
/// or [`gzwrite`] on `file`. The `offset` represents a number of bytes in the
2384+
/// uncompressed data stream. The `whence` parameter is defined as in `lseek(2)`,
2385+
/// but only `SEEK_CUR` (relative to current position) and `SEEK_SET` (absolute from
2386+
/// start of the uncompressed data stream) are supported.
2387+
///
2388+
/// If `file` is open for reading, this function is emulated but can extremely
2389+
/// slow (because it operates on the decompressed data stream). If `file` is open
2390+
/// for writing, only forward seeks are supported; `gzseek` then compresses a sequence
2391+
/// of zeroes up to the new starting position. If a negative `offset` is specified in
2392+
/// write mode, `gzseek` returns -1.
2393+
///
2394+
/// <div class="warning">
2395+
///
2396+
/// Warning: `gzseek` currently is implemented only for reads. Write support will be added
2397+
/// in the future.
2398+
///
2399+
/// </div>
2400+
///
2401+
/// # Returns
2402+
///
2403+
/// - The resulting offset location as measured in bytes from the beginning of the uncompressed
2404+
/// stream, on success.
2405+
/// - `-1` on error.
2406+
///
2407+
/// # Safety
2408+
///
2409+
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
2410+
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzseek))]
2411+
pub unsafe extern "C-unwind" fn gzseek(file: gzFile, offset: z_off_t, whence: c_int) -> z_off_t {
2412+
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
2413+
return -1;
2414+
};
2415+
if state.mode != GzMode::GZ_READ && state.mode != GzMode::GZ_WRITE {
2416+
// Unreachable if `file` was initialized with `gzopen` or `gzdopen`.
2417+
return -1;
2418+
}
2419+
2420+
// Check that there's no error.
2421+
if state.err != Z_OK && state.err != Z_BUF_ERROR {
2422+
return -1;
2423+
}
2424+
2425+
// Can only seek from start or relative to current position.
2426+
if whence != SEEK_SET && whence != SEEK_CUR {
2427+
return -1;
2428+
}
2429+
2430+
let mut offset: i64 = offset as _;
2431+
2432+
// Normalize offset to a SEEK_CUR specification (i.e., relative to current position).
2433+
if whence == SEEK_SET {
2434+
offset -= state.pos;
2435+
} else if state.seek {
2436+
offset += state.skip;
2437+
}
2438+
state.seek = false;
2439+
2440+
// If we are reading non-compressed content, just lseek to the right location.
2441+
if state.mode == GZ_READ && state.how == How::Copy && state.pos + offset >= 0 {
2442+
let ret = unsafe { libc::lseek(state.fd, offset as off_t - state.have as off_t, SEEK_CUR) };
2443+
if ret == -1 {
2444+
return -1;
2445+
}
2446+
state.have = 0;
2447+
state.eof = false;
2448+
state.past = false;
2449+
state.seek = false;
2450+
// Safety: `state` was validated above.
2451+
unsafe { gz_error(state, None) };
2452+
state.stream.avail_in = 0;
2453+
state.pos += offset;
2454+
return state.pos as _;
2455+
}
2456+
2457+
// Calculate the skip amount. If we're seeking backwards in a compressed file, we'll
2458+
// need to rewind to the start and decompress content until we arrive at the right spot.
2459+
if offset < 0 {
2460+
if state.mode != GzMode::GZ_READ {
2461+
// Can't go backwards when writing.
2462+
return -1;
2463+
}
2464+
offset += state.pos;
2465+
if offset < 0 {
2466+
// Before start of file!
2467+
return -1;
2468+
}
2469+
2470+
// Rewind, then skip to offset.
2471+
// Safety: `file` points to an initialized `GzState`.
2472+
if unsafe { gzrewind(file) } == -1 {
2473+
return -1;
2474+
}
2475+
}
2476+
2477+
// If reading, skip what's in output buffer. (This simplifies `gzgetc`.)
2478+
if state.mode == GzMode::GZ_READ {
2479+
// For consistency with zlib-ng, we use `gt_off` to check whether the value
2480+
// of `state.have` is too large to be represented as a signed 64-bit offset.
2481+
// This case can be triggered only if the platform has 64-bit C ints and
2482+
// `state.have` is >= 2^63.
2483+
let n = if gt_off!(state.have) || state.have as i64 > offset {
2484+
offset as usize
2485+
} else {
2486+
state.have as usize
2487+
};
2488+
state.have -= n as c_uint;
2489+
// Safety: `n` <= `state.have`, and `state.next` points to at least `state.have`
2490+
// accessible bytes within the buffer.
2491+
state.next = unsafe { state.next.add(n) };
2492+
state.pos += n as i64;
2493+
offset -= n as i64;
2494+
}
2495+
2496+
// Request skip (if not zero). The actual seek will happen on the next read or write operation.
2497+
if offset != 0 {
2498+
state.seek = true;
2499+
state.skip = offset;
2500+
}
2501+
2502+
(state.pos + offset) as _
2503+
}
2504+
2505+
/// Rewind `file` to the start. This function is supported only for reading.
2506+
///
2507+
/// Note: `gzrewind(file)` is equivalent to [`gzseek`]`(file, 0, SEEK_SET)`
2508+
///
2509+
/// # Returns
2510+
///
2511+
/// - `0` on success.
2512+
/// - `-1` on error.
2513+
///
2514+
/// # Safety
2515+
///
2516+
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
2517+
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzrewind))]
2518+
pub unsafe extern "C-unwind" fn gzrewind(file: gzFile) -> c_int {
2519+
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
2520+
return -1;
2521+
};
2522+
2523+
// Check that we're reading and that there's no error.
2524+
if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) {
2525+
return -1;
2526+
}
2527+
2528+
// Back up and start over.
2529+
if unsafe { libc::lseek(state.fd, state.start as _, SEEK_SET) } == -1 {
2530+
return -1;
2531+
}
2532+
gz_reset(state);
2533+
0
2534+
}
2535+
23342536
// Create a deep copy of a C string using `ALLOCATOR`
23352537
//
23362538
// # Safety

0 commit comments

Comments
 (0)