Skip to content

Implement gzungetc #355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 113 additions & 6 deletions libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct GzState {
// fields directly.
have: c_uint, // number of bytes available at next
next: *const Bytef, // next byte of uncompressed data
pos: u64, // current offset in uncompressed data stream
pos: i64, // current offset in uncompressed data stream

// End of public interface:
// All fields after this point are opaque to C code using this library,
Expand Down Expand Up @@ -1108,7 +1108,7 @@ unsafe fn gz_read(state: &mut GzState, mut buf: *mut u8, mut len: usize) -> usiz
len -= n;
buf = unsafe { buf.add(n) };
got += n;
state.pos += n as u64;
state.pos += n as i64;

if len == 0 {
break;
Expand Down Expand Up @@ -1561,7 +1561,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
// bytes, and copy is <= len.
unsafe { ptr::copy(buf, state.input.add(have).cast::<c_void>(), copy) };
state.stream.avail_in += copy as c_uint;
state.pos += copy as u64;
state.pos += copy as i64;
buf = unsafe { buf.add(copy) };
len -= copy;
if len != 0 && gz_comp(state, Z_NO_FLUSH).is_err() {
Expand All @@ -1585,7 +1585,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
loop {
let n = cmp::min(len, c_uint::MAX as usize) as c_uint;
state.stream.avail_in = n;
state.pos += n as u64;
state.pos += n as i64;
if gz_comp(state, Z_NO_FLUSH).is_err() {
return 0;
}
Expand Down Expand Up @@ -1838,7 +1838,7 @@ pub unsafe extern "C-unwind" fn gztell(file: gzFile) -> z_off_t {

// Return position.
match state.seek {
true => (state.pos + state.skip as u64) as z_off_t,
true => (state.pos + state.skip) as z_off_t,
false => state.pos as z_off_t,
}
}
Expand Down Expand Up @@ -2045,6 +2045,113 @@ pub unsafe extern "C-unwind" fn gzgetc_(file: gzFile) -> c_int {
unsafe { gzgetc(file) }
}

/// Push `c` back onto the stream for file to be read as the first character on
/// the next read. At least one character of push-back is always allowed.
///
/// `gzungetc` will fail if `c` is `-1`, and may fail if a character has been pushed
/// but not read yet. If `gzungetc` is used immediately after [`gzopen`] or [`gzdopen`],
/// at least the output buffer size of pushed characters is allowed. (See [`gzbuffer`].)
///
/// The pushed character will be discarded if the stream is repositioned with
/// [`gzseek`] or [`gzrewind`].
///
/// # Returns
///
/// - The character pushed, on success.
/// - `-1` on failure.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzungetc))]
pub unsafe extern "C-unwind" fn gzungetc(c: c_int, file: gzFile) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
};

// Validate the input.
if c < 0 {
return -1;
}

// Check that we're reading and that there's no (serious) error.
if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) {
return -1;
}

// In case this was just opened, set up the input buffer.
if state.how == How::Look && state.have == 0 {
// We have verified that `state` is valid.
let _ = unsafe { gz_look(state) };
}

/* FIXME uncomment when seek support is implemented.
// Process a skip request.
if state.seek {
state.seek = false;
if gz_skip(state, state.skip) == -1 {
return -1;
}
}
*/

// If output buffer empty, put byte at end (allows more pushing).
if state.have == 0 {
state.have = 1;
// Safety: because `state.have` is nonzero, the `state.output` buffer has been
// allocated. And because the buffer's size is `state.out_size`, a pointer to
// `output + out_size - 1` points within the buffer.
state.next = unsafe { state.output.add(state.out_size - 1) };
// Safety: from the addition above, `state.next` currently points within the
// `state.output` buffer.
unsafe { *(state.next as *mut u8) = c as u8 };
state.pos -= 1;
state.past = false;
return c;
}

// If no room, give up (must have already done a `gzungetc`).
if state.have as usize == state.out_size {
const MSG: &str = "out of room to push characters";
// Safety: We have verified that `state` is valid.
unsafe { gz_error(state, Some((Z_DATA_ERROR, MSG))) };
return -1;
}

// Slide output data if needed and insert byte before existing data.
if state.next == state.output {
// There are `state.have` bytes of usable content at the front of the buffer
// `state.output`, which has capacity `state.out_size`. We want to move that
// content to the end of the buffer, so we copy from `state.output` to
// `state.output + (state.out_size - state.have)` and update `state.next`
// to point to the content's new location within the buffer.
let offset = state.out_size - state.have as usize;

// Safety: `state.have` < `state.out_size`, or we would have returned in the
// check for the == case above. Therefore, `offset`, which is `out_size - have`,
// is in the range `1..=(out_size - 1)`. When we add that to `output`, the result
// is within the buffer's allocation of `out_size` bytes.
let dst = unsafe { state.output.add(offset) };

// Safety: `state.next` points a sequence of `state.have` initialized bytes
// within the `state.output` buffer. And because `dst` was computed as
// `state.output + state.out_size - state.have`, we can write `state.have`
// bytes starting at `dst` and they will all be within the buffer.
// Note that this may be an overlapping copy.
unsafe { ptr::copy(state.next, dst as _, state.have as _) };
state.next = dst;
}
state.have += 1;
// Safety: `state.next` > `state.output`, due to the `state.next = dst` above, so it
// is safe to decrease `state.next` by 1.
state.next = unsafe { state.next.sub(1) };
// Safety: `state.next` >= `state.output` following the subtraction.
unsafe { *(state.next as *mut u8) = c as u8 };
state.pos -= 1;
state.past = false;
c
}

/// Read decompressed bytes from `file` into `buf`, until `len-1` characters are
/// read, or until a newline character is read and transferred to `buf`, or an
/// end-of-file condition is encountered. If any characters are read or if `len`
Expand Down Expand Up @@ -2138,7 +2245,7 @@ pub unsafe extern "C-unwind" fn gzgets(file: gzFile, buf: *mut c_char, len: c_in
// Safety: As described above, `state.next` pointed to at least `n` readable bytes, so
// when we increase it by `n` it will still point into the `output` buffer.
state.next = unsafe { state.next.add(n) };
state.pos += n as u64;
state.pos += n as i64;
left -= n;
// Safety: `dst` pointed to at least `n` writable bytes, so when we increase it by `n`
// it will still point into `buf`.
Expand Down
128 changes: 127 additions & 1 deletion test-libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use zlib_rs::c_api::*;
use libz_rs_sys::{
gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
gzflush, gzfread, gzfwrite, gzgetc, gzgetc_, gzgets, gzoffset, gzopen, gzputc, gzputs, gzread,
gztell, gzwrite,
gztell, gzungetc, gzwrite,
};

use libc::size_t;
Expand Down Expand Up @@ -1075,6 +1075,132 @@ fn gzgetc_error() {
}
}

#[test]
fn gzungetc_basic() {
// Open a gzip file for reading.
let file_name = crate_path("src/test-data/text.gz");
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("r").unwrap().as_ptr(),
)
};
assert!(!file.is_null());

// Set a small buffer size to make it easier to exercise all the edge cases.
// Since file is in read mode, `gzbuffer(file, 8)` will result in an input
// buffer of 8 bytes and an output buffer of 16 bytes. gzungetc operates
// on the output buffer, so the operations that follow are working with a
// 16 byte buffer.
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);

// Call gzungetc before doing any read operations on the file. It should return the
// character pushed. Because the output buffer size is 16 bytes (based on the gzbuffer
// call above), gzungetc should work exactly 16 times before we do any reads.
const CONTENT: &[u8] = b"0123456789abcdef";
for c in CONTENT.iter().rev() {
assert_eq!(unsafe { gzungetc(*c as c_int, file) }, *c as c_int);
}

// gzread should return the characters we pushed into the buffer with gzungetc.
// Note that we looped through CONTENT in reverse when doing the gzungetc, so
// the result of this read should match CONTENT.
let mut buf = [0u8; CONTENT.len()];
assert_eq!(
unsafe {
gzread(
file,
buf.as_mut_ptr().cast::<c_void>(),
CONTENT.len() as c_uint,
)
},
CONTENT.len() as _
);
assert_eq!(&buf, CONTENT);

// Do a large read to skip toward the end of the file. This will leave the output buffer empty.
assert_eq!(
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), 16) },
16
);
assert_eq!(&buf, b"gzip\nexample dat");

// The number of bytes remaining to decompress from the file is smaller than the output
// buffer. Do a one-byte gzread which will uncompress the remainder of the file into
// the output buffer and then consume the first byte.
assert_eq!(
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), 1) },
1
);
assert_eq!(buf[0], b'a');

// After the last gzread, the 16-byte output buffer should consist of:
// - 1 unused byte (that held the 'a' we consumed in the last `gzread`).
// - 10 bytes of decompressed output ("\nfor tests").
// - 5 unused bytes.
//
// Call gzungetc twice. The first call will be able to write into the available
// byte at the start. The second call will have to shift the content to the end
// of the output buffer to make room.
assert_eq!(unsafe { gzungetc('6' as c_int, file) }, '6' as c_int);
assert_eq!(unsafe { gzungetc('5' as c_int, file) }, '5' as c_int);

// The output buffer should now contain:
// - 4 unused bytes.
// - The last character pushed using gzungetc, '6'.
// - The previous character pushed using gzungetc, '5'.
// - The content that was already in the buffer, "\nfor tests".
//
// We should be able to push 4 more bytes with gzungetc to fill up the
// available space at the start.
for c in ['4', '3', '2', '1'] {
assert_eq!(unsafe { gzungetc(c as c_int, file) }, c as c_int);
}

// gzread should yield the remaining 10 bytes of uncompressed content from the file,
// preceded by the 6 bytes we just pushed with gzungetc, for a total of 16 bytes.
const EXPECTED: &[u8] = b"123456\nfor tests";
// Read more than expected to make sure there's no other output following it.
let mut buf = [0u8; EXPECTED.len() + 1];
assert_eq!(
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
EXPECTED.len() as _
);
assert_eq!(&buf[..EXPECTED.len()], EXPECTED);

// The 16-byte output buffer is now empty. Call gzungetc 17 times. The first
// 16 calls should succeed, and the last one should fail.
for _ in 0..16 {
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, '-' as c_int);
}
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, -1);

assert_eq!(unsafe { gzclose(file) }, Z_OK);
}

#[test]
fn gzungetc_error() {
// gzungetc on a null file handle should return -1.
assert_eq!(unsafe { gzungetc('*' as c_int, ptr::null_mut()) }, -1);

// gzgetc on a write-only file handle should return -1.
let file = unsafe { gzdopen(-2, CString::new("w").unwrap().as_ptr()) };
assert_eq!(unsafe { gzungetc('*' as c_int, file) }, -1);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);

// gzgetc with a negative character value should return -1.
let file_name = crate_path("src/test-data/text.gz");
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("r").unwrap().as_ptr(),
)
};
assert!(!file.is_null());
assert_eq!(unsafe { gzungetc(-1 as c_int, file) }, -1);
assert_eq!(unsafe { gzclose(file) }, Z_OK);
}

#[test]
fn gzgets_basic() {
// Open a file containing gzip-compressed text.
Expand Down
Loading