Skip to content

Implement gzputc and gzputs #351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 130 additions & 7 deletions libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use core::ffi::{c_char, c_int, c_uint, c_void, CStr};
use core::ptr;
use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY, SEEK_CUR, SEEK_END};
use std::cmp;
use std::cmp::Ordering;
use zlib_rs::deflate::Strategy;
use zlib_rs::MAX_WBITS;

Expand Down Expand Up @@ -105,6 +106,32 @@ impl GzState {

Ok((exclusive, cloexec))
}

/// Compute the number of bytes of input buffered in `self`.
///
/// # Safety
///
/// Either
/// - `state.input` is null.
/// - `state.stream.next_in .. state.stream.next_in + state.stream.avail_in`
/// is contained in `state.input .. state.input + state.in_size`.
///
/// It is almost always the case that one of those two conditions is true
/// inside this module. The notable exception is in a specific block within
/// `gz_write`, where we temporarily set `state.next_in` to point to a
/// caller-supplied buffer to do a zero-copy optimization when compressing
/// large inputs.
unsafe fn input_len(&self) -> usize {
if self.input.is_null() {
return 0;
}

// Safety: `next_in .. next_in + avail_in` is a subslice, so the preconditions hold.
let end = unsafe { self.stream.next_in.add(self.stream.avail_in as usize) };

// Safety: the caller guarantees that the input slice of `stream` is a subslice of `input`.
(unsafe { end.offset_from(self.input) }) as _
}
}

// Gzip operating modes
Expand Down Expand Up @@ -1408,13 +1435,8 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
if state.stream.avail_in == 0 {
state.stream.next_in = state.input;
}
let have = unsafe {
state
.stream
.next_in
.add(state.stream.avail_in as usize)
.offset_from(state.input)
} as usize;
// Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
let have = unsafe { state.input_len() };
let copy = cmp::min(state.in_size.saturating_sub(have), len);
// Safety: The caller is responsible for ensuring that buf points to at least len readable
// bytes, and copy is <= len.
Expand All @@ -1437,6 +1459,9 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
}

// Directly compress user buffer to file.
// Note: For this operation, we temporarily break the invariant that
// `state.stream.next_in` points to somewhere in the `state.input` buffer.
let save_next_in = state.stream.next_in;
state.stream.next_in = buf.cast::<_>();
loop {
let n = cmp::min(len, c_uint::MAX as usize) as c_uint;
Expand All @@ -1450,6 +1475,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
break;
}
}
state.stream.next_in = save_next_in;
}

// Input was all buffered or compressed.
Expand Down Expand Up @@ -1738,6 +1764,103 @@ pub unsafe extern "C-unwind" fn gzoffset(file: gzFile) -> z_off_t {
}
}

/// Compress and write `c`, converted to an unsigned 8-bit char, into `file`.
///
/// # Returns
///
/// - The value that was written, on success.
/// - `-1` on error.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
};

// Check that we're writing and that there's no error.
if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
return -1;
}

/* FIXME: Uncomment when seek support is implemented.
// Check for seek request.
if state.seek {
state.seek = false;
if gz_zero(state, state.skip) == -1 {
return -1;
}
}
*/

// Try writing to input buffer for speed (state.input == null if buffer not initialized).
if !state.input.is_null() {
if state.stream.avail_in == 0 {
state.stream.next_in = state.input;
}
// Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
// (This is an invariant maintained throughout this module, except for a specific
// block within `gz_write` that does not call any function that might call `gzputc`.)
let have = unsafe { state.input_len() };
if have < state.in_size {
// Safety: `input` has `in_size` bytes, and `have` < `in_size`.
unsafe { *state.input.add(have) = c as u8 };
state.stream.avail_in += 1;
state.pos += 1;
return c & 0xff;
}
}

// No room in buffer or not initialized, use gz_write.
let buf = [c as u8];
// Safety: We have confirmed that `state` is valid, and `buf` contains 1 readable byte of data.
match unsafe { gz_write(state, buf.as_ptr().cast::<c_void>(), 1) } {
1 => c & 0xff,
_ => -1,
}
}

/// Compress and write the given null-terminated string `s` to file, excluding
/// the terminating null character.
///
/// # Returns
///
/// - the number of characters written, on success.
/// - `-1` in case of error.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
/// - `s` must point to a null-terminated C string.
pub unsafe extern "C-unwind" fn gzputs(file: gzFile, s: *const c_char) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
};

if s.is_null() {
return -1;
}

// Check that we're writing and that there's no error.
if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
return -1;
}

// Write string.
let len = unsafe { libc::strlen(s) };
if c_int::try_from(len).is_err() {
const MSG: &str = "string length does not fit in int";
unsafe { gz_error(state, Some((Z_STREAM_ERROR, MSG))) };
return -1;
}
let put = unsafe { gz_write(state, s.cast::<c_void>(), len) };
match put.cmp(&(len as i32)) {
Ordering::Less => -1,
Ordering::Equal | Ordering::Greater => len as _,
}
}

// Create a deep copy of a C string using `ALLOCATOR`
//
// # Safety
Expand Down
146 changes: 145 additions & 1 deletion test-libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use zlib_rs::c_api::*;

use libz_rs_sys::{
gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
gzflush, gzoffset, gzopen, gzread, gztell, gzwrite,
gzflush, gzoffset, gzopen, gzputc, gzputs, gzread, gztell, gzwrite,
};

use std::ffi::{c_char, c_int, c_uint, c_void, CString};
Expand Down Expand Up @@ -878,6 +878,150 @@ fn gzoffset_gztell_error() {
}
}

#[test]
fn gzputc_basic() {
// Create a temporary directory that will be automatically removed when
// temp_dir goes out of scope.
let temp_dir_path = temp_base();
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
let temp_path = temp_dir.path();
let file_name = path(temp_path, "output");

// Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("wT").unwrap().as_ptr(),
)
};
assert!(!file.is_null());
// Set a small buffer size to exercise more internal code paths.
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);

// Write to the file one byte at a time, using gzputc.
const CONTENT: &[u8] = b"sample text to test gzputc implementation";
for c in CONTENT {
assert_eq!(unsafe { gzputc(file, *c as _) }, *c as _);
}

// Close the file to flush any buffered writes.
assert_eq!(unsafe { gzclose(file) }, Z_OK);

// Validate that the file contains the expected bytes.
let mut mode = 0;
#[cfg(target_os = "windows")]
{
mode |= libc::O_BINARY;
}
mode |= libc::O_RDONLY;
let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
assert_ne!(fd, -1);
// Try to read more than the expected amount of data, to ensure we get everything.
let mut buf = [0u8; CONTENT.len() + 1];
let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
assert_ne!(bytes_read, -1);
assert_eq!(&buf[..bytes_read as usize], CONTENT);
assert_eq!(unsafe { libc::close(fd) }, 0);
}

#[test]
fn gzputc_error() {
// gzputc on a null file handle should return -1.
assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);

// gzputc on a read-only file handle should return -1.
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
assert!(!file.is_null());
assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);

// Open an invalid file descriptor as a gzip write stream, with a small buffer,
// and use gzputc to write enough bytes to overflow the buffer and cause file I/O.
// The last gzputc call should return -1.
let file = unsafe { gzdopen(-2, CString::new("wT").unwrap().as_ptr()) };
const BUF_SIZE: usize = 10;
assert_eq!(unsafe { gzbuffer(file, BUF_SIZE as _) }, 0);
// In write mode, the internal input buffer is 2x the size specified via gzbuffer.
for _ in 0..BUF_SIZE * 2 {
assert_eq!(unsafe { gzputc(file, 1) }, 1);
}
assert_eq!(unsafe { gzputc(file, 1) }, -1);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
}

#[test]
fn gzputs_basic() {
// Create a temporary directory that will be automatically removed when
// temp_dir goes out of scope.
let temp_dir_path = temp_base();
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
let temp_path = temp_dir.path();
let file_name = path(temp_path, "output");

// Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("wT").unwrap().as_ptr(),
)
};
assert!(!file.is_null());
// Set a small buffer size to exercise more internal code paths.
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);

// gzputs of a null string should return -1 rather than crashing.
assert_eq!(unsafe { gzputs(file, ptr::null()) }, -1);

// Write some data to the file using gzputs.
const CONTENT: [&str; 3] = ["zlib ", "", "string larger than the buffer size"];
for s in CONTENT {
assert_eq!(
unsafe { gzputs(file, CString::new(s).unwrap().as_ptr()) },
s.len() as _
);
}

// Close the file to flush any buffered writes.
assert_eq!(unsafe { gzclose(file) }, Z_OK);

// Validate that the file contains the expected bytes.
const EXPECTED: &str = "zlib string larger than the buffer size";
let mut mode = 0;
#[cfg(target_os = "windows")]
{
mode |= libc::O_BINARY;
}
mode |= libc::O_RDONLY;
let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
assert_ne!(fd, -1);
// Try to read more than the expected amount of data, to ensure we get everything.
let mut buf = [0u8; EXPECTED.len() + 1];
let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
assert_ne!(bytes_read, -1);
assert_eq!(&buf[..bytes_read as usize], EXPECTED.as_bytes());
assert_eq!(unsafe { libc::close(fd) }, 0);
}

#[test]
fn gzputs_error() {
const CONTENT: &[u8] = b"example\0";

// gzputs on a null file handle should return -1.
assert_eq!(
unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
-1
);

// gzputs on a read-only file handle should return -1.
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
assert!(!file.is_null());
assert_eq!(
unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
-1
);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
}

// Get the size in bytes of a file.
//
// # Returns
Expand Down
Loading