trifectatechfoundation · folkertdev · Apr 24, 2025 · Apr 23, 2025 · Apr 24, 2025
diff --git a/libz-rs-sys/src/gz.rs b/libz-rs-sys/src/gz.rs
@@ -11,6 +11,7 @@ use core::ffi::{c_char, c_int, c_uint, c_void, CStr};
 use core::ptr;
 use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY, SEEK_CUR, SEEK_END};
 use std::cmp;
+use std::cmp::Ordering;
 use zlib_rs::deflate::Strategy;
 use zlib_rs::MAX_WBITS;
 
@@ -105,6 +106,32 @@ impl GzState {
 
         Ok((exclusive, cloexec))
     }
+
+    /// Compute the number of bytes of input buffered in `self`.
+    ///
+    /// # Safety
+    ///
+    /// Either
+    /// - `state.input` is null.
+    /// - `state.stream.next_in .. state.stream.next_in + state.stream.avail_in`
+    ///   is contained in `state.input .. state.input + state.in_size`.
+    ///
+    /// It is almost always the case that one of those two conditions is true
+    /// inside this module. The notable exception is in a specific block within
+    /// `gz_write`, where we temporarily set `state.next_in` to point to a
+    /// caller-supplied buffer to do a zero-copy optimization when compressing
+    /// large inputs.
+    unsafe fn input_len(&self) -> usize {
+        if self.input.is_null() {
+            return 0;
+        }
+
+        // Safety: `next_in .. next_in + avail_in` is a subslice, so the preconditions hold.
+        let end = unsafe { self.stream.next_in.add(self.stream.avail_in as usize) };
+
+        // Safety: the caller guarantees that the input slice of `stream` is a subslice of `input`.
+        (unsafe { end.offset_from(self.input) }) as _
+    }
 }
 
 // Gzip operating modes
@@ -1408,13 +1435,8 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
             if state.stream.avail_in == 0 {
                 state.stream.next_in = state.input;
             }
-            let have = unsafe {
-                state
-                    .stream
-                    .next_in
-                    .add(state.stream.avail_in as usize)
-                    .offset_from(state.input)
-            } as usize;
+            // Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
+            let have = unsafe { state.input_len() };
             let copy = cmp::min(state.in_size.saturating_sub(have), len);
             // Safety: The caller is responsible for ensuring that buf points to at least len readable
             // bytes, and copy is <= len.
@@ -1437,6 +1459,9 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
         }
 
         // Directly compress user buffer to file.
+        // Note: For this operation, we temporarily break the invariant that
+        // `state.stream.next_in` points to somewhere in the `state.input` buffer.
+        let save_next_in = state.stream.next_in;
         state.stream.next_in = buf.cast::<_>();
         loop {
             let n = cmp::min(len, c_uint::MAX as usize) as c_uint;
@@ -1450,6 +1475,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
                 break;
             }
         }
+        state.stream.next_in = save_next_in;
     }
 
     // Input was all buffered or compressed.
@@ -1738,6 +1764,103 @@ pub unsafe extern "C-unwind" fn gzoffset(file: gzFile) -> z_off_t {
     }
 }
 
+/// Compress and write `c`, converted to an unsigned 8-bit char, into `file`.
+///
+/// # Returns
+///
+///  - The value that was written, on success.
+///  - `-1` on error.
+///
+/// # Safety
+///
+/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
+pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
+    let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
+        return -1;
+    };
+
+    // Check that we're writing and that there's no error.
+    if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
+        return -1;
+    }
+
+    /* FIXME: Uncomment when seek support is implemented.
+    // Check for seek request.
+    if state.seek {
+        state.seek = false;
+        if gz_zero(state, state.skip) == -1 {
+            return -1;
+        }
+    }
+     */
+
+    // Try writing to input buffer for speed (state.input == null if buffer not initialized).
+    if !state.input.is_null() {
+        if state.stream.avail_in == 0 {
+            state.stream.next_in = state.input;
+        }
+        // Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
+        // (This is an invariant maintained throughout this module, except for a specific
+        // block within `gz_write` that does not call any function that might call `gzputc`.)
+        let have = unsafe { state.input_len() };
+        if have < state.in_size {
+            // Safety: `input` has `in_size` bytes, and `have` < `in_size`.
+            unsafe { *state.input.add(have) = c as u8 };
+            state.stream.avail_in += 1;
+            state.pos += 1;
+            return c & 0xff;
+        }
+    }
+
+    // No room in buffer or not initialized, use gz_write.
+    let buf = [c as u8];
+    // Safety: We have confirmed that `state` is valid, and `buf` contains 1 readable byte of data.
+    match unsafe { gz_write(state, buf.as_ptr().cast::<c_void>(), 1) } {
+        1 => c & 0xff,
+        _ => -1,
+    }
+}
+
+/// Compress and write the given null-terminated string `s` to file, excluding
+/// the terminating null character.
+///
+/// # Returns
+///
+/// - the number of characters written, on success.
+/// - `-1` in case of error.
+///
+/// # Safety
+///
+/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
+/// - `s` must point to a null-terminated C string.
+pub unsafe extern "C-unwind" fn gzputs(file: gzFile, s: *const c_char) -> c_int {
+    let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
+        return -1;
+    };
+
+    if s.is_null() {
+        return -1;
+    }
+
+    // Check that we're writing and that there's no error.
+    if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
+        return -1;
+    }
+
+    // Write string.
+    let len = unsafe { libc::strlen(s) };
+    if c_int::try_from(len).is_err() {
+        const MSG: &str = "string length does not fit in int";
+        unsafe { gz_error(state, Some((Z_STREAM_ERROR, MSG))) };
+        return -1;
+    }
+    let put = unsafe { gz_write(state, s.cast::<c_void>(), len) };
+    match put.cmp(&(len as i32)) {
+        Ordering::Less => -1,
+        Ordering::Equal | Ordering::Greater => len as _,
+    }
+}
+
 // Create a deep copy of a C string using `ALLOCATOR`
 //
 // # Safety

diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs
@@ -2,7 +2,7 @@ use zlib_rs::c_api::*;
 
 use libz_rs_sys::{
     gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
-    gzflush, gzoffset, gzopen, gzread, gztell, gzwrite,
+    gzflush, gzoffset, gzopen, gzputc, gzputs, gzread, gztell, gzwrite,
 };
 
 use std::ffi::{c_char, c_int, c_uint, c_void, CString};
@@ -878,6 +878,150 @@ fn gzoffset_gztell_error() {
     }
 }
 
+#[test]
+fn gzputc_basic() {
+    // Create a temporary directory that will be automatically removed when
+    // temp_dir goes out of scope.
+    let temp_dir_path = temp_base();
+    let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
+    let temp_path = temp_dir.path();
+    let file_name = path(temp_path, "output");
+
+    // Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
+    let file = unsafe {
+        gzopen(
+            CString::new(file_name.as_str()).unwrap().as_ptr(),
+            CString::new("wT").unwrap().as_ptr(),
+        )
+    };
+    assert!(!file.is_null());
+    // Set a small buffer size to exercise more internal code paths.
+    assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
+
+    // Write to the file one byte at a time, using gzputc.
+    const CONTENT: &[u8] = b"sample text to test gzputc implementation";
+    for c in CONTENT {
+        assert_eq!(unsafe { gzputc(file, *c as _) }, *c as _);
+    }
+
+    // Close the file to flush any buffered writes.
+    assert_eq!(unsafe { gzclose(file) }, Z_OK);
+
+    // Validate that the file contains the expected bytes.
+    let mut mode = 0;
+    #[cfg(target_os = "windows")]
+    {
+        mode |= libc::O_BINARY;
+    }
+    mode |= libc::O_RDONLY;
+    let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
+    assert_ne!(fd, -1);
+    // Try to read more than the expected amount of data, to ensure we get everything.
+    let mut buf = [0u8; CONTENT.len() + 1];
+    let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
+    assert_ne!(bytes_read, -1);
+    assert_eq!(&buf[..bytes_read as usize], CONTENT);
+    assert_eq!(unsafe { libc::close(fd) }, 0);
+}
+
+#[test]
+fn gzputc_error() {
+    // gzputc on a null file handle should return -1.
+    assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);
+
+    // gzputc on a read-only file handle should return -1.
+    let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
+    assert!(!file.is_null());
+    assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);
+    assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
+
+    // Open an invalid file descriptor as a gzip write stream, with a small buffer,
+    // and use gzputc to write enough bytes to overflow the buffer and cause file I/O.
+    // The last gzputc call should return -1.
+    let file = unsafe { gzdopen(-2, CString::new("wT").unwrap().as_ptr()) };
+    const BUF_SIZE: usize = 10;
+    assert_eq!(unsafe { gzbuffer(file, BUF_SIZE as _) }, 0);
+    // In write mode, the internal input buffer is 2x the size specified via gzbuffer.
+    for _ in 0..BUF_SIZE * 2 {
+        assert_eq!(unsafe { gzputc(file, 1) }, 1);
+    }
+    assert_eq!(unsafe { gzputc(file, 1) }, -1);
+    assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
+}
+
+#[test]
+fn gzputs_basic() {
+    // Create a temporary directory that will be automatically removed when
+    // temp_dir goes out of scope.
+    let temp_dir_path = temp_base();
+    let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
+    let temp_path = temp_dir.path();
+    let file_name = path(temp_path, "output");
+
+    // Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
+    let file = unsafe {
+        gzopen(
+            CString::new(file_name.as_str()).unwrap().as_ptr(),
+            CString::new("wT").unwrap().as_ptr(),
+        )
+    };
+    assert!(!file.is_null());
+    // Set a small buffer size to exercise more internal code paths.
+    assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
+
+    // gzputs of a null string should return -1 rather than crashing.
+    assert_eq!(unsafe { gzputs(file, ptr::null()) }, -1);
+
+    // Write some data to the file using gzputs.
+    const CONTENT: [&str; 3] = ["zlib ", "", "string larger than the buffer size"];
+    for s in CONTENT {
+        assert_eq!(
+            unsafe { gzputs(file, CString::new(s).unwrap().as_ptr()) },
+            s.len() as _
+        );
+    }
+
+    // Close the file to flush any buffered writes.
+    assert_eq!(unsafe { gzclose(file) }, Z_OK);
+
+    // Validate that the file contains the expected bytes.
+    const EXPECTED: &str = "zlib string larger than the buffer size";
+    let mut mode = 0;
+    #[cfg(target_os = "windows")]
+    {
+        mode |= libc::O_BINARY;
+    }
+    mode |= libc::O_RDONLY;
+    let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
+    assert_ne!(fd, -1);
+    // Try to read more than the expected amount of data, to ensure we get everything.
+    let mut buf = [0u8; EXPECTED.len() + 1];
+    let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
+    assert_ne!(bytes_read, -1);
+    assert_eq!(&buf[..bytes_read as usize], EXPECTED.as_bytes());
+    assert_eq!(unsafe { libc::close(fd) }, 0);
+}
+
+#[test]
+fn gzputs_error() {
+    const CONTENT: &[u8] = b"example\0";
+
+    // gzputs on a null file handle should return -1.
+    assert_eq!(
+        unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
+        -1
+    );
+
+    // gzputs on a read-only file handle should return -1.
+    let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
+    assert!(!file.is_null());
+    assert_eq!(
+        unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
+        -1
+    );
+    assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
+}
+
 // Get the size in bytes of a file.
 //
 // # Returns