Skip to content

Commit 41afb3d

Browse files
brianpanefolkertdev
authored andcommitted
Implement gzputc and gzputs
1 parent f72e720 commit 41afb3d

File tree

2 files changed

+277
-8
lines changed

2 files changed

+277
-8
lines changed

libz-rs-sys/src/gz.rs

Lines changed: 132 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use core::ffi::{c_char, c_int, c_uint, c_void, CStr};
1111
use core::ptr;
1212
use libc::{O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY, SEEK_CUR, SEEK_END};
1313
use std::cmp;
14+
use std::cmp::Ordering;
1415
use zlib_rs::deflate::Strategy;
1516
use zlib_rs::MAX_WBITS;
1617

@@ -105,6 +106,34 @@ impl GzState {
105106

106107
Ok((exclusive, cloexec))
107108
}
109+
110+
// Compute the number of bytes of input buffered in `self`.
111+
//
112+
// # Safety
113+
//
114+
// Either
115+
// - `state.next_in` points into the buffer that starts at `state.input`, or
116+
// - `state.input` is null.
117+
//
118+
// It is almost always the case that one of those two conditions is true
119+
// inside this module. The notable exception is in a specific block within
120+
// `gz_write`, where we temporarily set `state.next_in` to point to a
121+
// caller-supplied bufferto do a zero-copy optimization when compressing
122+
// large inputs.
123+
unsafe fn input_len(&self) -> usize {
124+
if self.input.is_null() {
125+
return 0;
126+
}
127+
// Safety: As long as the caller has verified that `stream.next_in` points inside
128+
// the buffer that starts at `input`, `stream.next_in + stream.avail_in` will be within
129+
// that buffer too.
130+
(unsafe {
131+
self.stream
132+
.next_in
133+
.add(self.stream.avail_in as usize)
134+
.offset_from(self.input)
135+
}) as _
136+
}
108137
}
109138

110139
// Gzip operating modes
@@ -1408,13 +1437,8 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
14081437
if state.stream.avail_in == 0 {
14091438
state.stream.next_in = state.input;
14101439
}
1411-
let have = unsafe {
1412-
state
1413-
.stream
1414-
.next_in
1415-
.add(state.stream.avail_in as usize)
1416-
.offset_from(state.input)
1417-
} as usize;
1440+
// Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
1441+
let have = unsafe { state.input_len() };
14181442
let copy = cmp::min(state.in_size.saturating_sub(have), len);
14191443
// Safety: The caller is responsible for ensuring that buf points to at least len readable
14201444
// bytes, and copy is <= len.
@@ -1437,6 +1461,9 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
14371461
}
14381462

14391463
// Directly compress user buffer to file.
1464+
// Note: For this operation, we temporarily break the invariant that
1465+
// `state.stream.next_in` points to somewhere in the `state.input` buffer.
1466+
let save_next_in = state.stream.next_in;
14401467
state.stream.next_in = buf.cast::<_>();
14411468
loop {
14421469
let n = cmp::min(len, c_uint::MAX as usize) as c_uint;
@@ -1450,6 +1477,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
14501477
break;
14511478
}
14521479
}
1480+
state.stream.next_in = save_next_in;
14531481
}
14541482

14551483
// Input was all buffered or compressed.
@@ -1738,6 +1766,103 @@ pub unsafe extern "C-unwind" fn gzoffset(file: gzFile) -> z_off_t {
17381766
}
17391767
}
17401768

1769+
/// Compress and write `c`, converted to an unsigned 8-bit char, into `file`.
1770+
///
1771+
/// # Returns
1772+
///
1773+
/// - The value that was written, on success.
1774+
/// - `-1` on error.
1775+
///
1776+
/// # Safety
1777+
///
1778+
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
1779+
pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
1780+
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
1781+
return -1;
1782+
};
1783+
1784+
// Check that we're writing and that there's no error.
1785+
if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
1786+
return -1;
1787+
}
1788+
1789+
/* FIXME: Uncomment when seek support is implemented.
1790+
// Check for seek request.
1791+
if state.seek {
1792+
state.seek = false;
1793+
if gz_zero(state, state.skip) == -1 {
1794+
return -1;
1795+
}
1796+
}
1797+
*/
1798+
1799+
// Try writing to input buffer for speed (state.input == null if buffer not initialized).
1800+
if !state.input.is_null() {
1801+
if state.stream.avail_in == 0 {
1802+
state.stream.next_in = state.input;
1803+
}
1804+
// Safety: `state.stream.next_in` points into the buffer starting at `state.input`.
1805+
// (This is an invariant maintained throughout this module, except for a specific
1806+
// block within `gz_write` that does not call any function that might call `gzputc`.)
1807+
let have = unsafe { state.input_len() };
1808+
if have < state.in_size {
1809+
// Safety: `input` has `in_size` bytes, and `have` < `in_size`.
1810+
unsafe { *state.input.add(have) = c as u8 };
1811+
state.stream.avail_in += 1;
1812+
state.pos += 1;
1813+
return c & 0xff;
1814+
}
1815+
}
1816+
1817+
// No room in buffer or not initialized, use gz_write.
1818+
let buf = [c as u8];
1819+
// Safety: We have confirmed that `state` is valid, and `buf` contains 1 readable byte of data.
1820+
match unsafe { gz_write(state, buf.as_ptr().cast::<c_void>(), 1) } {
1821+
1 => c & 0xff,
1822+
_ => -1,
1823+
}
1824+
}
1825+
1826+
/// Compress and write the given null-terminated string `s` to file, excluding
1827+
/// the terminating null character.
1828+
///
1829+
/// # Returns
1830+
///
1831+
/// - the number of characters written, on success.
1832+
/// - `-1` in case of error.
1833+
///
1834+
/// # Safety
1835+
///
1836+
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
1837+
/// - `s` must point to a null-terminated C string.
1838+
pub unsafe extern "C-unwind" fn gzputs(file: gzFile, s: *const c_char) -> c_int {
1839+
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
1840+
return -1;
1841+
};
1842+
1843+
if s.is_null() {
1844+
return -1;
1845+
}
1846+
1847+
// Check that we're writing and that there's no error.
1848+
if state.mode != GzMode::GZ_WRITE || state.err != Z_OK {
1849+
return -1;
1850+
}
1851+
1852+
// Write string.
1853+
let len = unsafe { libc::strlen(s) };
1854+
if c_int::try_from(len).is_err() {
1855+
const MSG: &str = "string length does not fit in int";
1856+
unsafe { gz_error(state, Some((Z_STREAM_ERROR, MSG))) };
1857+
return -1;
1858+
}
1859+
let put = unsafe { gz_write(state, s.cast::<c_void>(), len) };
1860+
match put.cmp(&(len as i32)) {
1861+
Ordering::Less => -1,
1862+
Ordering::Equal | Ordering::Greater => len as _,
1863+
}
1864+
}
1865+
17411866
// Create a deep copy of a C string using `ALLOCATOR`
17421867
//
17431868
// # Safety

test-libz-rs-sys/src/gz.rs

Lines changed: 145 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use zlib_rs::c_api::*;
22

33
use libz_rs_sys::{
44
gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
5-
gzflush, gzoffset, gzopen, gzread, gztell, gzwrite,
5+
gzflush, gzoffset, gzopen, gzputc, gzputs, gzread, gztell, gzwrite,
66
};
77

88
use std::ffi::{c_char, c_int, c_uint, c_void, CString};
@@ -878,6 +878,150 @@ fn gzoffset_gztell_error() {
878878
}
879879
}
880880

881+
#[test]
882+
fn gzputc_basic() {
883+
// Create a temporary directory that will be automatically removed when
884+
// temp_dir goes out of scope.
885+
let temp_dir_path = temp_base();
886+
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
887+
let temp_path = temp_dir.path();
888+
let file_name = path(temp_path, "output");
889+
890+
// Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
891+
let file = unsafe {
892+
gzopen(
893+
CString::new(file_name.as_str()).unwrap().as_ptr(),
894+
CString::new("wT").unwrap().as_ptr(),
895+
)
896+
};
897+
assert!(!file.is_null());
898+
// Set a small buffer size to exercise more internal code paths.
899+
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
900+
901+
// Write to the file one byte at a time, using gzputc.
902+
const CONTENT: &[u8] = b"sample text to test gzputc implementation";
903+
for c in CONTENT {
904+
assert_eq!(unsafe { gzputc(file, *c as _) }, *c as _);
905+
}
906+
907+
// Close the file to flush any buffered writes.
908+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
909+
910+
// Validate that the file contains the expected bytes.
911+
let mut mode = 0;
912+
#[cfg(target_os = "windows")]
913+
{
914+
mode |= libc::O_BINARY;
915+
}
916+
mode |= libc::O_RDONLY;
917+
let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
918+
assert_ne!(fd, -1);
919+
// Try to read more than the expected amount of data, to ensure we get everything.
920+
let mut buf = [0u8; CONTENT.len() + 1];
921+
let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
922+
assert_ne!(bytes_read, -1);
923+
assert_eq!(&buf[..bytes_read as usize], CONTENT);
924+
assert_eq!(unsafe { libc::close(fd) }, 0);
925+
}
926+
927+
#[test]
928+
fn gzputc_error() {
929+
// gzputc on a null file handle should return -1.
930+
assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);
931+
932+
// gzputc on a read-only file handle should return -1.
933+
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
934+
assert!(!file.is_null());
935+
assert_eq!(unsafe { gzputc(ptr::null_mut(), 1) }, -1);
936+
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
937+
938+
// Open an invalid file descriptor as a gzip write stream, with a small buffer,
939+
// and use gzputc to write enough bytes to overflow the buffer and cause file I/O.
940+
// The last gzputc call should return -1.
941+
let file = unsafe { gzdopen(-2, CString::new("wT").unwrap().as_ptr()) };
942+
const BUF_SIZE: usize = 10;
943+
assert_eq!(unsafe { gzbuffer(file, BUF_SIZE as _) }, 0);
944+
// In write mode, the internal input buffer is 2x the size specified via gzbuffer.
945+
for _ in 0..BUF_SIZE * 2 {
946+
assert_eq!(unsafe { gzputc(file, 1) }, 1);
947+
}
948+
assert_eq!(unsafe { gzputc(file, 1) }, -1);
949+
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
950+
}
951+
952+
#[test]
953+
fn gzputs_basic() {
954+
// Create a temporary directory that will be automatically removed when
955+
// temp_dir goes out of scope.
956+
let temp_dir_path = temp_base();
957+
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
958+
let temp_path = temp_dir.path();
959+
let file_name = path(temp_path, "output");
960+
961+
// Open a new gzip file for writing. Use direct (uncompressed) mode to make validation easier.
962+
let file = unsafe {
963+
gzopen(
964+
CString::new(file_name.as_str()).unwrap().as_ptr(),
965+
CString::new("wT").unwrap().as_ptr(),
966+
)
967+
};
968+
assert!(!file.is_null());
969+
// Set a small buffer size to exercise more internal code paths.
970+
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
971+
972+
// gzputs of a null string should return -1 rather than crashing.
973+
assert_eq!(unsafe { gzputs(file, ptr::null()) }, -1);
974+
975+
// Write some data to the file using gzputs.
976+
const CONTENT: [&str; 3] = ["zlib ", "", "string larger than the buffer size"];
977+
for s in CONTENT {
978+
assert_eq!(
979+
unsafe { gzputs(file, CString::new(s).unwrap().as_ptr()) },
980+
s.len() as _
981+
);
982+
}
983+
984+
// Close the file to flush any buffered writes.
985+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
986+
987+
// Validate that the file contains the expected bytes.
988+
const EXPECTED: &str = "zlib string larger than the buffer size";
989+
let mut mode = 0;
990+
#[cfg(target_os = "windows")]
991+
{
992+
mode |= libc::O_BINARY;
993+
}
994+
mode |= libc::O_RDONLY;
995+
let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) };
996+
assert_ne!(fd, -1);
997+
// Try to read more than the expected amount of data, to ensure we get everything.
998+
let mut buf = [0u8; EXPECTED.len() + 1];
999+
let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) };
1000+
assert_ne!(bytes_read, -1);
1001+
assert_eq!(&buf[..bytes_read as usize], EXPECTED.as_bytes());
1002+
assert_eq!(unsafe { libc::close(fd) }, 0);
1003+
}
1004+
1005+
#[test]
1006+
fn gzputs_error() {
1007+
const CONTENT: &[u8] = b"example\0";
1008+
1009+
// gzputs on a null file handle should return -1.
1010+
assert_eq!(
1011+
unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
1012+
-1
1013+
);
1014+
1015+
// gzputs on a read-only file handle should return -1.
1016+
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
1017+
assert!(!file.is_null());
1018+
assert_eq!(
1019+
unsafe { gzputs(ptr::null_mut(), CONTENT.as_ptr().cast::<c_char>()) },
1020+
-1
1021+
);
1022+
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
1023+
}
1024+
8811025
// Get the size in bytes of a file.
8821026
//
8831027
// # Returns

0 commit comments

Comments
 (0)