Skip to content

Commit d66e3aa

Browse files
committed
Implement gzungetc
1 parent bd042e5 commit d66e3aa

File tree

2 files changed

+214
-7
lines changed

2 files changed

+214
-7
lines changed

libz-rs-sys/src/gz.rs

Lines changed: 113 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct GzState {
3535
// fields directly.
3636
have: c_uint, // number of bytes available at next
3737
next: *const Bytef, // next byte of uncompressed data
38-
pos: u64, // current offset in uncompressed data stream
38+
pos: i64, // current offset in uncompressed data stream
3939

4040
// End of public interface:
4141
// All fields after this point are opaque to C code using this library,
@@ -1108,7 +1108,7 @@ unsafe fn gz_read(state: &mut GzState, mut buf: *mut u8, mut len: usize) -> usiz
11081108
len -= n;
11091109
buf = unsafe { buf.add(n) };
11101110
got += n;
1111-
state.pos += n as u64;
1111+
state.pos += n as i64;
11121112

11131113
if len == 0 {
11141114
break;
@@ -1561,7 +1561,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
15611561
// bytes, and copy is <= len.
15621562
unsafe { ptr::copy(buf, state.input.add(have).cast::<c_void>(), copy) };
15631563
state.stream.avail_in += copy as c_uint;
1564-
state.pos += copy as u64;
1564+
state.pos += copy as i64;
15651565
buf = unsafe { buf.add(copy) };
15661566
len -= copy;
15671567
if len != 0 && gz_comp(state, Z_NO_FLUSH).is_err() {
@@ -1585,7 +1585,7 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
15851585
loop {
15861586
let n = cmp::min(len, c_uint::MAX as usize) as c_uint;
15871587
state.stream.avail_in = n;
1588-
state.pos += n as u64;
1588+
state.pos += n as i64;
15891589
if gz_comp(state, Z_NO_FLUSH).is_err() {
15901590
return 0;
15911591
}
@@ -1838,7 +1838,7 @@ pub unsafe extern "C-unwind" fn gztell(file: gzFile) -> z_off_t {
18381838

18391839
// Return position.
18401840
match state.seek {
1841-
true => (state.pos + state.skip as u64) as z_off_t,
1841+
true => (state.pos + state.skip) as z_off_t,
18421842
false => state.pos as z_off_t,
18431843
}
18441844
}
@@ -2045,6 +2045,113 @@ pub unsafe extern "C-unwind" fn gzgetc_(file: gzFile) -> c_int {
20452045
unsafe { gzgetc(file) }
20462046
}
20472047

2048+
/// Push `c` back onto the stream for file to be read as the first character on
2049+
/// the next read. At least one character of push-back is always allowed.
2050+
///
2051+
/// `gzungetc` will fail if `c` is `-1`, and may fail if a character has been pushed
2052+
/// but not read yet. If `gzungetc` is used immediately after [`gzopen`] or [`gzdopen`],
2053+
/// at least the output buffer size of pushed characters is allowed. (See [`gzbuffer`].)
2054+
///
2055+
/// The pushed character will be discarded if the stream is repositioned with
2056+
/// [`gzseek`] or [`gzrewind`].
2057+
///
2058+
/// # Returns
2059+
///
2060+
/// - The character pushed, on success.
2061+
/// - `-1` on failure.
2062+
///
2063+
/// # Safety
2064+
///
2065+
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
2066+
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzungetc))]
2067+
pub unsafe extern "C-unwind" fn gzungetc(c: c_int, file: gzFile) -> c_int {
2068+
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
2069+
return -1;
2070+
};
2071+
2072+
// Validate the input.
2073+
if c < 0 {
2074+
return -1;
2075+
}
2076+
2077+
// Check that we're reading and that there's no (serious) error.
2078+
if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) {
2079+
return -1;
2080+
}
2081+
2082+
// In case this was just opened, set up the input buffer.
2083+
if state.how == How::Look && state.have == 0 {
2084+
// We have verified that `state` is valid.
2085+
let _ = unsafe { gz_look(state) };
2086+
}
2087+
2088+
/* FIXME uncomment when seek support is implemented.
2089+
// Process a skip request.
2090+
if state.seek {
2091+
state.seek = false;
2092+
if gz_skip(state, state.skip) == -1 {
2093+
return -1;
2094+
}
2095+
}
2096+
*/
2097+
2098+
// If output buffer empty, put byte at end (allows more pushing).
2099+
if state.have == 0 {
2100+
state.have = 1;
2101+
// Safety: because `state.have` is nonzero, the `state.output` buffer has been
2102+
// allocated. And because the buffer's size is `state.out_size`, a pointer to
2103+
// `output + out_size - 1` points within the buffer.
2104+
state.next = unsafe { state.output.add(state.out_size - 1) };
2105+
// Safety: from the addition above, `state.next` currently points within the
2106+
// `state.output` buffer.
2107+
unsafe { *(state.next as *mut u8) = c as u8 };
2108+
state.pos -= 1;
2109+
state.past = false;
2110+
return c;
2111+
}
2112+
2113+
// If no room, give up (must have already done a `gzungetc`).
2114+
if state.have as usize == state.out_size {
2115+
const MSG: &str = "out of room to push characters";
2116+
// Safety: We have verified that `state` is valid.
2117+
unsafe { gz_error(state, Some((Z_DATA_ERROR, MSG))) };
2118+
return -1;
2119+
}
2120+
2121+
// Slide output data if needed and insert byte before existing data.
2122+
if state.next == state.output {
2123+
// There are `state.have` bytes of usable content at the front of the buffer
2124+
// `state.output`, which has capacity `state.out_size`. We want to move that
2125+
// content to the end of the buffer, so we copy from `state.output` to
2126+
// `state.output + (state.out_size - state.have)` and update `state.next`
2127+
// to point to the content's new location within the buffer.
2128+
let offset = state.out_size - state.have as usize;
2129+
2130+
// Safety: `state.have` < `state.out_size`, or we would have returned in the
2131+
// check for the == case above. Therefore, `offset`, which is `out_size - have`,
2132+
// is in the range `1..=(out_size - 1)`. When we add that to `output`, the result
2133+
// is within the buffer's allocation of `out_size` bytes.
2134+
let dst = unsafe { state.output.add(offset) };
2135+
2136+
// Safety: `state.next` points a sequence of `state.have` initialized bytes
2137+
// within the `state.output` buffer. And because `dst` was computed as
2138+
// `state.output + state.out_size - state.have`, we can write `state.have`
2139+
// bytes starting at `dst` and they will all be within the buffer.
2140+
// Note that this may be an overlapping copy.
2141+
unsafe { ptr::copy(state.next, dst as _, state.have as _) };
2142+
state.next = dst;
2143+
}
2144+
state.have += 1;
2145+
// Safety: `state.next` > `state.output`, due to the `state.next = dst` above, so it
2146+
// is safe to decrease `state.next` by 1.
2147+
state.next = unsafe { state.next.sub(1) };
2148+
// Safety: `state.next` >= `state.output` following the subtraction.
2149+
unsafe { *(state.next as *mut u8) = c as u8 };
2150+
state.pos -= 1;
2151+
state.past = false;
2152+
c
2153+
}
2154+
20482155
/// Read decompressed bytes from `file` into `buf`, until `len-1` characters are
20492156
/// read, or until a newline character is read and transferred to `buf`, or an
20502157
/// end-of-file condition is encountered. If any characters are read or if `len`
@@ -2138,7 +2245,7 @@ pub unsafe extern "C-unwind" fn gzgets(file: gzFile, buf: *mut c_char, len: c_in
21382245
// Safety: As described above, `state.next` pointed to at least `n` readable bytes, so
21392246
// when we increase it by `n` it will still point into the `output` buffer.
21402247
state.next = unsafe { state.next.add(n) };
2141-
state.pos += n as u64;
2248+
state.pos += n as i64;
21422249
left -= n;
21432250
// Safety: `dst` pointed to at least `n` writable bytes, so when we increase it by `n`
21442251
// it will still point into `buf`.

test-libz-rs-sys/src/gz.rs

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use zlib_rs::c_api::*;
33
use libz_rs_sys::{
44
gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
55
gzflush, gzfread, gzfwrite, gzgetc, gzgetc_, gzgets, gzoffset, gzopen, gzputc, gzputs, gzread,
6-
gztell, gzwrite,
6+
gztell, gzungetc, gzwrite,
77
};
88

99
use libc::size_t;
@@ -1075,6 +1075,106 @@ fn gzgetc_error() {
10751075
}
10761076
}
10771077

1078+
#[test]
1079+
fn gzungetc_basic() {
1080+
// Open a gzip file for reading.
1081+
let file_name = crate_path("src/test-data/text.gz");
1082+
let file = unsafe {
1083+
gzopen(
1084+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1085+
CString::new("r").unwrap().as_ptr(),
1086+
)
1087+
};
1088+
assert!(!file.is_null());
1089+
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
1090+
1091+
// Call gzungetc before doing any read operations on the file. It should return the
1092+
// character pushed. Because the output buffer size in read mode is set internally
1093+
// to 2x the size specified using gzbuffer, gzungetc should work exactly 16 times
1094+
// before we do any reads.
1095+
const CONTENT: &[u8] = b"0123456789abcdef";
1096+
for c in CONTENT.iter().rev() {
1097+
assert_eq!(unsafe { gzungetc(*c as c_int, file) }, *c as c_int);
1098+
}
1099+
1100+
// gzread should return the characters we pushed into the buffer with gzungetc.
1101+
// Note that we looped through CONTENT in reverse when doing the gzungetc, so
1102+
// the result of this read should match CONTENT.
1103+
let mut buf = [0u8; CONTENT.len()];
1104+
assert_eq!(
1105+
unsafe {
1106+
gzread(
1107+
file,
1108+
buf.as_mut_ptr().cast::<c_void>(),
1109+
CONTENT.len() as c_uint,
1110+
)
1111+
},
1112+
CONTENT.len() as _
1113+
);
1114+
assert_eq!(&buf, CONTENT);
1115+
1116+
// Read more data from the file, so that we end up with a partially filled output
1117+
// buffer.
1118+
assert_eq!(
1119+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), 16) },
1120+
16
1121+
);
1122+
assert_eq!(&buf, b"gzip\nexample dat");
1123+
assert_eq!(
1124+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), 1) },
1125+
1
1126+
);
1127+
assert_eq!(buf[0], b'a');
1128+
1129+
// The output buffer should be partially full, with free space for one byte
1130+
// at the start. Call gzungetc twice. The second call will exercise the code
1131+
// path inside gzungetc that shifts the buffer contents right to make room.
1132+
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, '-' as c_int);
1133+
assert_eq!(unsafe { gzungetc('*' as c_int, file) }, '*' as c_int);
1134+
1135+
// gzread should yield the two bytes we just pushed with gzungetc, followed
1136+
// by the remainder of the uncompressed file content.
1137+
const EXPECTED: &[u8] = b"*-\nfor tests";
1138+
let mut buf = [0u8; EXPECTED.len() + 1];
1139+
assert_eq!(
1140+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
1141+
EXPECTED.len() as _
1142+
);
1143+
assert_eq!(&buf[..EXPECTED.len()], EXPECTED);
1144+
1145+
// The 16-byte output buffer is now empty. Call gzungetc 17 times. The first
1146+
// 16 calls should succeed, and the last one should fail.
1147+
for _ in 0..16 {
1148+
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, '-' as c_int);
1149+
}
1150+
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, -1);
1151+
1152+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1153+
}
1154+
1155+
#[test]
1156+
fn gzungetc_error() {
1157+
// gzungetc on a null file handle should return -1.
1158+
assert_eq!(unsafe { gzungetc('*' as c_int, ptr::null_mut()) }, -1);
1159+
1160+
// gzgetc on a write-only file handle should return -1.
1161+
let file = unsafe { gzdopen(-2, CString::new("w").unwrap().as_ptr()) };
1162+
assert_eq!(unsafe { gzungetc('*' as c_int, file) }, -1);
1163+
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
1164+
1165+
// gzgetc with a negative character value should return -1.
1166+
let file_name = crate_path("src/test-data/text.gz");
1167+
let file = unsafe {
1168+
gzopen(
1169+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1170+
CString::new("r").unwrap().as_ptr(),
1171+
)
1172+
};
1173+
assert!(!file.is_null());
1174+
assert_eq!(unsafe { gzungetc(-1 as c_int, file) }, -1);
1175+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1176+
}
1177+
10781178
#[test]
10791179
fn gzgets_basic() {
10801180
// Open a file containing gzip-compressed text.

0 commit comments

Comments
 (0)