trifectatechfoundation · folkertdev · Apr 22, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml
@@ -211,7 +211,7 @@ jobs:
         run: |
           cargo fuzz build --no-default-features --features="${{ matrix.features }}"
           for target in $(cargo fuzz list); do
-            if [ "$target" = "uncompress2" ]; then
+            if [ "$target" = "uncompress" ]; then
               features="${{ matrix.features }} disable-checksum"
             else
               features="${{ matrix.features }}"
@@ -246,7 +246,7 @@ jobs:
         run: |
           cargo fuzz build --no-default-features --features="${{ matrix.features }}"
           for target in $(cargo fuzz list); do
-            if [ "$target" = "uncompress2" ]; then
+            if [ "$target" = "uncompress" ]; then
               features="${{ matrix.features }} disable-checksum"
             else
               features="${{ matrix.features }}"
@@ -260,9 +260,9 @@ jobs:
     strategy:
       matrix:
         include:
-          - fuzz_target: uncompress2
+          - fuzz_target: uncompress
             corpus: "gzip-files/compressed"
-            features: '--no-default-features --features="disable-checksum"'
+            features: '--no-default-features --features="disable-checksum,keep-invalid-in-corpus"'
             flags: fuzz-decompress
           - fuzz_target: compress
             corpus: ""
@@ -290,6 +290,8 @@ jobs:
       - name: Run `cargo fuzz`
         env:
           RUST_BACKTRACE: "1"
+          # prevents `cargo fuzz coverage` from rebuilding everything
+          RUSTFLAGS: "-C instrument-coverage"
         run: |
           cargo fuzz run ${{matrix.features}} ${{matrix.fuzz_target}} ${{matrix.corpus}} -- -max_total_time=10
       - name: Fuzz codecov
@@ -299,7 +301,7 @@ jobs:
               target/$(rustc --print host-tuple)/coverage/$(rustc --print host-tuple)/release/${{matrix.fuzz_target}} \
               -instr-profile=fuzz/coverage/${{matrix.fuzz_target}}/coverage.profdata \
               --format=lcov \
-              -ignore-filename-regex="\.cargo|\.rustup" > lcov.info
+              -ignore-filename-regex="\.cargo|\.rustup|fuzz_targets" > lcov.info
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
         with:

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -16,6 +16,7 @@ default = ["rust-allocator", "libz-rs-sys/testing-prefix"]
 c-allocator = ["libz-rs-sys/c-allocator", "test-libz-rs-sys/c-allocator", "libz-rs-sys/std"]
 rust-allocator = ["libz-rs-sys/rust-allocator",  "test-libz-rs-sys/rust-allocator", "libz-rs-sys/std"]
 disable-checksum = ["zlib-rs/__internal-fuzz-disable-checksum"]
+keep-invalid-in-corpus = [] # For code coverage (on CI), we want to keep inputs that triggered the error branches
 
 [dependencies.libfuzzer-sys]
 version = "0.4"
@@ -47,12 +48,6 @@ members = ["."]
 [[bin]]
 name = "uncompress"
 path = "fuzz_targets/uncompress.rs"
-test = false
-doc = false
-
-[[bin]]
-name = "uncompress2"
-path = "fuzz_targets/uncompress2.rs"
 test = true
 doc = false
 required-features = ["disable-checksum"]

diff --git a/fuzz/fuzz_targets/uncompress.rs b/fuzz/fuzz_targets/uncompress.rs
@@ -1,54 +1,147 @@
-#![no_main]
-use libfuzzer_sys::fuzz_target;
+//! This fuzzer is intended to find memory safety bugs and undefined behavior. The input entire
+//! input is processed, allowing for analysis of files of arbitrary size. It also speeds up
+//! coverage by disabling checksum validation and disregarding correctness of the results.
+//!
+//! This test must be run with `--features disable-checksum`. It's also suggested to initialize
+//! fuzzing with a corpus of real zlib/gzip files. Place the corpus in directory
+//! `corpus/uncompress` (the default corpus location).
+//!
+//! Then, the fuzzer can be run like:
+//!
+//! ```
+//! cargo fuzz run uncompress --features disable-checksum -j$(nproc)
+//! ```
+//!
+//! If not starting with an initial corpus, consider using the `-- -max_len=1048576` argument to
+//! test larger inputs.
+//!
+//! libfuzzer uses LLVM sanitizers to detect some classes of bugs and UB. For detecting
+//! Rust-specific UB, use Miri. Once a corpus with suitable coverage has been built, you can run
+//! Miri against the corpus by executing:
+//! ```
+//! MIRIFLAGS=-Zmiri-disable-isolation cargo miri nextest run --bin uncompress --features disable-checksum
+//! ```
+//! This assumes the corpus is located in the default directory of `corpus/uncompress`. If it
+//! isn't, specify the corpus directory with the `ZLIB_RS_CORPUS_DIR` environment variable.
+#![cfg_attr(not(any(miri, test)), no_main)]
 
-use zlib_rs::ReturnCode;
+use libfuzzer_sys::{fuzz_target, Corpus};
+use libz_rs_sys::{
+    gz_header, inflate, inflateEnd, inflateGetHeader, inflateInit2_, z_stream, zlibVersion,
+};
+use zlib_rs::{InflateFlush, ReturnCode};
 
-fn uncompress_help(input: &[u8]) -> Vec<u8> {
-    let mut dest_vec = vec![0u8; 1 << 16];
+fuzz_target!(|input: &[u8]| -> Corpus { run(input) });
 
-    let mut dest_len = dest_vec.len() as std::ffi::c_ulong;
-    let dest = dest_vec.as_mut_ptr();
+fn run(input: &[u8]) -> Corpus {
+    if input.is_empty() {
+        return Corpus::Reject;
+    }
 
-    let source = input.as_ptr();
-    let source_len = input.len() as _;
+    let mut stream = z_stream::default();
 
-    let err = unsafe { ::libz_rs_sys::uncompress(dest, &mut dest_len, source, source_len) };
+    let err = unsafe {
+        inflateInit2_(
+            &mut stream,
+            15 + 32, // Support both zlib and gzip files.
+            zlibVersion(),
+            size_of::<z_stream>() as _,
+        )
+    };
+    assert_eq!(ReturnCode::from(err), ReturnCode::Ok);
 
-    if err != 0 {
-        panic!("error {:?}", ReturnCode::from(err));
-    }
+    let mut extra = vec![0; 64];
+    let mut name = vec![0; 64];
+    let mut comment = vec![0; 64];
+    let mut header = gz_header {
+        text: 0,
+        time: 0,
+        xflags: 0,
+        os: 0,
+        extra: extra.as_mut_ptr(),
+        extra_len: 0,
+        extra_max: 64,
+        name: name.as_mut_ptr(),
+        name_max: 64,
+        comment: comment.as_mut_ptr(),
+        comm_max: 64,
+        hcrc: 0,
+        done: 0,
+    };
 
-    dest_vec.truncate(dest_len as usize);
+    let err = unsafe { inflateGetHeader(&mut stream, &mut header) };
+    assert_eq!(ReturnCode::from(err), ReturnCode::Ok);
 
-    dest_vec
-}
+    let mut output = vec![0; input.len()];
+    let input_len: u64 = input.len().try_into().unwrap();
+    stream.next_out = output.as_mut_ptr();
+    stream.avail_out = output.len().try_into().unwrap();
 
-fuzz_target!(|data: String| {
-    // first, deflate the data using the standard zlib
-    let mut length = 8 * 1024;
-    let mut deflated = vec![0; length];
-
-    let error = unsafe {
-        libz_ng_sys::compress(
-            deflated.as_mut_ptr().cast(),
-            &mut length,
-            data.as_ptr().cast(),
-            data.len(),
-        )
+    // Small enough to hit interesting cases, but large enough to hit the fast path
+    let chunk_size = 64;
+
+    // For code coverage (on CI), we want to keep inputs that triggered the error
+    // branches, to get an accurate picture of what error paths we actually hit.
+    //
+    // It helps that on CI we start with a corpus of valid files: a mutation of such an
+    // input is not a sequence of random bytes, but rather quite close to correct and
+    // hence likely to hit interesting error conditions.
+    let invalid_input = if cfg!(feature = "keep-invalid-in-corpus") {
+        Corpus::Keep
+    } else {
+        Corpus::Reject
     };
 
-    let error = ReturnCode::from(error as i32);
-    assert_eq!(ReturnCode::Ok, error);
+    for chunk in input.chunks(chunk_size) {
+        stream.next_in = chunk.as_ptr() as *mut u8;
+        stream.avail_in = chunk.len() as _;
 
-    deflated.truncate(length as _);
+        let err = unsafe { inflate(&mut stream, InflateFlush::NoFlush as _) };
+        match ReturnCode::from(err) {
+            ReturnCode::StreamEnd => {
+                break;
+            }
+            ReturnCode::Ok => {
+                continue;
+            }
+            ReturnCode::BufError => {
+                let add_space: u32 = Ord::max(1024, output.len().try_into().unwrap());
+                output.resize(output.len() + add_space as usize, 0);
 
-    let output = uncompress_help(&deflated);
+                // If resize() reallocates, it may have moved in memory.
+                stream.next_out = output.as_mut_ptr();
+                stream.avail_out += add_space;
+            }
+            _ => {
+                unsafe { inflateEnd(&mut stream) };
+                return invalid_input;
+            }
+        }
+    }
 
-    if output != data.as_bytes() {
-        let path = std::env::temp_dir().join("deflate.txt");
-        std::fs::write(&path, &data).unwrap();
-        eprintln!("saved input file to {path:?}");
+    let err = unsafe { inflateEnd(&mut stream) };
+    match ReturnCode::from(err) {
+        ReturnCode::Ok => Corpus::Keep,
+        _ => invalid_input,
     }
+}
+
+#[cfg(test)]
+mod tests {
+    #[cfg(miri)]
+    use {
+        crate::run,
+        rstest::rstest,
+        std::{fs::File, io::Read, path::PathBuf},
+    };
 
-    assert_eq!(output, data.as_bytes());
-});
+    #[rstest]
+    #[cfg(miri)]
+    fn miri_corpus(#[files("${ZLIB_RS_CORPUS_DIR:-corpus/uncompress}/*")] path: PathBuf) {
+        let mut input = File::open(path).unwrap();
+        let mut buf = Vec::new();
+        input.read_to_end(&mut buf).unwrap();
+
+        run(&buf);
+    }
+}
diff --git a/fuzz/fuzz_targets/uncompress2.rs b/fuzz/fuzz_targets/uncompress2.rs