Quelle deflate.rs
Sprache: unbekannt
|
|
use core::{ffi::CStr, marker::PhantomData, mem::MaybeUninit, ops::ControlFlow};
use crate::{
adler32::adler32,
allocate::Allocator,
c_api::{gz_header, internal_state, z_checksum, z_stream},
crc32::{crc32, Crc32Fold},
read_buf::ReadBuf,
trace, DeflateFlush, ReturnCode, ADLER32_INITIAL_VALUE, CRC32_INITIAL_VALUE, MAX_WBIT S,
MIN_WBITS,
};
use self::{
algorithm::CONFIGURATION_TABLE,
hash_calc::{Crc32HashCalc, HashCalcVariant, RollHashCalc, StandardHashCalc},
pending::Pending,
trees_tbl::STATIC_LTREE,
window::Window,
};
mod algorithm;
mod compare256;
mod hash_calc;
mod longest_match;
mod pending;
mod slide_hash;
mod trees_tbl;
mod window;
#[repr(C)]
pub struct DeflateStream<'a> {
pub(crate) next_in: *mut crate::c_api::Bytef,
pub(crate) avail_in: crate::c_api::uInt,
pub(crate) total_in: crate::c_api::z_size,
pub(crate) next_out: *mut crate::c_api::Bytef,
pub(crate) avail_out: crate::c_api::uInt,
pub(crate) total_out: crate::c_api::z_size,
pub(crate) msg: *const core::ffi::c_char,
pub(crate) state: &'a mut State<'a>,
pub(crate) alloc: Allocator<'a>,
pub(crate) data_type: core::ffi::c_int,
pub(crate) adler: crate::c_api::z_checksum,
pub(crate) reserved: crate::c_api::uLong,
}
impl<'a> DeflateStream<'a> {
const _S: () = assert!(core::mem::size_of::<z_stream>() == core::mem::size_of::<Self>());
const _A: () = assert!(core::mem::align_of::<z_stream>() == core::mem::align_of::<Self>());
/// # Safety
///
/// Behavior is undefined if any of the following conditions are violated:
///
/// - `strm` satisfies the conditions of [`pointer::as_mut`]
/// - if not `NULL`, `strm` as initialized using [`init`] or similar
///
/// [`pointer::as_mut`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.as_mut
#[inline(always)]
pub unsafe fn from_stream_mut(strm: *mut z_stream) -> Option<&'a mut Self> {
{
// Safety: ptr points to a valid value of type z_stream (if non-null)
let stream = unsafe { strm.as_ref() }?;
if stream.zalloc.is_none() || stream.zfree.is_none() {
return None;
}
if stream.state.is_null() {
return None;
}
}
// Safety: DeflateStream has an equivalent layout as z_stream
unsafe { strm.cast::<DeflateStream>().as_mut() }
}
fn as_z_stream_mut(&mut self) -> &mut z_stream {
// safety: a valid &mut DeflateStream is also a valid &mut z_stream
unsafe { &mut *(self as *mut DeflateStream as *mut z_stream) }
}
pub fn pending(&self) -> (usize, u8) {
(
self.state.bit_writer.pending.pending,
self.state.bit_writer.bits_used,
)
}
}
/// number of elements in hash table
pub(crate) const HASH_SIZE: usize = 65536;
/// log2(HASH_SIZE)
const HASH_BITS: usize = 16;
/// Maximum value for memLevel in deflateInit2
const MAX_MEM_LEVEL: i32 = 9;
const DEF_MEM_LEVEL: i32 = if MAX_MEM_LEVEL > 8 { 8 } else { MAX_MEM_LEVEL };
#[repr(i32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
#[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))]
pub enum Method {
#[default]
Deflated = 8,
}
impl TryFrom<i32> for Method {
type Error = ();
fn try_from(value: i32) -> Result<Self, Self::Error> {
match value {
8 => Ok(Self::Deflated),
_ => Err(()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))]
pub struct DeflateConfig {
pub level: i32,
pub method: Method,
pub window_bits: i32,
pub mem_level: i32,
pub strategy: Strategy,
}
#[cfg(any(test, feature = "__internal-test"))]
impl quickcheck::Arbitrary for DeflateConfig {
fn arbitrary(g: &mut quickcheck::Gen) -> Self {
let mem_levels: Vec<_> = (1..=9).collect();
let levels: Vec<_> = (0..=9).collect();
let mut window_bits = Vec::new();
window_bits.extend(9..=15); // zlib
window_bits.extend(9 + 16..=15 + 16); // gzip
window_bits.extend(-15..=-9); // raw
Self {
level: *g.choose(&levels).unwrap(),
method: Method::Deflated,
window_bits: *g.choose(&window_bits).unwrap(),
mem_level: *g.choose(&mem_levels).unwrap(),
strategy: *g
.choose(&[
Strategy::Default,
Strategy::Filtered,
Strategy::HuffmanOnly,
Strategy::Rle,
Strategy::Fixed,
])
.unwrap(),
}
}
}
impl DeflateConfig {
pub fn new(level: i32) -> Self {
Self {
level,
..Self::default()
}
}
}
impl Default for DeflateConfig {
fn default() -> Self {
Self {
level: crate::c_api::Z_DEFAULT_COMPRESSION,
method: Method::Deflated,
window_bits: MAX_WBITS,
mem_level: DEF_MEM_LEVEL,
strategy: Strategy::Default,
}
}
}
// TODO: This could use `MaybeUninit::slice_assume_init` when it is stable.
unsafe fn slice_assume_init_mut<T>(slice: &mut [MaybeUninit<T>]) -> &mut [T] {
&mut *(slice as *mut [MaybeUninit<T>] as *mut [T])
}
// when stable, use MaybeUninit::write_slice
fn slice_to_uninit<T>(slice: &[T]) -> &[MaybeUninit<T>] {
// safety: &[T] and &[MaybeUninit<T>] have the same layout
unsafe { &*(slice as *const [T] as *const [MaybeUninit<T>]) }
}
pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
let DeflateConfig {
mut level,
method: _,
mut window_bits,
mem_level,
strategy,
} = config;
/* Todo: ignore strm->next_in if we use it as window */
stream.msg = core::ptr::null_mut();
// for safety we must really make sure that alloc and free are consistent
// this is a (slight) deviation from stock zlib. In this crate we pick the rust
// allocator as the default, but `libz-rs-sys` always explicitly sets an allocator,
// and can configure the C allocator
#[cfg(feature = "rust-allocator")]
if stream.zalloc.is_none() || stream.zfree.is_none() {
stream.configure_default_rust_allocator()
}
#[cfg(feature = "c-allocator")]
if stream.zalloc.is_none() || stream.zfree.is_none() {
stream.configure_default_c_allocator()
}
if stream.zalloc.is_none() || stream.zfree.is_none() {
return ReturnCode::StreamError;
}
if level == crate::c_api::Z_DEFAULT_COMPRESSION {
level = 6;
}
let wrap = if window_bits < 0 {
if window_bits < -MAX_WBITS {
return ReturnCode::StreamError;
}
window_bits = -window_bits;
0
} else if window_bits > MAX_WBITS {
window_bits -= 16;
2
} else {
1
};
if (!(1..=MAX_MEM_LEVEL).contains(&mem_level))
|| !(MIN_WBITS..=MAX_WBITS).contains(&window_bits)
|| !(0..=9).contains(&level)
|| (window_bits == 8 && wrap != 1)
{
return ReturnCode::StreamError;
}
let window_bits = if window_bits == 8 {
9 /* until 256-byte window bug fixed */
} else {
window_bits as usize
};
let alloc = Allocator {
zalloc: stream.zalloc.unwrap(),
zfree: stream.zfree.unwrap(),
opaque: stream.opaque,
_marker: PhantomData,
};
// allocated here to have the same order as zlib
let Some(state_allocation) = alloc.allocate::<State>() else {
return ReturnCode::MemError;
};
let w_size = 1 << window_bits;
let window = Window::new_in(&alloc, window_bits);
let prev = alloc.allocate_slice::<u16>(w_size);
let head = alloc.allocate::<[u16; HASH_SIZE]>();
let lit_bufsize = 1 << (mem_level + 6); // 16K elements by default
let pending = Pending::new_in(&alloc, 4 * lit_bufsize);
// zlib-ng overlays the pending_buf and sym_buf. We cannot really do that safely
let sym_buf = ReadBuf::new_in(&alloc, 3 * lit_bufsize);
// if any allocation failed, clean up allocations that did succeed
let (window, prev, head, pending, sym_buf) = match (window, prev, head, pending, sym_buf) {
(Some(window), Some(prev), Some(head), Some(pending), Some(sym_buf)) => {
(window, prev, head, pending, sym_buf)
}
(window, prev, head, pending, sym_buf) => {
unsafe {
if let Some(mut sym_buf) = sym_buf {
alloc.deallocate(sym_buf.as_mut_ptr(), sym_buf.capacity())
}
if let Some(pending) = pending {
pending.drop_in(&alloc);
}
if let Some(head) = head {
alloc.deallocate(head.as_mut_ptr(), 1)
}
if let Some(prev) = prev {
alloc.deallocate(prev.as_mut_ptr(), prev.len())
}
if let Some(mut window) = window {
window.drop_in(&alloc);
}
alloc.deallocate(state_allocation.as_mut_ptr(), 1);
}
return ReturnCode::MemError;
}
};
prev.fill(MaybeUninit::zeroed());
let prev = unsafe { slice_assume_init_mut(prev) };
*head = MaybeUninit::zeroed();
let head = unsafe { head.assume_init_mut() };
let state = State {
status: Status::Init,
// window
w_bits: window_bits,
w_size,
w_mask: w_size - 1,
// allocated values
window,
prev,
head,
bit_writer: BitWriter::from_pending(pending),
//
lit_bufsize,
//
sym_buf,
//
level: level as i8, // set to zero again for testing?
strategy,
// these fields are not set explicitly at this point
last_flush: 0,
wrap,
strstart: 0,
block_start: 0,
block_open: 0,
window_size: 0,
insert: 0,
matches: 0,
opt_len: 0,
static_len: 0,
lookahead: 0,
ins_h: 0,
max_chain_length: 0,
max_lazy_match: 0,
good_match: 0,
nice_match: 0,
//
l_desc: TreeDesc::EMPTY,
d_desc: TreeDesc::EMPTY,
bl_desc: TreeDesc::EMPTY,
bl_count: [0u16; MAX_BITS + 1],
//
heap: Heap::new(),
//
crc_fold: Crc32Fold::new(),
gzhead: None,
gzindex: 0,
//
match_start: 0,
match_length: 0,
prev_match: 0,
match_available: false,
prev_length: 0,
// just provide a valid default; gets set properly later
hash_calc_variant: HashCalcVariant::Standard,
};
let state = state_allocation.write(state);
stream.state = state as *mut _ as *mut internal_state;
let Some(stream) = (unsafe { DeflateStream::from_stream_mut(stream) }) else {
if cfg!(debug_assertions) {
unreachable!("we should have initialized the stream properly");
}
return ReturnCode::StreamError;
};
reset(stream)
}
pub fn params(stream: &mut DeflateStream, level: i32, strategy: Strategy) -> ReturnCode {
let level = if level == crate::c_api::Z_DEFAULT_COMPRESSION {
6
} else {
level
};
if !(0..=9).contains(&level) {
return ReturnCode::StreamError;
}
let level = level as i8;
let func = CONFIGURATION_TABLE[stream.state.level as usize].func;
let state = &mut stream.state;
if (strategy != state.strategy || func != CONFIGURATION_TABLE[level as usize].func)
&& state.last_flush != -2
{
// Flush the last buffer.
let err = deflate(stream, DeflateFlush::Block);
if err == ReturnCode::StreamError {
return err;
}
let state = &mut stream.state;
if stream.avail_in != 0
|| ((state.strstart as isize - state.block_start) + state.lookahead as isize) != 0
{
return ReturnCode::BufError;
}
}
let state = &mut stream.state;
if state.level != level {
if state.level == 0 && state.matches != 0 {
if state.matches == 1 {
self::slide_hash::slide_hash(state);
} else {
state.head.fill(0);
}
state.matches = 0;
}
lm_set_level(state, level);
}
state.strategy = strategy;
ReturnCode::Ok
}
pub fn set_dictionary(stream: &mut DeflateStream, mut dictionary: &[u8]) -> ReturnCode {
let state = &mut stream.state;
let wrap = state.wrap;
if wrap == 2 || (wrap == 1 && state.status != Status::Init) || state.lookahead != 0 {
return ReturnCode::StreamError;
}
// when using zlib wrappers, compute Adler-32 for provided dictionary
if wrap == 1 {
stream.adler = adler32(stream.adler as u32, dictionary) as z_checksum;
}
// avoid computing Adler-32 in read_buf
state.wrap = 0;
// if dictionary would fill window, just replace the history
if dictionary.len() >= state.window.capacity() {
if wrap == 0 {
// clear the hash table
state.head.fill(0);
state.strstart = 0;
state.block_start = 0;
state.insert = 0;
} else {
/* already empty otherwise */
}
// use the tail
dictionary = &dictionary[dictionary.len() - state.w_size..];
}
// insert dictionary into window and hash
let avail = stream.avail_in;
let next = stream.next_in;
stream.avail_in = dictionary.len() as _;
stream.next_in = dictionary.as_ptr() as *mut u8;
fill_window(stream);
while stream.state.lookahead >= STD_MIN_MATCH {
let str = stream.state.strstart;
let n = stream.state.lookahead - (STD_MIN_MATCH - 1);
stream.state.insert_string(str, n);
stream.state.strstart = str + n;
stream.state.lookahead = STD_MIN_MATCH - 1;
fill_window(stream);
}
let state = &mut stream.state;
state.strstart += state.lookahead;
state.block_start = state.strstart as _;
state.insert = state.lookahead;
state.lookahead = 0;
state.prev_length = 0;
state.match_available = false;
// restore the state
stream.next_in = next;
stream.avail_in = avail;
state.wrap = wrap;
ReturnCode::Ok
}
pub fn prime(stream: &mut DeflateStream, mut bits: i32, value: i32) -> ReturnCode {
// our logic actually supports up to 32 bits.
debug_assert!(bits <= 16, "zlib only supports up to 16 bits here");
let mut value64 = value as u64;
let state = &mut stream.state;
if bits < 0
|| bits > BitWriter::BIT_BUF_SIZE as i32
|| bits > (core::mem::size_of_val(&value) << 3) as i32
{
return ReturnCode::BufError;
}
let mut put;
loop {
put = BitWriter::BIT_BUF_SIZE - state.bit_writer.bits_used;
let put = Ord::min(put as i32, bits);
if state.bit_writer.bits_used == 0 {
state.bit_writer.bit_buffer = value64;
} else {
state.bit_writer.bit_buffer |=
(value64 & ((1 << put) - 1)) << state.bit_writer.bits_used;
}
state.bit_writer.bits_used += put as u8;
state.bit_writer.flush_bits();
value64 >>= put;
bits -= put;
if bits == 0 {
break;
}
}
ReturnCode::Ok
}
pub fn copy<'a>(
dest: &mut MaybeUninit<DeflateStream<'a>>,
source: &mut DeflateStream<'a>,
) -> ReturnCode {
// Safety: source and dest are both mutable references, so guaranteed not to overlap.
// dest being a reference to maybe uninitialized memory makes a copy of 1 DeflateStream valid.
unsafe {
core::ptr::copy_nonoverlapping(source, dest.as_mut_ptr(), 1);
}
let alloc = &source.alloc;
// allocated here to have the same order as zlib
let Some(state_allocation) = alloc.allocate::<State>() else {
return ReturnCode::MemError;
};
let source_state = &source.state;
let window = source_state.window.clone_in(alloc);
let prev = alloc.allocate_slice::<u16>(source_state.w_size);
let head = alloc.allocate::<[u16; HASH_SIZE]>();
let pending = source_state.bit_writer.pending.clone_in(alloc);
let sym_buf = source_state.sym_buf.clone_in(alloc);
// if any allocation failed, clean up allocations that did succeed
let (window, prev, head, pending, sym_buf) = match (window, prev, head, pending, sym_buf) {
(Some(window), Some(prev), Some(head), Some(pending), Some(sym_buf)) => {
(window, prev, head, pending, sym_buf)
}
(window, prev, head, pending, sym_buf) => {
// Safety: this access is in-bounds
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, core::ptr::null_mut()) };
// Safety: it is an assumpion on DeflateStream that (de)allocation does not cause UB.
unsafe {
if let Some(mut sym_buf) = sym_buf {
alloc.deallocate(sym_buf.as_mut_ptr(), sym_buf.capacity())
}
if let Some(pending) = pending {
pending.drop_in(alloc);
}
if let Some(head) = head {
alloc.deallocate(head.as_mut_ptr(), HASH_SIZE)
}
if let Some(prev) = prev {
alloc.deallocate(prev.as_mut_ptr(), prev.len())
}
if let Some(mut window) = window {
window.drop_in(alloc);
}
alloc.deallocate(state_allocation.as_mut_ptr(), 1);
}
return ReturnCode::MemError;
}
};
prev.copy_from_slice(slice_to_uninit(source_state.prev));
let prev = unsafe { core::slice::from_raw_parts_mut(prev.as_mut_ptr().cast(), prev.len()) };
let head = head.write(*source_state.head);
let mut bit_writer = BitWriter::from_pending(pending);
bit_writer.bits_used = source_state.bit_writer.bits_used;
bit_writer.bit_buffer = source_state.bit_writer.bit_buffer;
let dest_state = State {
status: source_state.status,
bit_writer,
last_flush: source_state.last_flush,
wrap: source_state.wrap,
strategy: source_state.strategy,
level: source_state.level,
good_match: source_state.good_match,
nice_match: source_state.nice_match,
l_desc: source_state.l_desc.clone(),
d_desc: source_state.d_desc.clone(),
bl_desc: source_state.bl_desc.clone(),
bl_count: source_state.bl_count,
match_length: source_state.match_length,
prev_match: source_state.prev_match,
match_available: source_state.match_available,
strstart: source_state.strstart,
match_start: source_state.match_start,
prev_length: source_state.prev_length,
max_chain_length: source_state.max_chain_length,
max_lazy_match: source_state.max_lazy_match,
block_start: source_state.block_start,
block_open: source_state.block_open,
window,
sym_buf,
lit_bufsize: source_state.lit_bufsize,
window_size: source_state.window_size,
matches: source_state.matches,
opt_len: source_state.opt_len,
static_len: source_state.static_len,
insert: source_state.insert,
w_size: source_state.w_size,
w_bits: source_state.w_bits,
w_mask: source_state.w_mask,
lookahead: source_state.lookahead,
prev,
head,
ins_h: source_state.ins_h,
heap: source_state.heap.clone(),
hash_calc_variant: source_state.hash_calc_variant,
crc_fold: source_state.crc_fold,
gzhead: None,
gzindex: source_state.gzindex,
};
// write the cloned state into state_ptr
let state_ptr = state_allocation.write(dest_state);
// insert the state_ptr into `dest`
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_ptr) };
// update the gzhead field (it contains a mutable reference so we need to be careful
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.gzhead) };
unsafe { core::ptr::copy(&source_state.gzhead, field_ptr, 1) };
ReturnCode::Ok
}
/// # Returns
///
/// - Err when deflate is not done. A common cause is insufficient output space
/// - Ok otherwise
pub fn end<'a>(stream: &'a mut DeflateStream) -> Result<&'a mut z_stream, &'a mut z_stream> {
let status = stream.state.status;
let alloc = stream.alloc;
// deallocate in reverse order of allocations
unsafe {
// safety: we make sure that these fields are not used (by invalidating the state pointer)
stream.state.sym_buf.drop_in(&alloc);
stream.state.bit_writer.pending.drop_in(&alloc);
alloc.deallocate(stream.state.head, 1);
if !stream.state.prev.is_empty() {
alloc.deallocate(stream.state.prev.as_mut_ptr(), stream.state.prev.len());
}
stream.state.window.drop_in(&alloc);
}
let state = stream.state as *mut State;
let stream = stream.as_z_stream_mut();
stream.state = core::ptr::null_mut();
// safety: `state` is not used later
unsafe {
alloc.deallocate(state, 1);
}
match status {
Status::Busy => Err(stream),
_ => Ok(stream),
}
}
pub fn reset(stream: &mut DeflateStream) -> ReturnCode {
let ret = reset_keep(stream);
if ret == ReturnCode::Ok {
lm_init(stream.state);
}
ret
}
fn reset_keep(stream: &mut DeflateStream) -> ReturnCode {
stream.total_in = 0;
stream.total_out = 0;
stream.msg = core::ptr::null_mut();
stream.data_type = crate::c_api::Z_UNKNOWN;
let state = &mut stream.state;
state.bit_writer.pending.reset_keep();
// can be made negative by deflate(..., Z_FINISH);
state.wrap = state.wrap.abs();
state.status = match state.wrap {
2 => Status::GZip,
_ => Status::Init,
};
stream.adler = match state.wrap {
2 => {
state.crc_fold = Crc32Fold::new();
CRC32_INITIAL_VALUE as _
}
_ => ADLER32_INITIAL_VALUE as _,
};
state.last_flush = -2;
state.zng_tr_init();
ReturnCode::Ok
}
fn lm_init(state: &mut State) {
state.window_size = 2 * state.w_size;
// zlib uses CLEAR_HASH here
state.head.fill(0);
// Set the default configuration parameters:
lm_set_level(state, state.level);
state.strstart = 0;
state.block_start = 0;
state.lookahead = 0;
state.insert = 0;
state.prev_length = 0;
state.match_available = false;
state.match_start = 0;
state.ins_h = 0;
}
fn lm_set_level(state: &mut State, level: i8) {
state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy as usize;
state.good_match = CONFIGURATION_TABLE[level as usize].good_length as usize;
state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length as usize;
state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain as usize;
state.hash_calc_variant = HashCalcVariant::for_max_chain_length(state.max_chain_length);
state.level = level;
}
pub fn tune(
stream: &mut DeflateStream,
good_length: usize,
max_lazy: usize,
nice_length: usize,
max_chain: usize,
) -> ReturnCode {
stream.state.good_match = good_length;
stream.state.max_lazy_match = max_lazy;
stream.state.nice_match = nice_length;
stream.state.max_chain_length = max_chain;
ReturnCode::Ok
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct Value {
a: u16,
b: u16,
}
impl Value {
pub(crate) const fn new(a: u16, b: u16) -> Self {
Self { a, b }
}
pub(crate) fn freq_mut(&mut self) -> &mut u16 {
&mut self.a
}
pub(crate) fn code_mut(&mut self) -> &mut u16 {
&mut self.a
}
pub(crate) fn dad_mut(&mut self) -> &mut u16 {
&mut self.b
}
pub(crate) fn len_mut(&mut self) -> &mut u16 {
&mut self.b
}
#[inline(always)]
pub(crate) const fn freq(self) -> u16 {
self.a
}
pub(crate) fn code(self) -> u16 {
self.a
}
pub(crate) fn dad(self) -> u16 {
self.b
}
pub(crate) fn len(self) -> u16 {
self.b
}
}
/// number of length codes, not counting the special END_BLOCK code
pub(crate) const LENGTH_CODES: usize = 29;
/// number of literal bytes 0..255
const LITERALS: usize = 256;
/// number of Literal or Length codes, including the END_BLOCK code
pub(crate) const L_CODES: usize = LITERALS + 1 + LENGTH_CODES;
/// number of distance codes
pub(crate) const D_CODES: usize = 30;
/// number of codes used to transfer the bit lengths
const BL_CODES: usize = 19;
/// maximum heap size
const HEAP_SIZE: usize = 2 * L_CODES + 1;
/// all codes must not exceed MAX_BITS bits
const MAX_BITS: usize = 15;
/// Bit length codes must not exceed MAX_BL_BITS bits
const MAX_BL_BITS: usize = 7;
pub(crate) const DIST_CODE_LEN: usize = 512;
struct BitWriter<'a> {
pub(crate) pending: Pending<'a>, // output still pending
pub(crate) bit_buffer: u64,
pub(crate) bits_used: u8,
/// total bit length of compressed file (NOTE: zlib-ng uses a 32-bit integer here)
#[cfg(feature = "ZLIB_DEBUG")]
compressed_len: usize,
/// bit length of compressed data sent (NOTE: zlib-ng uses a 32-bit integer here)
#[cfg(feature = "ZLIB_DEBUG")]
bits_sent: usize,
}
impl<'a> BitWriter<'a> {
pub(crate) const BIT_BUF_SIZE: u8 = 64;
fn from_pending(pending: Pending<'a>) -> Self {
Self {
pending,
bit_buffer: 0,
bits_used: 0,
#[cfg(feature = "ZLIB_DEBUG")]
compressed_len: 0,
#[cfg(feature = "ZLIB_DEBUG")]
bits_sent: 0,
}
}
fn flush_bits(&mut self) {
debug_assert!(self.bits_used <= 64);
let removed = self.bits_used.saturating_sub(7).next_multiple_of(8);
let keep_bytes = self.bits_used / 8; // can never divide by zero
let src = &self.bit_buffer.to_le_bytes();
self.pending.extend(&src[..keep_bytes as usize]);
self.bits_used -= removed;
self.bit_buffer = self.bit_buffer.checked_shr(removed as u32).unwrap_or(0);
}
fn emit_align(&mut self) {
debug_assert!(self.bits_used <= 64);
let keep_bytes = self.bits_used.div_ceil(8);
let src = &self.bit_buffer.to_le_bytes();
self.pending.extend(&src[..keep_bytes as usize]);
self.bits_used = 0;
self.bit_buffer = 0;
self.sent_bits_align();
}
fn send_bits_trace(&self, _value: u64, _len: u8) {
trace!(" l {:>2} v {:>4x} ", _len, _value);
}
fn cmpr_bits_add(&mut self, _len: usize) {
#[cfg(feature = "ZLIB_DEBUG")]
{
self.compressed_len += _len;
}
}
fn cmpr_bits_align(&mut self) {
#[cfg(feature = "ZLIB_DEBUG")]
{
self.compressed_len = self.compressed_len.next_multiple_of(8);
}
}
fn sent_bits_add(&mut self, _len: usize) {
#[cfg(feature = "ZLIB_DEBUG")]
{
self.bits_sent += _len;
}
}
fn sent_bits_align(&mut self) {
#[cfg(feature = "ZLIB_DEBUG")]
{
self.bits_sent = self.bits_sent.next_multiple_of(8);
}
}
fn send_bits(&mut self, val: u64, len: u8) {
debug_assert!(len <= 64);
debug_assert!(self.bits_used <= 64);
let total_bits = len + self.bits_used;
self.send_bits_trace(val, len);
self.sent_bits_add(len as usize);
if total_bits < Self::BIT_BUF_SIZE {
self.bit_buffer |= val << self.bits_used;
self.bits_used = total_bits;
} else if self.bits_used == Self::BIT_BUF_SIZE {
// with how send_bits is called, this is unreachable in practice
self.pending.extend(&self.bit_buffer.to_le_bytes());
self.bit_buffer = val;
self.bits_used = len;
} else {
self.bit_buffer |= val << self.bits_used;
self.pending.extend(&self.bit_buffer.to_le_bytes());
self.bit_buffer = val >> (Self::BIT_BUF_SIZE - self.bits_used);
self.bits_used = total_bits - Self::BIT_BUF_SIZE;
}
}
fn send_code(&mut self, code: usize, tree: &[Value]) {
let node = tree[code];
self.send_bits(node.code() as u64, node.len() as u8)
}
/// Send one empty static block to give enough lookahead for inflate.
/// This takes 10 bits, of which 7 may remain in the bit buffer.
pub fn align(&mut self) {
self.emit_tree(BlockType::StaticTrees, false);
self.emit_end_block(&STATIC_LTREE, false);
self.flush_bits();
}
pub(crate) fn emit_tree(&mut self, block_type: BlockType, is_last_block: bool) {
let header_bits = (block_type as u64) << 1 | (is_last_block as u64);
self.send_bits(header_bits, 3);
trace!("\n--- Emit Tree: Last: {}\n", is_last_block as u8);
}
pub(crate) fn emit_end_block_and_align(&mut self, ltree: &[Value], is_last_block: bool) {
self.emit_end_block(ltree, is_last_block);
if is_last_block {
self.emit_align();
}
}
fn emit_end_block(&mut self, ltree: &[Value], _is_last_block: bool) {
const END_BLOCK: usize = 256;
self.send_code(END_BLOCK, ltree);
trace!(
"\n+++ Emit End Block: Last: {} Pending: {} Total Out: {}\n",
_is_last_block as u8,
self.pending.pending().len(),
"<unknown>"
);
}
pub(crate) fn emit_lit(&mut self, ltree: &[Value], c: u8) -> u16 {
self.send_code(c as usize, ltree);
#[cfg(feature = "ZLIB_DEBUG")]
if let Some(c) = char::from_u32(c as u32) {
if isgraph(c as u8) {
trace!(" '{}' ", c);
}
}
ltree[c as usize].len()
}
pub(crate) fn emit_dist(
&mut self,
ltree: &[Value],
dtree: &[Value],
lc: u8,
mut dist: usize,
) -> usize {
let mut lc = lc as usize;
/* Send the length code, len is the match length - STD_MIN_MATCH */
let mut code = self::trees_tbl::LENGTH_CODE[lc] as usize;
let c = code + LITERALS + 1;
assert!(c < L_CODES, "bad l_code");
// send_code_trace(s, c);
let lnode = ltree[c];
let mut match_bits: u64 = lnode.code() as u64;
let mut match_bits_len = lnode.len() as usize;
let mut extra = StaticTreeDesc::EXTRA_LBITS[code] as usize;
if extra != 0 {
lc -= self::trees_tbl::BASE_LENGTH[code] as usize;
match_bits |= (lc as u64) << match_bits_len;
match_bits_len += extra;
}
dist -= 1; /* dist is now the match distance - 1 */
code = State::d_code(dist) as usize;
assert!(code < D_CODES, "bad d_code");
// send_code_trace(s, code);
/* Send the distance code */
let dnode = dtree[code];
match_bits |= (dnode.code() as u64) << match_bits_len;
match_bits_len += dnode.len() as usize;
extra = StaticTreeDesc::EXTRA_DBITS[code] as usize;
if extra != 0 {
dist -= self::trees_tbl::BASE_DIST[code] as usize;
match_bits |= (dist as u64) << match_bits_len;
match_bits_len += extra;
}
self.send_bits(match_bits, match_bits_len as u8);
match_bits_len
}
fn compress_block_help(&mut self, sym_buf: &[u8], ltree: &[Value], dtree: &[Value]) {
for chunk in sym_buf.chunks_exact(3) {
let [dist_low, dist_high, lc] = *chunk else {
unreachable!("out of bound access on the symbol buffer");
};
match u16::from_be_bytes([dist_high, dist_low]) as usize {
0 => self.emit_lit(ltree, lc) as usize,
dist => self.emit_dist(ltree, dtree, lc, dist),
};
}
self.emit_end_block(ltree, false)
}
fn send_tree(&mut self, tree: &[Value], bl_tree: &[Value], max_code: usize) {
/* tree: the tree to be scanned */
/* max_code and its largest code of non zero frequency */
let mut prevlen: isize = -1; /* last emitted length */
let mut curlen; /* length of current code */
let mut nextlen = tree[0].len(); /* length of next code */
let mut count = 0; /* repeat count of the current code */
let mut max_count = 7; /* max repeat count */
let mut min_count = 4; /* min repeat count */
/* tree[max_code+1].Len = -1; */
/* guard already set */
if nextlen == 0 {
max_count = 138;
min_count = 3;
}
for n in 0..=max_code {
curlen = nextlen;
nextlen = tree[n + 1].len();
count += 1;
if count < max_count && curlen == nextlen {
continue;
} else if count < min_count {
loop {
self.send_code(curlen as usize, bl_tree);
count -= 1;
if count == 0 {
break;
}
}
} else if curlen != 0 {
if curlen as isize != prevlen {
self.send_code(curlen as usize, bl_tree);
count -= 1;
}
assert!((3..=6).contains(&count), " 3_6?");
self.send_code(REP_3_6, bl_tree);
self.send_bits(count - 3, 2);
} else if count <= 10 {
self.send_code(REPZ_3_10, bl_tree);
self.send_bits(count - 3, 3);
} else {
self.send_code(REPZ_11_138, bl_tree);
self.send_bits(count - 11, 7);
}
count = 0;
prevlen = curlen as isize;
if nextlen == 0 {
max_count = 138;
min_count = 3;
} else if curlen == nextlen {
max_count = 6;
min_count = 3;
} else {
max_count = 7;
min_count = 4;
}
}
}
}
#[repr(C)]
pub(crate) struct State<'a> {
status: Status,
last_flush: i32, /* value of flush param for previous deflate call */
bit_writer: BitWriter<'a>,
pub(crate) wrap: i8, /* bit 0 true for zlib, bit 1 true for gzip */
pub(crate) strategy: Strategy,
pub(crate) level: i8,
/// Use a faster search when the previous match is longer than this
pub(crate) good_match: usize,
/// Stop searching when current match exceeds this
pub(crate) nice_match: usize,
// part of the fields below
// dyn_ltree: [Value; ],
// dyn_dtree: [Value; ],
// bl_tree: [Value; ],
l_desc: TreeDesc<HEAP_SIZE>, /* literal and length tree */
d_desc: TreeDesc<{ 2 * D_CODES + 1 }>, /* distance tree */
bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */
pub(crate) bl_count: [u16; MAX_BITS + 1],
pub(crate) match_length: usize, /* length of best match */
pub(crate) prev_match: u16, /* previous match */
pub(crate) match_available: bool, /* set if previous match exists */
pub(crate) strstart: usize, /* start of string to insert */
pub(crate) match_start: usize, /* start of matching string */
/// Length of the best match at previous step. Matches not greater than this
/// are discarded. This is used in the lazy match evaluation.
pub(crate) prev_length: usize,
/// To speed up deflation, hash chains are never searched beyond this length.
/// A higher limit improves compression ratio but degrades the speed.
pub(crate) max_chain_length: usize,
// TODO untangle this mess! zlib uses the same field differently based on compression level
// we should just have 2 fields for clarity!
//
// Insert new strings in the hash table only if the match length is not
// greater than this length. This saves time but degrades compression.
// max_insert_length is used only for compression levels <= 3.
// define max_insert_length max_lazy_match
/// Attempt to find a better match only when the current match is strictly smaller
/// than this value. This mechanism is used only for compression levels >= 4.
pub(crate) max_lazy_match: usize,
/// Window position at the beginning of the current output block. Gets
/// negative when the window is moved backwards.
pub(crate) block_start: isize,
/// Whether or not a block is currently open for the QUICK deflation scheme.
/// true if there is an active block, or false if the block was just closed
pub(crate) block_open: u8,
pub(crate) window: Window<'a>,
pub(crate) sym_buf: ReadBuf<'a>,
/// Size of match buffer for literals/lengths. There are 4 reasons for
/// limiting lit_bufsize to 64K:
/// - frequencies can be kept in 16 bit counters
/// - if compression is not successful for the first block, all input
/// data is still in the window so we can still emit a stored block even
/// when input comes from standard input. (This can also be done for
/// all blocks if lit_bufsize is not greater than 32K.)
/// - if compression is not successful for a file smaller than 64K, we can
/// even emit a stored file instead of a stored block (saving 5 bytes).
/// This is applicable only for zip (not gzip or zlib).
/// - creating new Huffman trees less frequently may not provide fast
/// adaptation to changes in the input data statistics. (Take for
/// example a binary file with poorly compressible code followed by
/// a highly compressible string table.) Smaller buffer sizes give
/// fast adaptation but have of course the overhead of transmitting
/// trees more frequently.
/// - I can't count above 4
lit_bufsize: usize,
/// Actual size of window: 2*wSize, except when the user input buffer is directly used as sliding window.
pub(crate) window_size: usize,
/// number of string matches in current block
pub(crate) matches: usize,
/// bit length of current block with optimal trees
opt_len: usize,
/// bit length of current block with static trees
static_len: usize,
/// bytes at end of window left to insert
pub(crate) insert: usize,
pub(crate) w_size: usize, /* LZ77 window size (32K by default) */
pub(crate) w_bits: usize, /* log2(w_size) (8..16) */
pub(crate) w_mask: usize, /* w_size - 1 */
pub(crate) lookahead: usize, /* number of valid bytes ahead in window */
pub(crate) prev: &'a mut [u16],
pub(crate) head: &'a mut [u16; HASH_SIZE],
/// hash index of string to be inserted
pub(crate) ins_h: usize,
heap: Heap,
pub(crate) hash_calc_variant: HashCalcVariant,
crc_fold: crate::crc32::Crc32Fold,
gzhead: Option<&'a mut gz_header>,
gzindex: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
#[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))]
pub enum Strategy {
#[default]
Default = 0,
Filtered = 1,
HuffmanOnly = 2,
Rle = 3,
Fixed = 4,
}
impl TryFrom<i32> for Strategy {
type Error = ();
fn try_from(value: i32) -> Result<Self, Self::Error> {
match value {
0 => Ok(Strategy::Default),
1 => Ok(Strategy::Filtered),
2 => Ok(Strategy::HuffmanOnly),
3 => Ok(Strategy::Rle),
4 => Ok(Strategy::Fixed),
_ => Err(()),
}
}
}
#[derive(Debug, PartialEq, Eq)]
enum DataType {
Binary = 0,
Text = 1,
Unknown = 2,
}
impl<'a> State<'a> {
pub const BIT_BUF_SIZE: u8 = BitWriter::BIT_BUF_SIZE;
pub(crate) fn max_dist(&self) -> usize {
self.w_size - MIN_LOOKAHEAD
}
// TODO untangle this mess! zlib uses the same field differently based on compression level
// we should just have 2 fields for clarity!
pub(crate) fn max_insert_length(&self) -> usize {
self.max_lazy_match
}
/// Total size of the pending buf. But because `pending` shares memory with `sym_buf`, this is
/// not the number of bytes that are actually in `pending`!
pub(crate) fn pending_buf_size(&self) -> usize {
self.lit_bufsize * 4
}
pub(crate) fn update_hash(&self, h: u32, val: u32) -> u32 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::update_hash(h, val),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::update_hash(h, val) },
HashCalcVariant::Roll => RollHashCalc::update_hash(h, val),
}
}
pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::quick_insert_string(self, string) },
HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string),
}
}
pub(crate) fn insert_string(&mut self, string: usize, count: usize) {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::insert_string(self, string, count),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::insert_string(self, string, count) },
HashCalcVariant::Roll => RollHashCalc::insert_string(self, string, count),
}
}
pub(crate) fn tally_lit(&mut self, unmatched: u8) -> bool {
self.sym_buf.push(0);
self.sym_buf.push(0);
self.sym_buf.push(unmatched);
*self.l_desc.dyn_tree[unmatched as usize].freq_mut() += 1;
assert!(
unmatched as usize <= STD_MAX_MATCH - STD_MIN_MATCH,
"zng_tr_tally: bad literal"
);
// signal that the current block should be flushed
self.sym_buf.len() == self.sym_buf.capacity() - 3
}
const fn d_code(dist: usize) -> u8 {
let index = if dist < 256 { dist } else { 256 + (dist >> 7) };
self::trees_tbl::DIST_CODE[index]
}
pub(crate) fn tally_dist(&mut self, mut dist: usize, len: usize) -> bool {
let symbols = [dist as u8, (dist >> 8) as u8, len as u8];
self.sym_buf.extend(&symbols);
self.matches += 1;
dist -= 1;
assert!(
dist < self.max_dist() && Self::d_code(dist) < D_CODES as u8,
"tally_dist: bad match"
);
let index = self::trees_tbl::LENGTH_CODE[len] as usize + LITERALS + 1;
*self.l_desc.dyn_tree[index].freq_mut() += 1;
*self.d_desc.dyn_tree[Self::d_code(dist) as usize].freq_mut() += 1;
// signal that the current block should be flushed
self.sym_buf.len() == self.sym_buf.capacity() - 3
}
fn detect_data_type(dyn_tree: &[Value]) -> DataType {
// set bits 0..6, 14..25, and 28..31
// 0xf3ffc07f = binary 11110011111111111100000001111111
const NON_TEXT: u64 = 0xf3ffc07f;
let mut mask = NON_TEXT;
/* Check for non-textual bytes. */
for value in &dyn_tree[0..32] {
if (mask & 1) != 0 && value.freq() != 0 {
return DataType::Binary;
}
mask >>= 1;
}
/* Check for textual bytes. */
if dyn_tree[9].freq() != 0 || dyn_tree[10].freq() != 0 || dyn_tree[13].freq() != 0 {
return DataType::Text;
}
if dyn_tree[32..LITERALS].iter().any(|v| v.freq() != 0) {
return DataType::Text;
}
// there are no explicit text or non-text bytes. The stream is either empty or has only
// tolerated bytes
DataType::Binary
}
fn compress_block_static_trees(&mut self) {
self.bit_writer.compress_block_help(
self.sym_buf.filled(),
self::trees_tbl::STATIC_LTREE.as_slice(),
self::trees_tbl::STATIC_DTREE.as_slice(),
)
}
fn compress_block_dynamic_trees(&mut self) {
self.bit_writer.compress_block_help(
self.sym_buf.filled(),
&self.l_desc.dyn_tree,
&self.d_desc.dyn_tree,
);
}
fn header(&self) -> u16 {
// preset dictionary flag in zlib header
const PRESET_DICT: u16 = 0x20;
// The deflate compression method (the only one supported in this version)
const Z_DEFLATED: u16 = 8;
let dict = match self.strstart {
0 => 0,
_ => PRESET_DICT,
};
let h =
(Z_DEFLATED + ((self.w_bits as u16 - 8) << 4)) << 8 | (self.level_flags() << 6) | dict;
h + 31 - (h % 31)
}
fn level_flags(&self) -> u16 {
if self.strategy >= Strategy::HuffmanOnly || self.level < 2 {
0
} else if self.level < 6 {
1
} else if self.level == 6 {
2
} else {
3
}
}
fn zng_tr_init(&mut self) {
self.l_desc.stat_desc = &StaticTreeDesc::L;
self.d_desc.stat_desc = &StaticTreeDesc::D;
self.bl_desc.stat_desc = &StaticTreeDesc::BL;
self.bit_writer.bit_buffer = 0;
self.bit_writer.bits_used = 0;
#[cfg(feature = "ZLIB_DEBUG")]
{
self.bit_writer.compressed_len = 0;
self.bit_writer.bits_sent = 0;
}
// Initialize the first block of the first file:
self.init_block();
}
/// initializes a new block
fn init_block(&mut self) {
// Initialize the trees.
// TODO would a memset also work here?
for value in &mut self.l_desc.dyn_tree[..L_CODES] {
*value.freq_mut() = 0;
}
for value in &mut self.d_desc.dyn_tree[..D_CODES] {
*value.freq_mut() = 0;
}
for value in &mut self.bl_desc.dyn_tree[..BL_CODES] {
*value.freq_mut() = 0;
}
// end of block literal code
const END_BLOCK: usize = 256;
*self.l_desc.dyn_tree[END_BLOCK].freq_mut() = 1;
self.opt_len = 0;
self.static_len = 0;
self.sym_buf.clear();
self.matches = 0;
}
}
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Status {
Init = 1,
GZip = 4,
Extra = 5,
Name = 6,
Comment = 7,
Hcrc = 8,
Busy = 2,
Finish = 3,
}
const fn rank_flush(f: i32) -> i32 {
// rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH
((f) * 2) - (if (f) > 4 { 9 } else { 0 })
}
#[derive(Debug)]
pub(crate) enum BlockState {
/// block not completed, need more input or more output
NeedMore = 0,
/// block flush performed
BlockDone = 1,
/// finish started, need only more output at next deflate
FinishStarted = 2,
/// finish done, accept no more input or output
FinishDone = 3,
}
// Maximum stored block length in deflate format (not including header).
pub(crate) const MAX_STORED: usize = 65535; // so u16::max
pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: usize) -> usize {
let len = Ord::min(stream.avail_in as usize, size);
if len == 0 {
return 0;
}
stream.avail_in -= len as u32;
if stream.state.wrap == 2 {
// we likely cannot fuse the crc32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
let data = &stream.state.window.filled()[offset..][..len];
stream.state.crc_fold.fold(data, CRC32_INITIAL_VALUE);
} else if stream.state.wrap == 1 {
// we likely cannot fuse the adler32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
let data = &stream.state.window.filled()[offset..][..len];
stream.adler = adler32(stream.adler as u32, data) as _;
} else {
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
}
stream.next_in = stream.next_in.wrapping_add(len);
stream.total_in += len as crate::c_api::z_size;
len
}
pub(crate) enum BlockType {
StoredBlock = 0,
StaticTrees = 1,
DynamicTrees = 2,
}
pub(crate) fn zng_tr_stored_block(
state: &mut State,
window_range: core::ops::Range<usize>,
is_last: bool,
) {
// send block type
state.bit_writer.emit_tree(BlockType::StoredBlock, is_last);
// align on byte boundary
state.bit_writer.emit_align();
state.bit_writer.cmpr_bits_align();
let input_block: &[u8] = &state.window.filled()[window_range];
let stored_len = input_block.len() as u16;
state.bit_writer.pending.extend(&stored_len.to_le_bytes());
state
.bit_writer
.pending
.extend(&(!stored_len).to_le_bytes());
state.bit_writer.cmpr_bits_add(32);
state.bit_writer.sent_bits_add(32);
if stored_len > 0 {
state.bit_writer.pending.extend(input_block);
state.bit_writer.cmpr_bits_add((stored_len << 3) as usize);
state.bit_writer.sent_bits_add((stored_len << 3) as usize);
}
}
/// The minimum match length mandated by the deflate standard
pub(crate) const STD_MIN_MATCH: usize = 3;
/// The maximum match length mandated by the deflate standard
pub(crate) const STD_MAX_MATCH: usize = 258;
/// The minimum wanted match length, affects deflate_quick, deflate_fast, deflate_medium and deflate_slow
pub(crate) const WANT_MIN_MATCH: usize = 4;
pub(crate) const MIN_LOOKAHEAD: usize = STD_MAX_MATCH + STD_MIN_MATCH + 1;
pub(crate) fn fill_window(stream: &mut DeflateStream) {
debug_assert!(stream.state.lookahead < MIN_LOOKAHEAD);
let wsize = stream.state.w_size;
loop {
let state = &mut stream.state;
let mut more = state.window_size - state.lookahead - state.strstart;
// If the window is almost full and there is insufficient lookahead,
// move the upper half to the lower one to make room in the upper half.
if state.strstart >= wsize + state.max_dist() {
// in some cases zlib-ng copies uninitialized bytes here. We cannot have that, so
// explicitly initialize them with zeros.
//
// see also the "fill_window_out_of_bounds" test.
state.window.initialize_at_least(2 * wsize);
state.window.filled_mut().copy_within(wsize..2 * wsize, 0);
if state.match_start >= wsize {
state.match_start -= wsize;
} else {
state.match_start = 0;
state.prev_length = 0;
}
state.strstart -= wsize; /* we now have strstart >= MAX_DIST */
state.block_start -= wsize as isize;
if state.insert > state.strstart {
state.insert = state.strstart;
}
self::slide_hash::slide_hash(state);
more += wsize;
}
if stream.avail_in == 0 {
break;
}
// If there was no sliding:
// strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
// more == window_size - lookahead - strstart
// => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
// => more >= window_size - 2*WSIZE + 2
// In the BIG_MEM or MMAP case (not yet supported),
// window_size == input_size + MIN_LOOKAHEAD &&
// strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
// Otherwise, window_size == 2*WSIZE so more >= 2.
// If there was sliding, more >= WSIZE. So in all cases, more >= 2.
assert!(more >= 2, "more < 2");
let n = read_buf_window(stream, stream.state.strstart + stream.state.lookahead, more);
let state = &mut stream.state;
state.lookahead += n;
// Initialize the hash value now that we have some input:
if state.lookahead + state.insert >= STD_MIN_MATCH {
let string = state.strstart - state.insert;
if state.max_chain_length > 1024 {
let v0 = state.window.filled()[string] as u32;
let v1 = state.window.filled()[string + 1] as u32;
state.ins_h = state.update_hash(v0, v1) as usize;
} else if string >= 1 {
state.quick_insert_string(string + 2 - STD_MIN_MATCH);
}
let mut count = state.insert;
if state.lookahead == 1 {
count -= 1;
}
if count > 0 {
state.insert_string(string, count);
state.insert -= count;
}
}
// If the whole input has less than STD_MIN_MATCH bytes, ins_h is garbage,
// but this is not important since only literal bytes will be emitted.
if !(stream.state.lookahead < MIN_LOOKAHEAD && stream.avail_in != 0) {
break;
}
}
// initialize some memory at the end of the (filled) window, so SIMD operations can go "out of
// bounds" without violating any requirements. The window allocation is already slightly bigger
// to allow for this.
stream.state.window.initialize_out_of_bounds();
assert!(
stream.state.strstart <= stream.state.window_size - MIN_LOOKAHEAD,
"not enough room for search"
);
}
pub(crate) struct StaticTreeDesc {
/// static tree or NULL
pub(crate) static_tree: &'static [Value],
/// extra bits for each code or NULL
extra_bits: &'static [u8],
/// base index for extra_bits
extra_base: usize,
/// max number of elements in the tree
elems: usize,
/// max bit length for the codes
max_length: u16,
}
impl StaticTreeDesc {
const EMPTY: Self = Self {
static_tree: &[],
extra_bits: &[],
extra_base: 0,
elems: 0,
max_length: 0,
};
/// extra bits for each length code
const EXTRA_LBITS: [u8; LENGTH_CODES] = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
];
/// extra bits for each distance code
const EXTRA_DBITS: [u8; D_CODES] = [
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
13, 13,
];
/// extra bits for each bit length code
const EXTRA_BLBITS: [u8; BL_CODES] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7];
/// The lengths of the bit length codes are sent in order of decreasing
/// probability, to avoid transmitting the lengths for unused bit length codes.
const BL_ORDER: [u8; BL_CODES] = [
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
];
pub(crate) const L: Self = Self {
static_tree: &self::trees_tbl::STATIC_LTREE,
extra_bits: &Self::EXTRA_LBITS,
extra_base: LITERALS + 1,
elems: L_CODES,
max_length: MAX_BITS as u16,
};
pub(crate) const D: Self = Self {
static_tree: &self::trees_tbl::STATIC_DTREE,
extra_bits: &Self::EXTRA_DBITS,
extra_base: 0,
elems: D_CODES,
max_length: MAX_BITS as u16,
};
pub(crate) const BL: Self = Self {
static_tree: &[],
extra_bits: &Self::EXTRA_BLBITS,
extra_base: 0,
elems: BL_CODES,
max_length: MAX_BL_BITS as u16,
};
}
#[derive(Clone)]
struct TreeDesc<const N: usize> {
dyn_tree: [Value; N],
max_code: usize,
stat_desc: &'static StaticTreeDesc,
}
impl<const N: usize> TreeDesc<N> {
const EMPTY: Self = Self {
dyn_tree: [Value::new(0, 0); N],
max_code: 0,
stat_desc: &StaticTreeDesc::EMPTY,
};
}
fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
let tree = &mut desc.dyn_tree;
let stree = desc.stat_desc.static_tree;
let elements = desc.stat_desc.elems;
let mut max_code = state.heap.initialize(&mut tree[..elements]);
// The pkzip format requires that at least one distance code exists,
// and that at least one bit should be sent even if there is only one
// possible code. So to avoid special checks later on we force at least
// two codes of non zero frequency.
while state.heap.heap_len < 2 {
state.heap.heap_len += 1;
let node = if max_code < 2 {
max_code += 1;
max_code
} else {
0
};
debug_assert!(node >= 0);
let node = node as usize;
state.heap.heap[state.heap.heap_len] = node as u32;
*tree[node].freq_mut() = 1;
state.heap.depth[node] = 0;
state.opt_len -= 1;
if !stree.is_empty() {
state.static_len -= stree[node].len() as usize;
}
/* node is 0 or 1 so it does not have extra bits */
}
debug_assert!(max_code >= 0);
let max_code = max_code as usize;
desc.max_code = max_code;
// The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
// establish sub-heaps of increasing lengths:
let mut n = state.heap.heap_len / 2;
while n >= 1 {
state.heap.pqdownheap(tree, n);
n -= 1;
}
state.heap.construct_huffman_tree(tree, elements);
// At this point, the fields freq and dad are set. We can now
// generate the bit lengths.
gen_bitlen(state, desc);
// The field len is now set, we can generate the bit codes
gen_codes(&mut desc.dyn_tree, max_code, &state.bl_count);
}
fn gen_bitlen<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
let heap = &mut state.heap;
let tree = &mut desc.dyn_tree;
let max_code = desc.max_code;
let stree = desc.stat_desc.static_tree;
let extra = desc.stat_desc.extra_bits;
let base = desc.stat_desc.extra_base;
let max_length = desc.stat_desc.max_length;
state.bl_count.fill(0);
// In a first pass, compute the optimal bit lengths (which may
// overflow in the case of the bit length tree).
*tree[heap.heap[heap.heap_max] as usize].len_mut() = 0; /* root of the heap */
// number of elements with bit length too large
let mut overflow: i32 = 0;
for h in heap.heap_max + 1..HEAP_SIZE {
let n = heap.heap[h] as usize;
let mut bits = tree[tree[n].dad() as usize].len() + 1;
if bits > max_length {
bits = max_length;
overflow += 1;
}
// We overwrite tree[n].Dad which is no longer needed
*tree[n].len_mut() = bits;
// not a leaf node
if n > max_code {
continue;
}
state.bl_count[bits as usize] += 1;
let mut xbits = 0;
if n >= base {
xbits = extra[n - base] as usize;
}
let f = tree[n].freq() as usize;
state.opt_len += f * (bits as usize + xbits);
if !stree.is_empty() {
state.static_len += f * (stree[n].len() as usize + xbits);
}
}
if overflow == 0 {
return;
}
/* Find the first bit length which could increase: */
loop {
let mut bits = max_length as usize - 1;
while state.bl_count[bits] == 0 {
bits -= 1;
}
state.bl_count[bits] -= 1; /* move one leaf down the tree */
state.bl_count[bits + 1] += 2; /* move one overflow item as its brother */
state.bl_count[max_length as usize] -= 1;
/* The brother of the overflow item also moves one step up,
* but this does not affect bl_count[max_length]
*/
overflow -= 2;
if overflow <= 0 {
break;
}
}
// Now recompute all bit lengths, scanning in increasing frequency.
// h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
// lengths instead of fixing only the wrong ones. This idea is taken
// from 'ar' written by Haruhiko Okumura.)
let mut h = HEAP_SIZE;
for bits in (1..=max_length).rev() {
let mut n = state.bl_count[bits as usize];
while n != 0 {
h -= 1;
let m = heap.heap[h] as usize;
if m > max_code {
continue;
}
if tree[m].len() != bits {
// Tracev((stderr, "code %d bits %d->%u\n", m, tree[m].Len, bits));
state.opt_len += (bits * tree[m].freq()) as usize;
state.opt_len -= (tree[m].len() * tree[m].freq()) as usize;
*tree[m].len_mut() = bits;
}
n -= 1;
}
}
}
/// Checks that symbol is a printing character (excluding space)
#[allow(unused)]
fn isgraph(c: u8) -> bool {
(c > 0x20) && (c <= 0x7E)
}
fn gen_codes(tree: &mut [Value], max_code: usize, bl_count: &[u16]) {
/* tree: the tree to decorate */
/* max_code: largest code with non zero frequency */
/* bl_count: number of codes at each bit length */
let mut next_code = [0; MAX_BITS + 1]; /* next code value for each bit length */
let mut code = 0; /* running code value */
/* The distribution counts are first used to generate the code values
* without bit reversal.
*/
for bits in 1..=MAX_BITS {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
/* Check that the bit counts in bl_count are consistent. The last code
* must be all ones.
*/
assert!(
code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
"inconsistent bit counts"
);
trace!("\ngen_codes: max_code {max_code} ");
for n in 0..=max_code {
let len = tree[n].len();
if len == 0 {
continue;
}
/* Now reverse the bits */
assert!((1..=15).contains(&len), "code length must be 1-15");
*tree[n].code_mut() = next_code[len as usize].reverse_bits() >> (16 - len);
next_code[len as usize] += 1;
if tree != self::trees_tbl::STATIC_LTREE.as_slice() {
trace!(
"\nn {:>3} {} l {:>2} c {:>4x} ({:x}) ",
n,
if isgraph(n as u8) {
char::from_u32(n as u32).unwrap()
} else {
' '
},
len,
tree[n].code(),
next_code[len as usize] - 1
);
}
}
}
/// repeat previous bit length 3-6 times (2 bits of repeat count)
const REP_3_6: usize = 16;
/// repeat a zero length 3-10 times (3 bits of repeat count)
const REPZ_3_10: usize = 17;
/// repeat a zero length 11-138 times (7 bits of repeat count)
const REPZ_11_138: usize = 18;
fn scan_tree(bl_desc: &mut TreeDesc<{ 2 * BL_CODES + 1 }>, tree: &mut [Value], max_code: usize) {
/* tree: the tree to be scanned */
/* max_code: and its largest code of non zero frequency */
let mut prevlen = -1isize; /* last emitted length */
let mut curlen: isize; /* length of current code */
let mut nextlen = tree[0].len(); /* length of next code */
let mut count = 0; /* repeat count of the current code */
let mut max_count = 7; /* max repeat count */
let mut min_count = 4; /* min repeat count */
if nextlen == 0 {
max_count = 138;
min_count = 3;
}
*tree[max_code + 1].len_mut() = 0xffff; /* guard */
let bl_tree = &mut bl_desc.dyn_tree;
for n in 0..=max_code {
curlen = nextlen as isize;
nextlen = tree[n + 1].len();
count += 1;
if count < max_count && curlen == nextlen as isize {
continue;
} else if count < min_count {
*bl_tree[curlen as usize].freq_mut() += count;
} else if curlen != 0 {
if curlen != prevlen {
*bl_tree[curlen as usize].freq_mut() += 1;
}
*bl_tree[REP_3_6].freq_mut() += 1;
} else if count <= 10 {
*bl_tree[REPZ_3_10].freq_mut() += 1;
} else {
*bl_tree[REPZ_11_138].freq_mut() += 1;
}
count = 0;
prevlen = curlen;
if nextlen == 0 {
max_count = 138;
min_count = 3;
} else if curlen == nextlen as isize {
max_count = 6;
--> --------------------
--> maximum size reached
--> --------------------
[ 0.60Quellennavigators
Projekt
]
|
2026-04-04
|