kernel-ark/lib/zlib_deflate/defutil.h
Jim Keniston 565d76cb7d zlib: slim down zlib_deflate() workspace when possible
Instead of always creating a huge (268K) deflate_workspace with the
maximum compression parameters (windowBits=15, memLevel=8), allow the
caller to obtain a smaller workspace by specifying smaller parameter
values.

For example, when capturing oops and panic reports to a medium with
limited capacity, such as NVRAM, compression may be the only way to
capture the whole report.  In this case, a small workspace (24K works
fine) is a win, whether you allocate the workspace when you need it (i.e.,
during an oops or panic) or at boot time.

I've verified that this patch works with all accepted values of windowBits
(positive and negative), memLevel, and compression level.

Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-03-22 17:44:17 -07:00

344 lines
12 KiB
C

#define Assert(err, str)
#define Trace(dummy)
#define Tracev(dummy)
#define Tracecv(err, dummy)
#define Tracevv(dummy)
#define LENGTH_CODES 29
/* number of length codes, not counting the special END_BLOCK code */
#define LITERALS 256
/* number of literal bytes 0..255 */
#define L_CODES (LITERALS+1+LENGTH_CODES)
/* number of Literal or Length codes, including the END_BLOCK code */
#define D_CODES 30
/* number of distance codes */
#define BL_CODES 19
/* number of codes used to transfer the bit lengths */
#define HEAP_SIZE (2*L_CODES+1)
/* maximum heap size */
#define MAX_BITS 15
/* All codes must not exceed MAX_BITS bits */
#define INIT_STATE 42
#define BUSY_STATE 113
#define FINISH_STATE 666
/* Stream status */
/* Data structure describing a single value and its code string. */
typedef struct ct_data_s {
union {
ush freq; /* frequency count */
ush code; /* bit string */
} fc;
union {
ush dad; /* father node in Huffman tree */
ush len; /* length of bit string */
} dl;
} ct_data;
#define Freq fc.freq
#define Code fc.code
#define Dad dl.dad
#define Len dl.len
typedef struct static_tree_desc_s static_tree_desc;
typedef struct tree_desc_s {
ct_data *dyn_tree; /* the dynamic tree */
int max_code; /* largest code with non zero frequency */
static_tree_desc *stat_desc; /* the corresponding static tree */
} tree_desc;
typedef ush Pos;
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
*/
typedef struct deflate_state {
z_streamp strm; /* pointer back to this zlib stream */
int status; /* as the name implies */
Byte *pending_buf; /* output still pending */
ulg pending_buf_size; /* size of pending_buf */
Byte *pending_out; /* next pending byte to output to the stream */
int pending; /* nb of bytes in the pending buffer */
int noheader; /* suppress zlib header and adler32 */
Byte data_type; /* UNKNOWN, BINARY or ASCII */
Byte method; /* STORED (for zip only) or DEFLATED */
int last_flush; /* value of flush param for previous deflate call */
/* used by deflate.c: */
uInt w_size; /* LZ77 window size (32K by default) */
uInt w_bits; /* log2(w_size) (8..16) */
uInt w_mask; /* w_size - 1 */
Byte *window;
/* Sliding window. Input bytes are read into the second half of the window,
* and move to the first half later to keep a dictionary of at least wSize
* bytes. With this organization, matches are limited to a distance of
* wSize-MAX_MATCH bytes, but this ensures that IO is always
* performed with a length multiple of the block size. Also, it limits
* the window size to 64K, which is quite useful on MSDOS.
* To do: use the user input buffer as sliding window.
*/
ulg window_size;
/* Actual size of window: 2*wSize, except when the user input buffer
* is directly used as sliding window.
*/
Pos *prev;
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Pos *head; /* Heads of the hash chains or NIL. */
uInt ins_h; /* hash index of string to be inserted */
uInt hash_size; /* number of elements in hash table */
uInt hash_bits; /* log2(hash_size) */
uInt hash_mask; /* hash_size-1 */
uInt hash_shift;
/* Number of bits by which ins_h must be shifted at each input
* step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* hash_shift * MIN_MATCH >= hash_bits
*/
long block_start;
/* Window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
uInt match_length; /* length of best match */
IPos prev_match; /* previous match */
int match_available; /* set if previous match exists */
uInt strstart; /* start of string to insert */
uInt match_start; /* start of matching string */
uInt lookahead; /* number of valid bytes ahead in window */
uInt prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation.
*/
uInt max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this
* length. A higher limit improves compression ratio but degrades the
* speed.
*/
uInt max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is used only for compression
* levels >= 4.
*/
# define max_insert_length max_lazy_match
/* Insert new strings in the hash table only if the match length is not
* greater than this length. This saves time but degrades compression.
* max_insert_length is used only for compression levels <= 3.
*/
int level; /* compression level (1..9) */
int strategy; /* favor or force Huffman coding*/
uInt good_match;
/* Use a faster search when the previous match is longer than this */
int nice_match; /* Stop searching when current match exceeds this */
/* used by trees.c: */
/* Didn't use ct_data typedef below to suppress compiler warning */
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
struct tree_desc_s l_desc; /* desc. for literal tree */
struct tree_desc_s d_desc; /* desc. for distance tree */
struct tree_desc_s bl_desc; /* desc. for bit length tree */
ush bl_count[MAX_BITS+1];
/* number of codes at each bit length for an optimal tree */
int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
int heap_len; /* number of elements in the heap */
int heap_max; /* element of largest frequency */
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
* The same heap array is used to build all trees.
*/
uch depth[2*L_CODES+1];
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/
uch *l_buf; /* buffer for literals or lengths */
uInt lit_bufsize;
/* Size of match buffer for literals/lengths. There are 4 reasons for
* limiting lit_bufsize to 64K:
* - frequencies can be kept in 16 bit counters
* - if compression is not successful for the first block, all input
* data is still in the window so we can still emit a stored block even
* when input comes from standard input. (This can also be done for
* all blocks if lit_bufsize is not greater than 32K.)
* - if compression is not successful for a file smaller than 64K, we can
* even emit a stored file instead of a stored block (saving 5 bytes).
* This is applicable only for zip (not gzip or zlib).
* - creating new Huffman trees less frequently may not provide fast
* adaptation to changes in the input data statistics. (Take for
* example a binary file with poorly compressible code followed by
* a highly compressible string table.) Smaller buffer sizes give
* fast adaptation but have of course the overhead of transmitting
* trees more frequently.
* - I can't count above 4
*/
uInt last_lit; /* running index in l_buf */
ush *d_buf;
/* Buffer for distances. To simplify the code, d_buf and l_buf have
* the same number of elements. To use different lengths, an extra flag
* array would be necessary.
*/
ulg opt_len; /* bit length of current block with optimal trees */
ulg static_len; /* bit length of current block with static trees */
ulg compressed_len; /* total bit length of compressed file */
uInt matches; /* number of string matches in current block */
int last_eob_len; /* bit length of EOB code for last block */
#ifdef DEBUG_ZLIB
ulg bits_sent; /* bit length of the compressed data */
#endif
ush bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least
* significant bits).
*/
int bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
} deflate_state;
typedef struct deflate_workspace {
/* State memory for the deflator */
deflate_state deflate_memory;
Byte *window_memory;
Pos *prev_memory;
Pos *head_memory;
char *overlay_memory;
} deflate_workspace;
#define zlib_deflate_window_memsize(windowBits) \
(2 * (1 << (windowBits)) * sizeof(Byte))
#define zlib_deflate_prev_memsize(windowBits) \
((1 << (windowBits)) * sizeof(Pos))
#define zlib_deflate_head_memsize(memLevel) \
((1 << ((memLevel)+7)) * sizeof(Pos))
#define zlib_deflate_overlay_memsize(memLevel) \
((1 << ((memLevel)+6)) * (sizeof(ush)+2))
/* Output a byte on the stream.
* IN assertion: there is enough room in pending_buf.
*/
#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.
* See deflate.c for comments about the MIN_MATCH+1.
*/
#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
/* In order to simplify the code, particularly on 16 bit machines, match
* distances are limited to MAX_DIST instead of WSIZE.
*/
/* in trees.c */
void zlib_tr_init (deflate_state *s);
int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc);
ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len,
int eof);
void zlib_tr_align (deflate_state *s);
void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len,
int eof);
void zlib_tr_stored_type_only (deflate_state *);
/* ===========================================================================
* Output a short LSB first on the stream.
* IN assertion: there is enough room in pendingBuf.
*/
#define put_short(s, w) { \
put_byte(s, (uch)((w) & 0xff)); \
put_byte(s, (uch)((ush)(w) >> 8)); \
}
/* ===========================================================================
* Reverse the first len bits of a code, using straightforward code (a faster
* method would use a table)
* IN assertion: 1 <= len <= 15
*/
static inline unsigned bi_reverse(unsigned code, /* the value to invert */
int len) /* its bit length */
{
register unsigned res = 0;
do {
res |= code & 1;
code >>= 1, res <<= 1;
} while (--len > 0);
return res >> 1;
}
/* ===========================================================================
* Flush the bit buffer, keeping at most 7 bits in it.
*/
static inline void bi_flush(deflate_state *s)
{
if (s->bi_valid == 16) {
put_short(s, s->bi_buf);
s->bi_buf = 0;
s->bi_valid = 0;
} else if (s->bi_valid >= 8) {
put_byte(s, (Byte)s->bi_buf);
s->bi_buf >>= 8;
s->bi_valid -= 8;
}
}
/* ===========================================================================
* Flush the bit buffer and align the output on a byte boundary
*/
static inline void bi_windup(deflate_state *s)
{
if (s->bi_valid > 8) {
put_short(s, s->bi_buf);
} else if (s->bi_valid > 0) {
put_byte(s, (Byte)s->bi_buf);
}
s->bi_buf = 0;
s->bi_valid = 0;
#ifdef DEBUG_ZLIB
s->bits_sent = (s->bits_sent+7) & ~7;
#endif
}