diff options
Diffstat (limited to 'source4/lib/zlib/examples')
| -rw-r--r-- | source4/lib/zlib/examples/README.examples | 42 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/fitblk.c | 233 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/gun.c | 693 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/gzappend.c | 500 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/gzjoin.c | 448 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/gzlog.c | 413 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/gzlog.h | 58 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/zlib_how.html | 523 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/zpipe.c | 191 | ||||
| -rw-r--r-- | source4/lib/zlib/examples/zran.c | 404 | 
10 files changed, 3505 insertions, 0 deletions
diff --git a/source4/lib/zlib/examples/README.examples b/source4/lib/zlib/examples/README.examples new file mode 100644 index 0000000000..5632d7a4cc --- /dev/null +++ b/source4/lib/zlib/examples/README.examples @@ -0,0 +1,42 @@ +This directory contains examples of the use of zlib. + +fitblk.c +    compress just enough input to nearly fill a requested output size +    - zlib isn't designed to do this, but fitblk does it anyway + +gun.c +    uncompress a gzip file +    - illustrates the use of inflateBack() for high speed file-to-file +      decompression using call-back functions +    - is approximately twice as fast as gzip -d +    - also provides Unix uncompress functionality, again twice as fast + +gzappend.c +    append to a gzip file +    - illustrates the use of the Z_BLOCK flush parameter for inflate() +    - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c +    join gzip files without recalculating the crc or recompressing +    - illustrates the use of the Z_BLOCK flush parameter for inflate() +    - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h +    efficiently maintain a message log file in gzip format +    - illustrates use of raw deflate and Z_SYNC_FLUSH +    - illustrates use of gzip header extra field + +zlib_how.html +    painfully comprehensive description of zpipe.c (see below) +    - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c +    reads and writes zlib streams from stdin to stdout +    - illustrates the proper use of deflate() and inflate() +    - deeply commented in zlib_how.html (see above) + +zran.c +    index a zlib or gzip stream and randomly access it +    - illustrates the use of Z_BLOCK, inflatePrime(), and +      inflateSetDictionary() to provide random access diff --git a/source4/lib/zlib/examples/fitblk.c b/source4/lib/zlib/examples/fitblk.c new file mode 100644 index 0000000000..c61de5c996 --- /dev/null +++ b/source4/lib/zlib/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size +   Not copyrighted -- provided to the public domain +   Version 1.1  25 November 2004  Mark Adler */ + +/* Version history: +   1.0  24 Nov 2004  First version +   1.1  25 Nov 2004  Change deflateInit2() to deflateInit() +                     Use fixed-size, stack-allocated raw buffers +                     Simplify code moving compression to subroutines +                     Use assert() for internal errors +                     Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + +   fitblk performs three compression passes on a portion of the input +   data in order to determine how much of that input will compress to +   nearly the requested output block size.  The first pass generates +   enough deflate blocks to produce output to fill the requested +   output size plus a specfied excess amount (see the EXCESS define +   below).  The last deflate block may go quite a bit past that, but +   is discarded.  The second pass decompresses and recompresses just +   the compressed data that fit in the requested plus excess sized +   buffer.  The deflate process is terminated after that amount of +   input, which is less than the amount consumed on the first pass. +   The last deflate block of the result will be of a comparable size +   to the final product, so that the header for that deflate block and +   the compression ratio for that block will be about the same as in +   the final product.  The third compression pass decompresses the +   result of the second step, but only the compressed data up to the +   requested size minus an amount to allow the compressed stream to +   complete (see the MARGIN define below).  That will result in a +   final compressed stream whose length is less than or equal to the +   requested size.  Assuming sufficient input and a requested size +   greater than a few hundred bytes, the shortfall will typically be +   less than ten bytes. + +   If the input is short enough that the first compression completes +   before filling the requested output size, then that compressed +   stream is return with no recompression. + +   EXCESS is chosen to be just greater than the shortfall seen in a +   two pass approach similar to the above.  That shortfall is due to +   the last deflate block compressing more efficiently with a smaller +   header on the second pass.  EXCESS is set to be large enough so +   that there is enough uncompressed data for the second pass to fill +   out the requested size, and small enough so that the final deflate +   block of the second pass will be close in size to the final deflate +   block of the third and final pass.  MARGIN is chosen to be just +   large enough to assure that the final compression has enough room +   to complete in all cases. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ +    fprintf(stderr, "fitblk abort: %s\n", why); +    exit(1); +} + +#define RAWLEN 4096    /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of +   input reached; return last deflate() return value, or Z_ERRNO if +   there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ +    int ret, flush; +    unsigned char raw[RAWLEN]; + +    flush = Z_NO_FLUSH; +    do { +        def->avail_in = fread(raw, 1, RAWLEN, in); +        if (ferror(in)) +            return Z_ERRNO; +        def->next_in = raw; +        if (feof(in)) +            flush = Z_FINISH; +        ret = deflate(def, flush); +        assert(ret != Z_STREAM_ERROR); +    } while (def->avail_out != 0 && flush == Z_NO_FLUSH); +    return ret; +} + +/* recompress from inf's input to def's output; the input for inf and +   the output for def are set in those structures before calling; +   return last deflate() return value, or Z_MEM_ERROR if inflate() +   was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ +    int ret, flush; +    unsigned char raw[RAWLEN]; + +    flush = Z_NO_FLUSH; +    do { +        /* decompress */ +        inf->avail_out = RAWLEN; +        inf->next_out = raw; +        ret = inflate(inf, Z_NO_FLUSH); +        assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && +               ret != Z_NEED_DICT); +        if (ret == Z_MEM_ERROR) +            return ret; + +        /* compress what was decompresed until done or no room */ +        def->avail_in = RAWLEN - inf->avail_out; +        def->next_in = raw; +        if (inf->avail_out != 0) +            flush = Z_FINISH; +        ret = deflate(def, flush); +        assert(ret != Z_STREAM_ERROR); +    } while (ret != Z_STREAM_END && def->avail_out != 0); +    return ret; +} + +#define EXCESS 256      /* empirically determined stream overage */ +#define MARGIN 8        /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ +    int ret;                /* return code */ +    unsigned size;          /* requested fixed output block size */ +    unsigned have;          /* bytes written by deflate() call */ +    unsigned char *blk;     /* intermediate and final stream */ +    unsigned char *tmp;     /* close to desired size stream */ +    z_stream def, inf;      /* zlib deflate and inflate states */ + +    /* get requested output size */ +    if (argc != 2) +        quit("need one argument: size of output block"); +    ret = strtol(argv[1], argv + 1, 10); +    if (argv[1][0] != 0) +        quit("argument must be a number"); +    if (ret < 8)            /* 8 is minimum zlib stream size */ +        quit("need positive size of 8 or greater"); +    size = (unsigned)ret; + +    /* allocate memory for buffers and compression engine */ +    blk = malloc(size + EXCESS); +    def.zalloc = Z_NULL; +    def.zfree = Z_NULL; +    def.opaque = Z_NULL; +    ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); +    if (ret != Z_OK || blk == NULL) +        quit("out of memory"); + +    /* compress from stdin until output full, or no more input */ +    def.avail_out = size + EXCESS; +    def.next_out = blk; +    ret = partcompress(stdin, &def); +    if (ret == Z_ERRNO) +        quit("error reading input"); + +    /* if it all fit, then size was undersubscribed -- done! */ +    if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { +        /* write block to stdout */ +        have = size + EXCESS - def.avail_out; +        if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) +            quit("error writing output"); + +        /* clean up and print results to stderr */ +        ret = deflateEnd(&def); +        assert(ret != Z_STREAM_ERROR); +        free(blk); +        fprintf(stderr, +                "%u bytes unused out of %u requested (all input)\n", +                size - have, size); +        return 0; +    } + +    /* it didn't all fit -- set up for recompression */ +    inf.zalloc = Z_NULL; +    inf.zfree = Z_NULL; +    inf.opaque = Z_NULL; +    inf.avail_in = 0; +    inf.next_in = Z_NULL; +    ret = inflateInit(&inf); +    tmp = malloc(size + EXCESS); +    if (ret != Z_OK || tmp == NULL) +        quit("out of memory"); +    ret = deflateReset(&def); +    assert(ret != Z_STREAM_ERROR); + +    /* do first recompression close to the right amount */ +    inf.avail_in = size + EXCESS; +    inf.next_in = blk; +    def.avail_out = size + EXCESS; +    def.next_out = tmp; +    ret = recompress(&inf, &def); +    if (ret == Z_MEM_ERROR) +        quit("out of memory"); + +    /* set up for next reocmpression */ +    ret = inflateReset(&inf); +    assert(ret != Z_STREAM_ERROR); +    ret = deflateReset(&def); +    assert(ret != Z_STREAM_ERROR); + +    /* do second and final recompression (third compression) */ +    inf.avail_in = size - MARGIN;   /* assure stream will complete */ +    inf.next_in = tmp; +    def.avail_out = size; +    def.next_out = blk; +    ret = recompress(&inf, &def); +    if (ret == Z_MEM_ERROR) +        quit("out of memory"); +    assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */ + +    /* done -- write block to stdout */ +    have = size - def.avail_out; +    if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) +        quit("error writing output"); + +    /* clean up and print results to stderr */ +    free(tmp); +    ret = inflateEnd(&inf); +    assert(ret != Z_STREAM_ERROR); +    ret = deflateEnd(&def); +    assert(ret != Z_STREAM_ERROR); +    free(blk); +    fprintf(stderr, +            "%u bytes unused out of %u requested (%lu input)\n", +            size - have, size, def.total_in); +    return 0; +} diff --git a/source4/lib/zlib/examples/gun.c b/source4/lib/zlib/examples/gun.c new file mode 100644 index 0000000000..bfec590a00 --- /dev/null +++ b/source4/lib/zlib/examples/gun.c @@ -0,0 +1,693 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +   Version 1.3  12 June 2005  Mark Adler */ + +/* Version history: +   1.0  16 Feb 2003  First version for testing of inflateBack() +   1.1  21 Feb 2005  Decompress concatenated gzip streams +                     Remove use of "this" variable (C++ keyword) +                     Fix return value for in() +                     Improve allocation failure checking +                     Add typecasting for void * structures +                     Add -h option for command version and usage +                     Add a bunch of comments +   1.2  20 Mar 2005  Add Unix compress (LZW) decompression +                     Copy file attributes from input file to output file +   1.3  12 Jun 2005  Add casts for error messages [Oberhumer] + */ + +/* +   gun [ -t ] [ name ... ] + +   decompresses the data in the named gzip files.  If no arguments are given, +   gun will decompress from stdin to stdout.  The names must end in .gz, -gz, +   .z, -z, _z, or .Z.  The uncompressed data will be written to a file name +   with the suffix stripped.  On success, the original file is deleted.  On +   failure, the output file is deleted.  For most failures, the command will +   continue to process the remaining names on the command line.  A memory +   allocation failure will abort the command.  If -t is specified, then the +   listed files or stdin will be tested as gzip files for integrity (without +   checking for a proper suffix), no output will be written, and no files +   will be deleted. + +   Like gzip, gun allows concatenated gzip streams and will decompress them, +   writing all of the uncompressed data to the output.  Unlike gzip, gun allows +   an empty file on input, and will produce no error writing an empty output +   file. + +   gun will also decompress files made by Unix compress, which uses LZW +   compression.  These files are automatically detected by virtue of their +   magic header bytes.  Since the end of Unix compress stream is marked by the +   end-of-file, they cannot be concantenated.  If a Unix compress stream is +   encountered in an input file, it is the last stream in that file. + +   Like gunzip and uncompress, the file attributes of the orignal compressed +   file are maintained in the final uncompressed file, to the extent that the +   user permissions allow it. + +   On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version +   1.2.4) is on the same file, when gun is linked with zlib 1.2.2.  Also the +   LZW decompression provided by gun is about twice as fast as the standard +   Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include <stdio.h>          /* fprintf() */ +#include <stdlib.h>         /* malloc(), free() */ +#include <string.h>         /* strerror(), strcmp(), strlen(), memcpy() */ +#include <errno.h>          /* errno */ +#include <fcntl.h>          /* open() */ +#include <unistd.h>         /* read(), write(), close(), chown(), unlink() */ +#include <sys/types.h> +#include <sys/stat.h>       /* stat(), chmod() */ +#include <utime.h>          /* utime() */ +#include "zlib.h"           /* inflateBackInit(), inflateBack(), */ +                            /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U         /* input and output buffer sizes */ +#define PIECE 16384         /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the +   input file and a buffer of size SIZE */ +struct ind { +    int infile; +    unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a +   pointer to them.  read() is called until the buffer is full, or until it +   returns end-of-file or error.  Return 0 on error. */ +local unsigned in(void *in_desc, unsigned char **buf) +{ +    int ret; +    unsigned len; +    unsigned char *next; +    struct ind *me = (struct ind *)in_desc; + +    next = me->inbuf; +    *buf = next; +    len = 0; +    do { +        ret = PIECE; +        if ((unsigned)ret > SIZE - len) +            ret = (int)(SIZE - len); +        ret = (int)read(me->infile, next, ret); +        if (ret == -1) { +            len = 0; +            break; +        } +        next += ret; +        len += ret; +    } while (ret != 0 && len < SIZE); +    return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the +   output file, a running CRC-32 check on the output and the total number of +   bytes output, both for checking against the gzip trailer.  (The length in +   the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and +   the output is greater than 4 GB.) */ +struct outd { +    int outfile; +    int check;                  /* true if checking crc and total */ +    unsigned long crc; +    unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written.  write() +   is called until all of the output is written or an error is encountered. +   On success out() returns 0.  For a write failure, out() returns 1.  If the +   output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ +    int ret; +    struct outd *me = (struct outd *)out_desc; + +    if (me->check) { +        me->crc = crc32(me->crc, buf, len); +        me->total += len; +    } +    if (me->outfile != -1) +        do { +            ret = PIECE; +            if ((unsigned)ret > len) +                ret = (int)len; +            ret = (int)write(me->outfile, buf, ret); +            if (ret == -1) +                return 1; +            buf += ret; +            len -= ret; +        } while (len != 0); +    return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ +                last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- +   the first 256 entries of prefix[] and suffix[] are never used, could +   have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE];              /* input buffer */ +unsigned char outbuf[SIZE];             /* output buffer */ +unsigned short prefix[65536];           /* index to LZW prefix string */ +unsigned char suffix[65536];            /* one-character LZW suffix */ +unsigned char match[65280 + 2];         /* buffer for reversed match or gzip +                                           32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial +   aspect of the compressed data format derived from an implementation that +   made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ +    do { \ +        left = 0; \ +        rem = 0; \ +        if (chunk > have) { \ +            chunk -= have; \ +            have = 0; \ +            if (NEXT() == -1) \ +                break; \ +            chunk--; \ +            if (chunk > have) { \ +                chunk = have = 0; \ +                break; \ +            } \ +        } \ +        have -= chunk; \ +        next += chunk; \ +        chunk = 0; \ +    } while (0) + +/* Decompress a compress (LZW) file from indp to outfile.  The compress magic +   header (two bytes) has already been read and verified.  There are have bytes +   of buffered input at next.  strm is used for passing error information back +   to gunpipe(). + +   lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of +   file, read error, or write error (a write error indicated by strm->next_in +   not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, unsigned char *next, struct ind *indp, +                  int outfile, z_stream *strm) +{ +    int last;                   /* last byte read by NEXT(), or -1 if EOF */ +    int chunk;                  /* bytes left in current chunk */ +    int left;                   /* bits left in rem */ +    unsigned rem;               /* unused bits from input */ +    int bits;                   /* current bits per code */ +    unsigned code;              /* code, table traversal index */ +    unsigned mask;              /* mask for current bits codes */ +    int max;                    /* maximum bits per code for this stream */ +    int flags;                  /* compress flags, then block compress flag */ +    unsigned end;               /* last valid entry in prefix/suffix tables */ +    unsigned temp;              /* current code */ +    unsigned prev;              /* previous code */ +    unsigned final;             /* last character written for previous code */ +    unsigned stack;             /* next position for reversed string */ +    unsigned outcnt;            /* bytes in output buffer */ +    struct outd outd;           /* output structure */ + +    /* set up output */ +    outd.outfile = outfile; +    outd.check = 0; + +    /* process remainder of compress header -- a flags byte */ +    flags = NEXT(); +    if (last == -1) +        return Z_BUF_ERROR; +    if (flags & 0x60) { +        strm->msg = (char *)"unknown lzw flags set"; +        return Z_DATA_ERROR; +    } +    max = flags & 0x1f; +    if (max < 9 || max > 16) { +        strm->msg = (char *)"lzw bits out of range"; +        return Z_DATA_ERROR; +    } +    if (max == 9)                           /* 9 doesn't really mean 9 */ +        max = 10; +    flags &= 0x80;                          /* true if block compress */ + +    /* clear table */ +    bits = 9; +    mask = 0x1ff; +    end = flags ? 256 : 255; + +    /* set up: get first 9-bit code, which is the first decompressed byte, but +       don't create a table entry until the next code */ +    if (NEXT() == -1)                       /* no compressed data is ok */ +        return Z_OK; +    final = prev = (unsigned)last;          /* low 8 bits of code */ +    if (NEXT() == -1)                       /* missing a bit */ +        return Z_BUF_ERROR; +    if (last & 1) {                         /* code must be < 256 */ +        strm->msg = (char *)"invalid lzw code"; +        return Z_DATA_ERROR; +    } +    rem = (unsigned)last >> 1;              /* remaining 7 bits */ +    left = 7; +    chunk = bits - 2;                       /* 7 bytes left in this chunk */ +    outbuf[0] = (unsigned char)final;       /* write first decompressed byte */ +    outcnt = 1; + +    /* decode codes */ +    stack = 0; +    for (;;) { +        /* if the table will be full after this, increment the code size */ +        if (end >= mask && bits < max) { +            FLUSHCODE(); +            bits++; +            mask <<= 1; +            mask++; +        } + +        /* get a code of length bits */ +        if (chunk == 0)                     /* decrement chunk modulo bits */ +            chunk = bits; +        code = rem;                         /* low bits of code */ +        if (NEXT() == -1) {                 /* EOF is end of compressed data */ +            /* write remaining buffered output */ +            if (outcnt && out(&outd, outbuf, outcnt)) { +                strm->next_in = outbuf;     /* signal write error */ +                return Z_BUF_ERROR; +            } +            return Z_OK; +        } +        code += (unsigned)last << left;     /* middle (or high) bits of code */ +        left += 8; +        chunk--; +        if (bits > left) {                  /* need more bits */ +            if (NEXT() == -1)               /* can't end in middle of code */ +                return Z_BUF_ERROR; +            code += (unsigned)last << left; /* high bits of code */ +            left += 8; +            chunk--; +        } +        code &= mask;                       /* mask to current code length */ +        left -= bits;                       /* number of unused bits */ +        rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + +        /* process clear code (256) */ +        if (code == 256 && flags) { +            FLUSHCODE(); +            bits = 9;                       /* initialize bits and mask */ +            mask = 0x1ff; +            end = 255;                      /* empty table */ +            continue;                       /* get next code */ +        } + +        /* special code to reuse last match */ +        temp = code;                        /* save the current code */ +        if (code > end) { +            /* Be picky on the allowed code here, and make sure that the code +               we drop through (prev) will be a valid index so that random +               input does not cause an exception.  The code != end + 1 check is +               empirically derived, and not checked in the original uncompress +               code.  If this ever causes a problem, that check could be safely +               removed.  Leaving this check in greatly improves gun's ability +               to detect random or corrupted input after a compress header. +               In any case, the prev > end check must be retained. */ +            if (code != end + 1 || prev > end) { +                strm->msg = (char *)"invalid lzw code"; +                return Z_DATA_ERROR; +            } +            match[stack++] = (unsigned char)final; +            code = prev; +        } + +        /* walk through linked list to generate output in reverse order */ +        while (code >= 256) { +            match[stack++] = suffix[code]; +            code = prefix[code]; +        } +        match[stack++] = (unsigned char)code; +        final = code; + +        /* link new table entry */ +        if (end < mask) { +            end++; +            prefix[end] = (unsigned short)prev; +            suffix[end] = (unsigned char)final; +        } + +        /* set previous code for next iteration */ +        prev = temp; + +        /* write output in forward order */ +        while (stack > SIZE - outcnt) { +            while (outcnt < SIZE) +                outbuf[outcnt++] = match[--stack]; +            if (out(&outd, outbuf, outcnt)) { +                strm->next_in = outbuf; /* signal write error */ +                return Z_BUF_ERROR; +            } +            outcnt = 0; +        } +        do { +            outbuf[outcnt++] = match[--stack]; +        } while (stack); + +        /* loop for next code with final and prev as the last match, rem and +           left provide the first 0..7 bits of the next code, end is the last +           valid table entry */ +    } +} + +/* Decompress a gzip file from infile to outfile.  strm is assumed to have been +   successfully initialized with inflateBackInit().  The input file may consist +   of a series of gzip streams, in which case all of them will be decompressed +   to the output file.  If outfile is -1, then the gzip stream(s) integrity is +   checked and nothing is written. + +   The return value is a zlib error code: Z_MEM_ERROR if out of memory, +   Z_DATA_ERROR if the header or the compressed data is invalid, or if the +   trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends +   prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip +   stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ +    int ret, first, last; +    unsigned have, flags, len; +    unsigned char *next; +    struct ind ind, *indp; +    struct outd outd; + +    /* setup input buffer */ +    ind.infile = infile; +    ind.inbuf = inbuf; +    indp = &ind; + +    /* decompress concatenated gzip streams */ +    have = 0;                               /* no input data read in yet */ +    first = 1;                              /* looking for first gzip header */ +    strm->next_in = Z_NULL;                 /* so Z_BUF_ERROR means EOF */ +    for (;;) { +        /* look for the two magic header bytes for a gzip stream */ +        if (NEXT() == -1) { +            ret = Z_OK; +            break;                          /* empty gzip stream is ok */ +        } +        if (last != 31 || (NEXT() != 139 && last != 157)) { +            strm->msg = (char *)"incorrect header check"; +            ret = first ? Z_DATA_ERROR : Z_ERRNO; +            break;                          /* not a gzip or compress header */ +        } +        first = 0;                          /* next non-header is junk */ + +        /* process a compress (LZW) file -- can't be concatenated after this */ +        if (last == 157) { +            ret = lunpipe(have, next, indp, outfile, strm); +            break; +        } + +        /* process remainder of gzip header */ +        ret = Z_BUF_ERROR; +        if (NEXT() != 8) {                  /* only deflate method allowed */ +            if (last == -1) break; +            strm->msg = (char *)"unknown compression method"; +            ret = Z_DATA_ERROR; +            break; +        } +        flags = NEXT();                     /* header flags */ +        NEXT();                             /* discard mod time, xflgs, os */ +        NEXT(); +        NEXT(); +        NEXT(); +        NEXT(); +        NEXT(); +        if (last == -1) break; +        if (flags & 0xe0) { +            strm->msg = (char *)"unknown header flags set"; +            ret = Z_DATA_ERROR; +            break; +        } +        if (flags & 4) {                    /* extra field */ +            len = NEXT(); +            len += (unsigned)(NEXT()) << 8; +            if (last == -1) break; +            while (len > have) { +                len -= have; +                have = 0; +                if (NEXT() == -1) break; +                len--; +            } +            if (last == -1) break; +            have -= len; +            next += len; +        } +        if (flags & 8)                      /* file name */ +            while (NEXT() != 0 && last != -1) +                ; +        if (flags & 16)                     /* comment */ +            while (NEXT() != 0 && last != -1) +                ; +        if (flags & 2) {                    /* header crc */ +            NEXT(); +            NEXT(); +        } +        if (last == -1) break; + +        /* set up output */ +        outd.outfile = outfile; +        outd.check = 1; +        outd.crc = crc32(0L, Z_NULL, 0); +        outd.total = 0; + +        /* decompress data to output */ +        strm->next_in = next; +        strm->avail_in = have; +        ret = inflateBack(strm, in, indp, out, &outd); +        if (ret != Z_STREAM_END) break; +        next = strm->next_in; +        have = strm->avail_in; +        strm->next_in = Z_NULL;             /* so Z_BUF_ERROR means EOF */ + +        /* check trailer */ +        ret = Z_BUF_ERROR; +        if (NEXT() != (outd.crc & 0xff) || +            NEXT() != ((outd.crc >> 8) & 0xff) || +            NEXT() != ((outd.crc >> 16) & 0xff) || +            NEXT() != ((outd.crc >> 24) & 0xff)) { +            /* crc error */ +            if (last != -1) { +                strm->msg = (char *)"incorrect data check"; +                ret = Z_DATA_ERROR; +            } +            break; +        } +        if (NEXT() != (outd.total & 0xff) || +            NEXT() != ((outd.total >> 8) & 0xff) || +            NEXT() != ((outd.total >> 16) & 0xff) || +            NEXT() != ((outd.total >> 24) & 0xff)) { +            /* length error */ +            if (last != -1) { +                strm->msg = (char *)"incorrect length check"; +                ret = Z_DATA_ERROR; +            } +            break; +        } + +        /* go back and look for another gzip stream */ +    } + +    /* clean up and return */ +    return ret; +} + +/* Copy file attributes, from -> to, as best we can.  This is best effort, so +   no errors are reported.  The mode bits, including suid, sgid, and the sticky +   bit are copied (if allowed), the owner's user id and group id are copied +   (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ +    struct stat was; +    struct utimbuf when; + +    /* get all of from's Unix meta data, return if not a regular file */ +    if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) +        return; + +    /* set to's mode bits, ignore errors */ +    (void)chmod(to, was.st_mode & 07777); + +    /* copy owner's user and group, ignore errors */ +    (void)chown(to, was.st_uid, was.st_gid); + +    /* copy access and modify times, ignore errors */ +    when.actime = was.st_atime; +    when.modtime = was.st_mtime; +    (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just +   decompress without writing and check the gzip trailer for integrity.  If +   inname is NULL or an empty string, read from stdin.  If outname is NULL or +   an empty string, write to stdout.  strm is a pre-initialized inflateBack +   structure.  When appropriate, copy the file attributes from inname to +   outname. + +   gunzip() returns 1 if there is an out-of-memory error or an unexpected +   return code from gunpipe().  Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ +    int ret; +    int infile, outfile; + +    /* open files */ +    if (inname == NULL || *inname == 0) { +        inname = "-"; +        infile = 0;     /* stdin */ +    } +    else { +        infile = open(inname, O_RDONLY, 0); +        if (infile == -1) { +            fprintf(stderr, "gun cannot open %s\n", inname); +            return 0; +        } +    } +    if (test) +        outfile = -1; +    else if (outname == NULL || *outname == 0) { +        outname = "-"; +        outfile = 1;    /* stdout */ +    } +    else { +        outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); +        if (outfile == -1) { +            close(infile); +            fprintf(stderr, "gun cannot create %s\n", outname); +            return 0; +        } +    } +    errno = 0; + +    /* decompress */ +    ret = gunpipe(strm, infile, outfile); +    if (outfile > 2) close(outfile); +    if (infile > 2) close(infile); + +    /* interpret result */ +    switch (ret) { +    case Z_OK: +    case Z_ERRNO: +        if (infile > 2 && outfile > 2) { +            copymeta(inname, outname);          /* copy attributes */ +            unlink(inname); +        } +        if (ret == Z_ERRNO) +            fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", +                    inname); +        break; +    case Z_DATA_ERROR: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); +        break; +    case Z_MEM_ERROR: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun out of memory error--aborting\n"); +        return 1; +    case Z_BUF_ERROR: +        if (outfile > 2) unlink(outname); +        if (strm->next_in != Z_NULL) { +            fprintf(stderr, "gun write error on %s: %s\n", +                    outname, strerror(errno)); +        } +        else if (errno) { +            fprintf(stderr, "gun read error on %s: %s\n", +                    inname, strerror(errno)); +        } +        else { +            fprintf(stderr, "gun unexpected end of file on %s\n", +                    inname); +        } +        break; +    default: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun internal error--aborting\n"); +        return 1; +    } +    return 0; +} + +/* Process the gun command line arguments.  See the command syntax near the +   beginning of this source file. */ +int main(int argc, char **argv) +{ +    int ret, len, test; +    char *outname; +    unsigned char *window; +    z_stream strm; + +    /* initialize inflateBack state for repeated use */ +    window = match;                         /* reuse LZW match buffer */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = inflateBackInit(&strm, 15, window); +    if (ret != Z_OK) { +        fprintf(stderr, "gun out of memory error--aborting\n"); +        return 1; +    } + +    /* decompress each file to the same name with the suffix removed */ +    argc--; +    argv++; +    test = 0; +    if (argc && strcmp(*argv, "-h") == 0) { +        fprintf(stderr, "gun 1.3 (12 Jun 2005)\n"); +        fprintf(stderr, "Copyright (c) 2005 Mark Adler\n"); +        fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); +        return 0; +    } +    if (argc && strcmp(*argv, "-t") == 0) { +        test = 1; +        argc--; +        argv++; +    } +    if (argc) +        do { +            if (test) +                outname = NULL; +            else { +                len = (int)strlen(*argv); +                if (strcmp(*argv + len - 3, ".gz") == 0 || +                    strcmp(*argv + len - 3, "-gz") == 0) +                    len -= 3; +                else if (strcmp(*argv + len - 2, ".z") == 0 || +                    strcmp(*argv + len - 2, "-z") == 0 || +                    strcmp(*argv + len - 2, "_z") == 0 || +                    strcmp(*argv + len - 2, ".Z") == 0) +                    len -= 2; +                else { +                    fprintf(stderr, "gun error: no gz type on %s--skipping\n", +                            *argv); +                    continue; +                } +                outname = malloc(len + 1); +                if (outname == NULL) { +                    fprintf(stderr, "gun out of memory error--aborting\n"); +                    ret = 1; +                    break; +                } +                memcpy(outname, *argv, len); +                outname[len] = 0; +            } +            ret = gunzip(&strm, *argv, outname, test); +            if (outname != NULL) free(outname); +            if (ret) break; +        } while (argv++, --argc); +    else +        ret = gunzip(&strm, NULL, NULL, test); + +    /* clean up */ +    inflateBackEnd(&strm); +    return ret; +} diff --git a/source4/lib/zlib/examples/gzappend.c b/source4/lib/zlib/examples/gzappend.c new file mode 100644 index 0000000000..e9e878e116 --- /dev/null +++ b/source4/lib/zlib/examples/gzappend.c @@ -0,0 +1,500 @@ +/* gzappend -- command to append to a gzip file + +  Copyright (C) 2003 Mark Adler, all rights reserved +  version 1.1, 4 Nov 2003 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0  19 Oct 2003     - First version + * 1.1   4 Nov 2003     - Expand and clarify some comments and notes + *                      - Add version and copyright to help + *                      - Send help to stdout instead of stderr + *                      - Add some preemptive typecasts + *                      - Add L to constants in lseek() calls + *                      - Remove some debugging information in error messages + *                      - Use new data_type definition for zlib 1.2.1 + *                      - Simplfy and unify file operations + *                      - Finish off gzip file in gztack() + *                      - Use deflatePrime() instead of adding empty blocks + *                      - Keep gzip file clean on appended file read errors + *                      - Use in-place rotate instead of auxiliary buffer + *                        (Why you ask?  Because it was fun to write!) + */ + +/* +   gzappend takes a gzip file and appends to it, compressing files from the +   command line or data from stdin.  The gzip file is written to directly, to +   avoid copying that file, in case it's large.  Note that this results in the +   unfriendly behavior that if gzappend fails, the gzip file is corrupted. + +   This program was written to illustrate the use of the new Z_BLOCK option of +   zlib 1.2.x's inflate() function.  This option returns from inflate() at each +   block boundary to facilitate locating and modifying the last block bit at +   the start of the final deflate block.  Also whether using Z_BLOCK or not, +   another required feature of zlib 1.2.x is that inflate() now provides the +   number of unusued bits in the last input byte used.  gzappend will not work +   with versions of zlib earlier than 1.2.1. + +   gzappend first decompresses the gzip file internally, discarding all but +   the last 32K of uncompressed data, and noting the location of the last block +   bit and the number of unused bits in the last byte of the compressed data. +   The gzip trailer containing the CRC-32 and length of the uncompressed data +   is verified.  This trailer will be later overwritten. + +   Then the last block bit is cleared by seeking back in the file and rewriting +   the byte that contains it.  Seeking forward, the last byte of the compressed +   data is saved along with the number of unused bits to initialize deflate. + +   A deflate process is initialized, using the last 32K of the uncompressed +   data from the gzip file to initialize the dictionary.  If the total +   uncompressed data was less than 32K, then all of it is used to initialize +   the dictionary.  The deflate output bit buffer is also initialized with the +   last bits from the original deflate stream.  From here on, the data to +   append is simply compressed using deflate, and written to the gzip file. +   When that is complete, the new CRC-32 and uncompressed length are written +   as the trailer of the gzip file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ +    fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); +    exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, +   modified to be fast when one argument much greater than the other, and +   coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ +    unsigned c; + +    while (a && b) +        if (a > b) { +            c = b; +            while (a - c >= c) +                c <<= 1; +            a -= c; +        } +        else { +            c = a; +            while (b - c >= c) +                c <<= 1; +            b -= c; +        } +    return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ +    unsigned char tmp; +    unsigned cycles; +    unsigned char *start, *last, *to, *from; + +    /* normalize rot and handle degenerate cases */ +    if (len < 2) return; +    if (rot >= len) rot %= len; +    if (rot == 0) return; + +    /* pointer to last entry in list */ +    last = list + (len - 1); + +    /* do simple left shift by one */ +    if (rot == 1) { +        tmp = *list; +        memcpy(list, list + 1, len - 1); +        *last = tmp; +        return; +    } + +    /* do simple right shift by one */ +    if (rot == len - 1) { +        tmp = *last; +        memmove(list + 1, list, len - 1); +        *list = tmp; +        return; +    } + +    /* otherwise do rotate as a set of cycles in place */ +    cycles = gcd(len, rot);             /* number of cycles */ +    do { +        start = from = list + cycles;   /* start index is arbitrary */ +        tmp = *from;                    /* save entry to be overwritten */ +        for (;;) { +            to = from;                  /* next step in cycle */ +            from += rot;                /* go right rot positions */ +            if (from > last) from -= len;   /* (pointer better not wrap) */ +            if (from == start) break;   /* all but one shifted */ +            *to = *from;                /* shift left */ +        } +        *to = tmp;                      /* complete the circle */ +    } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { +    int fd;                     /* file descriptor */ +    int size;                   /* 1 << size is bytes in buf */ +    unsigned left;              /* bytes available at next */ +    unsigned char *buf;         /* buffer */ +    unsigned char *next;        /* next byte in buffer */ +    char *name;                 /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ +    int len; + +    len = read(in->fd, in->buf, 1 << in->size); +    if (len == -1) bye("error reading ", in->name); +    in->left = (unsigned)len; +    in->next = in->buf; +    return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ +    if (readin(in) == 0) bye("unexpected end of ", in->name); +    return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ +                   in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ +    unsigned bypass; + +    if (n > in->left) { +        n -= in->left; +        bypass = n & ~((1U << in->size) - 1); +        if (bypass) { +            if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) +                bye("seeking ", in->name); +            n -= bypass; +        } +        readmore(in); +        if (n > in->left) +            bye("unexpected end of ", in->name); +    } +    in->left -= n; +    in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ +    unsigned long val; + +    val = read1(in); +    val += (unsigned)read1(in) << 8; +    val += (unsigned long)read1(in) << 16; +    val += (unsigned long)read1(in) << 24; +    return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ +    int flags; +    unsigned n; + +    if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); +    if (read1(in) != 8) bye("unknown compression method in", in->name); +    flags = read1(in); +    if (flags & 0xe0) bye("unknown header flags set in", in->name); +    skip(in, 6); +    if (flags & 4) { +        n = read1(in); +        n += (unsigned)(read1(in)) << 8; +        skip(in, n); +    } +    if (flags & 8) while (read1(in) != 0) ; +    if (flags & 16) while (read1(in) != 0) ; +    if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to +   continue compression of the data in the gzip file, and return a file +   descriptor pointing to where to write the compressed data -- the deflate +   stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ +    int ret, lastbit, left, full; +    unsigned have; +    unsigned long crc, tot; +    unsigned char *window; +    off_t lastoff, end; +    file gz; + +    /* open gzip file */ +    gz.name = name; +    gz.fd = open(name, O_RDWR, 0); +    if (gz.fd == -1) bye("cannot open ", name); +    gz.buf = malloc(CHUNK); +    if (gz.buf == NULL) bye("out of memory", ""); +    gz.size = LGCHUNK; +    gz.left = 0; + +    /* skip gzip header */ +    gzheader(&gz); + +    /* prepare to decompress */ +    window = malloc(DSIZE); +    if (window == NULL) bye("out of memory", ""); +    strm->zalloc = Z_NULL; +    strm->zfree = Z_NULL; +    strm->opaque = Z_NULL; +    ret = inflateInit2(strm, -15); +    if (ret != Z_OK) bye("out of memory", " or library mismatch"); + +    /* decompress the deflate stream, saving append information */ +    lastbit = 0; +    lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; +    left = 0; +    strm->avail_in = gz.left; +    strm->next_in = gz.next; +    crc = crc32(0L, Z_NULL, 0); +    have = full = 0; +    do { +        /* if needed, get more input */ +        if (strm->avail_in == 0) { +            readmore(&gz); +            strm->avail_in = gz.left; +            strm->next_in = gz.next; +        } + +        /* set up output to next available section of sliding window */ +        strm->avail_out = DSIZE - have; +        strm->next_out = window + have; + +        /* inflate and check for errors */ +        ret = inflate(strm, Z_BLOCK); +        if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); +        if (ret == Z_MEM_ERROR) bye("out of memory", ""); +        if (ret == Z_DATA_ERROR) +            bye("invalid compressed data--format violated in", name); + +        /* update crc and sliding window pointer */ +        crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); +        if (strm->avail_out) +            have = DSIZE - strm->avail_out; +        else { +            have = 0; +            full = 1; +        } + +        /* process end of block */ +        if (strm->data_type & 128) { +            if (strm->data_type & 64) +                left = strm->data_type & 0x1f; +            else { +                lastbit = strm->data_type & 0x1f; +                lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; +            } +        } +    } while (ret != Z_STREAM_END); +    inflateEnd(strm); +    gz.left = strm->avail_in; +    gz.next = strm->next_in; + +    /* save the location of the end of the compressed data */ +    end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + +    /* check gzip trailer and save total for deflate */ +    if (crc != read4(&gz)) +        bye("invalid compressed data--crc mismatch in ", name); +    tot = strm->total_out; +    if ((tot & 0xffffffffUL) != read4(&gz)) +        bye("invalid compressed data--length mismatch in", name); + +    /* if not at end of file, warn */ +    if (gz.left || readin(&gz)) +        fprintf(stderr, +            "gzappend warning: junk at end of gzip file overwritten\n"); + +    /* clear last block bit */ +    lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); +    if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); +    *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); +    lseek(gz.fd, -1L, SEEK_CUR); +    if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + +    /* if window wrapped, build dictionary from window by rotating */ +    if (full) { +        rotate(window, DSIZE, have); +        have = DSIZE; +    } + +    /* set up deflate stream with window, crc, total_in, and leftover bits */ +    ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); +    if (ret != Z_OK) bye("out of memory", ""); +    deflateSetDictionary(strm, window, have); +    strm->adler = crc; +    strm->total_in = tot; +    if (left) { +        lseek(gz.fd, --end, SEEK_SET); +        if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); +        deflatePrime(strm, 8 - left, *gz.buf); +    } +    lseek(gz.fd, end, SEEK_SET); + +    /* clean up and return */ +    free(window); +    free(gz.buf); +    return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last +   is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ +    int fd, len, ret; +    unsigned left; +    unsigned char *in, *out; + +    /* open file to compress and append */ +    fd = 0; +    if (name != NULL) { +        fd = open(name, O_RDONLY, 0); +        if (fd == -1) +            fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", +                    name); +    } + +    /* allocate buffers */ +    in = fd == -1 ? NULL : malloc(CHUNK); +    out = malloc(CHUNK); +    if (out == NULL) bye("out of memory", ""); + +    /* compress input file and append to gzip file */ +    do { +        /* get more input */ +        len = fd == -1 ? 0 : read(fd, in, CHUNK); +        if (len == -1) { +            fprintf(stderr, +                    "gzappend warning: error reading %s, skipping rest ...\n", +                    name); +            len = 0; +        } +        strm->avail_in = (unsigned)len; +        strm->next_in = in; +        if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + +        /* compress and write all available output */ +        do { +            strm->avail_out = CHUNK; +            strm->next_out = out; +            ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); +            left = CHUNK - strm->avail_out; +            while (left) { +                len = write(gd, out + CHUNK - strm->avail_out - left, left); +                if (len == -1) bye("writing gzip file", ""); +                left -= (unsigned)len; +            } +        } while (strm->avail_out == 0 && ret != Z_STREAM_END); +    } while (len != 0); + +    /* write trailer after last entry */ +    if (last) { +        deflateEnd(strm); +        out[0] = (unsigned char)(strm->adler); +        out[1] = (unsigned char)(strm->adler >> 8); +        out[2] = (unsigned char)(strm->adler >> 16); +        out[3] = (unsigned char)(strm->adler >> 24); +        out[4] = (unsigned char)(strm->total_in); +        out[5] = (unsigned char)(strm->total_in >> 8); +        out[6] = (unsigned char)(strm->total_in >> 16); +        out[7] = (unsigned char)(strm->total_in >> 24); +        len = 8; +        do { +            ret = write(gd, out + 8 - len, len); +            if (ret == -1) bye("writing gzip file", ""); +            len -= ret; +        } while (len); +        close(gd); +    } + +    /* clean up and return */ +    free(out); +    if (in != NULL) free(in); +    if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and +   append the specified files, or append the data from stdin if no other file +   names are provided on the command line -- the gzip file must be writable +   and seekable */ +int main(int argc, char **argv) +{ +    int gd, level; +    z_stream strm; + +    /* ignore command name */ +    argv++; + +    /* provide usage if no arguments */ +    if (*argv == NULL) { +        printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n"); +        printf( +            "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); +        return 0; +    } + +    /* set compression level */ +    level = Z_DEFAULT_COMPRESSION; +    if (argv[0][0] == '-') { +        if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) +            bye("invalid compression level", ""); +        level = argv[0][1] - '0'; +        if (*++argv == NULL) bye("no gzip file name after options", ""); +    } + +    /* prepare to append to gzip file */ +    gd = gzscan(*argv++, &strm, level); + +    /* append files on command line, or from stdin if none */ +    if (*argv == NULL) +        gztack(NULL, gd, &strm, 1); +    else +        do { +            gztack(*argv, gd, &strm, argv[1] == NULL); +        } while (*++argv != NULL); +    return 0; +} diff --git a/source4/lib/zlib/examples/gzjoin.c b/source4/lib/zlib/examples/gzjoin.c new file mode 100644 index 0000000000..129347ce3c --- /dev/null +++ b/source4/lib/zlib/examples/gzjoin.c @@ -0,0 +1,448 @@ +/* gzjoin -- command to join gzip files into one gzip file + +  Copyright (C) 2004 Mark Adler, all rights reserved +  version 1.0, 11 Dec 2004 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0  11 Dec 2004     - First version + * 1.1  12 Jun 2005     - Changed ssize_t to long for portability + */ + +/* +   gzjoin takes one or more gzip files on the command line and writes out a +   single gzip file that will uncompress to the concatenation of the +   uncompressed data from the individual gzip files.  gzjoin does this without +   having to recompress any of the data and without having to calculate a new +   crc32 for the concatenated uncompressed data.  gzjoin does however have to +   decompress all of the input data in order to find the bits in the compressed +   data that need to be modified to concatenate the streams. + +   gzjoin does not do an integrity check on the input gzip files other than +   checking the gzip header and decompressing the compressed data.  They are +   otherwise assumed to be complete and correct. + +   Each joint between gzip files removes at least 18 bytes of previous trailer +   and subsequent header, and inserts an average of about three bytes to the +   compressed data in order to connect the streams.  The output gzip file +   has a minimal ten-byte gzip header with no file name or modification time. + +   This program was written to illustrate the use of the Z_BLOCK option of +   inflate() and the crc32_combine() function.  gzjoin will not compile with +   versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h>     /* exit(), malloc(), free() */ +#include <fcntl.h>      /* open() */ +#include <unistd.h>     /* close(), read(), lseek() */ +#include "zlib.h" +    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ +    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); +    exit(1); +    return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768         /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { +    char *name;             /* name of file for error messages */ +    int fd;                 /* file descriptor */ +    unsigned left;          /* bytes remaining at next */ +    unsigned char *next;    /* next byte to read */ +    unsigned char *buf;     /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ +    if (in != NULL) { +        if (in->fd != -1) +            close(in->fd); +        if (in->buf != NULL) +            free(in->buf); +        free(in); +    } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on +   failure */ +local bin *bopen(char *name) +{ +    bin *in; + +    in = malloc(sizeof(bin)); +    if (in == NULL) +        return NULL; +    in->buf = malloc(CHUNK); +    in->fd = open(name, O_RDONLY, 0); +    if (in->buf == NULL || in->fd == -1) { +        bclose(in); +        return NULL; +    } +    in->left = 0; +    in->next = in->buf; +    in->name = name; +    return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with +   1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ +    long len; + +    if (in == NULL) +        return -1; +    if (in->left != 0) +        return 0; +    in->next = in->buf; +    do { +        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); +        if (len < 0) +            return -1; +        in->left += (unsigned)len; +    } while (len != 0 && in->left < CHUNK); +    return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ +                  in->left ? (in->left--, *(in->next)++) : \ +                    bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ +    unsigned long val; + +    val = bget(in); +    val += (unsigned long)(bget(in)) << 8; +    val += (unsigned long)(bget(in)) << 16; +    val += (unsigned long)(bget(in)) << 24; +    return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ +    /* check pointer */ +    if (in == NULL) +        return; + +    /* easy case -- skip bytes in buffer */ +    if (skip <= in->left) { +        in->left -= skip; +        in->next += skip; +        return; +    } + +    /* skip what's in buffer, discard buffer contents */ +    skip -= in->left; +    in->left = 0; + +    /* seek past multiples of CHUNK bytes */ +    if (skip > CHUNK) { +        unsigned left; + +        left = skip & (CHUNK - 1); +        if (left == 0) { +            /* exact number of chunks: seek all the way minus one byte to check +               for end-of-file with a read */ +            lseek(in->fd, skip - 1, SEEK_CUR); +            if (read(in->fd, in->buf, 1) != 1) +                bail("unexpected end of file on ", in->name); +            return; +        } + +        /* skip the integral chunks, update skip with remainder */ +        lseek(in->fd, skip - left, SEEK_CUR); +        skip = left; +    } + +    /* read more input and skip remainder */ +    bload(in); +    if (skip > in->left) +        bail("unexpected end of file on ", in->name); +    in->left -= skip; +    in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ +    int flags; + +    /* verify gzip magic header and compression method */ +    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) +        bail(in->name, " is not a valid gzip file"); + +    /* get and verify flags */ +    flags = bget(in); +    if ((flags & 0xe0) != 0) +        bail("unknown reserved bits set in ", in->name); + +    /* skip modification time, extra flags, and os */ +    bskip(in, 6); + +    /* skip extra field if present */ +    if (flags & 4) { +        unsigned len; + +        len = bget(in); +        len += (unsigned)(bget(in)) << 8; +        bskip(in, len); +    } + +    /* skip file name if present */ +    if (flags & 8) +        while (bget(in) != 0) +            ; + +    /* skip comment if present */ +    if (flags & 16) +        while (bget(in) != 0) +            ; + +    /* skip header crc if present */ +    if (flags & 2) +        bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ +    putc(val & 0xff, out); +    putc((val >> 8) & 0xff, out); +    putc((val >> 16) & 0xff, out); +    putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ +    if (in->left == 0) +        bload(in); +    if (in->left == 0) +        bail("unexpected end of file on ", in->name); +    strm->avail_in = in->left; +    strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ +    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); +    *crc = crc32(0L, Z_NULL, 0); +    *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last +   block if clr is true, and adding empty blocks as needed to get to a byte +   boundary.  If clr is false, then the last block becomes the last block of +   the output, and the gzip trailer is written.  crc and tot maintains the +   crc and length (modulo 2^32) of the output for the trailer.  The resulting +   gzip file is written to out.  gzinit() must be called before the first call +   of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, +                  FILE *out) +{ +    int ret;                /* return value from zlib functions */ +    int pos;                /* where the "last block" bit is in byte */ +    int last;               /* true if processing the last block */ +    bin *in;                /* buffered input file */ +    unsigned char *start;   /* start of compressed data in buffer */ +    unsigned char *junk;    /* buffer for uncompressed data -- discarded */ +    z_off_t len;            /* length of uncompressed data (support > 4 GB) */ +    z_stream strm;          /* zlib inflate stream */ + +    /* open gzip file and skip header */ +    in = bopen(name); +    if (in == NULL) +        bail("could not open ", name); +    gzhead(in); + +    /* allocate buffer for uncompressed data and initialize raw inflate +       stream */ +    junk = malloc(CHUNK); +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, -15); +    if (junk == NULL || ret != Z_OK) +        bail("out of memory", ""); + +    /* inflate and copy compressed data, clear last-block bit if requested */ +    len = 0; +    zpull(&strm, in); +    start = strm.next_in; +    last = start[0] & 1; +    if (last && clr) +        start[0] &= ~1; +    strm.avail_out = 0; +    for (;;) { +        /* if input used and output done, write used input and get more */ +        if (strm.avail_in == 0 && strm.avail_out != 0) { +            fwrite(start, 1, strm.next_in - start, out); +            start = in->buf; +            in->left = 0; +            zpull(&strm, in); +        } + +        /* decompress -- return early when end-of-block reached */ +        strm.avail_out = CHUNK; +        strm.next_out = junk; +        ret = inflate(&strm, Z_BLOCK); +        switch (ret) { +        case Z_MEM_ERROR: +            bail("out of memory", ""); +        case Z_DATA_ERROR: +            bail("invalid compressed data in ", in->name); +        } + +        /* update length of uncompressed data */ +        len += CHUNK - strm.avail_out; + +        /* check for block boundary (only get this when block copied out) */ +        if (strm.data_type & 128) { +            /* if that was the last block, then done */ +            if (last) +                break; + +            /* number of unused bits in last byte */ +            pos = strm.data_type & 7; + +            /* find the next last-block bit */ +            if (pos != 0) { +                /* next last-block bit is in last used byte */ +                pos = 0x100 >> pos; +                last = strm.next_in[-1] & pos; +                if (last && clr) +                    strm.next_in[-1] &= ~pos; +            } +            else { +                /* next last-block bit is in next unused byte */ +                if (strm.avail_in == 0) { +                    /* don't have that byte yet -- get it */ +                    fwrite(start, 1, strm.next_in - start, out); +                    start = in->buf; +                    in->left = 0; +                    zpull(&strm, in); +                } +                last = strm.next_in[0] & 1; +                if (last && clr) +                    strm.next_in[0] &= ~1; +            } +        } +    } + +    /* update buffer with unused input */ +    in->left = strm.avail_in; +    in->next = strm.next_in; + +    /* copy used input, write empty blocks to get to byte boundary */ +    pos = strm.data_type & 7; +    fwrite(start, 1, in->next - start - 1, out); +    last = in->next[-1]; +    if (pos == 0 || !clr) +        /* already at byte boundary, or last file: write last byte */ +        putc(last, out); +    else { +        /* append empty blocks to last byte */ +        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */ +        if (pos & 1) { +            /* odd -- append an empty stored block */ +            putc(last, out); +            if (pos == 1) +                putc(0, out);               /* two more bits in block header */ +            fwrite("\0\0\xff\xff", 1, 4, out); +        } +        else { +            /* even -- append 1, 2, or 3 empty fixed blocks */ +            switch (pos) { +            case 6: +                putc(last | 8, out); +                last = 0; +            case 4: +                putc(last | 0x20, out); +                last = 0; +            case 2: +                putc(last | 0x80, out); +                putc(0, out); +            } +        } +    } + +    /* update crc and tot */ +    *crc = crc32_combine(*crc, bget4(in), len); +    *tot += (unsigned long)len; + +    /* clean up */ +    inflateEnd(&strm); +    free(junk); +    bclose(in); + +    /* write trailer if this is the last gzip file */ +    if (!clr) { +        put4(*crc, out); +        put4(*tot, out); +    } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ +    unsigned long crc, tot;     /* running crc and total uncompressed length */ + +    /* skip command name */ +    argc--; +    argv++; + +    /* show usage if no arguments */ +    if (argc == 0) { +        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", +              stderr); +        return 0; +    } + +    /* join gzip files on command line and write to stdout */ +    gzinit(&crc, &tot, stdout); +    while (argc--) +        gzcopy(*argv++, argc, &crc, &tot, stdout); + +    /* done */ +    return 0; +} diff --git a/source4/lib/zlib/examples/gzlog.c b/source4/lib/zlib/examples/gzlog.c new file mode 100644 index 0000000000..f71f817c8b --- /dev/null +++ b/source4/lib/zlib/examples/gzlog.c @@ -0,0 +1,413 @@ +/* + * gzlog.c + * Copyright (C) 2004 Mark Adler + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 1.0, 26 Nov 2004 + * + */ + +#include <string.h>             /* memcmp() */ +#include <stdlib.h>             /* malloc(), free(), NULL */ +#include <sys/types.h>          /* size_t, off_t */ +#include <unistd.h>             /* read(), close(), sleep(), ftruncate(), */ +                                /* lseek() */ +#include <fcntl.h>              /* open() */ +#include <sys/file.h>           /* flock() */ +#include "zlib.h"               /* deflateInit2(), deflate(), deflateEnd() */ + +#include "gzlog.h"              /* interface */ +#define local static + +/* log object structure */ +typedef struct { +    int id;                 /* object identifier */ +    int fd;                 /* log file descriptor */ +    off_t extra;            /* offset of extra "ap" subfield */ +    off_t mark_off;         /* offset of marked data */ +    off_t last_off;         /* offset of last block */ +    unsigned long crc;      /* uncompressed crc */ +    unsigned long len;      /* uncompressed length (modulo 2^32) */ +    unsigned stored;        /* length of current stored block */ +} gz_log; + +#define GZLOGID 19334       /* gz_log object identifier */ + +#define LOCK_RETRY 1            /* retry lock once a second */ +#define LOCK_PATIENCE 1200      /* try about twenty minutes before forcing */ + +/* acquire a lock on a file */ +local int lock(int fd) +{ +    int patience; + +    /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ +    patience = LOCK_PATIENCE; +    do { +        if (flock(fd, LOCK_EX + LOCK_NB) == 0) +            return 0; +        (void)sleep(LOCK_RETRY); +        patience -= LOCK_RETRY; +    } while (patience > 0); + +    /* we've run out of patience -- give up */ +    return -1; +} + +/* release lock */ +local void unlock(int fd) +{ +    (void)flock(fd, LOCK_UN); +} + +/* release a log object */ +local void log_clean(gz_log *log) +{ +    unlock(log->fd); +    (void)close(log->fd); +    free(log); +} + +/* read an unsigned long from a byte buffer little-endian */ +local unsigned long make_ulg(unsigned char *buf) +{ +    int n; +    unsigned long val; + +    val = (unsigned long)(*buf++); +    for (n = 8; n < 32; n += 8) +        val += (unsigned long)(*buf++) << n; +    return val; +} + +/* read an off_t from a byte buffer little-endian */ +local off_t make_off(unsigned char *buf) +{ +    int n; +    off_t val; + +    val = (off_t)(*buf++); +    for (n = 8; n < 64; n += 8) +        val += (off_t)(*buf++) << n; +    return val; +} + +/* write an unsigned long little-endian to byte buffer */ +local void dice_ulg(unsigned long val, unsigned char *buf) +{ +    int n; + +    for (n = 0; n < 4; n++) { +        *buf++ = val & 0xff; +        val >>= 8; +    } +} + +/* write an off_t little-endian to byte buffer */ +local void dice_off(off_t val, unsigned char *buf) +{ +    int n; + +    for (n = 0; n < 8; n++) { +        *buf++ = val & 0xff; +        val >>= 8; +    } +} + +/* initial, empty gzip file for appending */ +local char empty_gz[] = { +    0x1f, 0x8b,                 /* magic gzip id */ +    8,                          /* compression method is deflate */ +    4,                          /* there is an extra field */ +    0, 0, 0, 0,                 /* no modification time provided */ +    0, 0xff,                    /* no extra flags, no OS */ +    20, 0, 'a', 'p', 16, 0,     /* extra field with "ap" subfield */ +    32, 0, 0, 0, 0, 0, 0, 0,    /* offset of uncompressed data */ +    32, 0, 0, 0, 0, 0, 0, 0,    /* offset of last block */ +    1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */ +    0, 0, 0, 0,                 /* crc */ +    0, 0, 0, 0                  /* uncompressed length */ +}; + +/* initialize a log object with locking */ +void *gzlog_open(char *path) +{ +    unsigned xlen; +    unsigned char temp[20]; +    unsigned sub_len; +    int good; +    gz_log *log; + +    /* allocate log structure */ +    log = malloc(sizeof(gz_log)); +    if (log == NULL) +        return NULL; +    log->id = GZLOGID; + +    /* open file, creating it if necessary, and locking it */ +    log->fd = open(path, O_RDWR | O_CREAT, 0600); +    if (log->fd < 0) { +        free(log); +        return NULL; +    } +    if (lock(log->fd)) { +        close(log->fd); +        free(log); +        return NULL; +    } + +    /* if file is empty, write new gzip stream */ +    if (lseek(log->fd, 0, SEEK_END) == 0) { +        if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { +            log_clean(log); +            return NULL; +        } +    } + +    /* check gzip header */ +    (void)lseek(log->fd, 0, SEEK_SET); +    if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || +        temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { +        log_clean(log); +        return NULL; +    } + +    /* process extra field to find "ap" sub-field */ +    xlen = temp[10] + (temp[11] << 8); +    good = 0; +    while (xlen) { +        if (xlen < 4 || read(log->fd, temp, 4) != 4) +            break; +        sub_len = temp[2]; +        sub_len += temp[3] << 8; +        xlen -= 4; +        if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { +            good = 1; +            break; +        } +        if (xlen < sub_len) +            break; +        (void)lseek(log->fd, sub_len, SEEK_CUR); +        xlen -= sub_len; +    } +    if (!good) { +        log_clean(log); +        return NULL; +    } + +    /* read in "ap" sub-field */ +    log->extra = lseek(log->fd, 0, SEEK_CUR); +    if (read(log->fd, temp, 16) != 16) { +        log_clean(log); +        return NULL; +    } +    log->mark_off = make_off(temp); +    log->last_off = make_off(temp + 8); + +    /* get crc, length of gzip file */ +    (void)lseek(log->fd, log->last_off, SEEK_SET); +    if (read(log->fd, temp, 13) != 13 || +        memcmp(temp, "\001\000\000\377\377", 5) != 0) { +        log_clean(log); +        return NULL; +    } +    log->crc = make_ulg(temp + 5); +    log->len = make_ulg(temp + 9); + +    /* set up to write over empty last block */ +    (void)lseek(log->fd, log->last_off + 5, SEEK_SET); +    log->stored = 0; +    return (void *)log; +} + +/* maximum amount to put in a stored block before starting a new one */ +#define MAX_BLOCK 16384 + +/* write a block to a log object */ +int gzlog_write(void *obj, char *data, size_t len) +{ +    size_t some; +    unsigned char temp[5]; +    gz_log *log; + +    /* check object */ +    log = (gz_log *)obj; +    if (log == NULL || log->id != GZLOGID) +        return 1; + +    /* write stored blocks until all of the input is written */ +    do { +        some = MAX_BLOCK - log->stored; +        if (some > len) +            some = len; +        if (write(log->fd, data, some) != some) +            return 1; +        log->crc = crc32(log->crc, data, some); +        log->len += some; +        len -= some; +        data += some; +        log->stored += some; + +        /* if the stored block is full, end it and start another */ +        if (log->stored == MAX_BLOCK) { +            (void)lseek(log->fd, log->last_off, SEEK_SET); +            temp[0] = 0; +            dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), +                     temp + 1); +            if (write(log->fd, temp, 5) != 5) +                return 1; +            log->last_off = lseek(log->fd, log->stored, SEEK_CUR); +            (void)lseek(log->fd, 5, SEEK_CUR); +            log->stored = 0; +        } +    } while (len); +    return 0; +} + +/* recompress the remaining stored deflate data in place */ +local int recomp(gz_log *log) +{ +    z_stream strm; +    size_t len, max; +    unsigned char *in; +    unsigned char *out; +    unsigned char temp[16]; + +    /* allocate space and read it all in (it's around 1 MB) */ +    len = log->last_off - log->mark_off; +    max = len + (len >> 12) + (len >> 14) + 11; +    out = malloc(max); +    if (out == NULL) +        return 1; +    in = malloc(len); +    if (in == NULL) { +        free(out); +        return 1; +    } +    (void)lseek(log->fd, log->mark_off, SEEK_SET); +    if (read(log->fd, in, len) != len) { +        free(in); +        free(out); +        return 1; +    } + +    /* recompress in memory, decoding stored data as we go */ +    /* note: this assumes that unsigned is four bytes or more */ +    /*       consider not making that assumption */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, +        Z_DEFAULT_STRATEGY) != Z_OK) { +        free(in); +        free(out); +        return 1; +    } +    strm.next_in = in; +    strm.avail_out = max; +    strm.next_out = out; +    while (len >= 5) { +        if (strm.next_in[0] != 0) +            break; +        strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); +        strm.next_in += 5; +        len -= 5; +        if (strm.avail_in != 0) { +            if (len < strm.avail_in) +                break; +            len -= strm.avail_in; +            (void)deflate(&strm, Z_NO_FLUSH); +            if (strm.avail_in != 0 || strm.avail_out == 0) +                break; +        } +    } +    (void)deflate(&strm, Z_SYNC_FLUSH); +    (void)deflateEnd(&strm); +    free(in); +    if (len != 0 || strm.avail_out == 0) { +        free(out); +        return 1; +    } + +    /* overwrite stored data with compressed data */ +    (void)lseek(log->fd, log->mark_off, SEEK_SET); +    len = max - strm.avail_out; +    if (write(log->fd, out, len) != len) { +        free(out); +        return 1; +    } +    free(out); + +    /* write last empty block, crc, and length */ +    log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); +    temp[0] = 1; +    dice_ulg(0xffffL << 16, temp + 1); +    dice_ulg(log->crc, temp + 5); +    dice_ulg(log->len, temp + 9); +    if (write(log->fd, temp, 13) != 13) +        return 1; + +    /* truncate file to discard remaining stored data and old trailer */ +    ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); + +    /* update extra field to point to new last empty block */ +    (void)lseek(log->fd, log->extra, SEEK_SET); +    dice_off(log->mark_off, temp); +    dice_off(log->last_off, temp + 8); +    if (write(log->fd, temp, 16) != 16) +        return 1; +    return 0; +} + +/* maximum accumulation of stored blocks before compressing */ +#define MAX_STORED 1048576 + +/* close log object */ +int gzlog_close(void *obj) +{ +    unsigned char temp[8]; +    gz_log *log; + +    /* check object */ +    log = (gz_log *)obj; +    if (log == NULL || log->id != GZLOGID) +        return 1; + +    /* go to start of most recent block being written */ +    (void)lseek(log->fd, log->last_off, SEEK_SET); + +    /* if some stuff was put there, update block */ +    if (log->stored) { +        temp[0] = 0; +        dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), +                 temp + 1); +        if (write(log->fd, temp, 5) != 5) +            return 1; +        log->last_off = lseek(log->fd, log->stored, SEEK_CUR); +    } + +    /* write last block (empty) */ +    if (write(log->fd, "\001\000\000\377\377", 5) != 5) +        return 1; + +    /* write updated crc and uncompressed length */ +    dice_ulg(log->crc, temp); +    dice_ulg(log->len, temp + 4); +    if (write(log->fd, temp, 8) != 8) +        return 1; + +    /* put offset of that last block in gzip extra block */ +    (void)lseek(log->fd, log->extra + 8, SEEK_SET); +    dice_off(log->last_off, temp); +    if (write(log->fd, temp, 8) != 8) +        return 1; + +    /* if more than 1 MB stored, then time to compress it */ +    if (log->last_off - log->mark_off > MAX_STORED) { +        if (recomp(log)) +            return 1; +    } + +    /* unlock and close file */ +    log_clean(log); +    return 0; +} diff --git a/source4/lib/zlib/examples/gzlog.h b/source4/lib/zlib/examples/gzlog.h new file mode 100644 index 0000000000..a800bd5391 --- /dev/null +++ b/source4/lib/zlib/examples/gzlog.h @@ -0,0 +1,58 @@ +/* gzlog.h +  Copyright (C) 2004 Mark Adler, all rights reserved +  version 1.0, 26 Nov 2004 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* +   The gzlog object allows writing short messages to a gzipped log file, +   opening the log file locked for small bursts, and then closing it.  The log +   object works by appending stored data to the gzip file until 1 MB has been +   accumulated.  At that time, the stored data is compressed, and replaces the +   uncompressed data in the file.  The log file is truncated to its new size at +   that time.  After closing, the log file is always valid gzip file that can +   decompressed to recover what was written. + +   A gzip header "extra" field contains two file offsets for appending.  The +   first points to just after the last compressed data.  The second points to +   the last stored block in the deflate stream, which is empty.  All of the +   data between those pointers is uncompressed. + */ + +/* Open a gzlog object, creating the log file if it does not exist.  Return +   NULL on error.  Note that gzlog_open() could take a long time to return if +   there is difficulty in locking the file. */ +void *gzlog_open(char *path); + +/* Write to a gzlog object.  Return non-zero on error.  This function will +   simply write data to the file uncompressed.  Compression of the data +   will not occur until gzlog_close() is called.  It is expected that +   gzlog_write() is used for a short message, and then gzlog_close() is +   called.  If a large amount of data is to be written, then the application +   should write no more than 1 MB at a time with gzlog_write() before +   calling gzlog_close() and then gzlog_open() again. */ +int gzlog_write(void *log, char *data, size_t len); + +/* Close a gzlog object.  Return non-zero on error.  The log file is locked +   until this function is called.  This function will compress stored data +   at the end of the gzip file if at least 1 MB has been accumulated.  Note +   that the file will not be a valid gzip file until this function completes. + */ +int gzlog_close(void *log); diff --git a/source4/lib/zlib/examples/zlib_how.html b/source4/lib/zlib/examples/zlib_how.html new file mode 100644 index 0000000000..40998dbf08 --- /dev/null +++ b/source4/lib/zlib/examples/zlib_how.html @@ -0,0 +1,523 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" +  "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>zlib Usage Example</title> +<!--  Copyright (c) 2004 Mark Adler.  --> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000"> +<h2 align="center"> zlib Usage Example </h2> +We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and +so on.  So for those who have read <tt>zlib.h</tt> (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively.  The +annotations are interspersed between lines of the code.  So please read between the lines. +We hope this helps explain some of the intricacies of <em>zlib</em>. +<p> +Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>: +<pre><b> +/* zpipe.c: example of proper use of zlib's inflate() and deflate() +   Not copyrighted -- provided to the public domain +   Version 1.2  9 November 2004  Mark Adler */ + +/* Version history: +   1.0  30 Oct 2004  First version +   1.1   8 Nov 2004  Add void casting for unused return values +                     Use switch statement for inflate() return values +   1.2   9 Nov 2004  Add assertions to document zlib guarantees + */ +</b></pre><!-- --> +We now include the header files for the required definitions.  From +<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>, +<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and +<tt>fputs()</tt> for error messages.  From <tt>string.h</tt> we use +<tt>strcmp()</tt> for command line argument processing. +From <tt>assert.h</tt> we use the <tt>assert()</tt> macro. +From <tt>zlib.h</tt> +we use the basic compression functions <tt>deflateInit()</tt>, +<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression +functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and +<tt>inflateEnd()</tt>. +<pre><b> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" +</b></pre><!-- --> +<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data +from the <em>zlib</em> routines.  Larger buffer sizes would be more efficient, +especially for <tt>inflate()</tt>.  If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +<pre><b> +#define CHUNK 16384 +</b></pre><!-- --> +The <tt>def()</tt> routine compresses data from an input file to an output file.  The output data +will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em> +formats.  The <em>zlib</em> format has a very small header of only two bytes to identify it as +a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +<pre><b> +/* Compress from file source to file dest until EOF on source. +   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_STREAM_ERROR if an invalid compression +   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the +   version of the library linked do not match, or Z_ERRNO if there is +   an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +</b></pre> +Here are the local variables for <tt>def()</tt>.  <tt>ret</tt> will be used for <em>zlib</em> +return codes.  <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>, +which is either no flushing, or flush to completion after the end of the input file is reached. +<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>.  The <tt>strm</tt> structure +is used to pass information to and from the <em>zlib</em> routines, and to maintain the +<tt>deflate()</tt> state.  <tt>in</tt> and <tt>out</tt> are the input and output buffers for +<tt>deflate()</tt>. +<pre><b> +    int ret, flush; +    unsigned have; +    z_stream strm; +    char in[CHUNK]; +    char out[CHUNK]; +</b></pre><!-- --> +The first thing we do is to initialize the <em>zlib</em> state for compression using +<tt>deflateInit()</tt>.  This must be done before the first use of <tt>deflate()</tt>. +The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt> +structure must be initialized before calling <tt>deflateInit()</tt>.  Here they are +set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use +the default memory allocation routines.  An application may also choose to provide +custom memory allocation routines here.  <tt>deflateInit()</tt> will allocate on the +order of 256K bytes for the internal state. +(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.) +<p> +<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9.  Lower compression +levels result in faster execution, but less compression.  Higher levels result in +greater compression, but slower execution.  The <em>zlib</em> constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the <em>zlib</em> format (it is not a byte-for-byte copy of the input). +More advanced applications of <em>zlib</em> +may use <tt>deflateInit2()</tt> here instead.  Such an application may want to reduce how +much memory will be used, at some price in compression.  Or it may need to request a +<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw +encoding with no header or trailer at all. +<p> +We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant +<tt>Z_OK</tt> to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt> +file came from matches the version of <em>zlib</em> actually linked with the program.  This +is especially important for environments in which <em>zlib</em> is a shared library. +<p> +Note that an application can initialize multiple, independent <em>zlib</em> streams, which can +operate in parallel.  The state information maintained in the structure allows the <em>zlib</em> +routines to be reentrant. +<pre><b> +    /* allocate deflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = deflateInit(&strm, level); +    if (ret != Z_OK) +        return ret; +</b></pre><!-- --> +With the pleasantries out of the way, now we can get down to business.  The outer <tt>do</tt>-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of <tt>deflate()</tt>.  So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +<pre><b> +    /* compress until end of file */ +    do { +</b></pre> +We start off by reading data from the input file.  The number of bytes read is put directly +into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>.  We also +check to see if end-of-file on the input has been reached.  If we are at the end of file, then <tt>flush</tt> is set to the +<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to +indicate that this is the last chunk of input data to compress.  We need to use <tt>feof()</tt> +to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read.  The +reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss +the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish +up the compressed stream.  If we are not yet at the end of the input, then the <em>zlib</em> +constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still +in the middle of the uncompressed data. +<p> +If there is an error in reading from the input file, the process is aborted with +<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning +the error.  We wouldn't want a memory leak, now would we?  <tt>deflateEnd()</tt> can be called +at any time after the state has been initialized.  Once that's done, <tt>deflateInit()</tt> (or +<tt>deflateInit2()</tt>) would have to be called to start a new compression process.  There is +no point here in checking the <tt>deflateEnd()</tt> return code.  The deallocation can't fail. +<pre><b> +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)deflateEnd(&strm); +            return Z_ERRNO; +        } +        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; +        strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then +keeps calling <tt>deflate()</tt> until it is done producing output.  Once there is no more +new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e., +<tt>avail_in</tt> will be zero. +<pre><b> +        /* run deflate() on input until output buffer not full, finish +           compression if all of source has been read in */ +        do { +</b></pre> +Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number +of available output bytes and <tt>next_out</tt> to a pointer to that space. +<pre><b> +            strm.avail_out = CHUNK; +            strm.next_out = out; +</b></pre> +Now we call the compression engine itself, <tt>deflate()</tt>.  It takes as many of the +<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as +<tt>avail_out</tt> bytes to <tt>next_out</tt>.  Those counters and pointers are then +updated past the input data consumed and the output data written.  It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time.  Since <tt>avail_in</tt> +and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those +between <tt>deflate()</tt> calls until it's all used up. +<p> +The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output.  Normally <tt>deflate</tt> will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression.  It will then put out a burst of +compressed data, and proceed to consume more input before the next burst.  Eventually, +<tt>deflate()</tt> +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value.  <tt>deflate()</tt> will continue to compress normally as long +as the flush parameter is <tt>Z_NO_FLUSH</tt>.  Once the <tt>Z_FINISH</tt> parameter is provided, +<tt>deflate()</tt> will begin to complete the compressed output stream.  However depending on how +much output space is provided, <tt>deflate()</tt> may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input.  The flush +parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls. +<p> +There are other values of the flush parameter that are used in more advanced applications.  You can +force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data.  You can also ask that <tt>deflate()</tt> do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications.  Both requests will degrade compression by an amount depending on how often such +requests are made. +<p> +<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here.  Why +not?  Well, it turns out that <tt>deflate()</tt> can do no wrong here.  Let's go through +<tt>deflate()</tt>'s return values and dispense with them one by one.  The possible values are +<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>.  <tt>Z_OK</tt> +is, well, ok.  <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of +<tt>deflate()</tt>.  This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt> +until it has no more output.  <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not +initialized properly, but we did initialize it properly.  There is no harm in checking for +<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state. +<tt>Z_BUF_ERROR</tt> will be explained further below, but +suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume +more input or produce more output.  <tt>deflate()</tt> can be called again with more output space +or more available input, which it will be in this code. +<pre><b> +            ret = deflate(&strm, flush);    /* no bad return value */ +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +</b></pre> +Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call.  Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of <tt>deflate()</tt>.  Again if there +is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak. +<pre><b> +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)deflateEnd(&strm); +                return Z_ERRNO; +            } +</b></pre> +The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the +provided output buffer.  Then we know that <tt>deflate()</tt> has done as much as it can with +the provided input, and that all of that input has been consumed.  We can then fall out of this +loop and reuse the input buffer. +<p> +The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill +the output buffer, leaving <tt>avail_out</tt> greater than zero.  However suppose that +<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer! +<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can. +As far as we know, <tt>deflate()</tt> +has more output for us.  So we call it again.  But now <tt>deflate()</tt> produces no output +at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>.  That <tt>deflate()</tt> call +wasn't able to do anything, either consume input or produce output, and so it returns +<tt>Z_BUF_ERROR</tt>.  (See, I told you I'd cover this later.)  However this is not a problem at +all.  Now we finally have the desired indication that <tt>deflate()</tt> is really done, +and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>. +<p> +With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will +complete the output stream.  Once that is done, subsequent calls of <tt>deflate()</tt> would return +<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing +until the state is reinitialized. +<p> +Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt> +instead of the single inner loop we have here.  The first loop would call +without flushing and feed all of the data to <tt>deflate()</tt>.  The second loop would call +<tt>deflate()</tt> with no more +data and the <tt>Z_FINISH</tt> parameter to complete the process.  As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +<pre><b> +        } while (strm.avail_out == 0); +        assert(strm.avail_in == 0);     /* all input will be used */ +</b></pre><!-- --> +Now we check to see if we have already processed all of the input file.  That information was +saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>.  If so, +then we're done and we fall out of the outer loop.  We're guaranteed to get <tt>Z_STREAM_END</tt> +from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +<pre><b> +        /* done when last data in file processed */ +    } while (flush != Z_FINISH); +    assert(ret == Z_STREAM_END);        /* stream will be complete */ +</b></pre><!-- --> +The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this).  Then +finally we can return with a happy return value. +<pre><b> +    /* clean up and return */ +    (void)deflateEnd(&strm); +    return Z_OK; +} +</b></pre><!-- --> +Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt> +decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the +uncompressed data to the output file.  Much of the discussion above for <tt>def()</tt> +applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between +the two. +<pre><b> +/* Decompress from file source to file dest until stream ends or EOF. +   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_DATA_ERROR if the deflate data is +   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and +   the version of the library linked do not match, or Z_ERRNO if there +   is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +</b></pre> +The local variables have the same functionality as they do for <tt>def()</tt>.  The +only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt> +can tell from the <em>zlib</em> stream itself when the stream is complete. +<pre><b> +    int ret; +    unsigned have; +    z_stream strm; +    char in[CHUNK]; +    char out[CHUNK]; +</b></pre><!-- --> +The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized.  <tt>avail_in</tt> +and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>.  This +is because the application has the option to provide the start of the zlib stream in +order for <tt>inflateInit()</tt> to have access to information about the compression +method to aid in memory allocation.  In the current implementation of <em>zlib</em> +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +<tt>inflate()</tt> anyway.  However those fields must be initialized since later versions +of <em>zlib</em> that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by <tt>inflateInit()</tt>, so the +<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling. +<p> +Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to +indicate that no input data is being provided. +<pre><b> +    /* allocate inflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit(&strm); +    if (ret != Z_OK) +        return ret; +</b></pre><!-- --> +The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output.  This is in contrast to <tt>def()</tt> which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +<pre><b> +    /* decompress until deflate stream ends or end of file */ +    do { +</b></pre> +We read input data and set the <tt>strm</tt> structure accordingly.  If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete.  Note that we may read more data than is eventually consumed +by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream. +For applications where <em>zlib</em> streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the <em>zlib</em> stream. +<pre><b> +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)inflateEnd(&strm); +            return Z_ERRNO; +        } +        if (strm.avail_in == 0) +            break; +        strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to +keep calling <tt>inflate()</tt> until has generated all of the output it can with the +provided input. +<pre><b> +        /* run inflate() on input until output buffer not full */ +        do { +</b></pre> +Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>. +<pre><b> +            strm.avail_out = CHUNK; +            strm.next_out = out; +</b></pre> +Now we run the decompression engine itself.  There is no need to adjust the flush parameter, since +the <em>zlib</em> format is self-terminating. The main difference here is that there are +return values that we need to pay attention to.  <tt>Z_DATA_ERROR</tt> +indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format, +which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed.  The other error to be processed is +<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt> +needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>. +<p> +Advanced applications may use +<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the +first 32K or so of compression.  This is noted in the <em>zlib</em> header, so <tt>inflate()</tt> +requests that that dictionary be provided before it can start to decompress.  Without the dictionary, +correct decompression is not possible.  For this routine, we have no idea what the dictionary is, +so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>. +<p> +<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here, +but could be checked for as noted above for <tt>def()</tt>.  <tt>Z_BUF_ERROR</tt> does not need to be +checked for here, for the same reasons noted for <tt>def()</tt>.  <tt>Z_STREAM_END</tt> will be +checked for later. +<pre><b> +            ret = inflate(&strm, Z_NO_FLUSH); +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            switch (ret) { +            case Z_NEED_DICT: +                ret = Z_DATA_ERROR;     /* and fall through */ +            case Z_DATA_ERROR: +            case Z_MEM_ERROR: +                (void)inflateEnd(&strm); +                return ret; +            } +</b></pre> +The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>. +<pre><b> +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)inflateEnd(&strm); +                return Z_ERRNO; +            } +</b></pre> +The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated +by not filling the output buffer, just as for <tt>deflate()</tt>.  In this case, we cannot +assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file +does. +<pre><b> +        } while (strm.avail_out == 0); +</b></pre><!-- --> +The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the +end of the input <em>zlib</em> stream, has completed the decompression and integrity +check, and has provided all of the output.  This is indicated by the <tt>inflate()</tt> +return value <tt>Z_STREAM_END</tt>.  The inner loop is guaranteed to leave <tt>ret</tt> +equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end +of the <em>zlib</em> stream.  So if the return value is not <tt>Z_STREAM_END</tt>, the +loop continues to read more input. +<pre><b> +        /* done when inflate() says it's done */ +    } while (ret != Z_STREAM_END); +</b></pre><!-- --> +At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file.  If the last <tt>inflate()</tt> return value +is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error +is returned.  Otherwise, we return with a happy return value.  Of course, <tt>inflateEnd()</tt> +is called first to avoid a memory leak. +<pre><b> +    /* clean up and return */ +    (void)inflateEnd(&strm); +    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} +</b></pre><!-- --> +That ends the routines that directly use <em>zlib</em>.  The following routines make this +a command-line program by running data through the above routines from <tt>stdin</tt> to +<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>. +<p> +<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt> +and <tt>inf()</tt>, as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from <tt>deflate()</tt> +and <tt>inflate()</tt>. +<pre><b> +/* report a zlib or i/o error */ +void zerr(int ret) +{ +    fputs("zpipe: ", stderr); +    switch (ret) { +    case Z_ERRNO: +        if (ferror(stdin)) +            fputs("error reading stdin\n", stderr); +        if (ferror(stdout)) +            fputs("error writing stdout\n", stderr); +        break; +    case Z_STREAM_ERROR: +        fputs("invalid compression level\n", stderr); +        break; +    case Z_DATA_ERROR: +        fputs("invalid or incomplete deflate data\n", stderr); +        break; +    case Z_MEM_ERROR: +        fputs("out of memory\n", stderr); +        break; +    case Z_VERSION_ERROR: +        fputs("zlib version mismatch!\n", stderr); +    } +} +</b></pre><!-- --> +Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>.  The +<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if +no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used.  If any other +arguments are provided, no compression or decompression is performed.  Instead a usage +message is displayed.  Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and +<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress. +<pre><b> +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ +    int ret; + +    /* do compression if no arguments */ +    if (argc == 1) { +        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* do decompression if -d specified */ +    else if (argc == 2 && strcmp(argv[1], "-d") == 0) { +        ret = inf(stdin, stdout); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* otherwise, report usage */ +    else { +        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); +        return 1; +    } +} +</b></pre> +<hr> +<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i> +</body> +</html> diff --git a/source4/lib/zlib/examples/zpipe.c b/source4/lib/zlib/examples/zpipe.c new file mode 100644 index 0000000000..26abb56a9c --- /dev/null +++ b/source4/lib/zlib/examples/zpipe.c @@ -0,0 +1,191 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() +   Not copyrighted -- provided to the public domain +   Version 1.2  9 November 2004  Mark Adler */ + +/* Version history: +   1.0  30 Oct 2004  First version +   1.1   8 Nov 2004  Add void casting for unused return values +                     Use switch statement for inflate() return values +   1.2   9 Nov 2004  Add assertions to document zlib guarantees +   1.3   6 Apr 2005  Remove incorrect assertion in inf() + */ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. +   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_STREAM_ERROR if an invalid compression +   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the +   version of the library linked do not match, or Z_ERRNO if there is +   an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +    int ret, flush; +    unsigned have; +    z_stream strm; +    char in[CHUNK]; +    char out[CHUNK]; + +    /* allocate deflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = deflateInit(&strm, level); +    if (ret != Z_OK) +        return ret; + +    /* compress until end of file */ +    do { +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)deflateEnd(&strm); +            return Z_ERRNO; +        } +        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; +        strm.next_in = in; + +        /* run deflate() on input until output buffer not full, finish +           compression if all of source has been read in */ +        do { +            strm.avail_out = CHUNK; +            strm.next_out = out; +            ret = deflate(&strm, flush);    /* no bad return value */ +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)deflateEnd(&strm); +                return Z_ERRNO; +            } +        } while (strm.avail_out == 0); +        assert(strm.avail_in == 0);     /* all input will be used */ + +        /* done when last data in file processed */ +    } while (flush != Z_FINISH); +    assert(ret == Z_STREAM_END);        /* stream will be complete */ + +    /* clean up and return */ +    (void)deflateEnd(&strm); +    return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. +   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_DATA_ERROR if the deflate data is +   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and +   the version of the library linked do not match, or Z_ERRNO if there +   is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +    int ret; +    unsigned have; +    z_stream strm; +    char in[CHUNK]; +    char out[CHUNK]; + +    /* allocate inflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit(&strm); +    if (ret != Z_OK) +        return ret; + +    /* decompress until deflate stream ends or end of file */ +    do { +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)inflateEnd(&strm); +            return Z_ERRNO; +        } +        if (strm.avail_in == 0) +            break; +        strm.next_in = in; + +        /* run inflate() on input until output buffer not full */ +        do { +            strm.avail_out = CHUNK; +            strm.next_out = out; +            ret = inflate(&strm, Z_NO_FLUSH); +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            switch (ret) { +            case Z_NEED_DICT: +                ret = Z_DATA_ERROR;     /* and fall through */ +            case Z_DATA_ERROR: +            case Z_MEM_ERROR: +                (void)inflateEnd(&strm); +                return ret; +            } +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)inflateEnd(&strm); +                return Z_ERRNO; +            } +        } while (strm.avail_out == 0); + +        /* done when inflate() says it's done */ +    } while (ret != Z_STREAM_END); + +    /* clean up and return */ +    (void)inflateEnd(&strm); +    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ +    fputs("zpipe: ", stderr); +    switch (ret) { +    case Z_ERRNO: +        if (ferror(stdin)) +            fputs("error reading stdin\n", stderr); +        if (ferror(stdout)) +            fputs("error writing stdout\n", stderr); +        break; +    case Z_STREAM_ERROR: +        fputs("invalid compression level\n", stderr); +        break; +    case Z_DATA_ERROR: +        fputs("invalid or incomplete deflate data\n", stderr); +        break; +    case Z_MEM_ERROR: +        fputs("out of memory\n", stderr); +        break; +    case Z_VERSION_ERROR: +        fputs("zlib version mismatch!\n", stderr); +    } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ +    int ret; + +    /* do compression if no arguments */ +    if (argc == 1) { +        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* do decompression if -d specified */ +    else if (argc == 2 && strcmp(argv[1], "-d") == 0) { +        ret = inf(stdin, stdout); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* otherwise, report usage */ +    else { +        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); +        return 1; +    } +} diff --git a/source4/lib/zlib/examples/zran.c b/source4/lib/zlib/examples/zran.c new file mode 100644 index 0000000000..8c7717eb2c --- /dev/null +++ b/source4/lib/zlib/examples/zran.c @@ -0,0 +1,404 @@ +/* zran.c -- example of zlib/gzip stream indexing and random access + * Copyright (C) 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +   Version 1.0  29 May 2005  Mark Adler */ + +/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() +   for random access of a compressed file.  A file containing a zlib or gzip +   stream is provided on the command line.  The compressed stream is decoded in +   its entirety, and an index built with access points about every SPAN bytes +   in the uncompressed output.  The compressed file is left open, and can then +   be read randomly, having to decompress on the average SPAN/2 uncompressed +   bytes before getting to the desired block of data. + +   An access point can be created at the start of any deflate block, by saving +   the starting file offset and bit of that block, and the 32K bytes of +   uncompressed data that precede that block.  Also the uncompressed offset of +   that block is saved to provide a referece for locating a desired starting +   point in the uncompressed stream.  build_index() works by decompressing the +   input zlib or gzip stream a block at a time, and at the end of each block +   deciding if enough uncompressed data has gone by to justify the creation of +   a new access point.  If so, that point is saved in a data structure that +   grows as needed to accommodate the points. + +   To use the index, an offset in the uncompressed data is provided, for which +   the latest accees point at or preceding that offset is located in the index. +   The input file is positioned to the specified location in the index, and if +   necessary the first few bits of the compressed data is read from the file. +   inflate is initialized with those bits and the 32K of uncompressed data, and +   the decompression then proceeds until the desired offset in the file is +   reached.  Then the decompression continues to read the desired uncompressed +   data from the file. + +   Another approach would be to generate the index on demand.  In that case, +   requests for random access reads from the compressed data would try to use +   the index, but if a read far enough past the end of the index is required, +   then further index entries would be generated and added. + +   There is some fair bit of overhead to starting inflation for the random +   access, mainly copying the 32K byte dictionary.  So if small pieces of the +   file are being accessed, it would make sense to implement a cache to hold +   some lookahead and avoid many calls to extract() for small lengths. + +   Another way to build an index would be to use inflateCopy().  That would +   not be constrained to have access points at block boundaries, but requires +   more memory per access point, and also cannot be saved to file due to the +   use of pointers in the state.  The approach here allows for storage of the +   index in a file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "zlib.h" + +#define local static + +#define SPAN 1048576L       /* desired distance between access points */ +#define WINSIZE 32768U      /* sliding window size */ +#define CHUNK 16384         /* file input buffer size */ + +/* access point entry */ +struct point { +    off_t out;          /* corresponding offset in uncompressed data */ +    off_t in;           /* offset in input file of first full byte */ +    int bits;           /* number of bits (1-7) from byte at in - 1, or 0 */ +    unsigned char window[WINSIZE];  /* preceding 32K of uncompressed data */ +}; + +/* access point list */ +struct access { +    int have;           /* number of list entries filled in */ +    int size;           /* number of list entries allocated */ +    struct point *list; /* allocated list */ +}; + +/* Deallocate an index built by build_index() */ +local void free_index(struct access *index) +{ +    if (index != NULL) { +        free(index->list); +        free(index); +    } +} + +/* Add an entry to the access point list.  If out of memory, deallocate the +   existing list and return NULL. */ +local struct access *addpoint(struct access *index, int bits, +    off_t in, off_t out, unsigned left, unsigned char *window) +{ +    struct point *next; + +    /* if list is empty, create it (start with eight points) */ +    if (index == NULL) { +        index = malloc(sizeof(struct access)); +        if (index == NULL) return NULL; +        index->list = malloc(sizeof(struct point) << 3); +        if (index->list == NULL) { +            free(index); +            return NULL; +        } +        index->size = 8; +        index->have = 0; +    } + +    /* if list is full, make it bigger */ +    else if (index->have == index->size) { +        index->size <<= 1; +        next = realloc(index->list, sizeof(struct point) * index->size); +        if (next == NULL) { +            free_index(index); +            return NULL; +        } +        index->list = next; +    } + +    /* fill in entry and increment how many we have */ +    next = index->list + index->have; +    next->bits = bits; +    next->in = in; +    next->out = out; +    if (left) +        memcpy(next->window, window + WINSIZE - left, left); +    if (left < WINSIZE) +        memcpy(next->window + left, window, WINSIZE - left); +    index->have++; + +    /* return list, possibly reallocated */ +    return index; +} + +/* Make one entire pass through the compressed stream and build an index, with +   access points about every span bytes of uncompressed output -- span is +   chosen to balance the speed of random access against the memory requirements +   of the list, about 32K bytes per access point.  Note that data after the end +   of the first zlib or gzip stream in the file is ignored.  build_index() +   returns the number of access points on success (>= 1), Z_MEM_ERROR for out +   of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a +   file read error.  On success, *built points to the resulting index. */ +local int build_index(FILE *in, off_t span, struct access **built) +{ +    int ret; +    off_t totin, totout;        /* our own total counters to avoid 4GB limit */ +    off_t last;                 /* totout value of last access point */ +    struct access *index;       /* access points being generated */ +    z_stream strm; +    unsigned char input[CHUNK]; +    unsigned char window[WINSIZE]; + +    /* initialize inflate */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, 47);      /* automatic zlib or gzip decoding */ +    if (ret != Z_OK) +        return ret; + +    /* inflate the input, maintain a sliding window, and build an index -- this +       also validates the integrity of the compressed data using the check +       information at the end of the gzip or zlib stream */ +    totin = totout = last = 0; +    index = NULL;               /* will be allocated by first addpoint() */ +    strm.avail_out = 0; +    do { +        /* get some compressed data from input file */ +        strm.avail_in = fread(input, 1, CHUNK, in); +        if (ferror(in)) { +            ret = Z_ERRNO; +            goto build_index_error; +        } +        if (strm.avail_in == 0) { +            ret = Z_DATA_ERROR; +            goto build_index_error; +        } +        strm.next_in = input; + +        /* process all of that, or until end of stream */ +        do { +            /* reset sliding window if necessary */ +            if (strm.avail_out == 0) { +                strm.avail_out = WINSIZE; +                strm.next_out = window; +            } + +            /* inflate until out of input, output, or at end of block -- +               update the total input and output counters */ +            totin += strm.avail_in; +            totout += strm.avail_out; +            ret = inflate(&strm, Z_BLOCK);      /* return at end of block */ +            totin -= strm.avail_in; +            totout -= strm.avail_out; +            if (ret == Z_NEED_DICT) +                ret = Z_DATA_ERROR; +            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) +                goto build_index_error; +            if (ret == Z_STREAM_END) +                break; + +            /* if at end of block, consider adding an index entry (note that if +               data_type indicates an end-of-block, then all of the +               uncompressed data from that block has been delivered, and none +               of the compressed data after that block has been consumed, +               except for up to seven bits) -- the totout == 0 provides an +               entry point after the zlib or gzip header, and assures that the +               index always has at least one access point; we avoid creating an +               access point after the last block by checking bit 6 of data_type +             */ +            if ((strm.data_type & 128) && !(strm.data_type & 64) && +                (totout == 0 || totout - last > span)) { +                index = addpoint(index, strm.data_type & 7, totin, +                                 totout, strm.avail_out, window); +                if (index == NULL) { +                    ret = Z_MEM_ERROR; +                    goto build_index_error; +                } +                last = totout; +            } +        } while (strm.avail_in != 0); +    } while (ret != Z_STREAM_END); + +    /* clean up and return index (release unused entries in list) */ +    (void)inflateEnd(&strm); +    index = realloc(index, sizeof(struct point) * index->have); +    index->size = index->have; +    *built = index; +    return index->size; + +    /* return error */ +  build_index_error: +    (void)inflateEnd(&strm); +    if (index != NULL) +        free_index(index); +    return ret; +} + +/* Use the index to read len bytes from offset into buf, return bytes read or +   negative for error (Z_DATA_ERROR or Z_MEM_ERROR).  If data is requested past +   the end of the uncompressed data, then extract() will return a value less +   than len, indicating how much as actually read into buf.  This function +   should not return a data error unless the file was modified since the index +   was generated.  extract() may also return Z_ERRNO if there is an error on +   reading or seeking the input file. */ +local int extract(FILE *in, struct access *index, off_t offset, +                  unsigned char *buf, int len) +{ +    int ret, skip; +    z_stream strm; +    struct point *here; +    unsigned char input[CHUNK]; +    unsigned char discard[WINSIZE]; + +    /* proceed only if something reasonable to do */ +    if (len < 0) +        return 0; + +    /* find where in stream to start */ +    here = index->list; +    ret = index->have; +    while (--ret && here[1].out <= offset) +        here++; + +    /* initialize file and inflate state to start there */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, -15);         /* raw inflate */ +    if (ret != Z_OK) +        return ret; +    ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); +    if (ret == -1) +        goto extract_ret; +    if (here->bits) { +        ret = getc(in); +        if (ret == -1) { +            ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; +            goto extract_ret; +        } +        (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); +    } +    (void)inflateSetDictionary(&strm, here->window, WINSIZE); + +    /* skip uncompressed bytes until offset reached, then satisfy request */ +    offset -= here->out; +    strm.avail_in = 0; +    skip = 1;                               /* while skipping to offset */ +    do { +        /* define where to put uncompressed data, and how much */ +        if (offset == 0 && skip) {          /* at offset now */ +            strm.avail_out = len; +            strm.next_out = buf; +            skip = 0;                       /* only do this once */ +        } +        if (offset > WINSIZE) {             /* skip WINSIZE bytes */ +            strm.avail_out = WINSIZE; +            strm.next_out = discard; +            offset -= WINSIZE; +        } +        else if (offset != 0) {             /* last skip */ +            strm.avail_out = (unsigned)offset; +            strm.next_out = discard; +            offset = 0; +        } + +        /* uncompress until avail_out filled, or end of stream */ +        do { +            if (strm.avail_in == 0) { +                strm.avail_in = fread(input, 1, CHUNK, in); +                if (ferror(in)) { +                    ret = Z_ERRNO; +                    goto extract_ret; +                } +                if (strm.avail_in == 0) { +                    ret = Z_DATA_ERROR; +                    goto extract_ret; +                } +                strm.next_in = input; +            } +            ret = inflate(&strm, Z_NO_FLUSH);       /* normal inflate */ +            if (ret == Z_NEED_DICT) +                ret = Z_DATA_ERROR; +            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) +                goto extract_ret; +            if (ret == Z_STREAM_END) +                break; +        } while (strm.avail_out != 0); + +        /* if reach end of stream, then don't keep trying to get more */ +        if (ret == Z_STREAM_END) +            break; + +        /* do until offset reached and requested data read, or stream ends */ +    } while (skip); + +    /* compute number of uncompressed bytes read after offset */ +    ret = skip ? 0 : len - strm.avail_out; + +    /* clean up and return bytes read or error */ +  extract_ret: +    (void)inflateEnd(&strm); +    return ret; +} + +/* Demonstrate the use of build_index() and extract() by processing the file +   provided on the command line, and the extracting 16K from about 2/3rds of +   the way through the uncompressed output, and writing that to stdout. */ +int main(int argc, char **argv) +{ +    int len; +    off_t offset; +    FILE *in; +    struct access *index; +    unsigned char buf[CHUNK]; + +    /* open input file */ +    if (argc != 2) { +        fprintf(stderr, "usage: zran file.gz\n"); +        return 1; +    } +    in = fopen(argv[1], "rb"); +    if (in == NULL) { +        fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); +        return 1; +    } + +    /* build index */ +    len = build_index(in, SPAN, &index); +    if (len < 0) { +        fclose(in); +        switch (len) { +        case Z_MEM_ERROR: +            fprintf(stderr, "zran: out of memory\n"); +            break; +        case Z_DATA_ERROR: +            fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); +            break; +        case Z_ERRNO: +            fprintf(stderr, "zran: read error on %s\n", argv[1]); +            break; +        default: +            fprintf(stderr, "zran: error %d while building index\n", len); +        } +        return 1; +    } +    fprintf(stderr, "zran: built index with %d access points\n", len); + +    /* use index by reading some bytes from an arbitrary offset */ +    offset = (index->list[index->have - 1].out << 1) / 3; +    len = extract(in, index, offset, buf, CHUNK); +    if (len < 0) +        fprintf(stderr, "zran: extraction failed: %s error\n", +                len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); +    else { +        fwrite(buf, 1, len, stdout); +        fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); +    } + +    /* clean up and exit */ +    free_index(index); +    fclose(in); +    return 0; +}  | 
