| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449 | 
							- /* gzjoin -- command to join gzip files into one gzip file
 
-   Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
 
-   version 1.2, 14 Aug 2012
 
-   This software is provided 'as-is', without any express or implied
 
-   warranty.  In no event will the author be held liable for any damages
 
-   arising from the use of this software.
 
-   Permission is granted to anyone to use this software for any purpose,
 
-   including commercial applications, and to alter it and redistribute it
 
-   freely, subject to the following restrictions:
 
-   1. The origin of this software must not be misrepresented; you must not
 
-      claim that you wrote the original software. If you use this software
 
-      in a product, an acknowledgment in the product documentation would be
 
-      appreciated but is not required.
 
-   2. Altered source versions must be plainly marked as such, and must not be
 
-      misrepresented as being the original software.
 
-   3. This notice may not be removed or altered from any source distribution.
 
-   Mark Adler    madler@alumni.caltech.edu
 
-  */
 
- /*
 
-  * Change history:
 
-  *
 
-  * 1.0  11 Dec 2004     - First version
 
-  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
 
-  * 1.2  14 Aug 2012     - Clean up for z_const usage
 
-  */
 
- /*
 
-    gzjoin takes one or more gzip files on the command line and writes out a
 
-    single gzip file that will uncompress to the concatenation of the
 
-    uncompressed data from the individual gzip files.  gzjoin does this without
 
-    having to recompress any of the data and without having to calculate a new
 
-    crc32 for the concatenated uncompressed data.  gzjoin does however have to
 
-    decompress all of the input data in order to find the bits in the compressed
 
-    data that need to be modified to concatenate the streams.
 
-    gzjoin does not do an integrity check on the input gzip files other than
 
-    checking the gzip header and decompressing the compressed data.  They are
 
-    otherwise assumed to be complete and correct.
 
-    Each joint between gzip files removes at least 18 bytes of previous trailer
 
-    and subsequent header, and inserts an average of about three bytes to the
 
-    compressed data in order to connect the streams.  The output gzip file
 
-    has a minimal ten-byte gzip header with no file name or modification time.
 
-    This program was written to illustrate the use of the Z_BLOCK option of
 
-    inflate() and the crc32_combine() function.  gzjoin will not compile with
 
-    versions of zlib earlier than 1.2.3.
 
-  */
 
- #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
 
- #include <stdlib.h>     /* exit(), malloc(), free() */
 
- #include <fcntl.h>      /* open() */
 
- #include <unistd.h>     /* close(), read(), lseek() */
 
- #include "zlib.h"
 
-     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
 
- #define local static
 
- /* exit with an error (return a value to allow use in an expression) */
 
- local int bail(char *why1, char *why2)
 
- {
 
-     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
 
-     exit(1);
 
-     return 0;
 
- }
 
- /* -- simple buffered file input with access to the buffer -- */
 
- #define CHUNK 32768         /* must be a power of two and fit in unsigned */
 
- /* bin buffered input file type */
 
- typedef struct {
 
-     char *name;             /* name of file for error messages */
 
-     int fd;                 /* file descriptor */
 
-     unsigned left;          /* bytes remaining at next */
 
-     unsigned char *next;    /* next byte to read */
 
-     unsigned char *buf;     /* allocated buffer of length CHUNK */
 
- } bin;
 
- /* close a buffered file and free allocated memory */
 
- local void bclose(bin *in)
 
- {
 
-     if (in != NULL) {
 
-         if (in->fd != -1)
 
-             close(in->fd);
 
-         if (in->buf != NULL)
 
-             free(in->buf);
 
-         free(in);
 
-     }
 
- }
 
- /* open a buffered file for input, return a pointer to type bin, or NULL on
 
-    failure */
 
- local bin *bopen(char *name)
 
- {
 
-     bin *in;
 
-     in = malloc(sizeof(bin));
 
-     if (in == NULL)
 
-         return NULL;
 
-     in->buf = malloc(CHUNK);
 
-     in->fd = open(name, O_RDONLY, 0);
 
-     if (in->buf == NULL || in->fd == -1) {
 
-         bclose(in);
 
-         return NULL;
 
-     }
 
-     in->left = 0;
 
-     in->next = in->buf;
 
-     in->name = name;
 
-     return in;
 
- }
 
- /* load buffer from file, return -1 on read error, 0 or 1 on success, with
 
-    1 indicating that end-of-file was reached */
 
- local int bload(bin *in)
 
- {
 
-     long len;
 
-     if (in == NULL)
 
-         return -1;
 
-     if (in->left != 0)
 
-         return 0;
 
-     in->next = in->buf;
 
-     do {
 
-         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
 
-         if (len < 0)
 
-             return -1;
 
-         in->left += (unsigned)len;
 
-     } while (len != 0 && in->left < CHUNK);
 
-     return len == 0 ? 1 : 0;
 
- }
 
- /* get a byte from the file, bail if end of file */
 
- #define bget(in) (in->left ? 0 : bload(in), \
 
-                   in->left ? (in->left--, *(in->next)++) : \
 
-                     bail("unexpected end of file on ", in->name))
 
- /* get a four-byte little-endian unsigned integer from file */
 
- local unsigned long bget4(bin *in)
 
- {
 
-     unsigned long val;
 
-     val = bget(in);
 
-     val += (unsigned long)(bget(in)) << 8;
 
-     val += (unsigned long)(bget(in)) << 16;
 
-     val += (unsigned long)(bget(in)) << 24;
 
-     return val;
 
- }
 
- /* skip bytes in file */
 
- local void bskip(bin *in, unsigned skip)
 
- {
 
-     /* check pointer */
 
-     if (in == NULL)
 
-         return;
 
-     /* easy case -- skip bytes in buffer */
 
-     if (skip <= in->left) {
 
-         in->left -= skip;
 
-         in->next += skip;
 
-         return;
 
-     }
 
-     /* skip what's in buffer, discard buffer contents */
 
-     skip -= in->left;
 
-     in->left = 0;
 
-     /* seek past multiples of CHUNK bytes */
 
-     if (skip > CHUNK) {
 
-         unsigned left;
 
-         left = skip & (CHUNK - 1);
 
-         if (left == 0) {
 
-             /* exact number of chunks: seek all the way minus one byte to check
 
-                for end-of-file with a read */
 
-             lseek(in->fd, skip - 1, SEEK_CUR);
 
-             if (read(in->fd, in->buf, 1) != 1)
 
-                 bail("unexpected end of file on ", in->name);
 
-             return;
 
-         }
 
-         /* skip the integral chunks, update skip with remainder */
 
-         lseek(in->fd, skip - left, SEEK_CUR);
 
-         skip = left;
 
-     }
 
-     /* read more input and skip remainder */
 
-     bload(in);
 
-     if (skip > in->left)
 
-         bail("unexpected end of file on ", in->name);
 
-     in->left -= skip;
 
-     in->next += skip;
 
- }
 
- /* -- end of buffered input functions -- */
 
- /* skip the gzip header from file in */
 
- local void gzhead(bin *in)
 
- {
 
-     int flags;
 
-     /* verify gzip magic header and compression method */
 
-     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
 
-         bail(in->name, " is not a valid gzip file");
 
-     /* get and verify flags */
 
-     flags = bget(in);
 
-     if ((flags & 0xe0) != 0)
 
-         bail("unknown reserved bits set in ", in->name);
 
-     /* skip modification time, extra flags, and os */
 
-     bskip(in, 6);
 
-     /* skip extra field if present */
 
-     if (flags & 4) {
 
-         unsigned len;
 
-         len = bget(in);
 
-         len += (unsigned)(bget(in)) << 8;
 
-         bskip(in, len);
 
-     }
 
-     /* skip file name if present */
 
-     if (flags & 8)
 
-         while (bget(in) != 0)
 
-             ;
 
-     /* skip comment if present */
 
-     if (flags & 16)
 
-         while (bget(in) != 0)
 
-             ;
 
-     /* skip header crc if present */
 
-     if (flags & 2)
 
-         bskip(in, 2);
 
- }
 
- /* write a four-byte little-endian unsigned integer to out */
 
- local void put4(unsigned long val, FILE *out)
 
- {
 
-     putc(val & 0xff, out);
 
-     putc((val >> 8) & 0xff, out);
 
-     putc((val >> 16) & 0xff, out);
 
-     putc((val >> 24) & 0xff, out);
 
- }
 
- /* Load up zlib stream from buffered input, bail if end of file */
 
- local void zpull(z_streamp strm, bin *in)
 
- {
 
-     if (in->left == 0)
 
-         bload(in);
 
-     if (in->left == 0)
 
-         bail("unexpected end of file on ", in->name);
 
-     strm->avail_in = in->left;
 
-     strm->next_in = in->next;
 
- }
 
- /* Write header for gzip file to out and initialize trailer. */
 
- local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
 
- {
 
-     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
 
-     *crc = crc32(0L, Z_NULL, 0);
 
-     *tot = 0;
 
- }
 
- /* Copy the compressed data from name, zeroing the last block bit of the last
 
-    block if clr is true, and adding empty blocks as needed to get to a byte
 
-    boundary.  If clr is false, then the last block becomes the last block of
 
-    the output, and the gzip trailer is written.  crc and tot maintains the
 
-    crc and length (modulo 2^32) of the output for the trailer.  The resulting
 
-    gzip file is written to out.  gzinit() must be called before the first call
 
-    of gzcopy() to write the gzip header and to initialize crc and tot. */
 
- local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
 
-                   FILE *out)
 
- {
 
-     int ret;                /* return value from zlib functions */
 
-     int pos;                /* where the "last block" bit is in byte */
 
-     int last;               /* true if processing the last block */
 
-     bin *in;                /* buffered input file */
 
-     unsigned char *start;   /* start of compressed data in buffer */
 
-     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
 
-     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
 
-     z_stream strm;          /* zlib inflate stream */
 
-     /* open gzip file and skip header */
 
-     in = bopen(name);
 
-     if (in == NULL)
 
-         bail("could not open ", name);
 
-     gzhead(in);
 
-     /* allocate buffer for uncompressed data and initialize raw inflate
 
-        stream */
 
-     junk = malloc(CHUNK);
 
-     strm.zalloc = Z_NULL;
 
-     strm.zfree = Z_NULL;
 
-     strm.opaque = Z_NULL;
 
-     strm.avail_in = 0;
 
-     strm.next_in = Z_NULL;
 
-     ret = inflateInit2(&strm, -15);
 
-     if (junk == NULL || ret != Z_OK)
 
-         bail("out of memory", "");
 
-     /* inflate and copy compressed data, clear last-block bit if requested */
 
-     len = 0;
 
-     zpull(&strm, in);
 
-     start = in->next;
 
-     last = start[0] & 1;
 
-     if (last && clr)
 
-         start[0] &= ~1;
 
-     strm.avail_out = 0;
 
-     for (;;) {
 
-         /* if input used and output done, write used input and get more */
 
-         if (strm.avail_in == 0 && strm.avail_out != 0) {
 
-             fwrite(start, 1, strm.next_in - start, out);
 
-             start = in->buf;
 
-             in->left = 0;
 
-             zpull(&strm, in);
 
-         }
 
-         /* decompress -- return early when end-of-block reached */
 
-         strm.avail_out = CHUNK;
 
-         strm.next_out = junk;
 
-         ret = inflate(&strm, Z_BLOCK);
 
-         switch (ret) {
 
-         case Z_MEM_ERROR:
 
-             bail("out of memory", "");
 
-         case Z_DATA_ERROR:
 
-             bail("invalid compressed data in ", in->name);
 
-         }
 
-         /* update length of uncompressed data */
 
-         len += CHUNK - strm.avail_out;
 
-         /* check for block boundary (only get this when block copied out) */
 
-         if (strm.data_type & 128) {
 
-             /* if that was the last block, then done */
 
-             if (last)
 
-                 break;
 
-             /* number of unused bits in last byte */
 
-             pos = strm.data_type & 7;
 
-             /* find the next last-block bit */
 
-             if (pos != 0) {
 
-                 /* next last-block bit is in last used byte */
 
-                 pos = 0x100 >> pos;
 
-                 last = strm.next_in[-1] & pos;
 
-                 if (last && clr)
 
-                     in->buf[strm.next_in - in->buf - 1] &= ~pos;
 
-             }
 
-             else {
 
-                 /* next last-block bit is in next unused byte */
 
-                 if (strm.avail_in == 0) {
 
-                     /* don't have that byte yet -- get it */
 
-                     fwrite(start, 1, strm.next_in - start, out);
 
-                     start = in->buf;
 
-                     in->left = 0;
 
-                     zpull(&strm, in);
 
-                 }
 
-                 last = strm.next_in[0] & 1;
 
-                 if (last && clr)
 
-                     in->buf[strm.next_in - in->buf] &= ~1;
 
-             }
 
-         }
 
-     }
 
-     /* update buffer with unused input */
 
-     in->left = strm.avail_in;
 
-     in->next = in->buf + (strm.next_in - in->buf);
 
-     /* copy used input, write empty blocks to get to byte boundary */
 
-     pos = strm.data_type & 7;
 
-     fwrite(start, 1, in->next - start - 1, out);
 
-     last = in->next[-1];
 
-     if (pos == 0 || !clr)
 
-         /* already at byte boundary, or last file: write last byte */
 
-         putc(last, out);
 
-     else {
 
-         /* append empty blocks to last byte */
 
-         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
 
-         if (pos & 1) {
 
-             /* odd -- append an empty stored block */
 
-             putc(last, out);
 
-             if (pos == 1)
 
-                 putc(0, out);               /* two more bits in block header */
 
-             fwrite("\0\0\xff\xff", 1, 4, out);
 
-         }
 
-         else {
 
-             /* even -- append 1, 2, or 3 empty fixed blocks */
 
-             switch (pos) {
 
-             case 6:
 
-                 putc(last | 8, out);
 
-                 last = 0;
 
-             case 4:
 
-                 putc(last | 0x20, out);
 
-                 last = 0;
 
-             case 2:
 
-                 putc(last | 0x80, out);
 
-                 putc(0, out);
 
-             }
 
-         }
 
-     }
 
-     /* update crc and tot */
 
-     *crc = crc32_combine(*crc, bget4(in), len);
 
-     *tot += (unsigned long)len;
 
-     /* clean up */
 
-     inflateEnd(&strm);
 
-     free(junk);
 
-     bclose(in);
 
-     /* write trailer if this is the last gzip file */
 
-     if (!clr) {
 
-         put4(*crc, out);
 
-         put4(*tot, out);
 
-     }
 
- }
 
- /* join the gzip files on the command line, write result to stdout */
 
- int main(int argc, char **argv)
 
- {
 
-     unsigned long crc, tot;     /* running crc and total uncompressed length */
 
-     /* skip command name */
 
-     argc--;
 
-     argv++;
 
-     /* show usage if no arguments */
 
-     if (argc == 0) {
 
-         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
 
-               stderr);
 
-         return 0;
 
-     }
 
-     /* join gzip files on command line and write to stdout */
 
-     gzinit(&crc, &tot, stdout);
 
-     while (argc--)
 
-         gzcopy(*argv++, argc, &crc, &tot, stdout);
 
-     /* done */
 
-     return 0;
 
- }
 
 
  |