1205194Sdelphij
2205194Sdelphij
3205194Sdelphij
4205194Sdelphij
5205194Sdelphij
6205194Sdelphij
7205194SdelphijNetwork Working Group                                         P. Deutsch
8205194SdelphijRequest for Comments: 1952                           Aladdin Enterprises
9205194SdelphijCategory: Informational                                         May 1996
10205194Sdelphij
11205194Sdelphij
12205194Sdelphij               GZIP file format specification version 4.3
13205194Sdelphij
14205194SdelphijStatus of This Memo
15205194Sdelphij
16205194Sdelphij   This memo provides information for the Internet community.  This memo
17205194Sdelphij   does not specify an Internet standard of any kind.  Distribution of
18205194Sdelphij   this memo is unlimited.
19205194Sdelphij
20205194SdelphijIESG Note:
21205194Sdelphij
22205194Sdelphij   The IESG takes no position on the validity of any Intellectual
23205194Sdelphij   Property Rights statements contained in this document.
24205194Sdelphij
25205194SdelphijNotices
26205194Sdelphij
27205194Sdelphij   Copyright (c) 1996 L. Peter Deutsch
28205194Sdelphij
29205194Sdelphij   Permission is granted to copy and distribute this document for any
30205194Sdelphij   purpose and without charge, including translations into other
31205194Sdelphij   languages and incorporation into compilations, provided that the
32205194Sdelphij   copyright notice and this notice are preserved, and that any
33205194Sdelphij   substantive changes or deletions from the original are clearly
34205194Sdelphij   marked.
35205194Sdelphij
36205194Sdelphij   A pointer to the latest version of this and related documentation in
37205194Sdelphij   HTML format can be found at the URL
38205194Sdelphij   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
39205194Sdelphij
40205194SdelphijAbstract
41205194Sdelphij
42205194Sdelphij   This specification defines a lossless compressed data format that is
43205194Sdelphij   compatible with the widely used GZIP utility.  The format includes a
44205194Sdelphij   cyclic redundancy check value for detecting data corruption.  The
45205194Sdelphij   format presently uses the DEFLATE method of compression but can be
46205194Sdelphij   easily extended to use other compression methods.  The format can be
47205194Sdelphij   implemented readily in a manner not covered by patents.
48205194Sdelphij
49205194Sdelphij
50205194Sdelphij
51205194Sdelphij
52205194Sdelphij
53205194Sdelphij
54205194Sdelphij
55205194Sdelphij
56205194Sdelphij
57205194Sdelphij
58205194SdelphijDeutsch                      Informational                      [Page 1]
59205194Sdelphij
60205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
61205194Sdelphij
62205194Sdelphij
63205194SdelphijTable of Contents
64205194Sdelphij
65205194Sdelphij   1. Introduction ................................................... 2
66205194Sdelphij      1.1. Purpose ................................................... 2
67205194Sdelphij      1.2. Intended audience ......................................... 3
68205194Sdelphij      1.3. Scope ..................................................... 3
69205194Sdelphij      1.4. Compliance ................................................ 3
70205194Sdelphij      1.5. Definitions of terms and conventions used ................. 3
71205194Sdelphij      1.6. Changes from previous versions ............................ 3
72205194Sdelphij   2. Detailed specification ......................................... 4
73205194Sdelphij      2.1. Overall conventions ....................................... 4
74205194Sdelphij      2.2. File format ............................................... 5
75205194Sdelphij      2.3. Member format ............................................. 5
76205194Sdelphij          2.3.1. Member header and trailer ........................... 6
77205194Sdelphij              2.3.1.1. Extra field ................................... 8
78205194Sdelphij              2.3.1.2. Compliance .................................... 9
79205194Sdelphij      3. References .................................................. 9
80205194Sdelphij      4. Security Considerations .................................... 10
81205194Sdelphij      5. Acknowledgements ........................................... 10
82205194Sdelphij      6. Author's Address ........................................... 10
83205194Sdelphij      7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
84205194Sdelphij      8. Appendix: Sample CRC Code .................................. 11
85205194Sdelphij
86205194Sdelphij1. Introduction
87205194Sdelphij
88205194Sdelphij   1.1. Purpose
89205194Sdelphij
90205194Sdelphij      The purpose of this specification is to define a lossless
91205194Sdelphij      compressed data format that:
92205194Sdelphij
93205194Sdelphij          * Is independent of CPU type, operating system, file system,
94205194Sdelphij            and character set, and hence can be used for interchange;
95205194Sdelphij          * Can compress or decompress a data stream (as opposed to a
96205194Sdelphij            randomly accessible file) to produce another data stream,
97205194Sdelphij            using only an a priori bounded amount of intermediate
98205194Sdelphij            storage, and hence can be used in data communications or
99205194Sdelphij            similar structures such as Unix filters;
100205194Sdelphij          * Compresses data with efficiency comparable to the best
101205194Sdelphij            currently available general-purpose compression methods,
102205194Sdelphij            and in particular considerably better than the "compress"
103205194Sdelphij            program;
104205194Sdelphij          * Can be implemented readily in a manner not covered by
105205194Sdelphij            patents, and hence can be practiced freely;
106205194Sdelphij          * Is compatible with the file format produced by the current
107205194Sdelphij            widely used gzip utility, in that conforming decompressors
108205194Sdelphij            will be able to read data produced by the existing gzip
109205194Sdelphij            compressor.
110205194Sdelphij
111205194Sdelphij
112205194Sdelphij
113205194Sdelphij
114205194SdelphijDeutsch                      Informational                      [Page 2]
115205194Sdelphij
116205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
117205194Sdelphij
118205194Sdelphij
119205194Sdelphij      The data format defined by this specification does not attempt to:
120205194Sdelphij
121205194Sdelphij          * Provide random access to compressed data;
122205194Sdelphij          * Compress specialized data (e.g., raster graphics) as well as
123205194Sdelphij            the best currently available specialized algorithms.
124205194Sdelphij
125205194Sdelphij   1.2. Intended audience
126205194Sdelphij
127205194Sdelphij      This specification is intended for use by implementors of software
128205194Sdelphij      to compress data into gzip format and/or decompress data from gzip
129205194Sdelphij      format.
130205194Sdelphij
131205194Sdelphij      The text of the specification assumes a basic background in
132205194Sdelphij      programming at the level of bits and other primitive data
133205194Sdelphij      representations.
134205194Sdelphij
135205194Sdelphij   1.3. Scope
136205194Sdelphij
137205194Sdelphij      The specification specifies a compression method and a file format
138205194Sdelphij      (the latter assuming only that a file can store a sequence of
139205194Sdelphij      arbitrary bytes).  It does not specify any particular interface to
140205194Sdelphij      a file system or anything about character sets or encodings
141205194Sdelphij      (except for file names and comments, which are optional).
142205194Sdelphij
143205194Sdelphij   1.4. Compliance
144205194Sdelphij
145205194Sdelphij      Unless otherwise indicated below, a compliant decompressor must be
146205194Sdelphij      able to accept and decompress any file that conforms to all the
147205194Sdelphij      specifications presented here; a compliant compressor must produce
148205194Sdelphij      files that conform to all the specifications presented here.  The
149205194Sdelphij      material in the appendices is not part of the specification per se
150205194Sdelphij      and is not relevant to compliance.
151205194Sdelphij
152205194Sdelphij   1.5. Definitions of terms and conventions used
153205194Sdelphij
154205194Sdelphij      byte: 8 bits stored or transmitted as a unit (same as an octet).
155205194Sdelphij      (For this specification, a byte is exactly 8 bits, even on
156205194Sdelphij      machines which store a character on a number of bits different
157205194Sdelphij      from 8.)  See below for the numbering of bits within a byte.
158205194Sdelphij
159205194Sdelphij   1.6. Changes from previous versions
160205194Sdelphij
161205194Sdelphij      There have been no technical changes to the gzip format since
162205194Sdelphij      version 4.1 of this specification.  In version 4.2, some
163205194Sdelphij      terminology was changed, and the sample CRC code was rewritten for
164205194Sdelphij      clarity and to eliminate the requirement for the caller to do pre-
165205194Sdelphij      and post-conditioning.  Version 4.3 is a conversion of the
166205194Sdelphij      specification to RFC style.
167205194Sdelphij
168205194Sdelphij
169205194Sdelphij
170205194SdelphijDeutsch                      Informational                      [Page 3]
171205194Sdelphij
172205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
173205194Sdelphij
174205194Sdelphij
175205194Sdelphij2. Detailed specification
176205194Sdelphij
177205194Sdelphij   2.1. Overall conventions
178205194Sdelphij
179205194Sdelphij      In the diagrams below, a box like this:
180205194Sdelphij
181205194Sdelphij         +---+
182205194Sdelphij         |   | <-- the vertical bars might be missing
183205194Sdelphij         +---+
184205194Sdelphij
185205194Sdelphij      represents one byte; a box like this:
186205194Sdelphij
187205194Sdelphij         +==============+
188205194Sdelphij         |              |
189205194Sdelphij         +==============+
190205194Sdelphij
191205194Sdelphij      represents a variable number of bytes.
192205194Sdelphij
193205194Sdelphij      Bytes stored within a computer do not have a "bit order", since
194205194Sdelphij      they are always treated as a unit.  However, a byte considered as
195205194Sdelphij      an integer between 0 and 255 does have a most- and least-
196205194Sdelphij      significant bit, and since we write numbers with the most-
197205194Sdelphij      significant digit on the left, we also write bytes with the most-
198205194Sdelphij      significant bit on the left.  In the diagrams below, we number the
199205194Sdelphij      bits of a byte so that bit 0 is the least-significant bit, i.e.,
200205194Sdelphij      the bits are numbered:
201205194Sdelphij
202205194Sdelphij         +--------+
203205194Sdelphij         |76543210|
204205194Sdelphij         +--------+
205205194Sdelphij
206205194Sdelphij      This document does not address the issue of the order in which
207205194Sdelphij      bits of a byte are transmitted on a bit-sequential medium, since
208205194Sdelphij      the data format described here is byte- rather than bit-oriented.
209205194Sdelphij
210205194Sdelphij      Within a computer, a number may occupy multiple bytes.  All
211205194Sdelphij      multi-byte numbers in the format described here are stored with
212205194Sdelphij      the least-significant byte first (at the lower memory address).
213205194Sdelphij      For example, the decimal number 520 is stored as:
214205194Sdelphij
215205194Sdelphij             0        1
216205194Sdelphij         +--------+--------+
217205194Sdelphij         |00001000|00000010|
218205194Sdelphij         +--------+--------+
219205194Sdelphij          ^        ^
220205194Sdelphij          |        |
221205194Sdelphij          |        + more significant byte = 2 x 256
222205194Sdelphij          + less significant byte = 8
223205194Sdelphij
224205194Sdelphij
225205194Sdelphij
226205194SdelphijDeutsch                      Informational                      [Page 4]
227205194Sdelphij
228205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
229205194Sdelphij
230205194Sdelphij
231205194Sdelphij   2.2. File format
232205194Sdelphij
233205194Sdelphij      A gzip file consists of a series of "members" (compressed data
234205194Sdelphij      sets).  The format of each member is specified in the following
235205194Sdelphij      section.  The members simply appear one after another in the file,
236205194Sdelphij      with no additional information before, between, or after them.
237205194Sdelphij
238205194Sdelphij   2.3. Member format
239205194Sdelphij
240205194Sdelphij      Each member has the following structure:
241205194Sdelphij
242205194Sdelphij         +---+---+---+---+---+---+---+---+---+---+
243205194Sdelphij         |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
244205194Sdelphij         +---+---+---+---+---+---+---+---+---+---+
245205194Sdelphij
246205194Sdelphij      (if FLG.FEXTRA set)
247205194Sdelphij
248205194Sdelphij         +---+---+=================================+
249205194Sdelphij         | XLEN  |...XLEN bytes of "extra field"...| (more-->)
250205194Sdelphij         +---+---+=================================+
251205194Sdelphij
252205194Sdelphij      (if FLG.FNAME set)
253205194Sdelphij
254205194Sdelphij         +=========================================+
255205194Sdelphij         |...original file name, zero-terminated...| (more-->)
256205194Sdelphij         +=========================================+
257205194Sdelphij
258205194Sdelphij      (if FLG.FCOMMENT set)
259205194Sdelphij
260205194Sdelphij         +===================================+
261205194Sdelphij         |...file comment, zero-terminated...| (more-->)
262205194Sdelphij         +===================================+
263205194Sdelphij
264205194Sdelphij      (if FLG.FHCRC set)
265205194Sdelphij
266205194Sdelphij         +---+---+
267205194Sdelphij         | CRC16 |
268205194Sdelphij         +---+---+
269205194Sdelphij
270205194Sdelphij         +=======================+
271205194Sdelphij         |...compressed blocks...| (more-->)
272205194Sdelphij         +=======================+
273205194Sdelphij
274205194Sdelphij           0   1   2   3   4   5   6   7
275205194Sdelphij         +---+---+---+---+---+---+---+---+
276205194Sdelphij         |     CRC32     |     ISIZE     |
277205194Sdelphij         +---+---+---+---+---+---+---+---+
278205194Sdelphij
279205194Sdelphij
280205194Sdelphij
281205194Sdelphij
282205194SdelphijDeutsch                      Informational                      [Page 5]
283205194Sdelphij
284205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
285205194Sdelphij
286205194Sdelphij
287205194Sdelphij      2.3.1. Member header and trailer
288205194Sdelphij
289205194Sdelphij         ID1 (IDentification 1)
290205194Sdelphij         ID2 (IDentification 2)
291205194Sdelphij            These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
292205194Sdelphij            (0x8b, \213), to identify the file as being in gzip format.
293205194Sdelphij
294205194Sdelphij         CM (Compression Method)
295205194Sdelphij            This identifies the compression method used in the file.  CM
296205194Sdelphij            = 0-7 are reserved.  CM = 8 denotes the "deflate"
297205194Sdelphij            compression method, which is the one customarily used by
298205194Sdelphij            gzip and which is documented elsewhere.
299205194Sdelphij
300205194Sdelphij         FLG (FLaGs)
301205194Sdelphij            This flag byte is divided into individual bits as follows:
302205194Sdelphij
303205194Sdelphij               bit 0   FTEXT
304205194Sdelphij               bit 1   FHCRC
305205194Sdelphij               bit 2   FEXTRA
306205194Sdelphij               bit 3   FNAME
307205194Sdelphij               bit 4   FCOMMENT
308205194Sdelphij               bit 5   reserved
309205194Sdelphij               bit 6   reserved
310205194Sdelphij               bit 7   reserved
311205194Sdelphij
312205194Sdelphij            If FTEXT is set, the file is probably ASCII text.  This is
313205194Sdelphij            an optional indication, which the compressor may set by
314205194Sdelphij            checking a small amount of the input data to see whether any
315205194Sdelphij            non-ASCII characters are present.  In case of doubt, FTEXT
316205194Sdelphij            is cleared, indicating binary data. For systems which have
317205194Sdelphij            different file formats for ascii text and binary data, the
318205194Sdelphij            decompressor can use FTEXT to choose the appropriate format.
319205194Sdelphij            We deliberately do not specify the algorithm used to set
320205194Sdelphij            this bit, since a compressor always has the option of
321205194Sdelphij            leaving it cleared and a decompressor always has the option
322205194Sdelphij            of ignoring it and letting some other program handle issues
323205194Sdelphij            of data conversion.
324205194Sdelphij
325205194Sdelphij            If FHCRC is set, a CRC16 for the gzip header is present,
326205194Sdelphij            immediately before the compressed data. The CRC16 consists
327205194Sdelphij            of the two least significant bytes of the CRC32 for all
328205194Sdelphij            bytes of the gzip header up to and not including the CRC16.
329205194Sdelphij            [The FHCRC bit was never set by versions of gzip up to
330205194Sdelphij            1.2.4, even though it was documented with a different
331205194Sdelphij            meaning in gzip 1.2.4.]
332205194Sdelphij
333205194Sdelphij            If FEXTRA is set, optional extra fields are present, as
334205194Sdelphij            described in a following section.
335205194Sdelphij
336205194Sdelphij
337205194Sdelphij
338205194SdelphijDeutsch                      Informational                      [Page 6]
339205194Sdelphij
340205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
341205194Sdelphij
342205194Sdelphij
343205194Sdelphij            If FNAME is set, an original file name is present,
344205194Sdelphij            terminated by a zero byte.  The name must consist of ISO
345205194Sdelphij            8859-1 (LATIN-1) characters; on operating systems using
346205194Sdelphij            EBCDIC or any other character set for file names, the name
347205194Sdelphij            must be translated to the ISO LATIN-1 character set.  This
348205194Sdelphij            is the original name of the file being compressed, with any
349205194Sdelphij            directory components removed, and, if the file being
350205194Sdelphij            compressed is on a file system with case insensitive names,
351205194Sdelphij            forced to lower case. There is no original file name if the
352205194Sdelphij            data was compressed from a source other than a named file;
353205194Sdelphij            for example, if the source was stdin on a Unix system, there
354205194Sdelphij            is no file name.
355205194Sdelphij
356205194Sdelphij            If FCOMMENT is set, a zero-terminated file comment is
357205194Sdelphij            present.  This comment is not interpreted; it is only
358205194Sdelphij            intended for human consumption.  The comment must consist of
359205194Sdelphij            ISO 8859-1 (LATIN-1) characters.  Line breaks should be
360205194Sdelphij            denoted by a single line feed character (10 decimal).
361205194Sdelphij
362205194Sdelphij            Reserved FLG bits must be zero.
363205194Sdelphij
364205194Sdelphij         MTIME (Modification TIME)
365205194Sdelphij            This gives the most recent modification time of the original
366205194Sdelphij            file being compressed.  The time is in Unix format, i.e.,
367205194Sdelphij            seconds since 00:00:00 GMT, Jan.  1, 1970.  (Note that this
368205194Sdelphij            may cause problems for MS-DOS and other systems that use
369205194Sdelphij            local rather than Universal time.)  If the compressed data
370205194Sdelphij            did not come from a file, MTIME is set to the time at which
371205194Sdelphij            compression started.  MTIME = 0 means no time stamp is
372205194Sdelphij            available.
373205194Sdelphij
374205194Sdelphij         XFL (eXtra FLags)
375205194Sdelphij            These flags are available for use by specific compression
376205194Sdelphij            methods.  The "deflate" method (CM = 8) sets these flags as
377205194Sdelphij            follows:
378205194Sdelphij
379205194Sdelphij               XFL = 2 - compressor used maximum compression,
380205194Sdelphij                         slowest algorithm
381205194Sdelphij               XFL = 4 - compressor used fastest algorithm
382205194Sdelphij
383205194Sdelphij         OS (Operating System)
384205194Sdelphij            This identifies the type of file system on which compression
385205194Sdelphij            took place.  This may be useful in determining end-of-line
386205194Sdelphij            convention for text files.  The currently defined values are
387205194Sdelphij            as follows:
388205194Sdelphij
389205194Sdelphij
390205194Sdelphij
391205194Sdelphij
392205194Sdelphij
393205194Sdelphij
394205194SdelphijDeutsch                      Informational                      [Page 7]
395205194Sdelphij
396205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
397205194Sdelphij
398205194Sdelphij
399205194Sdelphij                 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
400205194Sdelphij                 1 - Amiga
401205194Sdelphij                 2 - VMS (or OpenVMS)
402205194Sdelphij                 3 - Unix
403205194Sdelphij                 4 - VM/CMS
404205194Sdelphij                 5 - Atari TOS
405205194Sdelphij                 6 - HPFS filesystem (OS/2, NT)
406205194Sdelphij                 7 - Macintosh
407205194Sdelphij                 8 - Z-System
408205194Sdelphij                 9 - CP/M
409205194Sdelphij                10 - TOPS-20
410205194Sdelphij                11 - NTFS filesystem (NT)
411205194Sdelphij                12 - QDOS
412205194Sdelphij                13 - Acorn RISCOS
413205194Sdelphij               255 - unknown
414205194Sdelphij
415205194Sdelphij         XLEN (eXtra LENgth)
416205194Sdelphij            If FLG.FEXTRA is set, this gives the length of the optional
417205194Sdelphij            extra field.  See below for details.
418205194Sdelphij
419205194Sdelphij         CRC32 (CRC-32)
420205194Sdelphij            This contains a Cyclic Redundancy Check value of the
421205194Sdelphij            uncompressed data computed according to CRC-32 algorithm
422205194Sdelphij            used in the ISO 3309 standard and in section 8.1.1.6.2 of
423205194Sdelphij            ITU-T recommendation V.42.  (See http://www.iso.ch for
424205194Sdelphij            ordering ISO documents. See gopher://info.itu.ch for an
425205194Sdelphij            online version of ITU-T V.42.)
426205194Sdelphij
427205194Sdelphij         ISIZE (Input SIZE)
428205194Sdelphij            This contains the size of the original (uncompressed) input
429205194Sdelphij            data modulo 2^32.
430205194Sdelphij
431205194Sdelphij      2.3.1.1. Extra field
432205194Sdelphij
433205194Sdelphij         If the FLG.FEXTRA bit is set, an "extra field" is present in
434205194Sdelphij         the header, with total length XLEN bytes.  It consists of a
435205194Sdelphij         series of subfields, each of the form:
436205194Sdelphij
437205194Sdelphij            +---+---+---+---+==================================+
438205194Sdelphij            |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
439205194Sdelphij            +---+---+---+---+==================================+
440205194Sdelphij
441205194Sdelphij         SI1 and SI2 provide a subfield ID, typically two ASCII letters
442205194Sdelphij         with some mnemonic value.  Jean-Loup Gailly
443205194Sdelphij         <gzip@prep.ai.mit.edu> is maintaining a registry of subfield
444205194Sdelphij         IDs; please send him any subfield ID you wish to use.  Subfield
445205194Sdelphij         IDs with SI2 = 0 are reserved for future use.  The following
446205194Sdelphij         IDs are currently defined:
447205194Sdelphij
448205194Sdelphij
449205194Sdelphij
450205194SdelphijDeutsch                      Informational                      [Page 8]
451205194Sdelphij
452205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
453205194Sdelphij
454205194Sdelphij
455205194Sdelphij            SI1         SI2         Data
456205194Sdelphij            ----------  ----------  ----
457205194Sdelphij            0x41 ('A')  0x70 ('P')  Apollo file type information
458205194Sdelphij
459205194Sdelphij         LEN gives the length of the subfield data, excluding the 4
460205194Sdelphij         initial bytes.
461205194Sdelphij
462205194Sdelphij      2.3.1.2. Compliance
463205194Sdelphij
464205194Sdelphij         A compliant compressor must produce files with correct ID1,
465205194Sdelphij         ID2, CM, CRC32, and ISIZE, but may set all the other fields in
466205194Sdelphij         the fixed-length part of the header to default values (255 for
467205194Sdelphij         OS, 0 for all others).  The compressor must set all reserved
468205194Sdelphij         bits to zero.
469205194Sdelphij
470205194Sdelphij         A compliant decompressor must check ID1, ID2, and CM, and
471205194Sdelphij         provide an error indication if any of these have incorrect
472205194Sdelphij         values.  It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
473205194Sdelphij         at least so it can skip over the optional fields if they are
474205194Sdelphij         present.  It need not examine any other part of the header or
475205194Sdelphij         trailer; in particular, a decompressor may ignore FTEXT and OS
476205194Sdelphij         and always produce binary output, and still be compliant.  A
477205194Sdelphij         compliant decompressor must give an error indication if any
478205194Sdelphij         reserved bit is non-zero, since such a bit could indicate the
479205194Sdelphij         presence of a new field that would cause subsequent data to be
480205194Sdelphij         interpreted incorrectly.
481205194Sdelphij
482205194Sdelphij3. References
483205194Sdelphij
484205194Sdelphij   [1] "Information Processing - 8-bit single-byte coded graphic
485205194Sdelphij       character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
486205194Sdelphij       The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
487205194Sdelphij       ASCII. Files defining this character set are available as
488205194Sdelphij       iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
489205194Sdelphij
490205194Sdelphij   [2] ISO 3309
491205194Sdelphij
492205194Sdelphij   [3] ITU-T recommendation V.42
493205194Sdelphij
494205194Sdelphij   [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
495205194Sdelphij       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
496205194Sdelphij
497205194Sdelphij   [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
498205194Sdelphij       ftp://prep.ai.mit.edu/pub/gnu/
499205194Sdelphij
500205194Sdelphij   [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
501205194Sdelphij       Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
502205194Sdelphij
503205194Sdelphij
504205194Sdelphij
505205194Sdelphij
506205194SdelphijDeutsch                      Informational                      [Page 9]
507205194Sdelphij
508205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
509205194Sdelphij
510205194Sdelphij
511205194Sdelphij   [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
512205194Sdelphij       pp.118-133.
513205194Sdelphij
514205194Sdelphij   [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
515205194Sdelphij       describing the CRC concept.
516205194Sdelphij
517205194Sdelphij4. Security Considerations
518205194Sdelphij
519205194Sdelphij   Any data compression method involves the reduction of redundancy in
520205194Sdelphij   the data.  Consequently, any corruption of the data is likely to have
521205194Sdelphij   severe effects and be difficult to correct.  Uncompressed text, on
522205194Sdelphij   the other hand, will probably still be readable despite the presence
523205194Sdelphij   of some corrupted bytes.
524205194Sdelphij
525205194Sdelphij   It is recommended that systems using this data format provide some
526205194Sdelphij   means of validating the integrity of the compressed data, such as by
527205194Sdelphij   setting and checking the CRC-32 check value.
528205194Sdelphij
529205194Sdelphij5. Acknowledgements
530205194Sdelphij
531205194Sdelphij   Trademarks cited in this document are the property of their
532205194Sdelphij   respective owners.
533205194Sdelphij
534205194Sdelphij   Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
535205194Sdelphij   the related software described in this specification.  Glenn
536205194Sdelphij   Randers-Pehrson converted this document to RFC and HTML format.
537205194Sdelphij
538205194Sdelphij6. Author's Address
539205194Sdelphij
540205194Sdelphij   L. Peter Deutsch
541205194Sdelphij   Aladdin Enterprises
542205194Sdelphij   203 Santa Margarita Ave.
543205194Sdelphij   Menlo Park, CA 94025
544205194Sdelphij
545205194Sdelphij   Phone: (415) 322-0103 (AM only)
546205194Sdelphij   FAX:   (415) 322-1734
547205194Sdelphij   EMail: <ghost@aladdin.com>
548205194Sdelphij
549205194Sdelphij   Questions about the technical content of this specification can be
550205194Sdelphij   sent by email to:
551205194Sdelphij
552205194Sdelphij   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
553205194Sdelphij   Mark Adler <madler@alumni.caltech.edu>
554205194Sdelphij
555205194Sdelphij   Editorial comments on this specification can be sent by email to:
556205194Sdelphij
557205194Sdelphij   L. Peter Deutsch <ghost@aladdin.com> and
558205194Sdelphij   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
559205194Sdelphij
560205194Sdelphij
561205194Sdelphij
562205194SdelphijDeutsch                      Informational                     [Page 10]
563205194Sdelphij
564205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
565205194Sdelphij
566205194Sdelphij
567205194Sdelphij7. Appendix: Jean-Loup Gailly's gzip utility
568205194Sdelphij
569205194Sdelphij   The most widely used implementation of gzip compression, and the
570205194Sdelphij   original documentation on which this specification is based, were
571205194Sdelphij   created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>.  Since this
572205194Sdelphij   implementation is a de facto standard, we mention some more of its
573205194Sdelphij   features here.  Again, the material in this section is not part of
574205194Sdelphij   the specification per se, and implementations need not follow it to
575205194Sdelphij   be compliant.
576205194Sdelphij
577205194Sdelphij   When compressing or decompressing a file, gzip preserves the
578205194Sdelphij   protection, ownership, and modification time attributes on the local
579205194Sdelphij   file system, since there is no provision for representing protection
580205194Sdelphij   attributes in the gzip file format itself.  Since the file format
581205194Sdelphij   includes a modification time, the gzip decompressor provides a
582205194Sdelphij   command line switch that assigns the modification time from the file,
583205194Sdelphij   rather than the local modification time of the compressed input, to
584205194Sdelphij   the decompressed output.
585205194Sdelphij
586205194Sdelphij8. Appendix: Sample CRC Code
587205194Sdelphij
588205194Sdelphij   The following sample code represents a practical implementation of
589205194Sdelphij   the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
590205194Sdelphij   for a formal specification.)
591205194Sdelphij
592205194Sdelphij   The sample code is in the ANSI C programming language. Non C users
593205194Sdelphij   may find it easier to read with these hints:
594205194Sdelphij
595205194Sdelphij      &      Bitwise AND operator.
596205194Sdelphij      ^      Bitwise exclusive-OR operator.
597205194Sdelphij      >>     Bitwise right shift operator. When applied to an
598205194Sdelphij             unsigned quantity, as here, right shift inserts zero
599205194Sdelphij             bit(s) at the left.
600205194Sdelphij      !      Logical NOT operator.
601205194Sdelphij      ++     "n++" increments the variable n.
602205194Sdelphij      0xNNN  0x introduces a hexadecimal (base 16) constant.
603205194Sdelphij             Suffix L indicates a long value (at least 32 bits).
604205194Sdelphij
605205194Sdelphij      /* Table of CRCs of all 8-bit messages. */
606205194Sdelphij      unsigned long crc_table[256];
607205194Sdelphij
608205194Sdelphij      /* Flag: has the table been computed? Initially false. */
609205194Sdelphij      int crc_table_computed = 0;
610205194Sdelphij
611205194Sdelphij      /* Make the table for a fast CRC. */
612205194Sdelphij      void make_crc_table(void)
613205194Sdelphij      {
614205194Sdelphij        unsigned long c;
615205194Sdelphij
616205194Sdelphij
617205194Sdelphij
618205194SdelphijDeutsch                      Informational                     [Page 11]
619205194Sdelphij
620205194SdelphijRFC 1952             GZIP File Format Specification             May 1996
621205194Sdelphij
622205194Sdelphij
623205194Sdelphij        int n, k;
624205194Sdelphij        for (n = 0; n < 256; n++) {
625205194Sdelphij          c = (unsigned long) n;
626205194Sdelphij          for (k = 0; k < 8; k++) {
627205194Sdelphij            if (c & 1) {
628205194Sdelphij              c = 0xedb88320L ^ (c >> 1);
629205194Sdelphij            } else {
630205194Sdelphij              c = c >> 1;
631205194Sdelphij            }
632205194Sdelphij          }
633205194Sdelphij          crc_table[n] = c;
634205194Sdelphij        }
635205194Sdelphij        crc_table_computed = 1;
636205194Sdelphij      }
637205194Sdelphij
638205194Sdelphij      /*
639205194Sdelphij         Update a running crc with the bytes buf[0..len-1] and return
640205194Sdelphij       the updated crc. The crc should be initialized to zero. Pre- and
641205194Sdelphij       post-conditioning (one's complement) is performed within this
642205194Sdelphij       function so it shouldn't be done by the caller. Usage example:
643205194Sdelphij
644205194Sdelphij         unsigned long crc = 0L;
645205194Sdelphij
646205194Sdelphij         while (read_buffer(buffer, length) != EOF) {
647205194Sdelphij           crc = update_crc(crc, buffer, length);
648205194Sdelphij         }
649205194Sdelphij         if (crc != original_crc) error();
650205194Sdelphij      */
651205194Sdelphij      unsigned long update_crc(unsigned long crc,
652205194Sdelphij                      unsigned char *buf, int len)
653205194Sdelphij      {
654205194Sdelphij        unsigned long c = crc ^ 0xffffffffL;
655205194Sdelphij        int n;
656205194Sdelphij
657205194Sdelphij        if (!crc_table_computed)
658205194Sdelphij          make_crc_table();
659205194Sdelphij        for (n = 0; n < len; n++) {
660205194Sdelphij          c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
661205194Sdelphij        }
662205194Sdelphij        return c ^ 0xffffffffL;
663205194Sdelphij      }
664205194Sdelphij
665205194Sdelphij      /* Return the CRC of the bytes buf[0..len-1]. */
666205194Sdelphij      unsigned long crc(unsigned char *buf, int len)
667205194Sdelphij      {
668205194Sdelphij        return update_crc(0L, buf, len);
669205194Sdelphij      }
670205194Sdelphij
671205194Sdelphij
672205194Sdelphij
673205194Sdelphij
674205194SdelphijDeutsch                      Informational                     [Page 12]
675205194Sdelphij
676