source: code/trunk/vendor/github.com/klauspost/compress/gzip/gunzip.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 9.6 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package gzip implements reading and writing of gzip format compressed files,
6// as specified in RFC 1952.
7package gzip
8
9import (
10 "bufio"
11 "compress/gzip"
12 "encoding/binary"
13 "hash/crc32"
14 "io"
15 "time"
16
17 "github.com/klauspost/compress/flate"
18)
19
20const (
21 gzipID1 = 0x1f
22 gzipID2 = 0x8b
23 gzipDeflate = 8
24 flagText = 1 << 0
25 flagHdrCrc = 1 << 1
26 flagExtra = 1 << 2
27 flagName = 1 << 3
28 flagComment = 1 << 4
29)
30
31var (
32 // ErrChecksum is returned when reading GZIP data that has an invalid checksum.
33 ErrChecksum = gzip.ErrChecksum
34 // ErrHeader is returned when reading GZIP data that has an invalid header.
35 ErrHeader = gzip.ErrHeader
36)
37
38var le = binary.LittleEndian
39
40// noEOF converts io.EOF to io.ErrUnexpectedEOF.
41func noEOF(err error) error {
42 if err == io.EOF {
43 return io.ErrUnexpectedEOF
44 }
45 return err
46}
47
48// The gzip file stores a header giving metadata about the compressed file.
49// That header is exposed as the fields of the Writer and Reader structs.
50//
51// Strings must be UTF-8 encoded and may only contain Unicode code points
52// U+0001 through U+00FF, due to limitations of the GZIP file format.
53type Header struct {
54 Comment string // comment
55 Extra []byte // "extra data"
56 ModTime time.Time // modification time
57 Name string // file name
58 OS byte // operating system type
59}
60
61// A Reader is an io.Reader that can be read to retrieve
62// uncompressed data from a gzip-format compressed file.
63//
64// In general, a gzip file can be a concatenation of gzip files,
65// each with its own header. Reads from the Reader
66// return the concatenation of the uncompressed data of each.
67// Only the first header is recorded in the Reader fields.
68//
69// Gzip files store a length and checksum of the uncompressed data.
70// The Reader will return a ErrChecksum when Read
71// reaches the end of the uncompressed data if it does not
72// have the expected length or checksum. Clients should treat data
73// returned by Read as tentative until they receive the io.EOF
74// marking the end of the data.
75type Reader struct {
76 Header // valid after NewReader or Reader.Reset
77 r flate.Reader
78 br *bufio.Reader
79 decompressor io.ReadCloser
80 digest uint32 // CRC-32, IEEE polynomial (section 8)
81 size uint32 // Uncompressed size (section 2.3.1)
82 buf [512]byte
83 err error
84 multistream bool
85}
86
87// NewReader creates a new Reader reading the given reader.
88// If r does not also implement io.ByteReader,
89// the decompressor may read more data than necessary from r.
90//
91// It is the caller's responsibility to call Close on the Reader when done.
92//
93// The Reader.Header fields will be valid in the Reader returned.
94func NewReader(r io.Reader) (*Reader, error) {
95 z := new(Reader)
96 if err := z.Reset(r); err != nil {
97 return nil, err
98 }
99 return z, nil
100}
101
102// Reset discards the Reader z's state and makes it equivalent to the
103// result of its original state from NewReader, but reading from r instead.
104// This permits reusing a Reader rather than allocating a new one.
105func (z *Reader) Reset(r io.Reader) error {
106 *z = Reader{
107 decompressor: z.decompressor,
108 multistream: true,
109 }
110 if rr, ok := r.(flate.Reader); ok {
111 z.r = rr
112 } else {
113 // Reuse if we can.
114 if z.br != nil {
115 z.br.Reset(r)
116 } else {
117 z.br = bufio.NewReader(r)
118 }
119 z.r = z.br
120 }
121 z.Header, z.err = z.readHeader()
122 return z.err
123}
124
125// Multistream controls whether the reader supports multistream files.
126//
127// If enabled (the default), the Reader expects the input to be a sequence
128// of individually gzipped data streams, each with its own header and
129// trailer, ending at EOF. The effect is that the concatenation of a sequence
130// of gzipped files is treated as equivalent to the gzip of the concatenation
131// of the sequence. This is standard behavior for gzip readers.
132//
133// Calling Multistream(false) disables this behavior; disabling the behavior
134// can be useful when reading file formats that distinguish individual gzip
135// data streams or mix gzip data streams with other data streams.
136// In this mode, when the Reader reaches the end of the data stream,
137// Read returns io.EOF. If the underlying reader implements io.ByteReader,
138// it will be left positioned just after the gzip stream.
139// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
140// If there is no next stream, z.Reset(r) will return io.EOF.
141func (z *Reader) Multistream(ok bool) {
142 z.multistream = ok
143}
144
145// readString reads a NUL-terminated string from z.r.
146// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
147// will output a string encoded using UTF-8.
148// This method always updates z.digest with the data read.
149func (z *Reader) readString() (string, error) {
150 var err error
151 needConv := false
152 for i := 0; ; i++ {
153 if i >= len(z.buf) {
154 return "", ErrHeader
155 }
156 z.buf[i], err = z.r.ReadByte()
157 if err != nil {
158 return "", err
159 }
160 if z.buf[i] > 0x7f {
161 needConv = true
162 }
163 if z.buf[i] == 0 {
164 // Digest covers the NUL terminator.
165 z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
166
167 // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
168 if needConv {
169 s := make([]rune, 0, i)
170 for _, v := range z.buf[:i] {
171 s = append(s, rune(v))
172 }
173 return string(s), nil
174 }
175 return string(z.buf[:i]), nil
176 }
177 }
178}
179
180// readHeader reads the GZIP header according to section 2.3.1.
181// This method does not set z.err.
182func (z *Reader) readHeader() (hdr Header, err error) {
183 if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
184 // RFC 1952, section 2.2, says the following:
185 // A gzip file consists of a series of "members" (compressed data sets).
186 //
187 // Other than this, the specification does not clarify whether a
188 // "series" is defined as "one or more" or "zero or more". To err on the
189 // side of caution, Go interprets this to mean "zero or more".
190 // Thus, it is okay to return io.EOF here.
191 return hdr, err
192 }
193 if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
194 return hdr, ErrHeader
195 }
196 flg := z.buf[3]
197 hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
198 // z.buf[8] is XFL and is currently ignored.
199 hdr.OS = z.buf[9]
200 z.digest = crc32.ChecksumIEEE(z.buf[:10])
201
202 if flg&flagExtra != 0 {
203 if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
204 return hdr, noEOF(err)
205 }
206 z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
207 data := make([]byte, le.Uint16(z.buf[:2]))
208 if _, err = io.ReadFull(z.r, data); err != nil {
209 return hdr, noEOF(err)
210 }
211 z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
212 hdr.Extra = data
213 }
214
215 var s string
216 if flg&flagName != 0 {
217 if s, err = z.readString(); err != nil {
218 return hdr, err
219 }
220 hdr.Name = s
221 }
222
223 if flg&flagComment != 0 {
224 if s, err = z.readString(); err != nil {
225 return hdr, err
226 }
227 hdr.Comment = s
228 }
229
230 if flg&flagHdrCrc != 0 {
231 if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
232 return hdr, noEOF(err)
233 }
234 digest := le.Uint16(z.buf[:2])
235 if digest != uint16(z.digest) {
236 return hdr, ErrHeader
237 }
238 }
239
240 z.digest = 0
241 if z.decompressor == nil {
242 z.decompressor = flate.NewReader(z.r)
243 } else {
244 z.decompressor.(flate.Resetter).Reset(z.r, nil)
245 }
246 return hdr, nil
247}
248
249// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
250func (z *Reader) Read(p []byte) (n int, err error) {
251 if z.err != nil {
252 return 0, z.err
253 }
254
255 n, z.err = z.decompressor.Read(p)
256 z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
257 z.size += uint32(n)
258 if z.err != io.EOF {
259 // In the normal case we return here.
260 return n, z.err
261 }
262
263 // Finished file; check checksum and size.
264 if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
265 z.err = noEOF(err)
266 return n, z.err
267 }
268 digest := le.Uint32(z.buf[:4])
269 size := le.Uint32(z.buf[4:8])
270 if digest != z.digest || size != z.size {
271 z.err = ErrChecksum
272 return n, z.err
273 }
274 z.digest, z.size = 0, 0
275
276 // File is ok; check if there is another.
277 if !z.multistream {
278 return n, io.EOF
279 }
280 z.err = nil // Remove io.EOF
281
282 if _, z.err = z.readHeader(); z.err != nil {
283 return n, z.err
284 }
285
286 // Read from next file, if necessary.
287 if n > 0 {
288 return n, nil
289 }
290 return z.Read(p)
291}
292
293// Support the io.WriteTo interface for io.Copy and friends.
294func (z *Reader) WriteTo(w io.Writer) (int64, error) {
295 total := int64(0)
296 crcWriter := crc32.NewIEEE()
297 for {
298 if z.err != nil {
299 if z.err == io.EOF {
300 return total, nil
301 }
302 return total, z.err
303 }
304
305 // We write both to output and digest.
306 mw := io.MultiWriter(w, crcWriter)
307 n, err := z.decompressor.(io.WriterTo).WriteTo(mw)
308 total += n
309 z.size += uint32(n)
310 if err != nil {
311 z.err = err
312 return total, z.err
313 }
314
315 // Finished file; check checksum + size.
316 if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
317 if err == io.EOF {
318 err = io.ErrUnexpectedEOF
319 }
320 z.err = err
321 return total, err
322 }
323 z.digest = crcWriter.Sum32()
324 digest := le.Uint32(z.buf[:4])
325 size := le.Uint32(z.buf[4:8])
326 if digest != z.digest || size != z.size {
327 z.err = ErrChecksum
328 return total, z.err
329 }
330 z.digest, z.size = 0, 0
331
332 // File is ok; check if there is another.
333 if !z.multistream {
334 return total, nil
335 }
336 crcWriter.Reset()
337 z.err = nil // Remove io.EOF
338
339 if _, z.err = z.readHeader(); z.err != nil {
340 if z.err == io.EOF {
341 return total, nil
342 }
343 return total, z.err
344 }
345 }
346}
347
348// Close closes the Reader. It does not close the underlying io.Reader.
349// In order for the GZIP checksum to be verified, the reader must be
350// fully consumed until the io.EOF.
351func (z *Reader) Close() error { return z.decompressor.Close() }
Note: See TracBrowser for help on using the repository browser.