source: code/trunk/vendor/github.com/klauspost/compress/flate/inflate.go@ 822

Last change on this file since 822 was 822, checked in by yakumo.izuru, 22 months ago

Prefer immortal.run over runit and rc.d, use vendored modules
for convenience.

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 24.3 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package flate implements the DEFLATE compressed data format, described in
6// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
7// formats.
8package flate
9
10import (
11 "bufio"
12 "fmt"
13 "io"
14 "math/bits"
15 "strconv"
16 "sync"
17)
18
19const (
20 maxCodeLen = 16 // max length of Huffman code
21 maxCodeLenMask = 15 // mask for max length of Huffman code
22 // The next three numbers come from the RFC section 3.2.7, with the
23 // additional proviso in section 3.2.5 which implies that distance codes
24 // 30 and 31 should never occur in compressed data.
25 maxNumLit = 286
26 maxNumDist = 30
27 numCodes = 19 // number of codes in Huffman meta-code
28
29 debugDecode = false
30)
31
32// Initialize the fixedHuffmanDecoder only once upon first use.
33var fixedOnce sync.Once
34var fixedHuffmanDecoder huffmanDecoder
35
36// A CorruptInputError reports the presence of corrupt input at a given offset.
37type CorruptInputError int64
38
39func (e CorruptInputError) Error() string {
40 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
41}
42
43// An InternalError reports an error in the flate code itself.
44type InternalError string
45
46func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
47
48// A ReadError reports an error encountered while reading input.
49//
50// Deprecated: No longer returned.
51type ReadError struct {
52 Offset int64 // byte offset where error occurred
53 Err error // error returned by underlying Read
54}
55
56func (e *ReadError) Error() string {
57 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
58}
59
60// A WriteError reports an error encountered while writing output.
61//
62// Deprecated: No longer returned.
63type WriteError struct {
64 Offset int64 // byte offset where error occurred
65 Err error // error returned by underlying Write
66}
67
68func (e *WriteError) Error() string {
69 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
70}
71
72// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
73// to switch to a new underlying Reader. This permits reusing a ReadCloser
74// instead of allocating a new one.
75type Resetter interface {
76 // Reset discards any buffered data and resets the Resetter as if it was
77 // newly initialized with the given reader.
78 Reset(r io.Reader, dict []byte) error
79}
80
81// The data structure for decoding Huffman tables is based on that of
82// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
83// For codes smaller than the table width, there are multiple entries
84// (each combination of trailing bits has the same value). For codes
85// larger than the table width, the table contains a link to an overflow
86// table. The width of each entry in the link table is the maximum code
87// size minus the chunk width.
88//
89// Note that you can do a lookup in the table even without all bits
90// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
91// have the property that shorter codes come before longer ones, the
92// bit length estimate in the result is a lower bound on the actual
93// number of bits.
94//
95// See the following:
96// http://www.gzip.org/algorithm.txt
97
98// chunk & 15 is number of bits
99// chunk >> 4 is value, including table link
100
101const (
102 huffmanChunkBits = 9
103 huffmanNumChunks = 1 << huffmanChunkBits
104 huffmanCountMask = 15
105 huffmanValueShift = 4
106)
107
108type huffmanDecoder struct {
109 maxRead int // the maximum number of bits we can read and not overread
110 chunks *[huffmanNumChunks]uint16 // chunks as described above
111 links [][]uint16 // overflow links
112 linkMask uint32 // mask the width of the link table
113}
114
115// Initialize Huffman decoding tables from array of code lengths.
116// Following this function, h is guaranteed to be initialized into a complete
117// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
118// degenerate case where the tree has only a single symbol with length 1. Empty
119// trees are permitted.
120func (h *huffmanDecoder) init(lengths []int) bool {
121 // Sanity enables additional runtime tests during Huffman
122 // table construction. It's intended to be used during
123 // development to supplement the currently ad-hoc unit tests.
124 const sanity = false
125
126 if h.chunks == nil {
127 h.chunks = &[huffmanNumChunks]uint16{}
128 }
129 if h.maxRead != 0 {
130 *h = huffmanDecoder{chunks: h.chunks, links: h.links}
131 }
132
133 // Count number of codes of each length,
134 // compute maxRead and max length.
135 var count [maxCodeLen]int
136 var min, max int
137 for _, n := range lengths {
138 if n == 0 {
139 continue
140 }
141 if min == 0 || n < min {
142 min = n
143 }
144 if n > max {
145 max = n
146 }
147 count[n&maxCodeLenMask]++
148 }
149
150 // Empty tree. The decompressor.huffSym function will fail later if the tree
151 // is used. Technically, an empty tree is only valid for the HDIST tree and
152 // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
153 // is guaranteed to fail since it will attempt to use the tree to decode the
154 // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
155 // guaranteed to fail later since the compressed data section must be
156 // composed of at least one symbol (the end-of-block marker).
157 if max == 0 {
158 return true
159 }
160
161 code := 0
162 var nextcode [maxCodeLen]int
163 for i := min; i <= max; i++ {
164 code <<= 1
165 nextcode[i&maxCodeLenMask] = code
166 code += count[i&maxCodeLenMask]
167 }
168
169 // Check that the coding is complete (i.e., that we've
170 // assigned all 2-to-the-max possible bit sequences).
171 // Exception: To be compatible with zlib, we also need to
172 // accept degenerate single-code codings. See also
173 // TestDegenerateHuffmanCoding.
174 if code != 1<<uint(max) && !(code == 1 && max == 1) {
175 if debugDecode {
176 fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
177 }
178 return false
179 }
180
181 h.maxRead = min
182 chunks := h.chunks[:]
183 for i := range chunks {
184 chunks[i] = 0
185 }
186
187 if max > huffmanChunkBits {
188 numLinks := 1 << (uint(max) - huffmanChunkBits)
189 h.linkMask = uint32(numLinks - 1)
190
191 // create link tables
192 link := nextcode[huffmanChunkBits+1] >> 1
193 if cap(h.links) < huffmanNumChunks-link {
194 h.links = make([][]uint16, huffmanNumChunks-link)
195 } else {
196 h.links = h.links[:huffmanNumChunks-link]
197 }
198 for j := uint(link); j < huffmanNumChunks; j++ {
199 reverse := int(bits.Reverse16(uint16(j)))
200 reverse >>= uint(16 - huffmanChunkBits)
201 off := j - uint(link)
202 if sanity && h.chunks[reverse] != 0 {
203 panic("impossible: overwriting existing chunk")
204 }
205 h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
206 if cap(h.links[off]) < numLinks {
207 h.links[off] = make([]uint16, numLinks)
208 } else {
209 links := h.links[off][:0]
210 h.links[off] = links[:numLinks]
211 }
212 }
213 } else {
214 h.links = h.links[:0]
215 }
216
217 for i, n := range lengths {
218 if n == 0 {
219 continue
220 }
221 code := nextcode[n]
222 nextcode[n]++
223 chunk := uint16(i<<huffmanValueShift | n)
224 reverse := int(bits.Reverse16(uint16(code)))
225 reverse >>= uint(16 - n)
226 if n <= huffmanChunkBits {
227 for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
228 // We should never need to overwrite
229 // an existing chunk. Also, 0 is
230 // never a valid chunk, because the
231 // lower 4 "count" bits should be
232 // between 1 and 15.
233 if sanity && h.chunks[off] != 0 {
234 panic("impossible: overwriting existing chunk")
235 }
236 h.chunks[off] = chunk
237 }
238 } else {
239 j := reverse & (huffmanNumChunks - 1)
240 if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
241 // Longer codes should have been
242 // associated with a link table above.
243 panic("impossible: not an indirect chunk")
244 }
245 value := h.chunks[j] >> huffmanValueShift
246 linktab := h.links[value]
247 reverse >>= huffmanChunkBits
248 for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
249 if sanity && linktab[off] != 0 {
250 panic("impossible: overwriting existing chunk")
251 }
252 linktab[off] = chunk
253 }
254 }
255 }
256
257 if sanity {
258 // Above we've sanity checked that we never overwrote
259 // an existing entry. Here we additionally check that
260 // we filled the tables completely.
261 for i, chunk := range h.chunks {
262 if chunk == 0 {
263 // As an exception, in the degenerate
264 // single-code case, we allow odd
265 // chunks to be missing.
266 if code == 1 && i%2 == 1 {
267 continue
268 }
269 panic("impossible: missing chunk")
270 }
271 }
272 for _, linktab := range h.links {
273 for _, chunk := range linktab {
274 if chunk == 0 {
275 panic("impossible: missing chunk")
276 }
277 }
278 }
279 }
280
281 return true
282}
283
284// The actual read interface needed by NewReader.
285// If the passed in io.Reader does not also have ReadByte,
286// the NewReader will introduce its own buffering.
287type Reader interface {
288 io.Reader
289 io.ByteReader
290}
291
292// Decompress state.
293type decompressor struct {
294 // Input source.
295 r Reader
296 roffset int64
297
298 // Input bits, in top of b.
299 b uint32
300 nb uint
301
302 // Huffman decoders for literal/length, distance.
303 h1, h2 huffmanDecoder
304
305 // Length arrays used to define Huffman codes.
306 bits *[maxNumLit + maxNumDist]int
307 codebits *[numCodes]int
308
309 // Output history, buffer.
310 dict dictDecoder
311
312 // Temporary buffer (avoids repeated allocation).
313 buf [4]byte
314
315 // Next step in the decompression,
316 // and decompression state.
317 step func(*decompressor)
318 stepState int
319 final bool
320 err error
321 toRead []byte
322 hl, hd *huffmanDecoder
323 copyLen int
324 copyDist int
325}
326
327func (f *decompressor) nextBlock() {
328 for f.nb < 1+2 {
329 if f.err = f.moreBits(); f.err != nil {
330 return
331 }
332 }
333 f.final = f.b&1 == 1
334 f.b >>= 1
335 typ := f.b & 3
336 f.b >>= 2
337 f.nb -= 1 + 2
338 switch typ {
339 case 0:
340 f.dataBlock()
341 case 1:
342 // compressed, fixed Huffman tables
343 f.hl = &fixedHuffmanDecoder
344 f.hd = nil
345 f.huffmanBlockDecoder()()
346 case 2:
347 // compressed, dynamic Huffman tables
348 if f.err = f.readHuffman(); f.err != nil {
349 break
350 }
351 f.hl = &f.h1
352 f.hd = &f.h2
353 f.huffmanBlockDecoder()()
354 default:
355 // 3 is reserved.
356 if debugDecode {
357 fmt.Println("reserved data block encountered")
358 }
359 f.err = CorruptInputError(f.roffset)
360 }
361}
362
363func (f *decompressor) Read(b []byte) (int, error) {
364 for {
365 if len(f.toRead) > 0 {
366 n := copy(b, f.toRead)
367 f.toRead = f.toRead[n:]
368 if len(f.toRead) == 0 {
369 return n, f.err
370 }
371 return n, nil
372 }
373 if f.err != nil {
374 return 0, f.err
375 }
376 f.step(f)
377 if f.err != nil && len(f.toRead) == 0 {
378 f.toRead = f.dict.readFlush() // Flush what's left in case of error
379 }
380 }
381}
382
383// Support the io.WriteTo interface for io.Copy and friends.
384func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
385 total := int64(0)
386 flushed := false
387 for {
388 if len(f.toRead) > 0 {
389 n, err := w.Write(f.toRead)
390 total += int64(n)
391 if err != nil {
392 f.err = err
393 return total, err
394 }
395 if n != len(f.toRead) {
396 return total, io.ErrShortWrite
397 }
398 f.toRead = f.toRead[:0]
399 }
400 if f.err != nil && flushed {
401 if f.err == io.EOF {
402 return total, nil
403 }
404 return total, f.err
405 }
406 if f.err == nil {
407 f.step(f)
408 }
409 if len(f.toRead) == 0 && f.err != nil && !flushed {
410 f.toRead = f.dict.readFlush() // Flush what's left in case of error
411 flushed = true
412 }
413 }
414}
415
416func (f *decompressor) Close() error {
417 if f.err == io.EOF {
418 return nil
419 }
420 return f.err
421}
422
423// RFC 1951 section 3.2.7.
424// Compression with dynamic Huffman codes
425
426var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
427
428func (f *decompressor) readHuffman() error {
429 // HLIT[5], HDIST[5], HCLEN[4].
430 for f.nb < 5+5+4 {
431 if err := f.moreBits(); err != nil {
432 return err
433 }
434 }
435 nlit := int(f.b&0x1F) + 257
436 if nlit > maxNumLit {
437 if debugDecode {
438 fmt.Println("nlit > maxNumLit", nlit)
439 }
440 return CorruptInputError(f.roffset)
441 }
442 f.b >>= 5
443 ndist := int(f.b&0x1F) + 1
444 if ndist > maxNumDist {
445 if debugDecode {
446 fmt.Println("ndist > maxNumDist", ndist)
447 }
448 return CorruptInputError(f.roffset)
449 }
450 f.b >>= 5
451 nclen := int(f.b&0xF) + 4
452 // numCodes is 19, so nclen is always valid.
453 f.b >>= 4
454 f.nb -= 5 + 5 + 4
455
456 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
457 for i := 0; i < nclen; i++ {
458 for f.nb < 3 {
459 if err := f.moreBits(); err != nil {
460 return err
461 }
462 }
463 f.codebits[codeOrder[i]] = int(f.b & 0x7)
464 f.b >>= 3
465 f.nb -= 3
466 }
467 for i := nclen; i < len(codeOrder); i++ {
468 f.codebits[codeOrder[i]] = 0
469 }
470 if !f.h1.init(f.codebits[0:]) {
471 if debugDecode {
472 fmt.Println("init codebits failed")
473 }
474 return CorruptInputError(f.roffset)
475 }
476
477 // HLIT + 257 code lengths, HDIST + 1 code lengths,
478 // using the code length Huffman code.
479 for i, n := 0, nlit+ndist; i < n; {
480 x, err := f.huffSym(&f.h1)
481 if err != nil {
482 return err
483 }
484 if x < 16 {
485 // Actual length.
486 f.bits[i] = x
487 i++
488 continue
489 }
490 // Repeat previous length or zero.
491 var rep int
492 var nb uint
493 var b int
494 switch x {
495 default:
496 return InternalError("unexpected length code")
497 case 16:
498 rep = 3
499 nb = 2
500 if i == 0 {
501 if debugDecode {
502 fmt.Println("i==0")
503 }
504 return CorruptInputError(f.roffset)
505 }
506 b = f.bits[i-1]
507 case 17:
508 rep = 3
509 nb = 3
510 b = 0
511 case 18:
512 rep = 11
513 nb = 7
514 b = 0
515 }
516 for f.nb < nb {
517 if err := f.moreBits(); err != nil {
518 if debugDecode {
519 fmt.Println("morebits:", err)
520 }
521 return err
522 }
523 }
524 rep += int(f.b & uint32(1<<nb-1))
525 f.b >>= nb
526 f.nb -= nb
527 if i+rep > n {
528 if debugDecode {
529 fmt.Println("i+rep > n", i, rep, n)
530 }
531 return CorruptInputError(f.roffset)
532 }
533 for j := 0; j < rep; j++ {
534 f.bits[i] = b
535 i++
536 }
537 }
538
539 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
540 if debugDecode {
541 fmt.Println("init2 failed")
542 }
543 return CorruptInputError(f.roffset)
544 }
545
546 // As an optimization, we can initialize the maxRead bits to read at a time
547 // for the HLIT tree to the length of the EOB marker since we know that
548 // every block must terminate with one. This preserves the property that
549 // we never read any extra bytes after the end of the DEFLATE stream.
550 if f.h1.maxRead < f.bits[endBlockMarker] {
551 f.h1.maxRead = f.bits[endBlockMarker]
552 }
553 if !f.final {
554 // If not the final block, the smallest block possible is
555 // a predefined table, BTYPE=01, with a single EOB marker.
556 // This will take up 3 + 7 bits.
557 f.h1.maxRead += 10
558 }
559
560 return nil
561}
562
563// Decode a single Huffman block from f.
564// hl and hd are the Huffman states for the lit/length values
565// and the distance values, respectively. If hd == nil, using the
566// fixed distance encoding associated with fixed Huffman blocks.
567func (f *decompressor) huffmanBlockGeneric() {
568 const (
569 stateInit = iota // Zero value must be stateInit
570 stateDict
571 )
572
573 switch f.stepState {
574 case stateInit:
575 goto readLiteral
576 case stateDict:
577 goto copyHistory
578 }
579
580readLiteral:
581 // Read literal and/or (length, distance) according to RFC section 3.2.3.
582 {
583 var v int
584 {
585 // Inlined v, err := f.huffSym(f.hl)
586 // Since a huffmanDecoder can be empty or be composed of a degenerate tree
587 // with single element, huffSym must error on these two edge cases. In both
588 // cases, the chunks slice will be 0 for the invalid sequence, leading it
589 // satisfy the n == 0 check below.
590 n := uint(f.hl.maxRead)
591 // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
592 // but is smart enough to keep local variables in registers, so use nb and b,
593 // inline call to moreBits and reassign b,nb back to f on return.
594 nb, b := f.nb, f.b
595 for {
596 for nb < n {
597 c, err := f.r.ReadByte()
598 if err != nil {
599 f.b = b
600 f.nb = nb
601 f.err = noEOF(err)
602 return
603 }
604 f.roffset++
605 b |= uint32(c) << (nb & 31)
606 nb += 8
607 }
608 chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
609 n = uint(chunk & huffmanCountMask)
610 if n > huffmanChunkBits {
611 chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
612 n = uint(chunk & huffmanCountMask)
613 }
614 if n <= nb {
615 if n == 0 {
616 f.b = b
617 f.nb = nb
618 if debugDecode {
619 fmt.Println("huffsym: n==0")
620 }
621 f.err = CorruptInputError(f.roffset)
622 return
623 }
624 f.b = b >> (n & 31)
625 f.nb = nb - n
626 v = int(chunk >> huffmanValueShift)
627 break
628 }
629 }
630 }
631
632 var n uint // number of bits extra
633 var length int
634 var err error
635 switch {
636 case v < 256:
637 f.dict.writeByte(byte(v))
638 if f.dict.availWrite() == 0 {
639 f.toRead = f.dict.readFlush()
640 f.step = (*decompressor).huffmanBlockGeneric
641 f.stepState = stateInit
642 return
643 }
644 goto readLiteral
645 case v == 256:
646 f.finishBlock()
647 return
648 // otherwise, reference to older data
649 case v < 265:
650 length = v - (257 - 3)
651 n = 0
652 case v < 269:
653 length = v*2 - (265*2 - 11)
654 n = 1
655 case v < 273:
656 length = v*4 - (269*4 - 19)
657 n = 2
658 case v < 277:
659 length = v*8 - (273*8 - 35)
660 n = 3
661 case v < 281:
662 length = v*16 - (277*16 - 67)
663 n = 4
664 case v < 285:
665 length = v*32 - (281*32 - 131)
666 n = 5
667 case v < maxNumLit:
668 length = 258
669 n = 0
670 default:
671 if debugDecode {
672 fmt.Println(v, ">= maxNumLit")
673 }
674 f.err = CorruptInputError(f.roffset)
675 return
676 }
677 if n > 0 {
678 for f.nb < n {
679 if err = f.moreBits(); err != nil {
680 if debugDecode {
681 fmt.Println("morebits n>0:", err)
682 }
683 f.err = err
684 return
685 }
686 }
687 length += int(f.b & uint32(1<<n-1))
688 f.b >>= n
689 f.nb -= n
690 }
691
692 var dist int
693 if f.hd == nil {
694 for f.nb < 5 {
695 if err = f.moreBits(); err != nil {
696 if debugDecode {
697 fmt.Println("morebits f.nb<5:", err)
698 }
699 f.err = err
700 return
701 }
702 }
703 dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
704 f.b >>= 5
705 f.nb -= 5
706 } else {
707 if dist, err = f.huffSym(f.hd); err != nil {
708 if debugDecode {
709 fmt.Println("huffsym:", err)
710 }
711 f.err = err
712 return
713 }
714 }
715
716 switch {
717 case dist < 4:
718 dist++
719 case dist < maxNumDist:
720 nb := uint(dist-2) >> 1
721 // have 1 bit in bottom of dist, need nb more.
722 extra := (dist & 1) << nb
723 for f.nb < nb {
724 if err = f.moreBits(); err != nil {
725 if debugDecode {
726 fmt.Println("morebits f.nb<nb:", err)
727 }
728 f.err = err
729 return
730 }
731 }
732 extra |= int(f.b & uint32(1<<nb-1))
733 f.b >>= nb
734 f.nb -= nb
735 dist = 1<<(nb+1) + 1 + extra
736 default:
737 if debugDecode {
738 fmt.Println("dist too big:", dist, maxNumDist)
739 }
740 f.err = CorruptInputError(f.roffset)
741 return
742 }
743
744 // No check on length; encoding can be prescient.
745 if dist > f.dict.histSize() {
746 if debugDecode {
747 fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
748 }
749 f.err = CorruptInputError(f.roffset)
750 return
751 }
752
753 f.copyLen, f.copyDist = length, dist
754 goto copyHistory
755 }
756
757copyHistory:
758 // Perform a backwards copy according to RFC section 3.2.3.
759 {
760 cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
761 if cnt == 0 {
762 cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
763 }
764 f.copyLen -= cnt
765
766 if f.dict.availWrite() == 0 || f.copyLen > 0 {
767 f.toRead = f.dict.readFlush()
768 f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
769 f.stepState = stateDict
770 return
771 }
772 goto readLiteral
773 }
774}
775
776// Copy a single uncompressed data block from input to output.
777func (f *decompressor) dataBlock() {
778 // Uncompressed.
779 // Discard current half-byte.
780 left := (f.nb) & 7
781 f.nb -= left
782 f.b >>= left
783
784 offBytes := f.nb >> 3
785 // Unfilled values will be overwritten.
786 f.buf[0] = uint8(f.b)
787 f.buf[1] = uint8(f.b >> 8)
788 f.buf[2] = uint8(f.b >> 16)
789 f.buf[3] = uint8(f.b >> 24)
790
791 f.roffset += int64(offBytes)
792 f.nb, f.b = 0, 0
793
794 // Length then ones-complement of length.
795 nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
796 f.roffset += int64(nr)
797 if err != nil {
798 f.err = noEOF(err)
799 return
800 }
801 n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
802 nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
803 if nn != ^n {
804 if debugDecode {
805 ncomp := ^n
806 fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
807 }
808 f.err = CorruptInputError(f.roffset)
809 return
810 }
811
812 if n == 0 {
813 f.toRead = f.dict.readFlush()
814 f.finishBlock()
815 return
816 }
817
818 f.copyLen = int(n)
819 f.copyData()
820}
821
822// copyData copies f.copyLen bytes from the underlying reader into f.hist.
823// It pauses for reads when f.hist is full.
824func (f *decompressor) copyData() {
825 buf := f.dict.writeSlice()
826 if len(buf) > f.copyLen {
827 buf = buf[:f.copyLen]
828 }
829
830 cnt, err := io.ReadFull(f.r, buf)
831 f.roffset += int64(cnt)
832 f.copyLen -= cnt
833 f.dict.writeMark(cnt)
834 if err != nil {
835 f.err = noEOF(err)
836 return
837 }
838
839 if f.dict.availWrite() == 0 || f.copyLen > 0 {
840 f.toRead = f.dict.readFlush()
841 f.step = (*decompressor).copyData
842 return
843 }
844 f.finishBlock()
845}
846
847func (f *decompressor) finishBlock() {
848 if f.final {
849 if f.dict.availRead() > 0 {
850 f.toRead = f.dict.readFlush()
851 }
852 f.err = io.EOF
853 }
854 f.step = (*decompressor).nextBlock
855}
856
857// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
858func noEOF(e error) error {
859 if e == io.EOF {
860 return io.ErrUnexpectedEOF
861 }
862 return e
863}
864
865func (f *decompressor) moreBits() error {
866 c, err := f.r.ReadByte()
867 if err != nil {
868 return noEOF(err)
869 }
870 f.roffset++
871 f.b |= uint32(c) << f.nb
872 f.nb += 8
873 return nil
874}
875
876// Read the next Huffman-encoded symbol from f according to h.
877func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
878 // Since a huffmanDecoder can be empty or be composed of a degenerate tree
879 // with single element, huffSym must error on these two edge cases. In both
880 // cases, the chunks slice will be 0 for the invalid sequence, leading it
881 // satisfy the n == 0 check below.
882 n := uint(h.maxRead)
883 // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
884 // but is smart enough to keep local variables in registers, so use nb and b,
885 // inline call to moreBits and reassign b,nb back to f on return.
886 nb, b := f.nb, f.b
887 for {
888 for nb < n {
889 c, err := f.r.ReadByte()
890 if err != nil {
891 f.b = b
892 f.nb = nb
893 return 0, noEOF(err)
894 }
895 f.roffset++
896 b |= uint32(c) << (nb & 31)
897 nb += 8
898 }
899 chunk := h.chunks[b&(huffmanNumChunks-1)]
900 n = uint(chunk & huffmanCountMask)
901 if n > huffmanChunkBits {
902 chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
903 n = uint(chunk & huffmanCountMask)
904 }
905 if n <= nb {
906 if n == 0 {
907 f.b = b
908 f.nb = nb
909 if debugDecode {
910 fmt.Println("huffsym: n==0")
911 }
912 f.err = CorruptInputError(f.roffset)
913 return 0, f.err
914 }
915 f.b = b >> (n & 31)
916 f.nb = nb - n
917 return int(chunk >> huffmanValueShift), nil
918 }
919 }
920}
921
922func makeReader(r io.Reader) Reader {
923 if rr, ok := r.(Reader); ok {
924 return rr
925 }
926 return bufio.NewReader(r)
927}
928
929func fixedHuffmanDecoderInit() {
930 fixedOnce.Do(func() {
931 // These come from the RFC section 3.2.6.
932 var bits [288]int
933 for i := 0; i < 144; i++ {
934 bits[i] = 8
935 }
936 for i := 144; i < 256; i++ {
937 bits[i] = 9
938 }
939 for i := 256; i < 280; i++ {
940 bits[i] = 7
941 }
942 for i := 280; i < 288; i++ {
943 bits[i] = 8
944 }
945 fixedHuffmanDecoder.init(bits[:])
946 })
947}
948
949func (f *decompressor) Reset(r io.Reader, dict []byte) error {
950 *f = decompressor{
951 r: makeReader(r),
952 bits: f.bits,
953 codebits: f.codebits,
954 h1: f.h1,
955 h2: f.h2,
956 dict: f.dict,
957 step: (*decompressor).nextBlock,
958 }
959 f.dict.init(maxMatchOffset, dict)
960 return nil
961}
962
963// NewReader returns a new ReadCloser that can be used
964// to read the uncompressed version of r.
965// If r does not also implement io.ByteReader,
966// the decompressor may read more data than necessary from r.
967// It is the caller's responsibility to call Close on the ReadCloser
968// when finished reading.
969//
970// The ReadCloser returned by NewReader also implements Resetter.
971func NewReader(r io.Reader) io.ReadCloser {
972 fixedHuffmanDecoderInit()
973
974 var f decompressor
975 f.r = makeReader(r)
976 f.bits = new([maxNumLit + maxNumDist]int)
977 f.codebits = new([numCodes]int)
978 f.step = (*decompressor).nextBlock
979 f.dict.init(maxMatchOffset, nil)
980 return &f
981}
982
983// NewReaderDict is like NewReader but initializes the reader
984// with a preset dictionary. The returned Reader behaves as if
985// the uncompressed data stream started with the given dictionary,
986// which has already been read. NewReaderDict is typically used
987// to read data compressed by NewWriterDict.
988//
989// The ReadCloser returned by NewReader also implements Resetter.
990func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
991 fixedHuffmanDecoderInit()
992
993 var f decompressor
994 f.r = makeReader(r)
995 f.bits = new([maxNumLit + maxNumDist]int)
996 f.codebits = new([numCodes]int)
997 f.step = (*decompressor).nextBlock
998 f.dict.init(maxMatchOffset, dict)
999 return &f
1000}
Note: See TracBrowser for help on using the repository browser.