1 | // Copyright 2009 The Go Authors. All rights reserved.
|
---|
2 | // Copyright (c) 2015 Klaus Post
|
---|
3 | // Use of this source code is governed by a BSD-style
|
---|
4 | // license that can be found in the LICENSE file.
|
---|
5 |
|
---|
6 | package flate
|
---|
7 |
|
---|
8 | import (
|
---|
9 | "encoding/binary"
|
---|
10 | "fmt"
|
---|
11 | "io"
|
---|
12 | "math"
|
---|
13 | )
|
---|
14 |
|
---|
15 | const (
|
---|
16 | NoCompression = 0
|
---|
17 | BestSpeed = 1
|
---|
18 | BestCompression = 9
|
---|
19 | DefaultCompression = -1
|
---|
20 |
|
---|
21 | // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
|
---|
22 | // entropy encoding. This mode is useful in compressing data that has
|
---|
23 | // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
|
---|
24 | // that lacks an entropy encoder. Compression gains are achieved when
|
---|
25 | // certain bytes in the input stream occur more frequently than others.
|
---|
26 | //
|
---|
27 | // Note that HuffmanOnly produces a compressed output that is
|
---|
28 | // RFC 1951 compliant. That is, any valid DEFLATE decompressor will
|
---|
29 | // continue to be able to decompress this output.
|
---|
30 | HuffmanOnly = -2
|
---|
31 | ConstantCompression = HuffmanOnly // compatibility alias.
|
---|
32 |
|
---|
33 | logWindowSize = 15
|
---|
34 | windowSize = 1 << logWindowSize
|
---|
35 | windowMask = windowSize - 1
|
---|
36 | logMaxOffsetSize = 15 // Standard DEFLATE
|
---|
37 | minMatchLength = 4 // The smallest match that the compressor looks for
|
---|
38 | maxMatchLength = 258 // The longest match for the compressor
|
---|
39 | minOffsetSize = 1 // The shortest offset that makes any sense
|
---|
40 |
|
---|
41 | // The maximum number of tokens we will encode at the time.
|
---|
42 | // Smaller sizes usually creates less optimal blocks.
|
---|
43 | // Bigger can make context switching slow.
|
---|
44 | // We use this for levels 7-9, so we make it big.
|
---|
45 | maxFlateBlockTokens = 1 << 15
|
---|
46 | maxStoreBlockSize = 65535
|
---|
47 | hashBits = 17 // After 17 performance degrades
|
---|
48 | hashSize = 1 << hashBits
|
---|
49 | hashMask = (1 << hashBits) - 1
|
---|
50 | hashShift = (hashBits + minMatchLength - 1) / minMatchLength
|
---|
51 | maxHashOffset = 1 << 28
|
---|
52 |
|
---|
53 | skipNever = math.MaxInt32
|
---|
54 |
|
---|
55 | debugDeflate = false
|
---|
56 | )
|
---|
57 |
|
---|
58 | type compressionLevel struct {
|
---|
59 | good, lazy, nice, chain, fastSkipHashing, level int
|
---|
60 | }
|
---|
61 |
|
---|
62 | // Compression levels have been rebalanced from zlib deflate defaults
|
---|
63 | // to give a bigger spread in speed and compression.
|
---|
64 | // See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
|
---|
65 | var levels = []compressionLevel{
|
---|
66 | {}, // 0
|
---|
67 | // Level 1-6 uses specialized algorithm - values not used
|
---|
68 | {0, 0, 0, 0, 0, 1},
|
---|
69 | {0, 0, 0, 0, 0, 2},
|
---|
70 | {0, 0, 0, 0, 0, 3},
|
---|
71 | {0, 0, 0, 0, 0, 4},
|
---|
72 | {0, 0, 0, 0, 0, 5},
|
---|
73 | {0, 0, 0, 0, 0, 6},
|
---|
74 | // Levels 7-9 use increasingly more lazy matching
|
---|
75 | // and increasingly stringent conditions for "good enough".
|
---|
76 | {8, 12, 16, 24, skipNever, 7},
|
---|
77 | {16, 30, 40, 64, skipNever, 8},
|
---|
78 | {32, 258, 258, 1024, skipNever, 9},
|
---|
79 | }
|
---|
80 |
|
---|
81 | // advancedState contains state for the advanced levels, with bigger hash tables, etc.
|
---|
82 | type advancedState struct {
|
---|
83 | // deflate state
|
---|
84 | length int
|
---|
85 | offset int
|
---|
86 | maxInsertIndex int
|
---|
87 |
|
---|
88 | // Input hash chains
|
---|
89 | // hashHead[hashValue] contains the largest inputIndex with the specified hash value
|
---|
90 | // If hashHead[hashValue] is within the current window, then
|
---|
91 | // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
|
---|
92 | // with the same hash value.
|
---|
93 | chainHead int
|
---|
94 | hashHead [hashSize]uint32
|
---|
95 | hashPrev [windowSize]uint32
|
---|
96 | hashOffset int
|
---|
97 |
|
---|
98 | // input window: unprocessed data is window[index:windowEnd]
|
---|
99 | index int
|
---|
100 | estBitsPerByte int
|
---|
101 | hashMatch [maxMatchLength + minMatchLength]uint32
|
---|
102 |
|
---|
103 | hash uint32
|
---|
104 | ii uint16 // position of last match, intended to overflow to reset.
|
---|
105 | }
|
---|
106 |
|
---|
107 | type compressor struct {
|
---|
108 | compressionLevel
|
---|
109 |
|
---|
110 | h *huffmanEncoder
|
---|
111 | w *huffmanBitWriter
|
---|
112 |
|
---|
113 | // compression algorithm
|
---|
114 | fill func(*compressor, []byte) int // copy data to window
|
---|
115 | step func(*compressor) // process window
|
---|
116 |
|
---|
117 | window []byte
|
---|
118 | windowEnd int
|
---|
119 | blockStart int // window index where current tokens start
|
---|
120 | err error
|
---|
121 |
|
---|
122 | // queued output tokens
|
---|
123 | tokens tokens
|
---|
124 | fast fastEnc
|
---|
125 | state *advancedState
|
---|
126 |
|
---|
127 | sync bool // requesting flush
|
---|
128 | byteAvailable bool // if true, still need to process window[index-1].
|
---|
129 | }
|
---|
130 |
|
---|
131 | func (d *compressor) fillDeflate(b []byte) int {
|
---|
132 | s := d.state
|
---|
133 | if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
|
---|
134 | // shift the window by windowSize
|
---|
135 | copy(d.window[:], d.window[windowSize:2*windowSize])
|
---|
136 | s.index -= windowSize
|
---|
137 | d.windowEnd -= windowSize
|
---|
138 | if d.blockStart >= windowSize {
|
---|
139 | d.blockStart -= windowSize
|
---|
140 | } else {
|
---|
141 | d.blockStart = math.MaxInt32
|
---|
142 | }
|
---|
143 | s.hashOffset += windowSize
|
---|
144 | if s.hashOffset > maxHashOffset {
|
---|
145 | delta := s.hashOffset - 1
|
---|
146 | s.hashOffset -= delta
|
---|
147 | s.chainHead -= delta
|
---|
148 | // Iterate over slices instead of arrays to avoid copying
|
---|
149 | // the entire table onto the stack (Issue #18625).
|
---|
150 | for i, v := range s.hashPrev[:] {
|
---|
151 | if int(v) > delta {
|
---|
152 | s.hashPrev[i] = uint32(int(v) - delta)
|
---|
153 | } else {
|
---|
154 | s.hashPrev[i] = 0
|
---|
155 | }
|
---|
156 | }
|
---|
157 | for i, v := range s.hashHead[:] {
|
---|
158 | if int(v) > delta {
|
---|
159 | s.hashHead[i] = uint32(int(v) - delta)
|
---|
160 | } else {
|
---|
161 | s.hashHead[i] = 0
|
---|
162 | }
|
---|
163 | }
|
---|
164 | }
|
---|
165 | }
|
---|
166 | n := copy(d.window[d.windowEnd:], b)
|
---|
167 | d.windowEnd += n
|
---|
168 | return n
|
---|
169 | }
|
---|
170 |
|
---|
171 | func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
|
---|
172 | if index > 0 || eof {
|
---|
173 | var window []byte
|
---|
174 | if d.blockStart <= index {
|
---|
175 | window = d.window[d.blockStart:index]
|
---|
176 | }
|
---|
177 | d.blockStart = index
|
---|
178 | //d.w.writeBlock(tok, eof, window)
|
---|
179 | d.w.writeBlockDynamic(tok, eof, window, d.sync)
|
---|
180 | return d.w.err
|
---|
181 | }
|
---|
182 | return nil
|
---|
183 | }
|
---|
184 |
|
---|
185 | // writeBlockSkip writes the current block and uses the number of tokens
|
---|
186 | // to determine if the block should be stored on no matches, or
|
---|
187 | // only huffman encoded.
|
---|
188 | func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
|
---|
189 | if index > 0 || eof {
|
---|
190 | if d.blockStart <= index {
|
---|
191 | window := d.window[d.blockStart:index]
|
---|
192 | // If we removed less than a 64th of all literals
|
---|
193 | // we huffman compress the block.
|
---|
194 | if int(tok.n) > len(window)-int(tok.n>>6) {
|
---|
195 | d.w.writeBlockHuff(eof, window, d.sync)
|
---|
196 | } else {
|
---|
197 | // Write a dynamic huffman block.
|
---|
198 | d.w.writeBlockDynamic(tok, eof, window, d.sync)
|
---|
199 | }
|
---|
200 | } else {
|
---|
201 | d.w.writeBlock(tok, eof, nil)
|
---|
202 | }
|
---|
203 | d.blockStart = index
|
---|
204 | return d.w.err
|
---|
205 | }
|
---|
206 | return nil
|
---|
207 | }
|
---|
208 |
|
---|
209 | // fillWindow will fill the current window with the supplied
|
---|
210 | // dictionary and calculate all hashes.
|
---|
211 | // This is much faster than doing a full encode.
|
---|
212 | // Should only be used after a start/reset.
|
---|
213 | func (d *compressor) fillWindow(b []byte) {
|
---|
214 | // Do not fill window if we are in store-only or huffman mode.
|
---|
215 | if d.level <= 0 {
|
---|
216 | return
|
---|
217 | }
|
---|
218 | if d.fast != nil {
|
---|
219 | // encode the last data, but discard the result
|
---|
220 | if len(b) > maxMatchOffset {
|
---|
221 | b = b[len(b)-maxMatchOffset:]
|
---|
222 | }
|
---|
223 | d.fast.Encode(&d.tokens, b)
|
---|
224 | d.tokens.Reset()
|
---|
225 | return
|
---|
226 | }
|
---|
227 | s := d.state
|
---|
228 | // If we are given too much, cut it.
|
---|
229 | if len(b) > windowSize {
|
---|
230 | b = b[len(b)-windowSize:]
|
---|
231 | }
|
---|
232 | // Add all to window.
|
---|
233 | n := copy(d.window[d.windowEnd:], b)
|
---|
234 |
|
---|
235 | // Calculate 256 hashes at the time (more L1 cache hits)
|
---|
236 | loops := (n + 256 - minMatchLength) / 256
|
---|
237 | for j := 0; j < loops; j++ {
|
---|
238 | startindex := j * 256
|
---|
239 | end := startindex + 256 + minMatchLength - 1
|
---|
240 | if end > n {
|
---|
241 | end = n
|
---|
242 | }
|
---|
243 | tocheck := d.window[startindex:end]
|
---|
244 | dstSize := len(tocheck) - minMatchLength + 1
|
---|
245 |
|
---|
246 | if dstSize <= 0 {
|
---|
247 | continue
|
---|
248 | }
|
---|
249 |
|
---|
250 | dst := s.hashMatch[:dstSize]
|
---|
251 | bulkHash4(tocheck, dst)
|
---|
252 | var newH uint32
|
---|
253 | for i, val := range dst {
|
---|
254 | di := i + startindex
|
---|
255 | newH = val & hashMask
|
---|
256 | // Get previous value with the same hash.
|
---|
257 | // Our chain should point to the previous value.
|
---|
258 | s.hashPrev[di&windowMask] = s.hashHead[newH]
|
---|
259 | // Set the head of the hash chain to us.
|
---|
260 | s.hashHead[newH] = uint32(di + s.hashOffset)
|
---|
261 | }
|
---|
262 | s.hash = newH
|
---|
263 | }
|
---|
264 | // Update window information.
|
---|
265 | d.windowEnd += n
|
---|
266 | s.index = n
|
---|
267 | }
|
---|
268 |
|
---|
269 | // Try to find a match starting at index whose length is greater than prevSize.
|
---|
270 | // We only look at chainCount possibilities before giving up.
|
---|
271 | // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
|
---|
272 | func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) {
|
---|
273 | minMatchLook := maxMatchLength
|
---|
274 | if lookahead < minMatchLook {
|
---|
275 | minMatchLook = lookahead
|
---|
276 | }
|
---|
277 |
|
---|
278 | win := d.window[0 : pos+minMatchLook]
|
---|
279 |
|
---|
280 | // We quit when we get a match that's at least nice long
|
---|
281 | nice := len(win) - pos
|
---|
282 | if d.nice < nice {
|
---|
283 | nice = d.nice
|
---|
284 | }
|
---|
285 |
|
---|
286 | // If we've got a match that's good enough, only look in 1/4 the chain.
|
---|
287 | tries := d.chain
|
---|
288 | length = minMatchLength - 1
|
---|
289 |
|
---|
290 | wEnd := win[pos+length]
|
---|
291 | wPos := win[pos:]
|
---|
292 | minIndex := pos - windowSize
|
---|
293 | if minIndex < 0 {
|
---|
294 | minIndex = 0
|
---|
295 | }
|
---|
296 | offset = 0
|
---|
297 |
|
---|
298 | cGain := 0
|
---|
299 | if d.chain < 100 {
|
---|
300 | for i := prevHead; tries > 0; tries-- {
|
---|
301 | if wEnd == win[i+length] {
|
---|
302 | n := matchLen(win[i:i+minMatchLook], wPos)
|
---|
303 | if n > length {
|
---|
304 | length = n
|
---|
305 | offset = pos - i
|
---|
306 | ok = true
|
---|
307 | if n >= nice {
|
---|
308 | // The match is good enough that we don't try to find a better one.
|
---|
309 | break
|
---|
310 | }
|
---|
311 | wEnd = win[pos+n]
|
---|
312 | }
|
---|
313 | }
|
---|
314 | if i <= minIndex {
|
---|
315 | // hashPrev[i & windowMask] has already been overwritten, so stop now.
|
---|
316 | break
|
---|
317 | }
|
---|
318 | i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
|
---|
319 | if i < minIndex {
|
---|
320 | break
|
---|
321 | }
|
---|
322 | }
|
---|
323 | return
|
---|
324 | }
|
---|
325 |
|
---|
326 | // Some like it higher (CSV), some like it lower (JSON)
|
---|
327 | const baseCost = 6
|
---|
328 | // Base is 4 bytes at with an additional cost.
|
---|
329 | // Matches must be better than this.
|
---|
330 | for i := prevHead; tries > 0; tries-- {
|
---|
331 | if wEnd == win[i+length] {
|
---|
332 | n := matchLen(win[i:i+minMatchLook], wPos)
|
---|
333 | if n > length {
|
---|
334 | // Calculate gain. Estimate
|
---|
335 | newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]])
|
---|
336 |
|
---|
337 | //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]))
|
---|
338 | if newGain > cGain {
|
---|
339 | length = n
|
---|
340 | offset = pos - i
|
---|
341 | cGain = newGain
|
---|
342 | ok = true
|
---|
343 | if n >= nice {
|
---|
344 | // The match is good enough that we don't try to find a better one.
|
---|
345 | break
|
---|
346 | }
|
---|
347 | wEnd = win[pos+n]
|
---|
348 | }
|
---|
349 | }
|
---|
350 | }
|
---|
351 | if i <= minIndex {
|
---|
352 | // hashPrev[i & windowMask] has already been overwritten, so stop now.
|
---|
353 | break
|
---|
354 | }
|
---|
355 | i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
|
---|
356 | if i < minIndex {
|
---|
357 | break
|
---|
358 | }
|
---|
359 | }
|
---|
360 | return
|
---|
361 | }
|
---|
362 |
|
---|
363 | func (d *compressor) writeStoredBlock(buf []byte) error {
|
---|
364 | if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
|
---|
365 | return d.w.err
|
---|
366 | }
|
---|
367 | d.w.writeBytes(buf)
|
---|
368 | return d.w.err
|
---|
369 | }
|
---|
370 |
|
---|
371 | // hash4 returns a hash representation of the first 4 bytes
|
---|
372 | // of the supplied slice.
|
---|
373 | // The caller must ensure that len(b) >= 4.
|
---|
374 | func hash4(b []byte) uint32 {
|
---|
375 | return hash4u(binary.LittleEndian.Uint32(b), hashBits)
|
---|
376 | }
|
---|
377 |
|
---|
378 | // bulkHash4 will compute hashes using the same
|
---|
379 | // algorithm as hash4
|
---|
380 | func bulkHash4(b []byte, dst []uint32) {
|
---|
381 | if len(b) < 4 {
|
---|
382 | return
|
---|
383 | }
|
---|
384 | hb := binary.LittleEndian.Uint32(b)
|
---|
385 |
|
---|
386 | dst[0] = hash4u(hb, hashBits)
|
---|
387 | end := len(b) - 4 + 1
|
---|
388 | for i := 1; i < end; i++ {
|
---|
389 | hb = (hb >> 8) | uint32(b[i+3])<<24
|
---|
390 | dst[i] = hash4u(hb, hashBits)
|
---|
391 | }
|
---|
392 | }
|
---|
393 |
|
---|
394 | func (d *compressor) initDeflate() {
|
---|
395 | d.window = make([]byte, 2*windowSize)
|
---|
396 | d.byteAvailable = false
|
---|
397 | d.err = nil
|
---|
398 | if d.state == nil {
|
---|
399 | return
|
---|
400 | }
|
---|
401 | s := d.state
|
---|
402 | s.index = 0
|
---|
403 | s.hashOffset = 1
|
---|
404 | s.length = minMatchLength - 1
|
---|
405 | s.offset = 0
|
---|
406 | s.hash = 0
|
---|
407 | s.chainHead = -1
|
---|
408 | }
|
---|
409 |
|
---|
410 | // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
|
---|
411 | // meaning it always has lazy matching on.
|
---|
412 | func (d *compressor) deflateLazy() {
|
---|
413 | s := d.state
|
---|
414 | // Sanity enables additional runtime tests.
|
---|
415 | // It's intended to be used during development
|
---|
416 | // to supplement the currently ad-hoc unit tests.
|
---|
417 | const sanity = debugDeflate
|
---|
418 |
|
---|
419 | if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
|
---|
420 | return
|
---|
421 | }
|
---|
422 | if d.windowEnd != s.index && d.chain > 100 {
|
---|
423 | // Get literal huffman coder.
|
---|
424 | if d.h == nil {
|
---|
425 | d.h = newHuffmanEncoder(maxFlateBlockTokens)
|
---|
426 | }
|
---|
427 | var tmp [256]uint16
|
---|
428 | for _, v := range d.window[s.index:d.windowEnd] {
|
---|
429 | tmp[v]++
|
---|
430 | }
|
---|
431 | d.h.generate(tmp[:], 15)
|
---|
432 | }
|
---|
433 |
|
---|
434 | s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
|
---|
435 | if s.index < s.maxInsertIndex {
|
---|
436 | s.hash = hash4(d.window[s.index:])
|
---|
437 | }
|
---|
438 |
|
---|
439 | for {
|
---|
440 | if sanity && s.index > d.windowEnd {
|
---|
441 | panic("index > windowEnd")
|
---|
442 | }
|
---|
443 | lookahead := d.windowEnd - s.index
|
---|
444 | if lookahead < minMatchLength+maxMatchLength {
|
---|
445 | if !d.sync {
|
---|
446 | return
|
---|
447 | }
|
---|
448 | if sanity && s.index > d.windowEnd {
|
---|
449 | panic("index > windowEnd")
|
---|
450 | }
|
---|
451 | if lookahead == 0 {
|
---|
452 | // Flush current output block if any.
|
---|
453 | if d.byteAvailable {
|
---|
454 | // There is still one pending token that needs to be flushed
|
---|
455 | d.tokens.AddLiteral(d.window[s.index-1])
|
---|
456 | d.byteAvailable = false
|
---|
457 | }
|
---|
458 | if d.tokens.n > 0 {
|
---|
459 | if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
|
---|
460 | return
|
---|
461 | }
|
---|
462 | d.tokens.Reset()
|
---|
463 | }
|
---|
464 | return
|
---|
465 | }
|
---|
466 | }
|
---|
467 | if s.index < s.maxInsertIndex {
|
---|
468 | // Update the hash
|
---|
469 | s.hash = hash4(d.window[s.index:])
|
---|
470 | ch := s.hashHead[s.hash&hashMask]
|
---|
471 | s.chainHead = int(ch)
|
---|
472 | s.hashPrev[s.index&windowMask] = ch
|
---|
473 | s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
|
---|
474 | }
|
---|
475 | prevLength := s.length
|
---|
476 | prevOffset := s.offset
|
---|
477 | s.length = minMatchLength - 1
|
---|
478 | s.offset = 0
|
---|
479 | minIndex := s.index - windowSize
|
---|
480 | if minIndex < 0 {
|
---|
481 | minIndex = 0
|
---|
482 | }
|
---|
483 |
|
---|
484 | if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
|
---|
485 | if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok {
|
---|
486 | s.length = newLength
|
---|
487 | s.offset = newOffset
|
---|
488 | }
|
---|
489 | }
|
---|
490 |
|
---|
491 | if prevLength >= minMatchLength && s.length <= prevLength {
|
---|
492 | // Check for better match at end...
|
---|
493 | //
|
---|
494 | // checkOff must be >=2 since we otherwise risk checking s.index
|
---|
495 | // Offset of 2 seems to yield best results.
|
---|
496 | const checkOff = 2
|
---|
497 | prevIndex := s.index - 1
|
---|
498 | if prevIndex+prevLength+checkOff < s.maxInsertIndex {
|
---|
499 | end := lookahead
|
---|
500 | if lookahead > maxMatchLength {
|
---|
501 | end = maxMatchLength
|
---|
502 | }
|
---|
503 | end += prevIndex
|
---|
504 | idx := prevIndex + prevLength - (4 - checkOff)
|
---|
505 | h := hash4(d.window[idx:])
|
---|
506 | ch2 := int(s.hashHead[h&hashMask]) - s.hashOffset - prevLength + (4 - checkOff)
|
---|
507 | if ch2 > minIndex {
|
---|
508 | length := matchLen(d.window[prevIndex:end], d.window[ch2:])
|
---|
509 | // It seems like a pure length metric is best.
|
---|
510 | if length > prevLength {
|
---|
511 | prevLength = length
|
---|
512 | prevOffset = prevIndex - ch2
|
---|
513 | }
|
---|
514 | }
|
---|
515 | }
|
---|
516 | // There was a match at the previous step, and the current match is
|
---|
517 | // not better. Output the previous match.
|
---|
518 | d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
|
---|
519 |
|
---|
520 | // Insert in the hash table all strings up to the end of the match.
|
---|
521 | // index and index-1 are already inserted. If there is not enough
|
---|
522 | // lookahead, the last two strings are not inserted into the hash
|
---|
523 | // table.
|
---|
524 | newIndex := s.index + prevLength - 1
|
---|
525 | // Calculate missing hashes
|
---|
526 | end := newIndex
|
---|
527 | if end > s.maxInsertIndex {
|
---|
528 | end = s.maxInsertIndex
|
---|
529 | }
|
---|
530 | end += minMatchLength - 1
|
---|
531 | startindex := s.index + 1
|
---|
532 | if startindex > s.maxInsertIndex {
|
---|
533 | startindex = s.maxInsertIndex
|
---|
534 | }
|
---|
535 | tocheck := d.window[startindex:end]
|
---|
536 | dstSize := len(tocheck) - minMatchLength + 1
|
---|
537 | if dstSize > 0 {
|
---|
538 | dst := s.hashMatch[:dstSize]
|
---|
539 | bulkHash4(tocheck, dst)
|
---|
540 | var newH uint32
|
---|
541 | for i, val := range dst {
|
---|
542 | di := i + startindex
|
---|
543 | newH = val & hashMask
|
---|
544 | // Get previous value with the same hash.
|
---|
545 | // Our chain should point to the previous value.
|
---|
546 | s.hashPrev[di&windowMask] = s.hashHead[newH]
|
---|
547 | // Set the head of the hash chain to us.
|
---|
548 | s.hashHead[newH] = uint32(di + s.hashOffset)
|
---|
549 | }
|
---|
550 | s.hash = newH
|
---|
551 | }
|
---|
552 |
|
---|
553 | s.index = newIndex
|
---|
554 | d.byteAvailable = false
|
---|
555 | s.length = minMatchLength - 1
|
---|
556 | if d.tokens.n == maxFlateBlockTokens {
|
---|
557 | // The block includes the current character
|
---|
558 | if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
|
---|
559 | return
|
---|
560 | }
|
---|
561 | d.tokens.Reset()
|
---|
562 | }
|
---|
563 | s.ii = 0
|
---|
564 | } else {
|
---|
565 | // Reset, if we got a match this run.
|
---|
566 | if s.length >= minMatchLength {
|
---|
567 | s.ii = 0
|
---|
568 | }
|
---|
569 | // We have a byte waiting. Emit it.
|
---|
570 | if d.byteAvailable {
|
---|
571 | s.ii++
|
---|
572 | d.tokens.AddLiteral(d.window[s.index-1])
|
---|
573 | if d.tokens.n == maxFlateBlockTokens {
|
---|
574 | if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
|
---|
575 | return
|
---|
576 | }
|
---|
577 | d.tokens.Reset()
|
---|
578 | }
|
---|
579 | s.index++
|
---|
580 |
|
---|
581 | // If we have a long run of no matches, skip additional bytes
|
---|
582 | // Resets when s.ii overflows after 64KB.
|
---|
583 | if n := int(s.ii) - d.chain; n > 0 {
|
---|
584 | n = 1 + int(n>>6)
|
---|
585 | for j := 0; j < n; j++ {
|
---|
586 | if s.index >= d.windowEnd-1 {
|
---|
587 | break
|
---|
588 | }
|
---|
589 | d.tokens.AddLiteral(d.window[s.index-1])
|
---|
590 | if d.tokens.n == maxFlateBlockTokens {
|
---|
591 | if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
|
---|
592 | return
|
---|
593 | }
|
---|
594 | d.tokens.Reset()
|
---|
595 | }
|
---|
596 | // Index...
|
---|
597 | if s.index < s.maxInsertIndex {
|
---|
598 | h := hash4(d.window[s.index:])
|
---|
599 | ch := s.hashHead[h]
|
---|
600 | s.chainHead = int(ch)
|
---|
601 | s.hashPrev[s.index&windowMask] = ch
|
---|
602 | s.hashHead[h] = uint32(s.index + s.hashOffset)
|
---|
603 | }
|
---|
604 | s.index++
|
---|
605 | }
|
---|
606 | // Flush last byte
|
---|
607 | d.tokens.AddLiteral(d.window[s.index-1])
|
---|
608 | d.byteAvailable = false
|
---|
609 | // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
|
---|
610 | if d.tokens.n == maxFlateBlockTokens {
|
---|
611 | if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
|
---|
612 | return
|
---|
613 | }
|
---|
614 | d.tokens.Reset()
|
---|
615 | }
|
---|
616 | }
|
---|
617 | } else {
|
---|
618 | s.index++
|
---|
619 | d.byteAvailable = true
|
---|
620 | }
|
---|
621 | }
|
---|
622 | }
|
---|
623 | }
|
---|
624 |
|
---|
625 | func (d *compressor) store() {
|
---|
626 | if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
|
---|
627 | d.err = d.writeStoredBlock(d.window[:d.windowEnd])
|
---|
628 | d.windowEnd = 0
|
---|
629 | }
|
---|
630 | }
|
---|
631 |
|
---|
632 | // fillWindow will fill the buffer with data for huffman-only compression.
|
---|
633 | // The number of bytes copied is returned.
|
---|
634 | func (d *compressor) fillBlock(b []byte) int {
|
---|
635 | n := copy(d.window[d.windowEnd:], b)
|
---|
636 | d.windowEnd += n
|
---|
637 | return n
|
---|
638 | }
|
---|
639 |
|
---|
640 | // storeHuff will compress and store the currently added data,
|
---|
641 | // if enough has been accumulated or we at the end of the stream.
|
---|
642 | // Any error that occurred will be in d.err
|
---|
643 | func (d *compressor) storeHuff() {
|
---|
644 | if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
|
---|
645 | return
|
---|
646 | }
|
---|
647 | d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
|
---|
648 | d.err = d.w.err
|
---|
649 | d.windowEnd = 0
|
---|
650 | }
|
---|
651 |
|
---|
652 | // storeFast will compress and store the currently added data,
|
---|
653 | // if enough has been accumulated or we at the end of the stream.
|
---|
654 | // Any error that occurred will be in d.err
|
---|
655 | func (d *compressor) storeFast() {
|
---|
656 | // We only compress if we have maxStoreBlockSize.
|
---|
657 | if d.windowEnd < len(d.window) {
|
---|
658 | if !d.sync {
|
---|
659 | return
|
---|
660 | }
|
---|
661 | // Handle extremely small sizes.
|
---|
662 | if d.windowEnd < 128 {
|
---|
663 | if d.windowEnd == 0 {
|
---|
664 | return
|
---|
665 | }
|
---|
666 | if d.windowEnd <= 32 {
|
---|
667 | d.err = d.writeStoredBlock(d.window[:d.windowEnd])
|
---|
668 | } else {
|
---|
669 | d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
|
---|
670 | d.err = d.w.err
|
---|
671 | }
|
---|
672 | d.tokens.Reset()
|
---|
673 | d.windowEnd = 0
|
---|
674 | d.fast.Reset()
|
---|
675 | return
|
---|
676 | }
|
---|
677 | }
|
---|
678 |
|
---|
679 | d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
|
---|
680 | // If we made zero matches, store the block as is.
|
---|
681 | if d.tokens.n == 0 {
|
---|
682 | d.err = d.writeStoredBlock(d.window[:d.windowEnd])
|
---|
683 | // If we removed less than 1/16th, huffman compress the block.
|
---|
684 | } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
|
---|
685 | d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
|
---|
686 | d.err = d.w.err
|
---|
687 | } else {
|
---|
688 | d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
|
---|
689 | d.err = d.w.err
|
---|
690 | }
|
---|
691 | d.tokens.Reset()
|
---|
692 | d.windowEnd = 0
|
---|
693 | }
|
---|
694 |
|
---|
695 | // write will add input byte to the stream.
|
---|
696 | // Unless an error occurs all bytes will be consumed.
|
---|
697 | func (d *compressor) write(b []byte) (n int, err error) {
|
---|
698 | if d.err != nil {
|
---|
699 | return 0, d.err
|
---|
700 | }
|
---|
701 | n = len(b)
|
---|
702 | for len(b) > 0 {
|
---|
703 | if d.windowEnd == len(d.window) || d.sync {
|
---|
704 | d.step(d)
|
---|
705 | }
|
---|
706 | b = b[d.fill(d, b):]
|
---|
707 | if d.err != nil {
|
---|
708 | return 0, d.err
|
---|
709 | }
|
---|
710 | }
|
---|
711 | return n, d.err
|
---|
712 | }
|
---|
713 |
|
---|
714 | func (d *compressor) syncFlush() error {
|
---|
715 | d.sync = true
|
---|
716 | if d.err != nil {
|
---|
717 | return d.err
|
---|
718 | }
|
---|
719 | d.step(d)
|
---|
720 | if d.err == nil {
|
---|
721 | d.w.writeStoredHeader(0, false)
|
---|
722 | d.w.flush()
|
---|
723 | d.err = d.w.err
|
---|
724 | }
|
---|
725 | d.sync = false
|
---|
726 | return d.err
|
---|
727 | }
|
---|
728 |
|
---|
729 | func (d *compressor) init(w io.Writer, level int) (err error) {
|
---|
730 | d.w = newHuffmanBitWriter(w)
|
---|
731 |
|
---|
732 | switch {
|
---|
733 | case level == NoCompression:
|
---|
734 | d.window = make([]byte, maxStoreBlockSize)
|
---|
735 | d.fill = (*compressor).fillBlock
|
---|
736 | d.step = (*compressor).store
|
---|
737 | case level == ConstantCompression:
|
---|
738 | d.w.logNewTablePenalty = 10
|
---|
739 | d.window = make([]byte, 32<<10)
|
---|
740 | d.fill = (*compressor).fillBlock
|
---|
741 | d.step = (*compressor).storeHuff
|
---|
742 | case level == DefaultCompression:
|
---|
743 | level = 5
|
---|
744 | fallthrough
|
---|
745 | case level >= 1 && level <= 6:
|
---|
746 | d.w.logNewTablePenalty = 7
|
---|
747 | d.fast = newFastEnc(level)
|
---|
748 | d.window = make([]byte, maxStoreBlockSize)
|
---|
749 | d.fill = (*compressor).fillBlock
|
---|
750 | d.step = (*compressor).storeFast
|
---|
751 | case 7 <= level && level <= 9:
|
---|
752 | d.w.logNewTablePenalty = 8
|
---|
753 | d.state = &advancedState{}
|
---|
754 | d.compressionLevel = levels[level]
|
---|
755 | d.initDeflate()
|
---|
756 | d.fill = (*compressor).fillDeflate
|
---|
757 | d.step = (*compressor).deflateLazy
|
---|
758 | default:
|
---|
759 | return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
|
---|
760 | }
|
---|
761 | d.level = level
|
---|
762 | return nil
|
---|
763 | }
|
---|
764 |
|
---|
765 | // reset the state of the compressor.
|
---|
766 | func (d *compressor) reset(w io.Writer) {
|
---|
767 | d.w.reset(w)
|
---|
768 | d.sync = false
|
---|
769 | d.err = nil
|
---|
770 | // We only need to reset a few things for Snappy.
|
---|
771 | if d.fast != nil {
|
---|
772 | d.fast.Reset()
|
---|
773 | d.windowEnd = 0
|
---|
774 | d.tokens.Reset()
|
---|
775 | return
|
---|
776 | }
|
---|
777 | switch d.compressionLevel.chain {
|
---|
778 | case 0:
|
---|
779 | // level was NoCompression or ConstantCompresssion.
|
---|
780 | d.windowEnd = 0
|
---|
781 | default:
|
---|
782 | s := d.state
|
---|
783 | s.chainHead = -1
|
---|
784 | for i := range s.hashHead {
|
---|
785 | s.hashHead[i] = 0
|
---|
786 | }
|
---|
787 | for i := range s.hashPrev {
|
---|
788 | s.hashPrev[i] = 0
|
---|
789 | }
|
---|
790 | s.hashOffset = 1
|
---|
791 | s.index, d.windowEnd = 0, 0
|
---|
792 | d.blockStart, d.byteAvailable = 0, false
|
---|
793 | d.tokens.Reset()
|
---|
794 | s.length = minMatchLength - 1
|
---|
795 | s.offset = 0
|
---|
796 | s.hash = 0
|
---|
797 | s.ii = 0
|
---|
798 | s.maxInsertIndex = 0
|
---|
799 | }
|
---|
800 | }
|
---|
801 |
|
---|
802 | func (d *compressor) close() error {
|
---|
803 | if d.err != nil {
|
---|
804 | return d.err
|
---|
805 | }
|
---|
806 | d.sync = true
|
---|
807 | d.step(d)
|
---|
808 | if d.err != nil {
|
---|
809 | return d.err
|
---|
810 | }
|
---|
811 | if d.w.writeStoredHeader(0, true); d.w.err != nil {
|
---|
812 | return d.w.err
|
---|
813 | }
|
---|
814 | d.w.flush()
|
---|
815 | d.w.reset(nil)
|
---|
816 | return d.w.err
|
---|
817 | }
|
---|
818 |
|
---|
819 | // NewWriter returns a new Writer compressing data at the given level.
|
---|
820 | // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
|
---|
821 | // higher levels typically run slower but compress more.
|
---|
822 | // Level 0 (NoCompression) does not attempt any compression; it only adds the
|
---|
823 | // necessary DEFLATE framing.
|
---|
824 | // Level -1 (DefaultCompression) uses the default compression level.
|
---|
825 | // Level -2 (ConstantCompression) will use Huffman compression only, giving
|
---|
826 | // a very fast compression for all types of input, but sacrificing considerable
|
---|
827 | // compression efficiency.
|
---|
828 | //
|
---|
829 | // If level is in the range [-2, 9] then the error returned will be nil.
|
---|
830 | // Otherwise the error returned will be non-nil.
|
---|
831 | func NewWriter(w io.Writer, level int) (*Writer, error) {
|
---|
832 | var dw Writer
|
---|
833 | if err := dw.d.init(w, level); err != nil {
|
---|
834 | return nil, err
|
---|
835 | }
|
---|
836 | return &dw, nil
|
---|
837 | }
|
---|
838 |
|
---|
839 | // NewWriterDict is like NewWriter but initializes the new
|
---|
840 | // Writer with a preset dictionary. The returned Writer behaves
|
---|
841 | // as if the dictionary had been written to it without producing
|
---|
842 | // any compressed output. The compressed data written to w
|
---|
843 | // can only be decompressed by a Reader initialized with the
|
---|
844 | // same dictionary.
|
---|
845 | func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
|
---|
846 | zw, err := NewWriter(w, level)
|
---|
847 | if err != nil {
|
---|
848 | return nil, err
|
---|
849 | }
|
---|
850 | zw.d.fillWindow(dict)
|
---|
851 | zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
|
---|
852 | return zw, err
|
---|
853 | }
|
---|
854 |
|
---|
855 | // A Writer takes data written to it and writes the compressed
|
---|
856 | // form of that data to an underlying writer (see NewWriter).
|
---|
857 | type Writer struct {
|
---|
858 | d compressor
|
---|
859 | dict []byte
|
---|
860 | }
|
---|
861 |
|
---|
862 | // Write writes data to w, which will eventually write the
|
---|
863 | // compressed form of data to its underlying writer.
|
---|
864 | func (w *Writer) Write(data []byte) (n int, err error) {
|
---|
865 | return w.d.write(data)
|
---|
866 | }
|
---|
867 |
|
---|
868 | // Flush flushes any pending data to the underlying writer.
|
---|
869 | // It is useful mainly in compressed network protocols, to ensure that
|
---|
870 | // a remote reader has enough data to reconstruct a packet.
|
---|
871 | // Flush does not return until the data has been written.
|
---|
872 | // Calling Flush when there is no pending data still causes the Writer
|
---|
873 | // to emit a sync marker of at least 4 bytes.
|
---|
874 | // If the underlying writer returns an error, Flush returns that error.
|
---|
875 | //
|
---|
876 | // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
|
---|
877 | func (w *Writer) Flush() error {
|
---|
878 | // For more about flushing:
|
---|
879 | // http://www.bolet.org/~pornin/deflate-flush.html
|
---|
880 | return w.d.syncFlush()
|
---|
881 | }
|
---|
882 |
|
---|
883 | // Close flushes and closes the writer.
|
---|
884 | func (w *Writer) Close() error {
|
---|
885 | return w.d.close()
|
---|
886 | }
|
---|
887 |
|
---|
888 | // Reset discards the writer's state and makes it equivalent to
|
---|
889 | // the result of NewWriter or NewWriterDict called with dst
|
---|
890 | // and w's level and dictionary.
|
---|
891 | func (w *Writer) Reset(dst io.Writer) {
|
---|
892 | if len(w.dict) > 0 {
|
---|
893 | // w was created with NewWriterDict
|
---|
894 | w.d.reset(dst)
|
---|
895 | if dst != nil {
|
---|
896 | w.d.fillWindow(w.dict)
|
---|
897 | }
|
---|
898 | } else {
|
---|
899 | // w was created with NewWriter
|
---|
900 | w.d.reset(dst)
|
---|
901 | }
|
---|
902 | }
|
---|
903 |
|
---|
904 | // ResetDict discards the writer's state and makes it equivalent to
|
---|
905 | // the result of NewWriter or NewWriterDict called with dst
|
---|
906 | // and w's level, but sets a specific dictionary.
|
---|
907 | func (w *Writer) ResetDict(dst io.Writer, dict []byte) {
|
---|
908 | w.dict = dict
|
---|
909 | w.d.reset(dst)
|
---|
910 | w.d.fillWindow(w.dict)
|
---|
911 | }
|
---|