1 | // Copyright 2009 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | package flate
|
---|
6 |
|
---|
7 | import (
|
---|
8 | "bytes"
|
---|
9 | "encoding/binary"
|
---|
10 | "fmt"
|
---|
11 | "io"
|
---|
12 | "math"
|
---|
13 | )
|
---|
14 |
|
---|
15 | const (
|
---|
16 | // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
|
---|
17 | // bits 16-22 offsetcode - 5 bits
|
---|
18 | // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
|
---|
19 | // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
|
---|
20 | lengthShift = 22
|
---|
21 | offsetMask = 1<<lengthShift - 1
|
---|
22 | typeMask = 3 << 30
|
---|
23 | literalType = 0 << 30
|
---|
24 | matchType = 1 << 30
|
---|
25 | matchOffsetOnlyMask = 0xffff
|
---|
26 | )
|
---|
27 |
|
---|
28 | // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
---|
29 | // is lengthCodes[length - MIN_MATCH_LENGTH]
|
---|
30 | var lengthCodes = [256]uint8{
|
---|
31 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
---|
32 | 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
---|
33 | 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
---|
34 | 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
---|
35 | 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
---|
36 | 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
---|
37 | 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
---|
38 | 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
---|
39 | 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
---|
40 | 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
---|
41 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
---|
42 | 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
---|
43 | 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
---|
44 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
45 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
46 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
47 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
48 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
49 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
50 | 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
51 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
52 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
53 | 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
---|
54 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
55 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
56 | 27, 27, 27, 27, 27, 28,
|
---|
57 | }
|
---|
58 |
|
---|
59 | // lengthCodes1 is length codes, but starting at 1.
|
---|
60 | var lengthCodes1 = [256]uint8{
|
---|
61 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
|
---|
62 | 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
|
---|
63 | 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
|
---|
64 | 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
|
---|
65 | 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
|
---|
66 | 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
|
---|
67 | 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
|
---|
68 | 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
---|
69 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
---|
70 | 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
|
---|
71 | 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
---|
72 | 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
73 | 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
|
---|
74 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
75 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
76 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
77 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
78 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
79 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
80 | 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
81 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
82 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
83 | 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
|
---|
84 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
85 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
86 | 28, 28, 28, 28, 28, 29,
|
---|
87 | }
|
---|
88 |
|
---|
89 | var offsetCodes = [256]uint32{
|
---|
90 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
---|
91 | 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
---|
92 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
---|
93 | 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
---|
94 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
---|
95 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
---|
96 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
---|
97 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
---|
98 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
---|
99 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
---|
100 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
---|
101 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
---|
102 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
---|
103 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
---|
104 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
---|
105 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
---|
106 | }
|
---|
107 |
|
---|
108 | // offsetCodes14 are offsetCodes, but with 14 added.
|
---|
109 | var offsetCodes14 = [256]uint32{
|
---|
110 | 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
|
---|
111 | 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
---|
112 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
113 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
114 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
115 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
---|
116 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
117 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
---|
118 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
119 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
120 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
121 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
---|
122 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
---|
123 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
---|
124 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
---|
125 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
---|
126 | }
|
---|
127 |
|
---|
128 | type token uint32
|
---|
129 |
|
---|
130 | type tokens struct {
|
---|
131 | extraHist [32]uint16 // codes 256->maxnumlit
|
---|
132 | offHist [32]uint16 // offset codes
|
---|
133 | litHist [256]uint16 // codes 0->255
|
---|
134 | nFilled int
|
---|
135 | n uint16 // Must be able to contain maxStoreBlockSize
|
---|
136 | tokens [maxStoreBlockSize + 1]token
|
---|
137 | }
|
---|
138 |
|
---|
139 | func (t *tokens) Reset() {
|
---|
140 | if t.n == 0 {
|
---|
141 | return
|
---|
142 | }
|
---|
143 | t.n = 0
|
---|
144 | t.nFilled = 0
|
---|
145 | for i := range t.litHist[:] {
|
---|
146 | t.litHist[i] = 0
|
---|
147 | }
|
---|
148 | for i := range t.extraHist[:] {
|
---|
149 | t.extraHist[i] = 0
|
---|
150 | }
|
---|
151 | for i := range t.offHist[:] {
|
---|
152 | t.offHist[i] = 0
|
---|
153 | }
|
---|
154 | }
|
---|
155 |
|
---|
156 | func (t *tokens) Fill() {
|
---|
157 | if t.n == 0 {
|
---|
158 | return
|
---|
159 | }
|
---|
160 | for i, v := range t.litHist[:] {
|
---|
161 | if v == 0 {
|
---|
162 | t.litHist[i] = 1
|
---|
163 | t.nFilled++
|
---|
164 | }
|
---|
165 | }
|
---|
166 | for i, v := range t.extraHist[:literalCount-256] {
|
---|
167 | if v == 0 {
|
---|
168 | t.nFilled++
|
---|
169 | t.extraHist[i] = 1
|
---|
170 | }
|
---|
171 | }
|
---|
172 | for i, v := range t.offHist[:offsetCodeCount] {
|
---|
173 | if v == 0 {
|
---|
174 | t.offHist[i] = 1
|
---|
175 | }
|
---|
176 | }
|
---|
177 | }
|
---|
178 |
|
---|
179 | func indexTokens(in []token) tokens {
|
---|
180 | var t tokens
|
---|
181 | t.indexTokens(in)
|
---|
182 | return t
|
---|
183 | }
|
---|
184 |
|
---|
185 | func (t *tokens) indexTokens(in []token) {
|
---|
186 | t.Reset()
|
---|
187 | for _, tok := range in {
|
---|
188 | if tok < matchType {
|
---|
189 | t.AddLiteral(tok.literal())
|
---|
190 | continue
|
---|
191 | }
|
---|
192 | t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
|
---|
193 | }
|
---|
194 | }
|
---|
195 |
|
---|
196 | // emitLiteral writes a literal chunk and returns the number of bytes written.
|
---|
197 | func emitLiteral(dst *tokens, lit []byte) {
|
---|
198 | for _, v := range lit {
|
---|
199 | dst.tokens[dst.n] = token(v)
|
---|
200 | dst.litHist[v]++
|
---|
201 | dst.n++
|
---|
202 | }
|
---|
203 | }
|
---|
204 |
|
---|
205 | func (t *tokens) AddLiteral(lit byte) {
|
---|
206 | t.tokens[t.n] = token(lit)
|
---|
207 | t.litHist[lit]++
|
---|
208 | t.n++
|
---|
209 | }
|
---|
210 |
|
---|
211 | // from https://stackoverflow.com/a/28730362
|
---|
212 | func mFastLog2(val float32) float32 {
|
---|
213 | ux := int32(math.Float32bits(val))
|
---|
214 | log2 := (float32)(((ux >> 23) & 255) - 128)
|
---|
215 | ux &= -0x7f800001
|
---|
216 | ux += 127 << 23
|
---|
217 | uval := math.Float32frombits(uint32(ux))
|
---|
218 | log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
|
---|
219 | return log2
|
---|
220 | }
|
---|
221 |
|
---|
222 | // EstimatedBits will return an minimum size estimated by an *optimal*
|
---|
223 | // compression of the block.
|
---|
224 | // The size of the block
|
---|
225 | func (t *tokens) EstimatedBits() int {
|
---|
226 | shannon := float32(0)
|
---|
227 | bits := int(0)
|
---|
228 | nMatches := 0
|
---|
229 | total := int(t.n) + t.nFilled
|
---|
230 | if total > 0 {
|
---|
231 | invTotal := 1.0 / float32(total)
|
---|
232 | for _, v := range t.litHist[:] {
|
---|
233 | if v > 0 {
|
---|
234 | n := float32(v)
|
---|
235 | shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
|
---|
236 | }
|
---|
237 | }
|
---|
238 | // Just add 15 for EOB
|
---|
239 | shannon += 15
|
---|
240 | for i, v := range t.extraHist[1 : literalCount-256] {
|
---|
241 | if v > 0 {
|
---|
242 | n := float32(v)
|
---|
243 | shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
|
---|
244 | bits += int(lengthExtraBits[i&31]) * int(v)
|
---|
245 | nMatches += int(v)
|
---|
246 | }
|
---|
247 | }
|
---|
248 | }
|
---|
249 | if nMatches > 0 {
|
---|
250 | invTotal := 1.0 / float32(nMatches)
|
---|
251 | for i, v := range t.offHist[:offsetCodeCount] {
|
---|
252 | if v > 0 {
|
---|
253 | n := float32(v)
|
---|
254 | shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
|
---|
255 | bits += int(offsetExtraBits[i&31]) * int(v)
|
---|
256 | }
|
---|
257 | }
|
---|
258 | }
|
---|
259 | return int(shannon) + bits
|
---|
260 | }
|
---|
261 |
|
---|
262 | // AddMatch adds a match to the tokens.
|
---|
263 | // This function is very sensitive to inlining and right on the border.
|
---|
264 | func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
|
---|
265 | if debugDeflate {
|
---|
266 | if xlength >= maxMatchLength+baseMatchLength {
|
---|
267 | panic(fmt.Errorf("invalid length: %v", xlength))
|
---|
268 | }
|
---|
269 | if xoffset >= maxMatchOffset+baseMatchOffset {
|
---|
270 | panic(fmt.Errorf("invalid offset: %v", xoffset))
|
---|
271 | }
|
---|
272 | }
|
---|
273 | oCode := offsetCode(xoffset)
|
---|
274 | xoffset |= oCode << 16
|
---|
275 |
|
---|
276 | t.extraHist[lengthCodes1[uint8(xlength)]]++
|
---|
277 | t.offHist[oCode&31]++
|
---|
278 | t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
|
---|
279 | t.n++
|
---|
280 | }
|
---|
281 |
|
---|
282 | // AddMatchLong adds a match to the tokens, potentially longer than max match length.
|
---|
283 | // Length should NOT have the base subtracted, only offset should.
|
---|
284 | func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
|
---|
285 | if debugDeflate {
|
---|
286 | if xoffset >= maxMatchOffset+baseMatchOffset {
|
---|
287 | panic(fmt.Errorf("invalid offset: %v", xoffset))
|
---|
288 | }
|
---|
289 | }
|
---|
290 | oc := offsetCode(xoffset)
|
---|
291 | xoffset |= oc << 16
|
---|
292 | for xlength > 0 {
|
---|
293 | xl := xlength
|
---|
294 | if xl > 258 {
|
---|
295 | // We need to have at least baseMatchLength left over for next loop.
|
---|
296 | if xl > 258+baseMatchLength {
|
---|
297 | xl = 258
|
---|
298 | } else {
|
---|
299 | xl = 258 - baseMatchLength
|
---|
300 | }
|
---|
301 | }
|
---|
302 | xlength -= xl
|
---|
303 | xl -= baseMatchLength
|
---|
304 | t.extraHist[lengthCodes1[uint8(xl)]]++
|
---|
305 | t.offHist[oc&31]++
|
---|
306 | t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
|
---|
307 | t.n++
|
---|
308 | }
|
---|
309 | }
|
---|
310 |
|
---|
311 | func (t *tokens) AddEOB() {
|
---|
312 | t.tokens[t.n] = token(endBlockMarker)
|
---|
313 | t.extraHist[0]++
|
---|
314 | t.n++
|
---|
315 | }
|
---|
316 |
|
---|
317 | func (t *tokens) Slice() []token {
|
---|
318 | return t.tokens[:t.n]
|
---|
319 | }
|
---|
320 |
|
---|
321 | // VarInt returns the tokens as varint encoded bytes.
|
---|
322 | func (t *tokens) VarInt() []byte {
|
---|
323 | var b = make([]byte, binary.MaxVarintLen32*int(t.n))
|
---|
324 | var off int
|
---|
325 | for _, v := range t.tokens[:t.n] {
|
---|
326 | off += binary.PutUvarint(b[off:], uint64(v))
|
---|
327 | }
|
---|
328 | return b[:off]
|
---|
329 | }
|
---|
330 |
|
---|
331 | // FromVarInt restores t to the varint encoded tokens provided.
|
---|
332 | // Any data in t is removed.
|
---|
333 | func (t *tokens) FromVarInt(b []byte) error {
|
---|
334 | var buf = bytes.NewReader(b)
|
---|
335 | var toks []token
|
---|
336 | for {
|
---|
337 | r, err := binary.ReadUvarint(buf)
|
---|
338 | if err == io.EOF {
|
---|
339 | break
|
---|
340 | }
|
---|
341 | if err != nil {
|
---|
342 | return err
|
---|
343 | }
|
---|
344 | toks = append(toks, token(r))
|
---|
345 | }
|
---|
346 | t.indexTokens(toks)
|
---|
347 | return nil
|
---|
348 | }
|
---|
349 |
|
---|
350 | // Returns the type of a token
|
---|
351 | func (t token) typ() uint32 { return uint32(t) & typeMask }
|
---|
352 |
|
---|
353 | // Returns the literal of a literal token
|
---|
354 | func (t token) literal() uint8 { return uint8(t) }
|
---|
355 |
|
---|
356 | // Returns the extra offset of a match token
|
---|
357 | func (t token) offset() uint32 { return uint32(t) & offsetMask }
|
---|
358 |
|
---|
359 | func (t token) length() uint8 { return uint8(t >> lengthShift) }
|
---|
360 |
|
---|
361 | // Convert length to code.
|
---|
362 | func lengthCode(len uint8) uint8 { return lengthCodes[len] }
|
---|
363 |
|
---|
364 | // Returns the offset code corresponding to a specific offset
|
---|
365 | func offsetCode(off uint32) uint32 {
|
---|
366 | if false {
|
---|
367 | if off < uint32(len(offsetCodes)) {
|
---|
368 | return offsetCodes[off&255]
|
---|
369 | } else if off>>7 < uint32(len(offsetCodes)) {
|
---|
370 | return offsetCodes[(off>>7)&255] + 14
|
---|
371 | } else {
|
---|
372 | return offsetCodes[(off>>14)&255] + 28
|
---|
373 | }
|
---|
374 | }
|
---|
375 | if off < uint32(len(offsetCodes)) {
|
---|
376 | return offsetCodes[uint8(off)]
|
---|
377 | }
|
---|
378 | return offsetCodes14[uint8(off>>7)]
|
---|
379 | }
|
---|