source: code/trunk/vendor/github.com/klauspost/compress/flate/token.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 10.8 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8 "bytes"
9 "encoding/binary"
10 "fmt"
11 "io"
12 "math"
13)
14
15const (
16 // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
17 // bits 16-22 offsetcode - 5 bits
18 // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
19 // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
20 lengthShift = 22
21 offsetMask = 1<<lengthShift - 1
22 typeMask = 3 << 30
23 literalType = 0 << 30
24 matchType = 1 << 30
25 matchOffsetOnlyMask = 0xffff
26)
27
28// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
29// is lengthCodes[length - MIN_MATCH_LENGTH]
30var lengthCodes = [256]uint8{
31 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
32 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
33 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
34 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
35 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
36 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
37 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
38 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
39 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
40 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
41 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
42 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
43 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
44 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
45 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
46 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
47 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
48 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
49 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
50 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
51 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
52 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
53 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
54 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
55 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
56 27, 27, 27, 27, 27, 28,
57}
58
59// lengthCodes1 is length codes, but starting at 1.
60var lengthCodes1 = [256]uint8{
61 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
62 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
63 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
64 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
65 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
66 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
67 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
68 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
69 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
70 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
71 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
72 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
73 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
74 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
75 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
76 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
77 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
78 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
79 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
80 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
81 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
82 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
83 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
84 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
85 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
86 28, 28, 28, 28, 28, 29,
87}
88
89var offsetCodes = [256]uint32{
90 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
91 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
92 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
93 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
94 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
95 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
96 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
97 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
98 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
99 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
100 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
101 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
102 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
103 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
104 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
105 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
106}
107
108// offsetCodes14 are offsetCodes, but with 14 added.
109var offsetCodes14 = [256]uint32{
110 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
111 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
112 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
113 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
114 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
115 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
116 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
117 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
118 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
119 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
120 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
121 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
122 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
123 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
124 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
125 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
126}
127
128type token uint32
129
130type tokens struct {
131 extraHist [32]uint16 // codes 256->maxnumlit
132 offHist [32]uint16 // offset codes
133 litHist [256]uint16 // codes 0->255
134 nFilled int
135 n uint16 // Must be able to contain maxStoreBlockSize
136 tokens [maxStoreBlockSize + 1]token
137}
138
139func (t *tokens) Reset() {
140 if t.n == 0 {
141 return
142 }
143 t.n = 0
144 t.nFilled = 0
145 for i := range t.litHist[:] {
146 t.litHist[i] = 0
147 }
148 for i := range t.extraHist[:] {
149 t.extraHist[i] = 0
150 }
151 for i := range t.offHist[:] {
152 t.offHist[i] = 0
153 }
154}
155
156func (t *tokens) Fill() {
157 if t.n == 0 {
158 return
159 }
160 for i, v := range t.litHist[:] {
161 if v == 0 {
162 t.litHist[i] = 1
163 t.nFilled++
164 }
165 }
166 for i, v := range t.extraHist[:literalCount-256] {
167 if v == 0 {
168 t.nFilled++
169 t.extraHist[i] = 1
170 }
171 }
172 for i, v := range t.offHist[:offsetCodeCount] {
173 if v == 0 {
174 t.offHist[i] = 1
175 }
176 }
177}
178
179func indexTokens(in []token) tokens {
180 var t tokens
181 t.indexTokens(in)
182 return t
183}
184
185func (t *tokens) indexTokens(in []token) {
186 t.Reset()
187 for _, tok := range in {
188 if tok < matchType {
189 t.AddLiteral(tok.literal())
190 continue
191 }
192 t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
193 }
194}
195
196// emitLiteral writes a literal chunk and returns the number of bytes written.
197func emitLiteral(dst *tokens, lit []byte) {
198 for _, v := range lit {
199 dst.tokens[dst.n] = token(v)
200 dst.litHist[v]++
201 dst.n++
202 }
203}
204
205func (t *tokens) AddLiteral(lit byte) {
206 t.tokens[t.n] = token(lit)
207 t.litHist[lit]++
208 t.n++
209}
210
211// from https://stackoverflow.com/a/28730362
212func mFastLog2(val float32) float32 {
213 ux := int32(math.Float32bits(val))
214 log2 := (float32)(((ux >> 23) & 255) - 128)
215 ux &= -0x7f800001
216 ux += 127 << 23
217 uval := math.Float32frombits(uint32(ux))
218 log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
219 return log2
220}
221
222// EstimatedBits will return an minimum size estimated by an *optimal*
223// compression of the block.
224// The size of the block
225func (t *tokens) EstimatedBits() int {
226 shannon := float32(0)
227 bits := int(0)
228 nMatches := 0
229 total := int(t.n) + t.nFilled
230 if total > 0 {
231 invTotal := 1.0 / float32(total)
232 for _, v := range t.litHist[:] {
233 if v > 0 {
234 n := float32(v)
235 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
236 }
237 }
238 // Just add 15 for EOB
239 shannon += 15
240 for i, v := range t.extraHist[1 : literalCount-256] {
241 if v > 0 {
242 n := float32(v)
243 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
244 bits += int(lengthExtraBits[i&31]) * int(v)
245 nMatches += int(v)
246 }
247 }
248 }
249 if nMatches > 0 {
250 invTotal := 1.0 / float32(nMatches)
251 for i, v := range t.offHist[:offsetCodeCount] {
252 if v > 0 {
253 n := float32(v)
254 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
255 bits += int(offsetExtraBits[i&31]) * int(v)
256 }
257 }
258 }
259 return int(shannon) + bits
260}
261
262// AddMatch adds a match to the tokens.
263// This function is very sensitive to inlining and right on the border.
264func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
265 if debugDeflate {
266 if xlength >= maxMatchLength+baseMatchLength {
267 panic(fmt.Errorf("invalid length: %v", xlength))
268 }
269 if xoffset >= maxMatchOffset+baseMatchOffset {
270 panic(fmt.Errorf("invalid offset: %v", xoffset))
271 }
272 }
273 oCode := offsetCode(xoffset)
274 xoffset |= oCode << 16
275
276 t.extraHist[lengthCodes1[uint8(xlength)]]++
277 t.offHist[oCode&31]++
278 t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
279 t.n++
280}
281
282// AddMatchLong adds a match to the tokens, potentially longer than max match length.
283// Length should NOT have the base subtracted, only offset should.
284func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
285 if debugDeflate {
286 if xoffset >= maxMatchOffset+baseMatchOffset {
287 panic(fmt.Errorf("invalid offset: %v", xoffset))
288 }
289 }
290 oc := offsetCode(xoffset)
291 xoffset |= oc << 16
292 for xlength > 0 {
293 xl := xlength
294 if xl > 258 {
295 // We need to have at least baseMatchLength left over for next loop.
296 if xl > 258+baseMatchLength {
297 xl = 258
298 } else {
299 xl = 258 - baseMatchLength
300 }
301 }
302 xlength -= xl
303 xl -= baseMatchLength
304 t.extraHist[lengthCodes1[uint8(xl)]]++
305 t.offHist[oc&31]++
306 t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
307 t.n++
308 }
309}
310
311func (t *tokens) AddEOB() {
312 t.tokens[t.n] = token(endBlockMarker)
313 t.extraHist[0]++
314 t.n++
315}
316
317func (t *tokens) Slice() []token {
318 return t.tokens[:t.n]
319}
320
321// VarInt returns the tokens as varint encoded bytes.
322func (t *tokens) VarInt() []byte {
323 var b = make([]byte, binary.MaxVarintLen32*int(t.n))
324 var off int
325 for _, v := range t.tokens[:t.n] {
326 off += binary.PutUvarint(b[off:], uint64(v))
327 }
328 return b[:off]
329}
330
331// FromVarInt restores t to the varint encoded tokens provided.
332// Any data in t is removed.
333func (t *tokens) FromVarInt(b []byte) error {
334 var buf = bytes.NewReader(b)
335 var toks []token
336 for {
337 r, err := binary.ReadUvarint(buf)
338 if err == io.EOF {
339 break
340 }
341 if err != nil {
342 return err
343 }
344 toks = append(toks, token(r))
345 }
346 t.indexTokens(toks)
347 return nil
348}
349
350// Returns the type of a token
351func (t token) typ() uint32 { return uint32(t) & typeMask }
352
353// Returns the literal of a literal token
354func (t token) literal() uint8 { return uint8(t) }
355
356// Returns the extra offset of a match token
357func (t token) offset() uint32 { return uint32(t) & offsetMask }
358
359func (t token) length() uint8 { return uint8(t >> lengthShift) }
360
361// Convert length to code.
362func lengthCode(len uint8) uint8 { return lengthCodes[len] }
363
364// Returns the offset code corresponding to a specific offset
365func offsetCode(off uint32) uint32 {
366 if false {
367 if off < uint32(len(offsetCodes)) {
368 return offsetCodes[off&255]
369 } else if off>>7 < uint32(len(offsetCodes)) {
370 return offsetCodes[(off>>7)&255] + 14
371 } else {
372 return offsetCodes[(off>>14)&255] + 28
373 }
374 }
375 if off < uint32(len(offsetCodes)) {
376 return offsetCodes[uint8(off)]
377 }
378 return offsetCodes14[uint8(off>>7)]
379}
Note: See TracBrowser for help on using the repository browser.