source: code/trunk/vendor/github.com/klauspost/compress/flate/token.go@ 822

Last change on this file since 822 was 822, checked in by yakumo.izuru, 22 months ago

Prefer immortal.run over runit and rc.d, use vendored modules
for convenience.

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 10.7 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8 "bytes"
9 "encoding/binary"
10 "fmt"
11 "io"
12 "math"
13)
14
15const (
16 // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
17 // 8 bits: xlength = length - MIN_MATCH_LENGTH
18 // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
19 lengthShift = 22
20 offsetMask = 1<<lengthShift - 1
21 typeMask = 3 << 30
22 literalType = 0 << 30
23 matchType = 1 << 30
24)
25
26// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
27// is lengthCodes[length - MIN_MATCH_LENGTH]
28var lengthCodes = [256]uint8{
29 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
30 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
31 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
32 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
33 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
34 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
35 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
36 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
37 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
38 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
39 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
40 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
41 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
42 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
43 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
44 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
45 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
46 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
47 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
48 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
49 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
50 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
51 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
52 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
53 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
54 27, 27, 27, 27, 27, 28,
55}
56
57// lengthCodes1 is length codes, but starting at 1.
58var lengthCodes1 = [256]uint8{
59 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
60 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
61 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
62 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
63 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
64 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
65 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
66 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
67 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
68 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
69 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
70 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
71 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
72 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
73 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
74 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
75 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
76 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
77 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
78 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
79 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
80 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
81 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
82 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
83 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
84 28, 28, 28, 28, 28, 29,
85}
86
87var offsetCodes = [256]uint32{
88 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
89 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
90 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
91 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
92 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
93 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
94 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
95 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
96 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
97 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
98 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
99 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
100 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
101 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
102 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
103 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
104}
105
106// offsetCodes14 are offsetCodes, but with 14 added.
107var offsetCodes14 = [256]uint32{
108 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
109 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
110 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
111 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
112 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
113 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
114 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
115 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
116 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
117 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
118 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
119 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
120 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
121 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
122 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
123 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
124}
125
126type token uint32
127
128type tokens struct {
129 nLits int
130 extraHist [32]uint16 // codes 256->maxnumlit
131 offHist [32]uint16 // offset codes
132 litHist [256]uint16 // codes 0->255
133 n uint16 // Must be able to contain maxStoreBlockSize
134 tokens [maxStoreBlockSize + 1]token
135}
136
137func (t *tokens) Reset() {
138 if t.n == 0 {
139 return
140 }
141 t.n = 0
142 t.nLits = 0
143 for i := range t.litHist[:] {
144 t.litHist[i] = 0
145 }
146 for i := range t.extraHist[:] {
147 t.extraHist[i] = 0
148 }
149 for i := range t.offHist[:] {
150 t.offHist[i] = 0
151 }
152}
153
154func (t *tokens) Fill() {
155 if t.n == 0 {
156 return
157 }
158 for i, v := range t.litHist[:] {
159 if v == 0 {
160 t.litHist[i] = 1
161 t.nLits++
162 }
163 }
164 for i, v := range t.extraHist[:literalCount-256] {
165 if v == 0 {
166 t.nLits++
167 t.extraHist[i] = 1
168 }
169 }
170 for i, v := range t.offHist[:offsetCodeCount] {
171 if v == 0 {
172 t.offHist[i] = 1
173 }
174 }
175}
176
177func indexTokens(in []token) tokens {
178 var t tokens
179 t.indexTokens(in)
180 return t
181}
182
183func (t *tokens) indexTokens(in []token) {
184 t.Reset()
185 for _, tok := range in {
186 if tok < matchType {
187 t.AddLiteral(tok.literal())
188 continue
189 }
190 t.AddMatch(uint32(tok.length()), tok.offset())
191 }
192}
193
194// emitLiteral writes a literal chunk and returns the number of bytes written.
195func emitLiteral(dst *tokens, lit []byte) {
196 ol := int(dst.n)
197 for i, v := range lit {
198 dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
199 dst.litHist[v]++
200 }
201 dst.n += uint16(len(lit))
202 dst.nLits += len(lit)
203}
204
205func (t *tokens) AddLiteral(lit byte) {
206 t.tokens[t.n] = token(lit)
207 t.litHist[lit]++
208 t.n++
209 t.nLits++
210}
211
212// from https://stackoverflow.com/a/28730362
213func mFastLog2(val float32) float32 {
214 ux := int32(math.Float32bits(val))
215 log2 := (float32)(((ux >> 23) & 255) - 128)
216 ux &= -0x7f800001
217 ux += 127 << 23
218 uval := math.Float32frombits(uint32(ux))
219 log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
220 return log2
221}
222
223// EstimatedBits will return an minimum size estimated by an *optimal*
224// compression of the block.
225// The size of the block
226func (t *tokens) EstimatedBits() int {
227 shannon := float32(0)
228 bits := int(0)
229 nMatches := 0
230 if t.nLits > 0 {
231 invTotal := 1.0 / float32(t.nLits)
232 for _, v := range t.litHist[:] {
233 if v > 0 {
234 n := float32(v)
235 shannon += -mFastLog2(n*invTotal) * n
236 }
237 }
238 // Just add 15 for EOB
239 shannon += 15
240 for i, v := range t.extraHist[1 : literalCount-256] {
241 if v > 0 {
242 n := float32(v)
243 shannon += -mFastLog2(n*invTotal) * n
244 bits += int(lengthExtraBits[i&31]) * int(v)
245 nMatches += int(v)
246 }
247 }
248 }
249 if nMatches > 0 {
250 invTotal := 1.0 / float32(nMatches)
251 for i, v := range t.offHist[:offsetCodeCount] {
252 if v > 0 {
253 n := float32(v)
254 shannon += -mFastLog2(n*invTotal) * n
255 bits += int(offsetExtraBits[i&31]) * int(v)
256 }
257 }
258 }
259 return int(shannon) + bits
260}
261
262// AddMatch adds a match to the tokens.
263// This function is very sensitive to inlining and right on the border.
264func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
265 if debugDeflate {
266 if xlength >= maxMatchLength+baseMatchLength {
267 panic(fmt.Errorf("invalid length: %v", xlength))
268 }
269 if xoffset >= maxMatchOffset+baseMatchOffset {
270 panic(fmt.Errorf("invalid offset: %v", xoffset))
271 }
272 }
273 t.nLits++
274 lengthCode := lengthCodes1[uint8(xlength)] & 31
275 t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
276 t.extraHist[lengthCode]++
277 t.offHist[offsetCode(xoffset)&31]++
278 t.n++
279}
280
281// AddMatchLong adds a match to the tokens, potentially longer than max match length.
282// Length should NOT have the base subtracted, only offset should.
283func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
284 if debugDeflate {
285 if xoffset >= maxMatchOffset+baseMatchOffset {
286 panic(fmt.Errorf("invalid offset: %v", xoffset))
287 }
288 }
289 oc := offsetCode(xoffset) & 31
290 for xlength > 0 {
291 xl := xlength
292 if xl > 258 {
293 // We need to have at least baseMatchLength left over for next loop.
294 xl = 258 - baseMatchLength
295 }
296 xlength -= xl
297 xl -= 3
298 t.nLits++
299 lengthCode := lengthCodes1[uint8(xl)] & 31
300 t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
301 t.extraHist[lengthCode]++
302 t.offHist[oc]++
303 t.n++
304 }
305}
306
307func (t *tokens) AddEOB() {
308 t.tokens[t.n] = token(endBlockMarker)
309 t.extraHist[0]++
310 t.n++
311}
312
313func (t *tokens) Slice() []token {
314 return t.tokens[:t.n]
315}
316
317// VarInt returns the tokens as varint encoded bytes.
318func (t *tokens) VarInt() []byte {
319 var b = make([]byte, binary.MaxVarintLen32*int(t.n))
320 var off int
321 for _, v := range t.tokens[:t.n] {
322 off += binary.PutUvarint(b[off:], uint64(v))
323 }
324 return b[:off]
325}
326
327// FromVarInt restores t to the varint encoded tokens provided.
328// Any data in t is removed.
329func (t *tokens) FromVarInt(b []byte) error {
330 var buf = bytes.NewReader(b)
331 var toks []token
332 for {
333 r, err := binary.ReadUvarint(buf)
334 if err == io.EOF {
335 break
336 }
337 if err != nil {
338 return err
339 }
340 toks = append(toks, token(r))
341 }
342 t.indexTokens(toks)
343 return nil
344}
345
346// Returns the type of a token
347func (t token) typ() uint32 { return uint32(t) & typeMask }
348
349// Returns the literal of a literal token
350func (t token) literal() uint8 { return uint8(t) }
351
352// Returns the extra offset of a match token
353func (t token) offset() uint32 { return uint32(t) & offsetMask }
354
355func (t token) length() uint8 { return uint8(t >> lengthShift) }
356
357// The code is never more than 8 bits, but is returned as uint32 for convenience.
358func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
359
360// Returns the offset code corresponding to a specific offset
361func offsetCode(off uint32) uint32 {
362 if false {
363 if off < uint32(len(offsetCodes)) {
364 return offsetCodes[off&255]
365 } else if off>>7 < uint32(len(offsetCodes)) {
366 return offsetCodes[(off>>7)&255] + 14
367 } else {
368 return offsetCodes[(off>>14)&255] + 28
369 }
370 }
371 if off < uint32(len(offsetCodes)) {
372 return offsetCodes[uint8(off)]
373 }
374 return offsetCodes14[uint8(off>>7)]
375}
Note: See TracBrowser for help on using the repository browser.