source: code/trunk/vendor/github.com/klauspost/compress/flate/huffman_code.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 10.3 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8 "math"
9 "math/bits"
10)
11
12const (
13 maxBitsLimit = 16
14 // number of valid literals
15 literalCount = 286
16)
17
18// hcode is a huffman code with a bit code and bit length.
19type hcode struct {
20 code uint16
21 len uint8
22}
23
24type huffmanEncoder struct {
25 codes []hcode
26 bitCount [17]int32
27
28 // Allocate a reusable buffer with the longest possible frequency table.
29 // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
30 // The largest of these is literalCount, so we allocate for that case.
31 freqcache [literalCount + 1]literalNode
32}
33
34type literalNode struct {
35 literal uint16
36 freq uint16
37}
38
39// A levelInfo describes the state of the constructed tree for a given depth.
40type levelInfo struct {
41 // Our level. for better printing
42 level int32
43
44 // The frequency of the last node at this level
45 lastFreq int32
46
47 // The frequency of the next character to add to this level
48 nextCharFreq int32
49
50 // The frequency of the next pair (from level below) to add to this level.
51 // Only valid if the "needed" value of the next lower level is 0.
52 nextPairFreq int32
53
54 // The number of chains remaining to generate for this level before moving
55 // up to the next level
56 needed int32
57}
58
59// set sets the code and length of an hcode.
60func (h *hcode) set(code uint16, length uint8) {
61 h.len = length
62 h.code = code
63}
64
65func reverseBits(number uint16, bitLength byte) uint16 {
66 return bits.Reverse16(number << ((16 - bitLength) & 15))
67}
68
69func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
70
71func newHuffmanEncoder(size int) *huffmanEncoder {
72 // Make capacity to next power of two.
73 c := uint(bits.Len32(uint32(size - 1)))
74 return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
75}
76
77// Generates a HuffmanCode corresponding to the fixed literal table
78func generateFixedLiteralEncoding() *huffmanEncoder {
79 h := newHuffmanEncoder(literalCount)
80 codes := h.codes
81 var ch uint16
82 for ch = 0; ch < literalCount; ch++ {
83 var bits uint16
84 var size uint8
85 switch {
86 case ch < 144:
87 // size 8, 000110000 .. 10111111
88 bits = ch + 48
89 size = 8
90 case ch < 256:
91 // size 9, 110010000 .. 111111111
92 bits = ch + 400 - 144
93 size = 9
94 case ch < 280:
95 // size 7, 0000000 .. 0010111
96 bits = ch - 256
97 size = 7
98 default:
99 // size 8, 11000000 .. 11000111
100 bits = ch + 192 - 280
101 size = 8
102 }
103 codes[ch] = hcode{code: reverseBits(bits, size), len: size}
104 }
105 return h
106}
107
108func generateFixedOffsetEncoding() *huffmanEncoder {
109 h := newHuffmanEncoder(30)
110 codes := h.codes
111 for ch := range codes {
112 codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
113 }
114 return h
115}
116
117var fixedLiteralEncoding = generateFixedLiteralEncoding()
118var fixedOffsetEncoding = generateFixedOffsetEncoding()
119
120func (h *huffmanEncoder) bitLength(freq []uint16) int {
121 var total int
122 for i, f := range freq {
123 if f != 0 {
124 total += int(f) * int(h.codes[i].len)
125 }
126 }
127 return total
128}
129
130func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
131 var total int
132 for _, f := range b {
133 total += int(h.codes[f].len)
134 }
135 return total
136}
137
138// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
139func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
140 var total int
141 for i, f := range freq {
142 if f != 0 {
143 code := h.codes[i]
144 if code.len == 0 {
145 return math.MaxInt32
146 }
147 total += int(f) * int(code.len)
148 }
149 }
150 return total
151}
152
153// Return the number of literals assigned to each bit size in the Huffman encoding
154//
155// This method is only called when list.length >= 3
156// The cases of 0, 1, and 2 literals are handled by special case code.
157//
158// list An array of the literals with non-zero frequencies
159// and their associated frequencies. The array is in order of increasing
160// frequency, and has as its last element a special element with frequency
161// MaxInt32
162// maxBits The maximum number of bits that should be used to encode any literal.
163// Must be less than 16.
164// return An integer array in which array[i] indicates the number of literals
165// that should be encoded in i bits.
166func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
167 if maxBits >= maxBitsLimit {
168 panic("flate: maxBits too large")
169 }
170 n := int32(len(list))
171 list = list[0 : n+1]
172 list[n] = maxNode()
173
174 // The tree can't have greater depth than n - 1, no matter what. This
175 // saves a little bit of work in some small cases
176 if maxBits > n-1 {
177 maxBits = n - 1
178 }
179
180 // Create information about each of the levels.
181 // A bogus "Level 0" whose sole purpose is so that
182 // level1.prev.needed==0. This makes level1.nextPairFreq
183 // be a legitimate value that never gets chosen.
184 var levels [maxBitsLimit]levelInfo
185 // leafCounts[i] counts the number of literals at the left
186 // of ancestors of the rightmost node at level i.
187 // leafCounts[i][j] is the number of literals at the left
188 // of the level j ancestor.
189 var leafCounts [maxBitsLimit][maxBitsLimit]int32
190
191 // Descending to only have 1 bounds check.
192 l2f := int32(list[2].freq)
193 l1f := int32(list[1].freq)
194 l0f := int32(list[0].freq) + int32(list[1].freq)
195
196 for level := int32(1); level <= maxBits; level++ {
197 // For every level, the first two items are the first two characters.
198 // We initialize the levels as if we had already figured this out.
199 levels[level] = levelInfo{
200 level: level,
201 lastFreq: l1f,
202 nextCharFreq: l2f,
203 nextPairFreq: l0f,
204 }
205 leafCounts[level][level] = 2
206 if level == 1 {
207 levels[level].nextPairFreq = math.MaxInt32
208 }
209 }
210
211 // We need a total of 2*n - 2 items at top level and have already generated 2.
212 levels[maxBits].needed = 2*n - 4
213
214 level := uint32(maxBits)
215 for level < 16 {
216 l := &levels[level]
217 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
218 // We've run out of both leafs and pairs.
219 // End all calculations for this level.
220 // To make sure we never come back to this level or any lower level,
221 // set nextPairFreq impossibly large.
222 l.needed = 0
223 levels[level+1].nextPairFreq = math.MaxInt32
224 level++
225 continue
226 }
227
228 prevFreq := l.lastFreq
229 if l.nextCharFreq < l.nextPairFreq {
230 // The next item on this row is a leaf node.
231 n := leafCounts[level][level] + 1
232 l.lastFreq = l.nextCharFreq
233 // Lower leafCounts are the same of the previous node.
234 leafCounts[level][level] = n
235 e := list[n]
236 if e.literal < math.MaxUint16 {
237 l.nextCharFreq = int32(e.freq)
238 } else {
239 l.nextCharFreq = math.MaxInt32
240 }
241 } else {
242 // The next item on this row is a pair from the previous row.
243 // nextPairFreq isn't valid until we generate two
244 // more values in the level below
245 l.lastFreq = l.nextPairFreq
246 // Take leaf counts from the lower level, except counts[level] remains the same.
247 if true {
248 save := leafCounts[level][level]
249 leafCounts[level] = leafCounts[level-1]
250 leafCounts[level][level] = save
251 } else {
252 copy(leafCounts[level][:level], leafCounts[level-1][:level])
253 }
254 levels[l.level-1].needed = 2
255 }
256
257 if l.needed--; l.needed == 0 {
258 // We've done everything we need to do for this level.
259 // Continue calculating one level up. Fill in nextPairFreq
260 // of that level with the sum of the two nodes we've just calculated on
261 // this level.
262 if l.level == maxBits {
263 // All done!
264 break
265 }
266 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
267 level++
268 } else {
269 // If we stole from below, move down temporarily to replenish it.
270 for levels[level-1].needed > 0 {
271 level--
272 }
273 }
274 }
275
276 // Somethings is wrong if at the end, the top level is null or hasn't used
277 // all of the leaves.
278 if leafCounts[maxBits][maxBits] != n {
279 panic("leafCounts[maxBits][maxBits] != n")
280 }
281
282 bitCount := h.bitCount[:maxBits+1]
283 bits := 1
284 counts := &leafCounts[maxBits]
285 for level := maxBits; level > 0; level-- {
286 // chain.leafCount gives the number of literals requiring at least "bits"
287 // bits to encode.
288 bitCount[bits] = counts[level] - counts[level-1]
289 bits++
290 }
291 return bitCount
292}
293
294// Look at the leaves and assign them a bit count and an encoding as specified
295// in RFC 1951 3.2.2
296func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
297 code := uint16(0)
298 for n, bits := range bitCount {
299 code <<= 1
300 if n == 0 || bits == 0 {
301 continue
302 }
303 // The literals list[len(list)-bits] .. list[len(list)-bits]
304 // are encoded using "bits" bits, and get the values
305 // code, code + 1, .... The code values are
306 // assigned in literal order (not frequency order).
307 chunk := list[len(list)-int(bits):]
308
309 sortByLiteral(chunk)
310 for _, node := range chunk {
311 h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)}
312 code++
313 }
314 list = list[0 : len(list)-int(bits)]
315 }
316}
317
318// Update this Huffman Code object to be the minimum code for the specified frequency count.
319//
320// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
321// maxBits The maximum number of bits to use for any literal.
322func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
323 list := h.freqcache[:len(freq)+1]
324 codes := h.codes[:len(freq)]
325 // Number of non-zero literals
326 count := 0
327 // Set list to be the set of all non-zero literals and their frequencies
328 for i, f := range freq {
329 if f != 0 {
330 list[count] = literalNode{uint16(i), f}
331 count++
332 } else {
333 codes[i].len = 0
334 }
335 }
336 list[count] = literalNode{}
337
338 list = list[:count]
339 if count <= 2 {
340 // Handle the small cases here, because they are awkward for the general case code. With
341 // two or fewer literals, everything has bit length 1.
342 for i, node := range list {
343 // "list" is in order of increasing literal value.
344 h.codes[node.literal].set(uint16(i), 1)
345 }
346 return
347 }
348 sortByFreq(list)
349
350 // Get the number of literals for each bit count
351 bitCount := h.bitCounts(list, maxBits)
352 // And do the assignment
353 h.assignEncodingAndSize(bitCount, list)
354}
355
356// atLeastOne clamps the result between 1 and 15.
357func atLeastOne(v float32) float32 {
358 if v < 1 {
359 return 1
360 }
361 if v > 15 {
362 return 15
363 }
364 return v
365}
366
367// Unassigned values are assigned '1' in the histogram.
368func fillHist(b []uint16) {
369 for i, v := range b {
370 if v == 0 {
371 b[i] = 1
372 }
373 }
374}
375
376func histogram(b []byte, h []uint16, fill bool) {
377 h = h[:256]
378 for _, t := range b {
379 h[t]++
380 }
381 if fill {
382 fillHist(h)
383 }
384}
Note: See TracBrowser for help on using the repository browser.