source: code/trunk/vendor/github.com/klauspost/compress/flate/huffman_code.go@ 822

Last change on this file since 822 was 822, checked in by yakumo.izuru, 22 months ago

Prefer immortal.run over runit and rc.d, use vendored modules
for convenience.

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 10.1 KB
Line 
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8 "math"
9 "math/bits"
10)
11
12const (
13 maxBitsLimit = 16
14 // number of valid literals
15 literalCount = 286
16)
17
18// hcode is a huffman code with a bit code and bit length.
19type hcode struct {
20 code, len uint16
21}
22
23type huffmanEncoder struct {
24 codes []hcode
25 freqcache []literalNode
26 bitCount [17]int32
27}
28
29type literalNode struct {
30 literal uint16
31 freq uint16
32}
33
34// A levelInfo describes the state of the constructed tree for a given depth.
35type levelInfo struct {
36 // Our level. for better printing
37 level int32
38
39 // The frequency of the last node at this level
40 lastFreq int32
41
42 // The frequency of the next character to add to this level
43 nextCharFreq int32
44
45 // The frequency of the next pair (from level below) to add to this level.
46 // Only valid if the "needed" value of the next lower level is 0.
47 nextPairFreq int32
48
49 // The number of chains remaining to generate for this level before moving
50 // up to the next level
51 needed int32
52}
53
54// set sets the code and length of an hcode.
55func (h *hcode) set(code uint16, length uint16) {
56 h.len = length
57 h.code = code
58}
59
60func reverseBits(number uint16, bitLength byte) uint16 {
61 return bits.Reverse16(number << ((16 - bitLength) & 15))
62}
63
64func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
65
66func newHuffmanEncoder(size int) *huffmanEncoder {
67 // Make capacity to next power of two.
68 c := uint(bits.Len32(uint32(size - 1)))
69 return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
70}
71
72// Generates a HuffmanCode corresponding to the fixed literal table
73func generateFixedLiteralEncoding() *huffmanEncoder {
74 h := newHuffmanEncoder(literalCount)
75 codes := h.codes
76 var ch uint16
77 for ch = 0; ch < literalCount; ch++ {
78 var bits uint16
79 var size uint16
80 switch {
81 case ch < 144:
82 // size 8, 000110000 .. 10111111
83 bits = ch + 48
84 size = 8
85 case ch < 256:
86 // size 9, 110010000 .. 111111111
87 bits = ch + 400 - 144
88 size = 9
89 case ch < 280:
90 // size 7, 0000000 .. 0010111
91 bits = ch - 256
92 size = 7
93 default:
94 // size 8, 11000000 .. 11000111
95 bits = ch + 192 - 280
96 size = 8
97 }
98 codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
99 }
100 return h
101}
102
103func generateFixedOffsetEncoding() *huffmanEncoder {
104 h := newHuffmanEncoder(30)
105 codes := h.codes
106 for ch := range codes {
107 codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
108 }
109 return h
110}
111
112var fixedLiteralEncoding = generateFixedLiteralEncoding()
113var fixedOffsetEncoding = generateFixedOffsetEncoding()
114
115func (h *huffmanEncoder) bitLength(freq []uint16) int {
116 var total int
117 for i, f := range freq {
118 if f != 0 {
119 total += int(f) * int(h.codes[i].len)
120 }
121 }
122 return total
123}
124
125// Return the number of literals assigned to each bit size in the Huffman encoding
126//
127// This method is only called when list.length >= 3
128// The cases of 0, 1, and 2 literals are handled by special case code.
129//
130// list An array of the literals with non-zero frequencies
131// and their associated frequencies. The array is in order of increasing
132// frequency, and has as its last element a special element with frequency
133// MaxInt32
134// maxBits The maximum number of bits that should be used to encode any literal.
135// Must be less than 16.
136// return An integer array in which array[i] indicates the number of literals
137// that should be encoded in i bits.
138func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
139 if maxBits >= maxBitsLimit {
140 panic("flate: maxBits too large")
141 }
142 n := int32(len(list))
143 list = list[0 : n+1]
144 list[n] = maxNode()
145
146 // The tree can't have greater depth than n - 1, no matter what. This
147 // saves a little bit of work in some small cases
148 if maxBits > n-1 {
149 maxBits = n - 1
150 }
151
152 // Create information about each of the levels.
153 // A bogus "Level 0" whose sole purpose is so that
154 // level1.prev.needed==0. This makes level1.nextPairFreq
155 // be a legitimate value that never gets chosen.
156 var levels [maxBitsLimit]levelInfo
157 // leafCounts[i] counts the number of literals at the left
158 // of ancestors of the rightmost node at level i.
159 // leafCounts[i][j] is the number of literals at the left
160 // of the level j ancestor.
161 var leafCounts [maxBitsLimit][maxBitsLimit]int32
162
163 for level := int32(1); level <= maxBits; level++ {
164 // For every level, the first two items are the first two characters.
165 // We initialize the levels as if we had already figured this out.
166 levels[level] = levelInfo{
167 level: level,
168 lastFreq: int32(list[1].freq),
169 nextCharFreq: int32(list[2].freq),
170 nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
171 }
172 leafCounts[level][level] = 2
173 if level == 1 {
174 levels[level].nextPairFreq = math.MaxInt32
175 }
176 }
177
178 // We need a total of 2*n - 2 items at top level and have already generated 2.
179 levels[maxBits].needed = 2*n - 4
180
181 level := maxBits
182 for {
183 l := &levels[level]
184 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
185 // We've run out of both leafs and pairs.
186 // End all calculations for this level.
187 // To make sure we never come back to this level or any lower level,
188 // set nextPairFreq impossibly large.
189 l.needed = 0
190 levels[level+1].nextPairFreq = math.MaxInt32
191 level++
192 continue
193 }
194
195 prevFreq := l.lastFreq
196 if l.nextCharFreq < l.nextPairFreq {
197 // The next item on this row is a leaf node.
198 n := leafCounts[level][level] + 1
199 l.lastFreq = l.nextCharFreq
200 // Lower leafCounts are the same of the previous node.
201 leafCounts[level][level] = n
202 e := list[n]
203 if e.literal < math.MaxUint16 {
204 l.nextCharFreq = int32(e.freq)
205 } else {
206 l.nextCharFreq = math.MaxInt32
207 }
208 } else {
209 // The next item on this row is a pair from the previous row.
210 // nextPairFreq isn't valid until we generate two
211 // more values in the level below
212 l.lastFreq = l.nextPairFreq
213 // Take leaf counts from the lower level, except counts[level] remains the same.
214 copy(leafCounts[level][:level], leafCounts[level-1][:level])
215 levels[l.level-1].needed = 2
216 }
217
218 if l.needed--; l.needed == 0 {
219 // We've done everything we need to do for this level.
220 // Continue calculating one level up. Fill in nextPairFreq
221 // of that level with the sum of the two nodes we've just calculated on
222 // this level.
223 if l.level == maxBits {
224 // All done!
225 break
226 }
227 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
228 level++
229 } else {
230 // If we stole from below, move down temporarily to replenish it.
231 for levels[level-1].needed > 0 {
232 level--
233 }
234 }
235 }
236
237 // Somethings is wrong if at the end, the top level is null or hasn't used
238 // all of the leaves.
239 if leafCounts[maxBits][maxBits] != n {
240 panic("leafCounts[maxBits][maxBits] != n")
241 }
242
243 bitCount := h.bitCount[:maxBits+1]
244 bits := 1
245 counts := &leafCounts[maxBits]
246 for level := maxBits; level > 0; level-- {
247 // chain.leafCount gives the number of literals requiring at least "bits"
248 // bits to encode.
249 bitCount[bits] = counts[level] - counts[level-1]
250 bits++
251 }
252 return bitCount
253}
254
255// Look at the leaves and assign them a bit count and an encoding as specified
256// in RFC 1951 3.2.2
257func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
258 code := uint16(0)
259 for n, bits := range bitCount {
260 code <<= 1
261 if n == 0 || bits == 0 {
262 continue
263 }
264 // The literals list[len(list)-bits] .. list[len(list)-bits]
265 // are encoded using "bits" bits, and get the values
266 // code, code + 1, .... The code values are
267 // assigned in literal order (not frequency order).
268 chunk := list[len(list)-int(bits):]
269
270 sortByLiteral(chunk)
271 for _, node := range chunk {
272 h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
273 code++
274 }
275 list = list[0 : len(list)-int(bits)]
276 }
277}
278
279// Update this Huffman Code object to be the minimum code for the specified frequency count.
280//
281// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
282// maxBits The maximum number of bits to use for any literal.
283func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
284 if h.freqcache == nil {
285 // Allocate a reusable buffer with the longest possible frequency table.
286 // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
287 // The largest of these is literalCount, so we allocate for that case.
288 h.freqcache = make([]literalNode, literalCount+1)
289 }
290 list := h.freqcache[:len(freq)+1]
291 // Number of non-zero literals
292 count := 0
293 // Set list to be the set of all non-zero literals and their frequencies
294 for i, f := range freq {
295 if f != 0 {
296 list[count] = literalNode{uint16(i), f}
297 count++
298 } else {
299 list[count] = literalNode{}
300 h.codes[i].len = 0
301 }
302 }
303 list[len(freq)] = literalNode{}
304
305 list = list[:count]
306 if count <= 2 {
307 // Handle the small cases here, because they are awkward for the general case code. With
308 // two or fewer literals, everything has bit length 1.
309 for i, node := range list {
310 // "list" is in order of increasing literal value.
311 h.codes[node.literal].set(uint16(i), 1)
312 }
313 return
314 }
315 sortByFreq(list)
316
317 // Get the number of literals for each bit count
318 bitCount := h.bitCounts(list, maxBits)
319 // And do the assignment
320 h.assignEncodingAndSize(bitCount, list)
321}
322
323func atLeastOne(v float32) float32 {
324 if v < 1 {
325 return 1
326 }
327 return v
328}
329
330// histogramSize accumulates a histogram of b in h.
331// An estimated size in bits is returned.
332// Unassigned values are assigned '1' in the histogram.
333// len(h) must be >= 256, and h's elements must be all zeroes.
334func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
335 h = h[:256]
336 for _, t := range b {
337 h[t]++
338 }
339 invTotal := 1.0 / float32(len(b))
340 shannon := float32(0.0)
341 var extra float32
342 if fill {
343 oneBits := atLeastOne(-mFastLog2(invTotal))
344 for i, v := range h[:] {
345 if v > 0 {
346 n := float32(v)
347 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
348 } else {
349 h[i] = 1
350 extra += oneBits
351 }
352 }
353 } else {
354 for _, v := range h[:] {
355 if v > 0 {
356 n := float32(v)
357 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
358 }
359 }
360 }
361
362 return int(shannon + 0.99), int(extra + 0.99)
363}
Note: See TracBrowser for help on using the repository browser.