source: code/trunk/vendor/github.com/andybalholm/brotli/bit_cost.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 10.7 KB
Line 
1package brotli
2
3/* Copyright 2013 Google Inc. All Rights Reserved.
4
5 Distributed under MIT license.
6 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7*/
8
9/* Functions to estimate the bit cost of Huffman trees. */
10func shannonEntropy(population []uint32, size uint, total *uint) float64 {
11 var sum uint = 0
12 var retval float64 = 0
13 var population_end []uint32 = population[size:]
14 var p uint
15 for -cap(population) < -cap(population_end) {
16 p = uint(population[0])
17 population = population[1:]
18 sum += p
19 retval -= float64(p) * fastLog2(p)
20 }
21
22 if sum != 0 {
23 retval += float64(sum) * fastLog2(sum)
24 }
25 *total = sum
26 return retval
27}
28
29func bitsEntropy(population []uint32, size uint) float64 {
30 var sum uint
31 var retval float64 = shannonEntropy(population, size, &sum)
32 if retval < float64(sum) {
33 /* At least one bit per literal is needed. */
34 retval = float64(sum)
35 }
36
37 return retval
38}
39
40const kOneSymbolHistogramCost float64 = 12
41const kTwoSymbolHistogramCost float64 = 20
42const kThreeSymbolHistogramCost float64 = 28
43const kFourSymbolHistogramCost float64 = 37
44
45func populationCostLiteral(histogram *histogramLiteral) float64 {
46 var data_size uint = histogramDataSizeLiteral()
47 var count int = 0
48 var s [5]uint
49 var bits float64 = 0.0
50 var i uint
51 if histogram.total_count_ == 0 {
52 return kOneSymbolHistogramCost
53 }
54
55 for i = 0; i < data_size; i++ {
56 if histogram.data_[i] > 0 {
57 s[count] = i
58 count++
59 if count > 4 {
60 break
61 }
62 }
63 }
64
65 if count == 1 {
66 return kOneSymbolHistogramCost
67 }
68
69 if count == 2 {
70 return kTwoSymbolHistogramCost + float64(histogram.total_count_)
71 }
72
73 if count == 3 {
74 var histo0 uint32 = histogram.data_[s[0]]
75 var histo1 uint32 = histogram.data_[s[1]]
76 var histo2 uint32 = histogram.data_[s[2]]
77 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
78 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
79 }
80
81 if count == 4 {
82 var histo [4]uint32
83 var h23 uint32
84 var histomax uint32
85 for i = 0; i < 4; i++ {
86 histo[i] = histogram.data_[s[i]]
87 }
88
89 /* Sort */
90 for i = 0; i < 4; i++ {
91 var j uint
92 for j = i + 1; j < 4; j++ {
93 if histo[j] > histo[i] {
94 var tmp uint32 = histo[j]
95 histo[j] = histo[i]
96 histo[i] = tmp
97 }
98 }
99 }
100
101 h23 = histo[2] + histo[3]
102 histomax = brotli_max_uint32_t(h23, histo[0])
103 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
104 }
105 {
106 var max_depth uint = 1
107 var depth_histo = [codeLengthCodes]uint32{0}
108 /* In this loop we compute the entropy of the histogram and simultaneously
109 build a simplified histogram of the code length codes where we use the
110 zero repeat code 17, but we don't use the non-zero repeat code 16. */
111
112 var log2total float64 = fastLog2(histogram.total_count_)
113 for i = 0; i < data_size; {
114 if histogram.data_[i] > 0 {
115 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
116 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
117 = log2(total_count) - log2(count(symbol)) */
118
119 var depth uint = uint(log2p + 0.5)
120 /* Approximate the bit depth by round(-log2(P(symbol))) */
121 bits += float64(histogram.data_[i]) * log2p
122
123 if depth > 15 {
124 depth = 15
125 }
126
127 if depth > max_depth {
128 max_depth = depth
129 }
130
131 depth_histo[depth]++
132 i++
133 } else {
134 var reps uint32 = 1
135 /* Compute the run length of zeros and add the appropriate number of 0
136 and 17 code length codes to the code length code histogram. */
137
138 var k uint
139 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
140 reps++
141 }
142
143 i += uint(reps)
144 if i == data_size {
145 /* Don't add any cost for the last zero run, since these are encoded
146 only implicitly. */
147 break
148 }
149
150 if reps < 3 {
151 depth_histo[0] += reps
152 } else {
153 reps -= 2
154 for reps > 0 {
155 depth_histo[repeatZeroCodeLength]++
156
157 /* Add the 3 extra bits for the 17 code length code. */
158 bits += 3
159
160 reps >>= 3
161 }
162 }
163 }
164 }
165
166 /* Add the estimated encoding cost of the code length code histogram. */
167 bits += float64(18 + 2*max_depth)
168
169 /* Add the entropy of the code length code histogram. */
170 bits += bitsEntropy(depth_histo[:], codeLengthCodes)
171 }
172
173 return bits
174}
175
176func populationCostCommand(histogram *histogramCommand) float64 {
177 var data_size uint = histogramDataSizeCommand()
178 var count int = 0
179 var s [5]uint
180 var bits float64 = 0.0
181 var i uint
182 if histogram.total_count_ == 0 {
183 return kOneSymbolHistogramCost
184 }
185
186 for i = 0; i < data_size; i++ {
187 if histogram.data_[i] > 0 {
188 s[count] = i
189 count++
190 if count > 4 {
191 break
192 }
193 }
194 }
195
196 if count == 1 {
197 return kOneSymbolHistogramCost
198 }
199
200 if count == 2 {
201 return kTwoSymbolHistogramCost + float64(histogram.total_count_)
202 }
203
204 if count == 3 {
205 var histo0 uint32 = histogram.data_[s[0]]
206 var histo1 uint32 = histogram.data_[s[1]]
207 var histo2 uint32 = histogram.data_[s[2]]
208 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
209 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
210 }
211
212 if count == 4 {
213 var histo [4]uint32
214 var h23 uint32
215 var histomax uint32
216 for i = 0; i < 4; i++ {
217 histo[i] = histogram.data_[s[i]]
218 }
219
220 /* Sort */
221 for i = 0; i < 4; i++ {
222 var j uint
223 for j = i + 1; j < 4; j++ {
224 if histo[j] > histo[i] {
225 var tmp uint32 = histo[j]
226 histo[j] = histo[i]
227 histo[i] = tmp
228 }
229 }
230 }
231
232 h23 = histo[2] + histo[3]
233 histomax = brotli_max_uint32_t(h23, histo[0])
234 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
235 }
236 {
237 var max_depth uint = 1
238 var depth_histo = [codeLengthCodes]uint32{0}
239 /* In this loop we compute the entropy of the histogram and simultaneously
240 build a simplified histogram of the code length codes where we use the
241 zero repeat code 17, but we don't use the non-zero repeat code 16. */
242
243 var log2total float64 = fastLog2(histogram.total_count_)
244 for i = 0; i < data_size; {
245 if histogram.data_[i] > 0 {
246 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
247 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
248 = log2(total_count) - log2(count(symbol)) */
249
250 var depth uint = uint(log2p + 0.5)
251 /* Approximate the bit depth by round(-log2(P(symbol))) */
252 bits += float64(histogram.data_[i]) * log2p
253
254 if depth > 15 {
255 depth = 15
256 }
257
258 if depth > max_depth {
259 max_depth = depth
260 }
261
262 depth_histo[depth]++
263 i++
264 } else {
265 var reps uint32 = 1
266 /* Compute the run length of zeros and add the appropriate number of 0
267 and 17 code length codes to the code length code histogram. */
268
269 var k uint
270 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
271 reps++
272 }
273
274 i += uint(reps)
275 if i == data_size {
276 /* Don't add any cost for the last zero run, since these are encoded
277 only implicitly. */
278 break
279 }
280
281 if reps < 3 {
282 depth_histo[0] += reps
283 } else {
284 reps -= 2
285 for reps > 0 {
286 depth_histo[repeatZeroCodeLength]++
287
288 /* Add the 3 extra bits for the 17 code length code. */
289 bits += 3
290
291 reps >>= 3
292 }
293 }
294 }
295 }
296
297 /* Add the estimated encoding cost of the code length code histogram. */
298 bits += float64(18 + 2*max_depth)
299
300 /* Add the entropy of the code length code histogram. */
301 bits += bitsEntropy(depth_histo[:], codeLengthCodes)
302 }
303
304 return bits
305}
306
307func populationCostDistance(histogram *histogramDistance) float64 {
308 var data_size uint = histogramDataSizeDistance()
309 var count int = 0
310 var s [5]uint
311 var bits float64 = 0.0
312 var i uint
313 if histogram.total_count_ == 0 {
314 return kOneSymbolHistogramCost
315 }
316
317 for i = 0; i < data_size; i++ {
318 if histogram.data_[i] > 0 {
319 s[count] = i
320 count++
321 if count > 4 {
322 break
323 }
324 }
325 }
326
327 if count == 1 {
328 return kOneSymbolHistogramCost
329 }
330
331 if count == 2 {
332 return kTwoSymbolHistogramCost + float64(histogram.total_count_)
333 }
334
335 if count == 3 {
336 var histo0 uint32 = histogram.data_[s[0]]
337 var histo1 uint32 = histogram.data_[s[1]]
338 var histo2 uint32 = histogram.data_[s[2]]
339 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
340 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
341 }
342
343 if count == 4 {
344 var histo [4]uint32
345 var h23 uint32
346 var histomax uint32
347 for i = 0; i < 4; i++ {
348 histo[i] = histogram.data_[s[i]]
349 }
350
351 /* Sort */
352 for i = 0; i < 4; i++ {
353 var j uint
354 for j = i + 1; j < 4; j++ {
355 if histo[j] > histo[i] {
356 var tmp uint32 = histo[j]
357 histo[j] = histo[i]
358 histo[i] = tmp
359 }
360 }
361 }
362
363 h23 = histo[2] + histo[3]
364 histomax = brotli_max_uint32_t(h23, histo[0])
365 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
366 }
367 {
368 var max_depth uint = 1
369 var depth_histo = [codeLengthCodes]uint32{0}
370 /* In this loop we compute the entropy of the histogram and simultaneously
371 build a simplified histogram of the code length codes where we use the
372 zero repeat code 17, but we don't use the non-zero repeat code 16. */
373
374 var log2total float64 = fastLog2(histogram.total_count_)
375 for i = 0; i < data_size; {
376 if histogram.data_[i] > 0 {
377 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
378 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
379 = log2(total_count) - log2(count(symbol)) */
380
381 var depth uint = uint(log2p + 0.5)
382 /* Approximate the bit depth by round(-log2(P(symbol))) */
383 bits += float64(histogram.data_[i]) * log2p
384
385 if depth > 15 {
386 depth = 15
387 }
388
389 if depth > max_depth {
390 max_depth = depth
391 }
392
393 depth_histo[depth]++
394 i++
395 } else {
396 var reps uint32 = 1
397 /* Compute the run length of zeros and add the appropriate number of 0
398 and 17 code length codes to the code length code histogram. */
399
400 var k uint
401 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
402 reps++
403 }
404
405 i += uint(reps)
406 if i == data_size {
407 /* Don't add any cost for the last zero run, since these are encoded
408 only implicitly. */
409 break
410 }
411
412 if reps < 3 {
413 depth_histo[0] += reps
414 } else {
415 reps -= 2
416 for reps > 0 {
417 depth_histo[repeatZeroCodeLength]++
418
419 /* Add the 3 extra bits for the 17 code length code. */
420 bits += 3
421
422 reps >>= 3
423 }
424 }
425 }
426 }
427
428 /* Add the estimated encoding cost of the code length code histogram. */
429 bits += float64(18 + 2*max_depth)
430
431 /* Add the entropy of the code length code histogram. */
432 bits += bitsEntropy(depth_histo[:], codeLengthCodes)
433 }
434
435 return bits
436}
Note: See TracBrowser for help on using the repository browser.