1 | package brotli
|
---|
2 |
|
---|
3 | /* Copyright 2013 Google Inc. All Rights Reserved.
|
---|
4 |
|
---|
5 | Distributed under MIT license.
|
---|
6 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
7 | */
|
---|
8 |
|
---|
9 | /* Functions to estimate the bit cost of Huffman trees. */
|
---|
10 | func shannonEntropy(population []uint32, size uint, total *uint) float64 {
|
---|
11 | var sum uint = 0
|
---|
12 | var retval float64 = 0
|
---|
13 | var population_end []uint32 = population[size:]
|
---|
14 | var p uint
|
---|
15 | for -cap(population) < -cap(population_end) {
|
---|
16 | p = uint(population[0])
|
---|
17 | population = population[1:]
|
---|
18 | sum += p
|
---|
19 | retval -= float64(p) * fastLog2(p)
|
---|
20 | }
|
---|
21 |
|
---|
22 | if sum != 0 {
|
---|
23 | retval += float64(sum) * fastLog2(sum)
|
---|
24 | }
|
---|
25 | *total = sum
|
---|
26 | return retval
|
---|
27 | }
|
---|
28 |
|
---|
29 | func bitsEntropy(population []uint32, size uint) float64 {
|
---|
30 | var sum uint
|
---|
31 | var retval float64 = shannonEntropy(population, size, &sum)
|
---|
32 | if retval < float64(sum) {
|
---|
33 | /* At least one bit per literal is needed. */
|
---|
34 | retval = float64(sum)
|
---|
35 | }
|
---|
36 |
|
---|
37 | return retval
|
---|
38 | }
|
---|
39 |
|
---|
40 | const kOneSymbolHistogramCost float64 = 12
|
---|
41 | const kTwoSymbolHistogramCost float64 = 20
|
---|
42 | const kThreeSymbolHistogramCost float64 = 28
|
---|
43 | const kFourSymbolHistogramCost float64 = 37
|
---|
44 |
|
---|
45 | func populationCostLiteral(histogram *histogramLiteral) float64 {
|
---|
46 | var data_size uint = histogramDataSizeLiteral()
|
---|
47 | var count int = 0
|
---|
48 | var s [5]uint
|
---|
49 | var bits float64 = 0.0
|
---|
50 | var i uint
|
---|
51 | if histogram.total_count_ == 0 {
|
---|
52 | return kOneSymbolHistogramCost
|
---|
53 | }
|
---|
54 |
|
---|
55 | for i = 0; i < data_size; i++ {
|
---|
56 | if histogram.data_[i] > 0 {
|
---|
57 | s[count] = i
|
---|
58 | count++
|
---|
59 | if count > 4 {
|
---|
60 | break
|
---|
61 | }
|
---|
62 | }
|
---|
63 | }
|
---|
64 |
|
---|
65 | if count == 1 {
|
---|
66 | return kOneSymbolHistogramCost
|
---|
67 | }
|
---|
68 |
|
---|
69 | if count == 2 {
|
---|
70 | return kTwoSymbolHistogramCost + float64(histogram.total_count_)
|
---|
71 | }
|
---|
72 |
|
---|
73 | if count == 3 {
|
---|
74 | var histo0 uint32 = histogram.data_[s[0]]
|
---|
75 | var histo1 uint32 = histogram.data_[s[1]]
|
---|
76 | var histo2 uint32 = histogram.data_[s[2]]
|
---|
77 | var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
|
---|
78 | return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
|
---|
79 | }
|
---|
80 |
|
---|
81 | if count == 4 {
|
---|
82 | var histo [4]uint32
|
---|
83 | var h23 uint32
|
---|
84 | var histomax uint32
|
---|
85 | for i = 0; i < 4; i++ {
|
---|
86 | histo[i] = histogram.data_[s[i]]
|
---|
87 | }
|
---|
88 |
|
---|
89 | /* Sort */
|
---|
90 | for i = 0; i < 4; i++ {
|
---|
91 | var j uint
|
---|
92 | for j = i + 1; j < 4; j++ {
|
---|
93 | if histo[j] > histo[i] {
|
---|
94 | var tmp uint32 = histo[j]
|
---|
95 | histo[j] = histo[i]
|
---|
96 | histo[i] = tmp
|
---|
97 | }
|
---|
98 | }
|
---|
99 | }
|
---|
100 |
|
---|
101 | h23 = histo[2] + histo[3]
|
---|
102 | histomax = brotli_max_uint32_t(h23, histo[0])
|
---|
103 | return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
|
---|
104 | }
|
---|
105 | {
|
---|
106 | var max_depth uint = 1
|
---|
107 | var depth_histo = [codeLengthCodes]uint32{0}
|
---|
108 | /* In this loop we compute the entropy of the histogram and simultaneously
|
---|
109 | build a simplified histogram of the code length codes where we use the
|
---|
110 | zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
---|
111 |
|
---|
112 | var log2total float64 = fastLog2(histogram.total_count_)
|
---|
113 | for i = 0; i < data_size; {
|
---|
114 | if histogram.data_[i] > 0 {
|
---|
115 | var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
|
---|
116 | /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
---|
117 | = log2(total_count) - log2(count(symbol)) */
|
---|
118 |
|
---|
119 | var depth uint = uint(log2p + 0.5)
|
---|
120 | /* Approximate the bit depth by round(-log2(P(symbol))) */
|
---|
121 | bits += float64(histogram.data_[i]) * log2p
|
---|
122 |
|
---|
123 | if depth > 15 {
|
---|
124 | depth = 15
|
---|
125 | }
|
---|
126 |
|
---|
127 | if depth > max_depth {
|
---|
128 | max_depth = depth
|
---|
129 | }
|
---|
130 |
|
---|
131 | depth_histo[depth]++
|
---|
132 | i++
|
---|
133 | } else {
|
---|
134 | var reps uint32 = 1
|
---|
135 | /* Compute the run length of zeros and add the appropriate number of 0
|
---|
136 | and 17 code length codes to the code length code histogram. */
|
---|
137 |
|
---|
138 | var k uint
|
---|
139 | for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
|
---|
140 | reps++
|
---|
141 | }
|
---|
142 |
|
---|
143 | i += uint(reps)
|
---|
144 | if i == data_size {
|
---|
145 | /* Don't add any cost for the last zero run, since these are encoded
|
---|
146 | only implicitly. */
|
---|
147 | break
|
---|
148 | }
|
---|
149 |
|
---|
150 | if reps < 3 {
|
---|
151 | depth_histo[0] += reps
|
---|
152 | } else {
|
---|
153 | reps -= 2
|
---|
154 | for reps > 0 {
|
---|
155 | depth_histo[repeatZeroCodeLength]++
|
---|
156 |
|
---|
157 | /* Add the 3 extra bits for the 17 code length code. */
|
---|
158 | bits += 3
|
---|
159 |
|
---|
160 | reps >>= 3
|
---|
161 | }
|
---|
162 | }
|
---|
163 | }
|
---|
164 | }
|
---|
165 |
|
---|
166 | /* Add the estimated encoding cost of the code length code histogram. */
|
---|
167 | bits += float64(18 + 2*max_depth)
|
---|
168 |
|
---|
169 | /* Add the entropy of the code length code histogram. */
|
---|
170 | bits += bitsEntropy(depth_histo[:], codeLengthCodes)
|
---|
171 | }
|
---|
172 |
|
---|
173 | return bits
|
---|
174 | }
|
---|
175 |
|
---|
176 | func populationCostCommand(histogram *histogramCommand) float64 {
|
---|
177 | var data_size uint = histogramDataSizeCommand()
|
---|
178 | var count int = 0
|
---|
179 | var s [5]uint
|
---|
180 | var bits float64 = 0.0
|
---|
181 | var i uint
|
---|
182 | if histogram.total_count_ == 0 {
|
---|
183 | return kOneSymbolHistogramCost
|
---|
184 | }
|
---|
185 |
|
---|
186 | for i = 0; i < data_size; i++ {
|
---|
187 | if histogram.data_[i] > 0 {
|
---|
188 | s[count] = i
|
---|
189 | count++
|
---|
190 | if count > 4 {
|
---|
191 | break
|
---|
192 | }
|
---|
193 | }
|
---|
194 | }
|
---|
195 |
|
---|
196 | if count == 1 {
|
---|
197 | return kOneSymbolHistogramCost
|
---|
198 | }
|
---|
199 |
|
---|
200 | if count == 2 {
|
---|
201 | return kTwoSymbolHistogramCost + float64(histogram.total_count_)
|
---|
202 | }
|
---|
203 |
|
---|
204 | if count == 3 {
|
---|
205 | var histo0 uint32 = histogram.data_[s[0]]
|
---|
206 | var histo1 uint32 = histogram.data_[s[1]]
|
---|
207 | var histo2 uint32 = histogram.data_[s[2]]
|
---|
208 | var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
|
---|
209 | return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
|
---|
210 | }
|
---|
211 |
|
---|
212 | if count == 4 {
|
---|
213 | var histo [4]uint32
|
---|
214 | var h23 uint32
|
---|
215 | var histomax uint32
|
---|
216 | for i = 0; i < 4; i++ {
|
---|
217 | histo[i] = histogram.data_[s[i]]
|
---|
218 | }
|
---|
219 |
|
---|
220 | /* Sort */
|
---|
221 | for i = 0; i < 4; i++ {
|
---|
222 | var j uint
|
---|
223 | for j = i + 1; j < 4; j++ {
|
---|
224 | if histo[j] > histo[i] {
|
---|
225 | var tmp uint32 = histo[j]
|
---|
226 | histo[j] = histo[i]
|
---|
227 | histo[i] = tmp
|
---|
228 | }
|
---|
229 | }
|
---|
230 | }
|
---|
231 |
|
---|
232 | h23 = histo[2] + histo[3]
|
---|
233 | histomax = brotli_max_uint32_t(h23, histo[0])
|
---|
234 | return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
|
---|
235 | }
|
---|
236 | {
|
---|
237 | var max_depth uint = 1
|
---|
238 | var depth_histo = [codeLengthCodes]uint32{0}
|
---|
239 | /* In this loop we compute the entropy of the histogram and simultaneously
|
---|
240 | build a simplified histogram of the code length codes where we use the
|
---|
241 | zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
---|
242 |
|
---|
243 | var log2total float64 = fastLog2(histogram.total_count_)
|
---|
244 | for i = 0; i < data_size; {
|
---|
245 | if histogram.data_[i] > 0 {
|
---|
246 | var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
|
---|
247 | /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
---|
248 | = log2(total_count) - log2(count(symbol)) */
|
---|
249 |
|
---|
250 | var depth uint = uint(log2p + 0.5)
|
---|
251 | /* Approximate the bit depth by round(-log2(P(symbol))) */
|
---|
252 | bits += float64(histogram.data_[i]) * log2p
|
---|
253 |
|
---|
254 | if depth > 15 {
|
---|
255 | depth = 15
|
---|
256 | }
|
---|
257 |
|
---|
258 | if depth > max_depth {
|
---|
259 | max_depth = depth
|
---|
260 | }
|
---|
261 |
|
---|
262 | depth_histo[depth]++
|
---|
263 | i++
|
---|
264 | } else {
|
---|
265 | var reps uint32 = 1
|
---|
266 | /* Compute the run length of zeros and add the appropriate number of 0
|
---|
267 | and 17 code length codes to the code length code histogram. */
|
---|
268 |
|
---|
269 | var k uint
|
---|
270 | for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
|
---|
271 | reps++
|
---|
272 | }
|
---|
273 |
|
---|
274 | i += uint(reps)
|
---|
275 | if i == data_size {
|
---|
276 | /* Don't add any cost for the last zero run, since these are encoded
|
---|
277 | only implicitly. */
|
---|
278 | break
|
---|
279 | }
|
---|
280 |
|
---|
281 | if reps < 3 {
|
---|
282 | depth_histo[0] += reps
|
---|
283 | } else {
|
---|
284 | reps -= 2
|
---|
285 | for reps > 0 {
|
---|
286 | depth_histo[repeatZeroCodeLength]++
|
---|
287 |
|
---|
288 | /* Add the 3 extra bits for the 17 code length code. */
|
---|
289 | bits += 3
|
---|
290 |
|
---|
291 | reps >>= 3
|
---|
292 | }
|
---|
293 | }
|
---|
294 | }
|
---|
295 | }
|
---|
296 |
|
---|
297 | /* Add the estimated encoding cost of the code length code histogram. */
|
---|
298 | bits += float64(18 + 2*max_depth)
|
---|
299 |
|
---|
300 | /* Add the entropy of the code length code histogram. */
|
---|
301 | bits += bitsEntropy(depth_histo[:], codeLengthCodes)
|
---|
302 | }
|
---|
303 |
|
---|
304 | return bits
|
---|
305 | }
|
---|
306 |
|
---|
307 | func populationCostDistance(histogram *histogramDistance) float64 {
|
---|
308 | var data_size uint = histogramDataSizeDistance()
|
---|
309 | var count int = 0
|
---|
310 | var s [5]uint
|
---|
311 | var bits float64 = 0.0
|
---|
312 | var i uint
|
---|
313 | if histogram.total_count_ == 0 {
|
---|
314 | return kOneSymbolHistogramCost
|
---|
315 | }
|
---|
316 |
|
---|
317 | for i = 0; i < data_size; i++ {
|
---|
318 | if histogram.data_[i] > 0 {
|
---|
319 | s[count] = i
|
---|
320 | count++
|
---|
321 | if count > 4 {
|
---|
322 | break
|
---|
323 | }
|
---|
324 | }
|
---|
325 | }
|
---|
326 |
|
---|
327 | if count == 1 {
|
---|
328 | return kOneSymbolHistogramCost
|
---|
329 | }
|
---|
330 |
|
---|
331 | if count == 2 {
|
---|
332 | return kTwoSymbolHistogramCost + float64(histogram.total_count_)
|
---|
333 | }
|
---|
334 |
|
---|
335 | if count == 3 {
|
---|
336 | var histo0 uint32 = histogram.data_[s[0]]
|
---|
337 | var histo1 uint32 = histogram.data_[s[1]]
|
---|
338 | var histo2 uint32 = histogram.data_[s[2]]
|
---|
339 | var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
|
---|
340 | return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
|
---|
341 | }
|
---|
342 |
|
---|
343 | if count == 4 {
|
---|
344 | var histo [4]uint32
|
---|
345 | var h23 uint32
|
---|
346 | var histomax uint32
|
---|
347 | for i = 0; i < 4; i++ {
|
---|
348 | histo[i] = histogram.data_[s[i]]
|
---|
349 | }
|
---|
350 |
|
---|
351 | /* Sort */
|
---|
352 | for i = 0; i < 4; i++ {
|
---|
353 | var j uint
|
---|
354 | for j = i + 1; j < 4; j++ {
|
---|
355 | if histo[j] > histo[i] {
|
---|
356 | var tmp uint32 = histo[j]
|
---|
357 | histo[j] = histo[i]
|
---|
358 | histo[i] = tmp
|
---|
359 | }
|
---|
360 | }
|
---|
361 | }
|
---|
362 |
|
---|
363 | h23 = histo[2] + histo[3]
|
---|
364 | histomax = brotli_max_uint32_t(h23, histo[0])
|
---|
365 | return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
|
---|
366 | }
|
---|
367 | {
|
---|
368 | var max_depth uint = 1
|
---|
369 | var depth_histo = [codeLengthCodes]uint32{0}
|
---|
370 | /* In this loop we compute the entropy of the histogram and simultaneously
|
---|
371 | build a simplified histogram of the code length codes where we use the
|
---|
372 | zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
---|
373 |
|
---|
374 | var log2total float64 = fastLog2(histogram.total_count_)
|
---|
375 | for i = 0; i < data_size; {
|
---|
376 | if histogram.data_[i] > 0 {
|
---|
377 | var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
|
---|
378 | /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
---|
379 | = log2(total_count) - log2(count(symbol)) */
|
---|
380 |
|
---|
381 | var depth uint = uint(log2p + 0.5)
|
---|
382 | /* Approximate the bit depth by round(-log2(P(symbol))) */
|
---|
383 | bits += float64(histogram.data_[i]) * log2p
|
---|
384 |
|
---|
385 | if depth > 15 {
|
---|
386 | depth = 15
|
---|
387 | }
|
---|
388 |
|
---|
389 | if depth > max_depth {
|
---|
390 | max_depth = depth
|
---|
391 | }
|
---|
392 |
|
---|
393 | depth_histo[depth]++
|
---|
394 | i++
|
---|
395 | } else {
|
---|
396 | var reps uint32 = 1
|
---|
397 | /* Compute the run length of zeros and add the appropriate number of 0
|
---|
398 | and 17 code length codes to the code length code histogram. */
|
---|
399 |
|
---|
400 | var k uint
|
---|
401 | for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
|
---|
402 | reps++
|
---|
403 | }
|
---|
404 |
|
---|
405 | i += uint(reps)
|
---|
406 | if i == data_size {
|
---|
407 | /* Don't add any cost for the last zero run, since these are encoded
|
---|
408 | only implicitly. */
|
---|
409 | break
|
---|
410 | }
|
---|
411 |
|
---|
412 | if reps < 3 {
|
---|
413 | depth_histo[0] += reps
|
---|
414 | } else {
|
---|
415 | reps -= 2
|
---|
416 | for reps > 0 {
|
---|
417 | depth_histo[repeatZeroCodeLength]++
|
---|
418 |
|
---|
419 | /* Add the 3 extra bits for the 17 code length code. */
|
---|
420 | bits += 3
|
---|
421 |
|
---|
422 | reps >>= 3
|
---|
423 | }
|
---|
424 | }
|
---|
425 | }
|
---|
426 | }
|
---|
427 |
|
---|
428 | /* Add the estimated encoding cost of the code length code histogram. */
|
---|
429 | bits += float64(18 + 2*max_depth)
|
---|
430 |
|
---|
431 | /* Add the entropy of the code length code histogram. */
|
---|
432 | bits += bitsEntropy(depth_histo[:], codeLengthCodes)
|
---|
433 | }
|
---|
434 |
|
---|
435 | return bits
|
---|
436 | }
|
---|