1 | package brotli
|
---|
2 |
|
---|
3 | import "math"
|
---|
4 |
|
---|
5 | /* Copyright 2013 Google Inc. All Rights Reserved.
|
---|
6 |
|
---|
7 | Distributed under MIT license.
|
---|
8 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
9 | */
|
---|
10 |
|
---|
11 | func initialEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
|
---|
12 | var seed uint32 = 7
|
---|
13 | var block_length uint = length / num_histograms
|
---|
14 | var i uint
|
---|
15 | clearHistogramsDistance(histograms, num_histograms)
|
---|
16 | for i = 0; i < num_histograms; i++ {
|
---|
17 | var pos uint = length * i / num_histograms
|
---|
18 | if i != 0 {
|
---|
19 | pos += uint(myRand(&seed) % uint32(block_length))
|
---|
20 | }
|
---|
21 |
|
---|
22 | if pos+stride >= length {
|
---|
23 | pos = length - stride - 1
|
---|
24 | }
|
---|
25 |
|
---|
26 | histogramAddVectorDistance(&histograms[i], data[pos:], stride)
|
---|
27 | }
|
---|
28 | }
|
---|
29 |
|
---|
30 | func randomSampleDistance(seed *uint32, data []uint16, length uint, stride uint, sample *histogramDistance) {
|
---|
31 | var pos uint = 0
|
---|
32 | if stride >= length {
|
---|
33 | stride = length
|
---|
34 | } else {
|
---|
35 | pos = uint(myRand(seed) % uint32(length-stride+1))
|
---|
36 | }
|
---|
37 |
|
---|
38 | histogramAddVectorDistance(sample, data[pos:], stride)
|
---|
39 | }
|
---|
40 |
|
---|
41 | func refineEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
|
---|
42 | var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
|
---|
43 | var seed uint32 = 7
|
---|
44 | var iter uint
|
---|
45 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
|
---|
46 | for iter = 0; iter < iters; iter++ {
|
---|
47 | var sample histogramDistance
|
---|
48 | histogramClearDistance(&sample)
|
---|
49 | randomSampleDistance(&seed, data, length, stride, &sample)
|
---|
50 | histogramAddHistogramDistance(&histograms[iter%num_histograms], &sample)
|
---|
51 | }
|
---|
52 | }
|
---|
53 |
|
---|
54 | /* Assigns a block id from the range [0, num_histograms) to each data element
|
---|
55 | in data[0..length) and fills in block_id[0..length) with the assigned values.
|
---|
56 | Returns the number of blocks, i.e. one plus the number of block switches. */
|
---|
57 | func findBlocksDistance(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramDistance, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
|
---|
58 | var data_size uint = histogramDataSizeDistance()
|
---|
59 | var bitmaplen uint = (num_histograms + 7) >> 3
|
---|
60 | var num_blocks uint = 1
|
---|
61 | var i uint
|
---|
62 | var j uint
|
---|
63 | assert(num_histograms <= 256)
|
---|
64 | if num_histograms <= 1 {
|
---|
65 | for i = 0; i < length; i++ {
|
---|
66 | block_id[i] = 0
|
---|
67 | }
|
---|
68 |
|
---|
69 | return 1
|
---|
70 | }
|
---|
71 |
|
---|
72 | for i := 0; i < int(data_size*num_histograms); i++ {
|
---|
73 | insert_cost[i] = 0
|
---|
74 | }
|
---|
75 | for i = 0; i < num_histograms; i++ {
|
---|
76 | insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
|
---|
77 | }
|
---|
78 |
|
---|
79 | for i = data_size; i != 0; {
|
---|
80 | i--
|
---|
81 | for j = 0; j < num_histograms; j++ {
|
---|
82 | insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
|
---|
83 | }
|
---|
84 | }
|
---|
85 |
|
---|
86 | for i := 0; i < int(num_histograms); i++ {
|
---|
87 | cost[i] = 0
|
---|
88 | }
|
---|
89 | for i := 0; i < int(length*bitmaplen); i++ {
|
---|
90 | switch_signal[i] = 0
|
---|
91 | }
|
---|
92 |
|
---|
93 | /* After each iteration of this loop, cost[k] will contain the difference
|
---|
94 | between the minimum cost of arriving at the current byte position using
|
---|
95 | entropy code k, and the minimum cost of arriving at the current byte
|
---|
96 | position. This difference is capped at the block switch cost, and if it
|
---|
97 | reaches block switch cost, it means that when we trace back from the last
|
---|
98 | position, we need to switch here. */
|
---|
99 | for i = 0; i < length; i++ {
|
---|
100 | var byte_ix uint = i
|
---|
101 | var ix uint = byte_ix * bitmaplen
|
---|
102 | var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
|
---|
103 | var min_cost float64 = 1e99
|
---|
104 | var block_switch_cost float64 = block_switch_bitcost
|
---|
105 | var k uint
|
---|
106 | for k = 0; k < num_histograms; k++ {
|
---|
107 | /* We are coding the symbol in data[byte_ix] with entropy code k. */
|
---|
108 | cost[k] += insert_cost[insert_cost_ix+k]
|
---|
109 |
|
---|
110 | if cost[k] < min_cost {
|
---|
111 | min_cost = cost[k]
|
---|
112 | block_id[byte_ix] = byte(k)
|
---|
113 | }
|
---|
114 | }
|
---|
115 |
|
---|
116 | /* More blocks for the beginning. */
|
---|
117 | if byte_ix < 2000 {
|
---|
118 | block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
|
---|
119 | }
|
---|
120 |
|
---|
121 | for k = 0; k < num_histograms; k++ {
|
---|
122 | cost[k] -= min_cost
|
---|
123 | if cost[k] >= block_switch_cost {
|
---|
124 | var mask byte = byte(1 << (k & 7))
|
---|
125 | cost[k] = block_switch_cost
|
---|
126 | assert(k>>3 < bitmaplen)
|
---|
127 | switch_signal[ix+(k>>3)] |= mask
|
---|
128 | /* Trace back from the last position and switch at the marked places. */
|
---|
129 | }
|
---|
130 | }
|
---|
131 | }
|
---|
132 | {
|
---|
133 | var byte_ix uint = length - 1
|
---|
134 | var ix uint = byte_ix * bitmaplen
|
---|
135 | var cur_id byte = block_id[byte_ix]
|
---|
136 | for byte_ix > 0 {
|
---|
137 | var mask byte = byte(1 << (cur_id & 7))
|
---|
138 | assert(uint(cur_id)>>3 < bitmaplen)
|
---|
139 | byte_ix--
|
---|
140 | ix -= bitmaplen
|
---|
141 | if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
|
---|
142 | if cur_id != block_id[byte_ix] {
|
---|
143 | cur_id = block_id[byte_ix]
|
---|
144 | num_blocks++
|
---|
145 | }
|
---|
146 | }
|
---|
147 |
|
---|
148 | block_id[byte_ix] = cur_id
|
---|
149 | }
|
---|
150 | }
|
---|
151 |
|
---|
152 | return num_blocks
|
---|
153 | }
|
---|
154 |
|
---|
155 | var remapBlockIdsDistance_kInvalidId uint16 = 256
|
---|
156 |
|
---|
157 | func remapBlockIdsDistance(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
|
---|
158 | var next_id uint16 = 0
|
---|
159 | var i uint
|
---|
160 | for i = 0; i < num_histograms; i++ {
|
---|
161 | new_id[i] = remapBlockIdsDistance_kInvalidId
|
---|
162 | }
|
---|
163 |
|
---|
164 | for i = 0; i < length; i++ {
|
---|
165 | assert(uint(block_ids[i]) < num_histograms)
|
---|
166 | if new_id[block_ids[i]] == remapBlockIdsDistance_kInvalidId {
|
---|
167 | new_id[block_ids[i]] = next_id
|
---|
168 | next_id++
|
---|
169 | }
|
---|
170 | }
|
---|
171 |
|
---|
172 | for i = 0; i < length; i++ {
|
---|
173 | block_ids[i] = byte(new_id[block_ids[i]])
|
---|
174 | assert(uint(block_ids[i]) < num_histograms)
|
---|
175 | }
|
---|
176 |
|
---|
177 | assert(uint(next_id) <= num_histograms)
|
---|
178 | return uint(next_id)
|
---|
179 | }
|
---|
180 |
|
---|
181 | func buildBlockHistogramsDistance(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramDistance) {
|
---|
182 | var i uint
|
---|
183 | clearHistogramsDistance(histograms, num_histograms)
|
---|
184 | for i = 0; i < length; i++ {
|
---|
185 | histogramAddDistance(&histograms[block_ids[i]], uint(data[i]))
|
---|
186 | }
|
---|
187 | }
|
---|
188 |
|
---|
189 | var clusterBlocksDistance_kInvalidIndex uint32 = math.MaxUint32
|
---|
190 |
|
---|
191 | func clusterBlocksDistance(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
|
---|
192 | var histogram_symbols []uint32 = make([]uint32, num_blocks)
|
---|
193 | var block_lengths []uint32 = make([]uint32, num_blocks)
|
---|
194 | var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
|
---|
195 | var all_histograms_size uint = 0
|
---|
196 | var all_histograms_capacity uint = expected_num_clusters
|
---|
197 | var all_histograms []histogramDistance = make([]histogramDistance, all_histograms_capacity)
|
---|
198 | var cluster_size_size uint = 0
|
---|
199 | var cluster_size_capacity uint = expected_num_clusters
|
---|
200 | var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
|
---|
201 | var num_clusters uint = 0
|
---|
202 | var histograms []histogramDistance = make([]histogramDistance, brotli_min_size_t(num_blocks, histogramsPerBatch))
|
---|
203 | var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
|
---|
204 | var pairs_capacity uint = max_num_pairs + 1
|
---|
205 | var pairs []histogramPair = make([]histogramPair, pairs_capacity)
|
---|
206 | var pos uint = 0
|
---|
207 | var clusters []uint32
|
---|
208 | var num_final_clusters uint
|
---|
209 | var new_index []uint32
|
---|
210 | var i uint
|
---|
211 | var sizes = [histogramsPerBatch]uint32{0}
|
---|
212 | var new_clusters = [histogramsPerBatch]uint32{0}
|
---|
213 | var symbols = [histogramsPerBatch]uint32{0}
|
---|
214 | var remap = [histogramsPerBatch]uint32{0}
|
---|
215 |
|
---|
216 | for i := 0; i < int(num_blocks); i++ {
|
---|
217 | block_lengths[i] = 0
|
---|
218 | }
|
---|
219 | {
|
---|
220 | var block_idx uint = 0
|
---|
221 | for i = 0; i < length; i++ {
|
---|
222 | assert(block_idx < num_blocks)
|
---|
223 | block_lengths[block_idx]++
|
---|
224 | if i+1 == length || block_ids[i] != block_ids[i+1] {
|
---|
225 | block_idx++
|
---|
226 | }
|
---|
227 | }
|
---|
228 |
|
---|
229 | assert(block_idx == num_blocks)
|
---|
230 | }
|
---|
231 |
|
---|
232 | for i = 0; i < num_blocks; i += histogramsPerBatch {
|
---|
233 | var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
|
---|
234 | var num_new_clusters uint
|
---|
235 | var j uint
|
---|
236 | for j = 0; j < num_to_combine; j++ {
|
---|
237 | var k uint
|
---|
238 | histogramClearDistance(&histograms[j])
|
---|
239 | for k = 0; uint32(k) < block_lengths[i+j]; k++ {
|
---|
240 | histogramAddDistance(&histograms[j], uint(data[pos]))
|
---|
241 | pos++
|
---|
242 | }
|
---|
243 |
|
---|
244 | histograms[j].bit_cost_ = populationCostDistance(&histograms[j])
|
---|
245 | new_clusters[j] = uint32(j)
|
---|
246 | symbols[j] = uint32(j)
|
---|
247 | sizes[j] = 1
|
---|
248 | }
|
---|
249 |
|
---|
250 | num_new_clusters = histogramCombineDistance(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
|
---|
251 | if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
|
---|
252 | var _new_size uint
|
---|
253 | if all_histograms_capacity == 0 {
|
---|
254 | _new_size = all_histograms_size + num_new_clusters
|
---|
255 | } else {
|
---|
256 | _new_size = all_histograms_capacity
|
---|
257 | }
|
---|
258 | var new_array []histogramDistance
|
---|
259 | for _new_size < (all_histograms_size + num_new_clusters) {
|
---|
260 | _new_size *= 2
|
---|
261 | }
|
---|
262 | new_array = make([]histogramDistance, _new_size)
|
---|
263 | if all_histograms_capacity != 0 {
|
---|
264 | copy(new_array, all_histograms[:all_histograms_capacity])
|
---|
265 | }
|
---|
266 |
|
---|
267 | all_histograms = new_array
|
---|
268 | all_histograms_capacity = _new_size
|
---|
269 | }
|
---|
270 |
|
---|
271 | brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
|
---|
272 | for j = 0; j < num_new_clusters; j++ {
|
---|
273 | all_histograms[all_histograms_size] = histograms[new_clusters[j]]
|
---|
274 | all_histograms_size++
|
---|
275 | cluster_size[cluster_size_size] = sizes[new_clusters[j]]
|
---|
276 | cluster_size_size++
|
---|
277 | remap[new_clusters[j]] = uint32(j)
|
---|
278 | }
|
---|
279 |
|
---|
280 | for j = 0; j < num_to_combine; j++ {
|
---|
281 | histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
|
---|
282 | }
|
---|
283 |
|
---|
284 | num_clusters += num_new_clusters
|
---|
285 | assert(num_clusters == cluster_size_size)
|
---|
286 | assert(num_clusters == all_histograms_size)
|
---|
287 | }
|
---|
288 |
|
---|
289 | histograms = nil
|
---|
290 |
|
---|
291 | max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
|
---|
292 | if pairs_capacity < max_num_pairs+1 {
|
---|
293 | pairs = nil
|
---|
294 | pairs = make([]histogramPair, (max_num_pairs + 1))
|
---|
295 | }
|
---|
296 |
|
---|
297 | clusters = make([]uint32, num_clusters)
|
---|
298 | for i = 0; i < num_clusters; i++ {
|
---|
299 | clusters[i] = uint32(i)
|
---|
300 | }
|
---|
301 |
|
---|
302 | num_final_clusters = histogramCombineDistance(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
|
---|
303 | pairs = nil
|
---|
304 | cluster_size = nil
|
---|
305 |
|
---|
306 | new_index = make([]uint32, num_clusters)
|
---|
307 | for i = 0; i < num_clusters; i++ {
|
---|
308 | new_index[i] = clusterBlocksDistance_kInvalidIndex
|
---|
309 | }
|
---|
310 | pos = 0
|
---|
311 | {
|
---|
312 | var next_index uint32 = 0
|
---|
313 | for i = 0; i < num_blocks; i++ {
|
---|
314 | var histo histogramDistance
|
---|
315 | var j uint
|
---|
316 | var best_out uint32
|
---|
317 | var best_bits float64
|
---|
318 | histogramClearDistance(&histo)
|
---|
319 | for j = 0; uint32(j) < block_lengths[i]; j++ {
|
---|
320 | histogramAddDistance(&histo, uint(data[pos]))
|
---|
321 | pos++
|
---|
322 | }
|
---|
323 |
|
---|
324 | if i == 0 {
|
---|
325 | best_out = histogram_symbols[0]
|
---|
326 | } else {
|
---|
327 | best_out = histogram_symbols[i-1]
|
---|
328 | }
|
---|
329 | best_bits = histogramBitCostDistanceDistance(&histo, &all_histograms[best_out])
|
---|
330 | for j = 0; j < num_final_clusters; j++ {
|
---|
331 | var cur_bits float64 = histogramBitCostDistanceDistance(&histo, &all_histograms[clusters[j]])
|
---|
332 | if cur_bits < best_bits {
|
---|
333 | best_bits = cur_bits
|
---|
334 | best_out = clusters[j]
|
---|
335 | }
|
---|
336 | }
|
---|
337 |
|
---|
338 | histogram_symbols[i] = best_out
|
---|
339 | if new_index[best_out] == clusterBlocksDistance_kInvalidIndex {
|
---|
340 | new_index[best_out] = next_index
|
---|
341 | next_index++
|
---|
342 | }
|
---|
343 | }
|
---|
344 | }
|
---|
345 |
|
---|
346 | clusters = nil
|
---|
347 | all_histograms = nil
|
---|
348 | brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
|
---|
349 | brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
|
---|
350 | {
|
---|
351 | var cur_length uint32 = 0
|
---|
352 | var block_idx uint = 0
|
---|
353 | var max_type byte = 0
|
---|
354 | for i = 0; i < num_blocks; i++ {
|
---|
355 | cur_length += block_lengths[i]
|
---|
356 | if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
|
---|
357 | var id byte = byte(new_index[histogram_symbols[i]])
|
---|
358 | split.types[block_idx] = id
|
---|
359 | split.lengths[block_idx] = cur_length
|
---|
360 | max_type = brotli_max_uint8_t(max_type, id)
|
---|
361 | cur_length = 0
|
---|
362 | block_idx++
|
---|
363 | }
|
---|
364 | }
|
---|
365 |
|
---|
366 | split.num_blocks = block_idx
|
---|
367 | split.num_types = uint(max_type) + 1
|
---|
368 | }
|
---|
369 |
|
---|
370 | new_index = nil
|
---|
371 | block_lengths = nil
|
---|
372 | histogram_symbols = nil
|
---|
373 | }
|
---|
374 |
|
---|
375 | func splitByteVectorDistance(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
|
---|
376 | var data_size uint = histogramDataSizeDistance()
|
---|
377 | var num_histograms uint = length/literals_per_histogram + 1
|
---|
378 | var histograms []histogramDistance
|
---|
379 | if num_histograms > max_histograms {
|
---|
380 | num_histograms = max_histograms
|
---|
381 | }
|
---|
382 |
|
---|
383 | if length == 0 {
|
---|
384 | split.num_types = 1
|
---|
385 | return
|
---|
386 | } else if length < kMinLengthForBlockSplitting {
|
---|
387 | brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
|
---|
388 | brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
|
---|
389 | split.num_types = 1
|
---|
390 | split.types[split.num_blocks] = 0
|
---|
391 | split.lengths[split.num_blocks] = uint32(length)
|
---|
392 | split.num_blocks++
|
---|
393 | return
|
---|
394 | }
|
---|
395 |
|
---|
396 | histograms = make([]histogramDistance, num_histograms)
|
---|
397 |
|
---|
398 | /* Find good entropy codes. */
|
---|
399 | initialEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
|
---|
400 |
|
---|
401 | refineEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
|
---|
402 | {
|
---|
403 | var block_ids []byte = make([]byte, length)
|
---|
404 | var num_blocks uint = 0
|
---|
405 | var bitmaplen uint = (num_histograms + 7) >> 3
|
---|
406 | var insert_cost []float64 = make([]float64, (data_size * num_histograms))
|
---|
407 | var cost []float64 = make([]float64, num_histograms)
|
---|
408 | var switch_signal []byte = make([]byte, (length * bitmaplen))
|
---|
409 | var new_id []uint16 = make([]uint16, num_histograms)
|
---|
410 | var iters uint
|
---|
411 | if params.quality < hqZopflificationQuality {
|
---|
412 | iters = 3
|
---|
413 | } else {
|
---|
414 | iters = 10
|
---|
415 | }
|
---|
416 | /* Find a good path through literals with the good entropy codes. */
|
---|
417 |
|
---|
418 | var i uint
|
---|
419 | for i = 0; i < iters; i++ {
|
---|
420 | num_blocks = findBlocksDistance(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
|
---|
421 | num_histograms = remapBlockIdsDistance(block_ids, length, new_id, num_histograms)
|
---|
422 | buildBlockHistogramsDistance(data, length, block_ids, num_histograms, histograms)
|
---|
423 | }
|
---|
424 |
|
---|
425 | insert_cost = nil
|
---|
426 | cost = nil
|
---|
427 | switch_signal = nil
|
---|
428 | new_id = nil
|
---|
429 | histograms = nil
|
---|
430 | clusterBlocksDistance(data, length, num_blocks, block_ids, split)
|
---|
431 | block_ids = nil
|
---|
432 | }
|
---|
433 | }
|
---|