source: code/trunk/vendor/github.com/andybalholm/brotli/block_splitter_distance.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 13.5 KB
Line 
1package brotli
2
3import "math"
4
5/* Copyright 2013 Google Inc. All Rights Reserved.
6
7 Distributed under MIT license.
8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9*/
10
11func initialEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
12 var seed uint32 = 7
13 var block_length uint = length / num_histograms
14 var i uint
15 clearHistogramsDistance(histograms, num_histograms)
16 for i = 0; i < num_histograms; i++ {
17 var pos uint = length * i / num_histograms
18 if i != 0 {
19 pos += uint(myRand(&seed) % uint32(block_length))
20 }
21
22 if pos+stride >= length {
23 pos = length - stride - 1
24 }
25
26 histogramAddVectorDistance(&histograms[i], data[pos:], stride)
27 }
28}
29
30func randomSampleDistance(seed *uint32, data []uint16, length uint, stride uint, sample *histogramDistance) {
31 var pos uint = 0
32 if stride >= length {
33 stride = length
34 } else {
35 pos = uint(myRand(seed) % uint32(length-stride+1))
36 }
37
38 histogramAddVectorDistance(sample, data[pos:], stride)
39}
40
41func refineEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
42 var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
43 var seed uint32 = 7
44 var iter uint
45 iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
46 for iter = 0; iter < iters; iter++ {
47 var sample histogramDistance
48 histogramClearDistance(&sample)
49 randomSampleDistance(&seed, data, length, stride, &sample)
50 histogramAddHistogramDistance(&histograms[iter%num_histograms], &sample)
51 }
52}
53
54/* Assigns a block id from the range [0, num_histograms) to each data element
55 in data[0..length) and fills in block_id[0..length) with the assigned values.
56 Returns the number of blocks, i.e. one plus the number of block switches. */
57func findBlocksDistance(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramDistance, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
58 var data_size uint = histogramDataSizeDistance()
59 var bitmaplen uint = (num_histograms + 7) >> 3
60 var num_blocks uint = 1
61 var i uint
62 var j uint
63 assert(num_histograms <= 256)
64 if num_histograms <= 1 {
65 for i = 0; i < length; i++ {
66 block_id[i] = 0
67 }
68
69 return 1
70 }
71
72 for i := 0; i < int(data_size*num_histograms); i++ {
73 insert_cost[i] = 0
74 }
75 for i = 0; i < num_histograms; i++ {
76 insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
77 }
78
79 for i = data_size; i != 0; {
80 i--
81 for j = 0; j < num_histograms; j++ {
82 insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
83 }
84 }
85
86 for i := 0; i < int(num_histograms); i++ {
87 cost[i] = 0
88 }
89 for i := 0; i < int(length*bitmaplen); i++ {
90 switch_signal[i] = 0
91 }
92
93 /* After each iteration of this loop, cost[k] will contain the difference
94 between the minimum cost of arriving at the current byte position using
95 entropy code k, and the minimum cost of arriving at the current byte
96 position. This difference is capped at the block switch cost, and if it
97 reaches block switch cost, it means that when we trace back from the last
98 position, we need to switch here. */
99 for i = 0; i < length; i++ {
100 var byte_ix uint = i
101 var ix uint = byte_ix * bitmaplen
102 var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
103 var min_cost float64 = 1e99
104 var block_switch_cost float64 = block_switch_bitcost
105 var k uint
106 for k = 0; k < num_histograms; k++ {
107 /* We are coding the symbol in data[byte_ix] with entropy code k. */
108 cost[k] += insert_cost[insert_cost_ix+k]
109
110 if cost[k] < min_cost {
111 min_cost = cost[k]
112 block_id[byte_ix] = byte(k)
113 }
114 }
115
116 /* More blocks for the beginning. */
117 if byte_ix < 2000 {
118 block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
119 }
120
121 for k = 0; k < num_histograms; k++ {
122 cost[k] -= min_cost
123 if cost[k] >= block_switch_cost {
124 var mask byte = byte(1 << (k & 7))
125 cost[k] = block_switch_cost
126 assert(k>>3 < bitmaplen)
127 switch_signal[ix+(k>>3)] |= mask
128 /* Trace back from the last position and switch at the marked places. */
129 }
130 }
131 }
132 {
133 var byte_ix uint = length - 1
134 var ix uint = byte_ix * bitmaplen
135 var cur_id byte = block_id[byte_ix]
136 for byte_ix > 0 {
137 var mask byte = byte(1 << (cur_id & 7))
138 assert(uint(cur_id)>>3 < bitmaplen)
139 byte_ix--
140 ix -= bitmaplen
141 if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
142 if cur_id != block_id[byte_ix] {
143 cur_id = block_id[byte_ix]
144 num_blocks++
145 }
146 }
147
148 block_id[byte_ix] = cur_id
149 }
150 }
151
152 return num_blocks
153}
154
155var remapBlockIdsDistance_kInvalidId uint16 = 256
156
157func remapBlockIdsDistance(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
158 var next_id uint16 = 0
159 var i uint
160 for i = 0; i < num_histograms; i++ {
161 new_id[i] = remapBlockIdsDistance_kInvalidId
162 }
163
164 for i = 0; i < length; i++ {
165 assert(uint(block_ids[i]) < num_histograms)
166 if new_id[block_ids[i]] == remapBlockIdsDistance_kInvalidId {
167 new_id[block_ids[i]] = next_id
168 next_id++
169 }
170 }
171
172 for i = 0; i < length; i++ {
173 block_ids[i] = byte(new_id[block_ids[i]])
174 assert(uint(block_ids[i]) < num_histograms)
175 }
176
177 assert(uint(next_id) <= num_histograms)
178 return uint(next_id)
179}
180
181func buildBlockHistogramsDistance(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramDistance) {
182 var i uint
183 clearHistogramsDistance(histograms, num_histograms)
184 for i = 0; i < length; i++ {
185 histogramAddDistance(&histograms[block_ids[i]], uint(data[i]))
186 }
187}
188
189var clusterBlocksDistance_kInvalidIndex uint32 = math.MaxUint32
190
191func clusterBlocksDistance(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
192 var histogram_symbols []uint32 = make([]uint32, num_blocks)
193 var block_lengths []uint32 = make([]uint32, num_blocks)
194 var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
195 var all_histograms_size uint = 0
196 var all_histograms_capacity uint = expected_num_clusters
197 var all_histograms []histogramDistance = make([]histogramDistance, all_histograms_capacity)
198 var cluster_size_size uint = 0
199 var cluster_size_capacity uint = expected_num_clusters
200 var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
201 var num_clusters uint = 0
202 var histograms []histogramDistance = make([]histogramDistance, brotli_min_size_t(num_blocks, histogramsPerBatch))
203 var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
204 var pairs_capacity uint = max_num_pairs + 1
205 var pairs []histogramPair = make([]histogramPair, pairs_capacity)
206 var pos uint = 0
207 var clusters []uint32
208 var num_final_clusters uint
209 var new_index []uint32
210 var i uint
211 var sizes = [histogramsPerBatch]uint32{0}
212 var new_clusters = [histogramsPerBatch]uint32{0}
213 var symbols = [histogramsPerBatch]uint32{0}
214 var remap = [histogramsPerBatch]uint32{0}
215
216 for i := 0; i < int(num_blocks); i++ {
217 block_lengths[i] = 0
218 }
219 {
220 var block_idx uint = 0
221 for i = 0; i < length; i++ {
222 assert(block_idx < num_blocks)
223 block_lengths[block_idx]++
224 if i+1 == length || block_ids[i] != block_ids[i+1] {
225 block_idx++
226 }
227 }
228
229 assert(block_idx == num_blocks)
230 }
231
232 for i = 0; i < num_blocks; i += histogramsPerBatch {
233 var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
234 var num_new_clusters uint
235 var j uint
236 for j = 0; j < num_to_combine; j++ {
237 var k uint
238 histogramClearDistance(&histograms[j])
239 for k = 0; uint32(k) < block_lengths[i+j]; k++ {
240 histogramAddDistance(&histograms[j], uint(data[pos]))
241 pos++
242 }
243
244 histograms[j].bit_cost_ = populationCostDistance(&histograms[j])
245 new_clusters[j] = uint32(j)
246 symbols[j] = uint32(j)
247 sizes[j] = 1
248 }
249
250 num_new_clusters = histogramCombineDistance(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
251 if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
252 var _new_size uint
253 if all_histograms_capacity == 0 {
254 _new_size = all_histograms_size + num_new_clusters
255 } else {
256 _new_size = all_histograms_capacity
257 }
258 var new_array []histogramDistance
259 for _new_size < (all_histograms_size + num_new_clusters) {
260 _new_size *= 2
261 }
262 new_array = make([]histogramDistance, _new_size)
263 if all_histograms_capacity != 0 {
264 copy(new_array, all_histograms[:all_histograms_capacity])
265 }
266
267 all_histograms = new_array
268 all_histograms_capacity = _new_size
269 }
270
271 brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
272 for j = 0; j < num_new_clusters; j++ {
273 all_histograms[all_histograms_size] = histograms[new_clusters[j]]
274 all_histograms_size++
275 cluster_size[cluster_size_size] = sizes[new_clusters[j]]
276 cluster_size_size++
277 remap[new_clusters[j]] = uint32(j)
278 }
279
280 for j = 0; j < num_to_combine; j++ {
281 histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
282 }
283
284 num_clusters += num_new_clusters
285 assert(num_clusters == cluster_size_size)
286 assert(num_clusters == all_histograms_size)
287 }
288
289 histograms = nil
290
291 max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
292 if pairs_capacity < max_num_pairs+1 {
293 pairs = nil
294 pairs = make([]histogramPair, (max_num_pairs + 1))
295 }
296
297 clusters = make([]uint32, num_clusters)
298 for i = 0; i < num_clusters; i++ {
299 clusters[i] = uint32(i)
300 }
301
302 num_final_clusters = histogramCombineDistance(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
303 pairs = nil
304 cluster_size = nil
305
306 new_index = make([]uint32, num_clusters)
307 for i = 0; i < num_clusters; i++ {
308 new_index[i] = clusterBlocksDistance_kInvalidIndex
309 }
310 pos = 0
311 {
312 var next_index uint32 = 0
313 for i = 0; i < num_blocks; i++ {
314 var histo histogramDistance
315 var j uint
316 var best_out uint32
317 var best_bits float64
318 histogramClearDistance(&histo)
319 for j = 0; uint32(j) < block_lengths[i]; j++ {
320 histogramAddDistance(&histo, uint(data[pos]))
321 pos++
322 }
323
324 if i == 0 {
325 best_out = histogram_symbols[0]
326 } else {
327 best_out = histogram_symbols[i-1]
328 }
329 best_bits = histogramBitCostDistanceDistance(&histo, &all_histograms[best_out])
330 for j = 0; j < num_final_clusters; j++ {
331 var cur_bits float64 = histogramBitCostDistanceDistance(&histo, &all_histograms[clusters[j]])
332 if cur_bits < best_bits {
333 best_bits = cur_bits
334 best_out = clusters[j]
335 }
336 }
337
338 histogram_symbols[i] = best_out
339 if new_index[best_out] == clusterBlocksDistance_kInvalidIndex {
340 new_index[best_out] = next_index
341 next_index++
342 }
343 }
344 }
345
346 clusters = nil
347 all_histograms = nil
348 brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
349 brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
350 {
351 var cur_length uint32 = 0
352 var block_idx uint = 0
353 var max_type byte = 0
354 for i = 0; i < num_blocks; i++ {
355 cur_length += block_lengths[i]
356 if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
357 var id byte = byte(new_index[histogram_symbols[i]])
358 split.types[block_idx] = id
359 split.lengths[block_idx] = cur_length
360 max_type = brotli_max_uint8_t(max_type, id)
361 cur_length = 0
362 block_idx++
363 }
364 }
365
366 split.num_blocks = block_idx
367 split.num_types = uint(max_type) + 1
368 }
369
370 new_index = nil
371 block_lengths = nil
372 histogram_symbols = nil
373}
374
375func splitByteVectorDistance(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
376 var data_size uint = histogramDataSizeDistance()
377 var num_histograms uint = length/literals_per_histogram + 1
378 var histograms []histogramDistance
379 if num_histograms > max_histograms {
380 num_histograms = max_histograms
381 }
382
383 if length == 0 {
384 split.num_types = 1
385 return
386 } else if length < kMinLengthForBlockSplitting {
387 brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
388 brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
389 split.num_types = 1
390 split.types[split.num_blocks] = 0
391 split.lengths[split.num_blocks] = uint32(length)
392 split.num_blocks++
393 return
394 }
395
396 histograms = make([]histogramDistance, num_histograms)
397
398 /* Find good entropy codes. */
399 initialEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
400
401 refineEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
402 {
403 var block_ids []byte = make([]byte, length)
404 var num_blocks uint = 0
405 var bitmaplen uint = (num_histograms + 7) >> 3
406 var insert_cost []float64 = make([]float64, (data_size * num_histograms))
407 var cost []float64 = make([]float64, num_histograms)
408 var switch_signal []byte = make([]byte, (length * bitmaplen))
409 var new_id []uint16 = make([]uint16, num_histograms)
410 var iters uint
411 if params.quality < hqZopflificationQuality {
412 iters = 3
413 } else {
414 iters = 10
415 }
416 /* Find a good path through literals with the good entropy codes. */
417
418 var i uint
419 for i = 0; i < iters; i++ {
420 num_blocks = findBlocksDistance(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
421 num_histograms = remapBlockIdsDistance(block_ids, length, new_id, num_histograms)
422 buildBlockHistogramsDistance(data, length, block_ids, num_histograms, histograms)
423 }
424
425 insert_cost = nil
426 cost = nil
427 switch_signal = nil
428 new_id = nil
429 histograms = nil
430 clusterBlocksDistance(data, length, num_blocks, block_ids, split)
431 block_ids = nil
432 }
433}
Note: See TracBrowser for help on using the repository browser.