1 | package brotli
|
---|
2 |
|
---|
3 | /* Copyright 2015 Google Inc. All Rights Reserved.
|
---|
4 |
|
---|
5 | Distributed under MIT license.
|
---|
6 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
7 | */
|
---|
8 |
|
---|
9 | /* Greedy block splitter for one block category (literal, command or distance).
|
---|
10 | */
|
---|
11 | type blockSplitterCommand struct {
|
---|
12 | alphabet_size_ uint
|
---|
13 | min_block_size_ uint
|
---|
14 | split_threshold_ float64
|
---|
15 | num_blocks_ uint
|
---|
16 | split_ *blockSplit
|
---|
17 | histograms_ []histogramCommand
|
---|
18 | histograms_size_ *uint
|
---|
19 | target_block_size_ uint
|
---|
20 | block_size_ uint
|
---|
21 | curr_histogram_ix_ uint
|
---|
22 | last_histogram_ix_ [2]uint
|
---|
23 | last_entropy_ [2]float64
|
---|
24 | merge_last_count_ uint
|
---|
25 | }
|
---|
26 |
|
---|
27 | func initBlockSplitterCommand(self *blockSplitterCommand, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramCommand, histograms_size *uint) {
|
---|
28 | var max_num_blocks uint = num_symbols/min_block_size + 1
|
---|
29 | var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
|
---|
30 | /* We have to allocate one more histogram than the maximum number of block
|
---|
31 | types for the current histogram when the meta-block is too big. */
|
---|
32 | self.alphabet_size_ = alphabet_size
|
---|
33 |
|
---|
34 | self.min_block_size_ = min_block_size
|
---|
35 | self.split_threshold_ = split_threshold
|
---|
36 | self.num_blocks_ = 0
|
---|
37 | self.split_ = split
|
---|
38 | self.histograms_size_ = histograms_size
|
---|
39 | self.target_block_size_ = min_block_size
|
---|
40 | self.block_size_ = 0
|
---|
41 | self.curr_histogram_ix_ = 0
|
---|
42 | self.merge_last_count_ = 0
|
---|
43 | brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
|
---|
44 | brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
|
---|
45 | self.split_.num_blocks = max_num_blocks
|
---|
46 | *histograms_size = max_num_types
|
---|
47 | if histograms == nil || cap(*histograms) < int(*histograms_size) {
|
---|
48 | *histograms = make([]histogramCommand, (*histograms_size))
|
---|
49 | } else {
|
---|
50 | *histograms = (*histograms)[:*histograms_size]
|
---|
51 | }
|
---|
52 | self.histograms_ = *histograms
|
---|
53 |
|
---|
54 | /* Clear only current histogram. */
|
---|
55 | histogramClearCommand(&self.histograms_[0])
|
---|
56 |
|
---|
57 | self.last_histogram_ix_[1] = 0
|
---|
58 | self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
|
---|
59 | }
|
---|
60 |
|
---|
61 | /* Does either of three things:
|
---|
62 | (1) emits the current block with a new block type;
|
---|
63 | (2) emits the current block with the type of the second last block;
|
---|
64 | (3) merges the current block with the last block. */
|
---|
65 | func blockSplitterFinishBlockCommand(self *blockSplitterCommand, is_final bool) {
|
---|
66 | var split *blockSplit = self.split_
|
---|
67 | var last_entropy []float64 = self.last_entropy_[:]
|
---|
68 | var histograms []histogramCommand = self.histograms_
|
---|
69 | self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
|
---|
70 | if self.num_blocks_ == 0 {
|
---|
71 | /* Create first block. */
|
---|
72 | split.lengths[0] = uint32(self.block_size_)
|
---|
73 |
|
---|
74 | split.types[0] = 0
|
---|
75 | last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
|
---|
76 | last_entropy[1] = last_entropy[0]
|
---|
77 | self.num_blocks_++
|
---|
78 | split.num_types++
|
---|
79 | self.curr_histogram_ix_++
|
---|
80 | if self.curr_histogram_ix_ < *self.histograms_size_ {
|
---|
81 | histogramClearCommand(&histograms[self.curr_histogram_ix_])
|
---|
82 | }
|
---|
83 | self.block_size_ = 0
|
---|
84 | } else if self.block_size_ > 0 {
|
---|
85 | var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
|
---|
86 | var combined_histo [2]histogramCommand
|
---|
87 | var combined_entropy [2]float64
|
---|
88 | var diff [2]float64
|
---|
89 | var j uint
|
---|
90 | for j = 0; j < 2; j++ {
|
---|
91 | var last_histogram_ix uint = self.last_histogram_ix_[j]
|
---|
92 | combined_histo[j] = histograms[self.curr_histogram_ix_]
|
---|
93 | histogramAddHistogramCommand(&combined_histo[j], &histograms[last_histogram_ix])
|
---|
94 | combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
|
---|
95 | diff[j] = combined_entropy[j] - entropy - last_entropy[j]
|
---|
96 | }
|
---|
97 |
|
---|
98 | if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
|
---|
99 | /* Create new block. */
|
---|
100 | split.lengths[self.num_blocks_] = uint32(self.block_size_)
|
---|
101 |
|
---|
102 | split.types[self.num_blocks_] = byte(split.num_types)
|
---|
103 | self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
|
---|
104 | self.last_histogram_ix_[0] = uint(byte(split.num_types))
|
---|
105 | last_entropy[1] = last_entropy[0]
|
---|
106 | last_entropy[0] = entropy
|
---|
107 | self.num_blocks_++
|
---|
108 | split.num_types++
|
---|
109 | self.curr_histogram_ix_++
|
---|
110 | if self.curr_histogram_ix_ < *self.histograms_size_ {
|
---|
111 | histogramClearCommand(&histograms[self.curr_histogram_ix_])
|
---|
112 | }
|
---|
113 | self.block_size_ = 0
|
---|
114 | self.merge_last_count_ = 0
|
---|
115 | self.target_block_size_ = self.min_block_size_
|
---|
116 | } else if diff[1] < diff[0]-20.0 {
|
---|
117 | split.lengths[self.num_blocks_] = uint32(self.block_size_)
|
---|
118 | split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
|
---|
119 | /* Combine this block with second last block. */
|
---|
120 |
|
---|
121 | var tmp uint = self.last_histogram_ix_[0]
|
---|
122 | self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
|
---|
123 | self.last_histogram_ix_[1] = tmp
|
---|
124 | histograms[self.last_histogram_ix_[0]] = combined_histo[1]
|
---|
125 | last_entropy[1] = last_entropy[0]
|
---|
126 | last_entropy[0] = combined_entropy[1]
|
---|
127 | self.num_blocks_++
|
---|
128 | self.block_size_ = 0
|
---|
129 | histogramClearCommand(&histograms[self.curr_histogram_ix_])
|
---|
130 | self.merge_last_count_ = 0
|
---|
131 | self.target_block_size_ = self.min_block_size_
|
---|
132 | } else {
|
---|
133 | /* Combine this block with last block. */
|
---|
134 | split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
|
---|
135 |
|
---|
136 | histograms[self.last_histogram_ix_[0]] = combined_histo[0]
|
---|
137 | last_entropy[0] = combined_entropy[0]
|
---|
138 | if split.num_types == 1 {
|
---|
139 | last_entropy[1] = last_entropy[0]
|
---|
140 | }
|
---|
141 |
|
---|
142 | self.block_size_ = 0
|
---|
143 | histogramClearCommand(&histograms[self.curr_histogram_ix_])
|
---|
144 | self.merge_last_count_++
|
---|
145 | if self.merge_last_count_ > 1 {
|
---|
146 | self.target_block_size_ += self.min_block_size_
|
---|
147 | }
|
---|
148 | }
|
---|
149 | }
|
---|
150 |
|
---|
151 | if is_final {
|
---|
152 | *self.histograms_size_ = split.num_types
|
---|
153 | split.num_blocks = self.num_blocks_
|
---|
154 | }
|
---|
155 | }
|
---|
156 |
|
---|
157 | /* Adds the next symbol to the current histogram. When the current histogram
|
---|
158 | reaches the target size, decides on merging the block. */
|
---|
159 | func blockSplitterAddSymbolCommand(self *blockSplitterCommand, symbol uint) {
|
---|
160 | histogramAddCommand(&self.histograms_[self.curr_histogram_ix_], symbol)
|
---|
161 | self.block_size_++
|
---|
162 | if self.block_size_ == self.target_block_size_ {
|
---|
163 | blockSplitterFinishBlockCommand(self, false) /* is_final = */
|
---|
164 | }
|
---|
165 | }
|
---|