1 | package brotli
|
---|
2 |
|
---|
3 | import "encoding/binary"
|
---|
4 |
|
---|
5 | /* Copyright 2016 Google Inc. All Rights Reserved.
|
---|
6 |
|
---|
7 | Distributed under MIT license.
|
---|
8 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
9 | */
|
---|
10 |
|
---|
11 | func (*h10) HashTypeLength() uint {
|
---|
12 | return 4
|
---|
13 | }
|
---|
14 |
|
---|
15 | func (*h10) StoreLookahead() uint {
|
---|
16 | return 128
|
---|
17 | }
|
---|
18 |
|
---|
19 | func hashBytesH10(data []byte) uint32 {
|
---|
20 | var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
|
---|
21 |
|
---|
22 | /* The higher bits contain more mixture from the multiplication,
|
---|
23 | so we take our results from there. */
|
---|
24 | return h >> (32 - 17)
|
---|
25 | }
|
---|
26 |
|
---|
27 | /* A (forgetful) hash table where each hash bucket contains a binary tree of
|
---|
28 | sequences whose first 4 bytes share the same hash code.
|
---|
29 | Each sequence is 128 long and is identified by its starting
|
---|
30 | position in the input data. The binary tree is sorted by the lexicographic
|
---|
31 | order of the sequences, and it is also a max-heap with respect to the
|
---|
32 | starting positions. */
|
---|
33 | type h10 struct {
|
---|
34 | hasherCommon
|
---|
35 | window_mask_ uint
|
---|
36 | buckets_ [1 << 17]uint32
|
---|
37 | invalid_pos_ uint32
|
---|
38 | forest []uint32
|
---|
39 | }
|
---|
40 |
|
---|
41 | func (h *h10) Initialize(params *encoderParams) {
|
---|
42 | h.window_mask_ = (1 << params.lgwin) - 1
|
---|
43 | h.invalid_pos_ = uint32(0 - h.window_mask_)
|
---|
44 | var num_nodes uint = uint(1) << params.lgwin
|
---|
45 | h.forest = make([]uint32, 2*num_nodes)
|
---|
46 | }
|
---|
47 |
|
---|
48 | func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) {
|
---|
49 | var invalid_pos uint32 = h.invalid_pos_
|
---|
50 | var i uint32
|
---|
51 | for i = 0; i < 1<<17; i++ {
|
---|
52 | h.buckets_[i] = invalid_pos
|
---|
53 | }
|
---|
54 | }
|
---|
55 |
|
---|
56 | func leftChildIndexH10(self *h10, pos uint) uint {
|
---|
57 | return 2 * (pos & self.window_mask_)
|
---|
58 | }
|
---|
59 |
|
---|
60 | func rightChildIndexH10(self *h10, pos uint) uint {
|
---|
61 | return 2*(pos&self.window_mask_) + 1
|
---|
62 | }
|
---|
63 |
|
---|
64 | /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
---|
65 | hash bucket's binary tree is searched for matches and is re-rooted at the
|
---|
66 | current position.
|
---|
67 |
|
---|
68 | If less than 128 data is available, the hash bucket of the
|
---|
69 | current position is searched for matches, but the state of the hash table
|
---|
70 | is not changed, since we can not know the final sorting order of the
|
---|
71 | current (incomplete) sequence.
|
---|
72 |
|
---|
73 | This function must be called with increasing cur_ix positions. */
|
---|
74 | func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch {
|
---|
75 | var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
---|
76 | var max_comp_len uint = brotli_min_size_t(max_length, 128)
|
---|
77 | var should_reroot_tree bool = (max_length >= 128)
|
---|
78 | var key uint32 = hashBytesH10(data[cur_ix_masked:])
|
---|
79 | var forest []uint32 = self.forest
|
---|
80 | var prev_ix uint = uint(self.buckets_[key])
|
---|
81 | var node_left uint = leftChildIndexH10(self, cur_ix)
|
---|
82 | var node_right uint = rightChildIndexH10(self, cur_ix)
|
---|
83 | var best_len_left uint = 0
|
---|
84 | var best_len_right uint = 0
|
---|
85 | var depth_remaining uint
|
---|
86 | /* The forest index of the rightmost node of the left subtree of the new
|
---|
87 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
88 |
|
---|
89 | /* The forest index of the leftmost node of the right subtree of the new
|
---|
90 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
91 |
|
---|
92 | /* The match length of the rightmost node of the left subtree of the new
|
---|
93 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
94 |
|
---|
95 | /* The match length of the leftmost node of the right subtree of the new
|
---|
96 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
97 | if should_reroot_tree {
|
---|
98 | self.buckets_[key] = uint32(cur_ix)
|
---|
99 | }
|
---|
100 |
|
---|
101 | for depth_remaining = 64; ; depth_remaining-- {
|
---|
102 | var backward uint = cur_ix - prev_ix
|
---|
103 | var prev_ix_masked uint = prev_ix & ring_buffer_mask
|
---|
104 | if backward == 0 || backward > max_backward || depth_remaining == 0 {
|
---|
105 | if should_reroot_tree {
|
---|
106 | forest[node_left] = self.invalid_pos_
|
---|
107 | forest[node_right] = self.invalid_pos_
|
---|
108 | }
|
---|
109 |
|
---|
110 | break
|
---|
111 | }
|
---|
112 | {
|
---|
113 | var cur_len uint = brotli_min_size_t(best_len_left, best_len_right)
|
---|
114 | var len uint
|
---|
115 | assert(cur_len <= 128)
|
---|
116 | len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len)
|
---|
117 | if matches != nil && len > *best_len {
|
---|
118 | *best_len = uint(len)
|
---|
119 | initBackwardMatch(&matches[0], backward, uint(len))
|
---|
120 | matches = matches[1:]
|
---|
121 | }
|
---|
122 |
|
---|
123 | if len >= max_comp_len {
|
---|
124 | if should_reroot_tree {
|
---|
125 | forest[node_left] = forest[leftChildIndexH10(self, prev_ix)]
|
---|
126 | forest[node_right] = forest[rightChildIndexH10(self, prev_ix)]
|
---|
127 | }
|
---|
128 |
|
---|
129 | break
|
---|
130 | }
|
---|
131 |
|
---|
132 | if data[cur_ix_masked+len] > data[prev_ix_masked+len] {
|
---|
133 | best_len_left = uint(len)
|
---|
134 | if should_reroot_tree {
|
---|
135 | forest[node_left] = uint32(prev_ix)
|
---|
136 | }
|
---|
137 |
|
---|
138 | node_left = rightChildIndexH10(self, prev_ix)
|
---|
139 | prev_ix = uint(forest[node_left])
|
---|
140 | } else {
|
---|
141 | best_len_right = uint(len)
|
---|
142 | if should_reroot_tree {
|
---|
143 | forest[node_right] = uint32(prev_ix)
|
---|
144 | }
|
---|
145 |
|
---|
146 | node_right = leftChildIndexH10(self, prev_ix)
|
---|
147 | prev_ix = uint(forest[node_right])
|
---|
148 | }
|
---|
149 | }
|
---|
150 | }
|
---|
151 |
|
---|
152 | return matches
|
---|
153 | }
|
---|
154 |
|
---|
155 | /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
---|
156 | length of max_length and stores the position cur_ix in the hash table.
|
---|
157 |
|
---|
158 | Sets *num_matches to the number of matches found, and stores the found
|
---|
159 | matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
---|
160 | sorted by strictly increasing length and (non-strictly) increasing
|
---|
161 | distance. */
|
---|
162 | func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint {
|
---|
163 | var orig_matches []backwardMatch = matches
|
---|
164 | var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
---|
165 | var best_len uint = 1
|
---|
166 | var short_match_max_backward uint
|
---|
167 | if params.quality != hqZopflificationQuality {
|
---|
168 | short_match_max_backward = 16
|
---|
169 | } else {
|
---|
170 | short_match_max_backward = 64
|
---|
171 | }
|
---|
172 | var stop uint = cur_ix - short_match_max_backward
|
---|
173 | var dict_matches [maxStaticDictionaryMatchLen + 1]uint32
|
---|
174 | var i uint
|
---|
175 | if cur_ix < short_match_max_backward {
|
---|
176 | stop = 0
|
---|
177 | }
|
---|
178 | for i = cur_ix - 1; i > stop && best_len <= 2; i-- {
|
---|
179 | var prev_ix uint = i
|
---|
180 | var backward uint = cur_ix - prev_ix
|
---|
181 | if backward > max_backward {
|
---|
182 | break
|
---|
183 | }
|
---|
184 |
|
---|
185 | prev_ix &= ring_buffer_mask
|
---|
186 | if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] {
|
---|
187 | continue
|
---|
188 | }
|
---|
189 | {
|
---|
190 | var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
---|
191 | if len > best_len {
|
---|
192 | best_len = uint(len)
|
---|
193 | initBackwardMatch(&matches[0], backward, uint(len))
|
---|
194 | matches = matches[1:]
|
---|
195 | }
|
---|
196 | }
|
---|
197 | }
|
---|
198 |
|
---|
199 | if best_len < max_length {
|
---|
200 | matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches)
|
---|
201 | }
|
---|
202 |
|
---|
203 | for i = 0; i <= maxStaticDictionaryMatchLen; i++ {
|
---|
204 | dict_matches[i] = kInvalidMatch
|
---|
205 | }
|
---|
206 | {
|
---|
207 | var minlen uint = brotli_max_size_t(4, best_len+1)
|
---|
208 | if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) {
|
---|
209 | var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length)
|
---|
210 | var l uint
|
---|
211 | for l = minlen; l <= maxlen; l++ {
|
---|
212 | var dict_id uint32 = dict_matches[l]
|
---|
213 | if dict_id < kInvalidMatch {
|
---|
214 | var distance uint = max_backward + gap + uint(dict_id>>5) + 1
|
---|
215 | if distance <= params.dist.max_distance {
|
---|
216 | initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31))
|
---|
217 | matches = matches[1:]
|
---|
218 | }
|
---|
219 | }
|
---|
220 | }
|
---|
221 | }
|
---|
222 | }
|
---|
223 |
|
---|
224 | return uint(-cap(matches) + cap(orig_matches))
|
---|
225 | }
|
---|
226 |
|
---|
227 | /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
---|
228 | current sequence, without returning any matches.
|
---|
229 | REQUIRES: ix + 128 <= end-of-current-block */
|
---|
230 | func (h *h10) Store(data []byte, mask uint, ix uint) {
|
---|
231 | var max_backward uint = h.window_mask_ - windowGap + 1
|
---|
232 | /* Maximum distance is window size - 16, see section 9.1. of the spec. */
|
---|
233 | storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil)
|
---|
234 | }
|
---|
235 |
|
---|
236 | func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
---|
237 | var i uint = ix_start
|
---|
238 | var j uint = ix_start
|
---|
239 | if ix_start+63 <= ix_end {
|
---|
240 | i = ix_end - 63
|
---|
241 | }
|
---|
242 |
|
---|
243 | if ix_start+512 <= i {
|
---|
244 | for ; j < i; j += 8 {
|
---|
245 | h.Store(data, mask, j)
|
---|
246 | }
|
---|
247 | }
|
---|
248 |
|
---|
249 | for ; i < ix_end; i++ {
|
---|
250 | h.Store(data, mask, i)
|
---|
251 | }
|
---|
252 | }
|
---|
253 |
|
---|
254 | func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
---|
255 | if num_bytes >= h.HashTypeLength()-1 && position >= 128 {
|
---|
256 | var i_start uint = position - 128 + 1
|
---|
257 | var i_end uint = brotli_min_size_t(position, i_start+num_bytes)
|
---|
258 | /* Store the last `128 - 1` positions in the hasher.
|
---|
259 | These could not be calculated before, since they require knowledge
|
---|
260 | of both the previous and the current block. */
|
---|
261 |
|
---|
262 | var i uint
|
---|
263 | for i = i_start; i < i_end; i++ {
|
---|
264 | /* Maximum distance is window size - 16, see section 9.1. of the spec.
|
---|
265 | Furthermore, we have to make sure that we don't look further back
|
---|
266 | from the start of the next block than the window size, otherwise we
|
---|
267 | could access already overwritten areas of the ring-buffer. */
|
---|
268 | var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i)
|
---|
269 |
|
---|
270 | /* We know that i + 128 <= position + num_bytes, i.e. the
|
---|
271 | end of the current block and that we have at least
|
---|
272 | 128 tail in the ring-buffer. */
|
---|
273 | storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil)
|
---|
274 | }
|
---|
275 | }
|
---|
276 | }
|
---|
277 |
|
---|
278 | /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
|
---|
279 | const maxNumMatchesH10 = 128
|
---|
280 |
|
---|
281 | func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
|
---|
282 | panic("unimplemented")
|
---|
283 | }
|
---|
284 |
|
---|
285 | func (*h10) PrepareDistanceCache(distance_cache []int) {
|
---|
286 | panic("unimplemented")
|
---|
287 | }
|
---|