[145] | 1 | package brotli
|
---|
| 2 |
|
---|
| 3 | import "encoding/binary"
|
---|
| 4 |
|
---|
| 5 | /* Copyright 2016 Google Inc. All Rights Reserved.
|
---|
| 6 |
|
---|
| 7 | Distributed under MIT license.
|
---|
| 8 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
| 9 | */
|
---|
| 10 |
|
---|
| 11 | func (*h10) HashTypeLength() uint {
|
---|
| 12 | return 4
|
---|
| 13 | }
|
---|
| 14 |
|
---|
| 15 | func (*h10) StoreLookahead() uint {
|
---|
| 16 | return 128
|
---|
| 17 | }
|
---|
| 18 |
|
---|
| 19 | func hashBytesH10(data []byte) uint32 {
|
---|
| 20 | var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
|
---|
| 21 |
|
---|
| 22 | /* The higher bits contain more mixture from the multiplication,
|
---|
| 23 | so we take our results from there. */
|
---|
| 24 | return h >> (32 - 17)
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | /* A (forgetful) hash table where each hash bucket contains a binary tree of
|
---|
| 28 | sequences whose first 4 bytes share the same hash code.
|
---|
| 29 | Each sequence is 128 long and is identified by its starting
|
---|
| 30 | position in the input data. The binary tree is sorted by the lexicographic
|
---|
| 31 | order of the sequences, and it is also a max-heap with respect to the
|
---|
| 32 | starting positions. */
|
---|
| 33 | type h10 struct {
|
---|
| 34 | hasherCommon
|
---|
| 35 | window_mask_ uint
|
---|
| 36 | buckets_ [1 << 17]uint32
|
---|
| 37 | invalid_pos_ uint32
|
---|
| 38 | forest []uint32
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | func (h *h10) Initialize(params *encoderParams) {
|
---|
| 42 | h.window_mask_ = (1 << params.lgwin) - 1
|
---|
| 43 | h.invalid_pos_ = uint32(0 - h.window_mask_)
|
---|
| 44 | var num_nodes uint = uint(1) << params.lgwin
|
---|
| 45 | h.forest = make([]uint32, 2*num_nodes)
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 | func (h *h10) Prepare(one_shot bool, input_size uint, data []byte) {
|
---|
| 49 | var invalid_pos uint32 = h.invalid_pos_
|
---|
| 50 | var i uint32
|
---|
| 51 | for i = 0; i < 1<<17; i++ {
|
---|
| 52 | h.buckets_[i] = invalid_pos
|
---|
| 53 | }
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | func leftChildIndexH10(self *h10, pos uint) uint {
|
---|
| 57 | return 2 * (pos & self.window_mask_)
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | func rightChildIndexH10(self *h10, pos uint) uint {
|
---|
| 61 | return 2*(pos&self.window_mask_) + 1
|
---|
| 62 | }
|
---|
| 63 |
|
---|
| 64 | /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
---|
| 65 | hash bucket's binary tree is searched for matches and is re-rooted at the
|
---|
| 66 | current position.
|
---|
| 67 |
|
---|
| 68 | If less than 128 data is available, the hash bucket of the
|
---|
| 69 | current position is searched for matches, but the state of the hash table
|
---|
| 70 | is not changed, since we can not know the final sorting order of the
|
---|
| 71 | current (incomplete) sequence.
|
---|
| 72 |
|
---|
| 73 | This function must be called with increasing cur_ix positions. */
|
---|
| 74 | func storeAndFindMatchesH10(self *h10, data []byte, cur_ix uint, ring_buffer_mask uint, max_length uint, max_backward uint, best_len *uint, matches []backwardMatch) []backwardMatch {
|
---|
| 75 | var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
---|
| 76 | var max_comp_len uint = brotli_min_size_t(max_length, 128)
|
---|
| 77 | var should_reroot_tree bool = (max_length >= 128)
|
---|
| 78 | var key uint32 = hashBytesH10(data[cur_ix_masked:])
|
---|
| 79 | var forest []uint32 = self.forest
|
---|
| 80 | var prev_ix uint = uint(self.buckets_[key])
|
---|
| 81 | var node_left uint = leftChildIndexH10(self, cur_ix)
|
---|
| 82 | var node_right uint = rightChildIndexH10(self, cur_ix)
|
---|
| 83 | var best_len_left uint = 0
|
---|
| 84 | var best_len_right uint = 0
|
---|
| 85 | var depth_remaining uint
|
---|
| 86 | /* The forest index of the rightmost node of the left subtree of the new
|
---|
| 87 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
| 88 |
|
---|
| 89 | /* The forest index of the leftmost node of the right subtree of the new
|
---|
| 90 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
| 91 |
|
---|
| 92 | /* The match length of the rightmost node of the left subtree of the new
|
---|
| 93 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
| 94 |
|
---|
| 95 | /* The match length of the leftmost node of the right subtree of the new
|
---|
| 96 | root, updated as we traverse and re-root the tree of the hash bucket. */
|
---|
| 97 | if should_reroot_tree {
|
---|
| 98 | self.buckets_[key] = uint32(cur_ix)
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | for depth_remaining = 64; ; depth_remaining-- {
|
---|
| 102 | var backward uint = cur_ix - prev_ix
|
---|
| 103 | var prev_ix_masked uint = prev_ix & ring_buffer_mask
|
---|
| 104 | if backward == 0 || backward > max_backward || depth_remaining == 0 {
|
---|
| 105 | if should_reroot_tree {
|
---|
| 106 | forest[node_left] = self.invalid_pos_
|
---|
| 107 | forest[node_right] = self.invalid_pos_
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 | break
|
---|
| 111 | }
|
---|
| 112 | {
|
---|
| 113 | var cur_len uint = brotli_min_size_t(best_len_left, best_len_right)
|
---|
| 114 | var len uint
|
---|
| 115 | assert(cur_len <= 128)
|
---|
| 116 | len = cur_len + findMatchLengthWithLimit(data[cur_ix_masked+cur_len:], data[prev_ix_masked+cur_len:], max_length-cur_len)
|
---|
| 117 | if matches != nil && len > *best_len {
|
---|
| 118 | *best_len = uint(len)
|
---|
| 119 | initBackwardMatch(&matches[0], backward, uint(len))
|
---|
| 120 | matches = matches[1:]
|
---|
| 121 | }
|
---|
| 122 |
|
---|
| 123 | if len >= max_comp_len {
|
---|
| 124 | if should_reroot_tree {
|
---|
| 125 | forest[node_left] = forest[leftChildIndexH10(self, prev_ix)]
|
---|
| 126 | forest[node_right] = forest[rightChildIndexH10(self, prev_ix)]
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | break
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | if data[cur_ix_masked+len] > data[prev_ix_masked+len] {
|
---|
| 133 | best_len_left = uint(len)
|
---|
| 134 | if should_reroot_tree {
|
---|
| 135 | forest[node_left] = uint32(prev_ix)
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | node_left = rightChildIndexH10(self, prev_ix)
|
---|
| 139 | prev_ix = uint(forest[node_left])
|
---|
| 140 | } else {
|
---|
| 141 | best_len_right = uint(len)
|
---|
| 142 | if should_reroot_tree {
|
---|
| 143 | forest[node_right] = uint32(prev_ix)
|
---|
| 144 | }
|
---|
| 145 |
|
---|
| 146 | node_right = leftChildIndexH10(self, prev_ix)
|
---|
| 147 | prev_ix = uint(forest[node_right])
|
---|
| 148 | }
|
---|
| 149 | }
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | return matches
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
---|
| 156 | length of max_length and stores the position cur_ix in the hash table.
|
---|
| 157 |
|
---|
| 158 | Sets *num_matches to the number of matches found, and stores the found
|
---|
| 159 | matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
---|
| 160 | sorted by strictly increasing length and (non-strictly) increasing
|
---|
| 161 | distance. */
|
---|
| 162 | func findAllMatchesH10(handle *h10, dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, cur_ix uint, max_length uint, max_backward uint, gap uint, params *encoderParams, matches []backwardMatch) uint {
|
---|
| 163 | var orig_matches []backwardMatch = matches
|
---|
| 164 | var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
---|
| 165 | var best_len uint = 1
|
---|
| 166 | var short_match_max_backward uint
|
---|
| 167 | if params.quality != hqZopflificationQuality {
|
---|
| 168 | short_match_max_backward = 16
|
---|
| 169 | } else {
|
---|
| 170 | short_match_max_backward = 64
|
---|
| 171 | }
|
---|
| 172 | var stop uint = cur_ix - short_match_max_backward
|
---|
| 173 | var dict_matches [maxStaticDictionaryMatchLen + 1]uint32
|
---|
| 174 | var i uint
|
---|
| 175 | if cur_ix < short_match_max_backward {
|
---|
| 176 | stop = 0
|
---|
| 177 | }
|
---|
| 178 | for i = cur_ix - 1; i > stop && best_len <= 2; i-- {
|
---|
| 179 | var prev_ix uint = i
|
---|
| 180 | var backward uint = cur_ix - prev_ix
|
---|
| 181 | if backward > max_backward {
|
---|
| 182 | break
|
---|
| 183 | }
|
---|
| 184 |
|
---|
| 185 | prev_ix &= ring_buffer_mask
|
---|
| 186 | if data[cur_ix_masked] != data[prev_ix] || data[cur_ix_masked+1] != data[prev_ix+1] {
|
---|
| 187 | continue
|
---|
| 188 | }
|
---|
| 189 | {
|
---|
| 190 | var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
---|
| 191 | if len > best_len {
|
---|
| 192 | best_len = uint(len)
|
---|
| 193 | initBackwardMatch(&matches[0], backward, uint(len))
|
---|
| 194 | matches = matches[1:]
|
---|
| 195 | }
|
---|
| 196 | }
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | if best_len < max_length {
|
---|
| 200 | matches = storeAndFindMatchesH10(handle, data, cur_ix, ring_buffer_mask, max_length, max_backward, &best_len, matches)
|
---|
| 201 | }
|
---|
| 202 |
|
---|
| 203 | for i = 0; i <= maxStaticDictionaryMatchLen; i++ {
|
---|
| 204 | dict_matches[i] = kInvalidMatch
|
---|
| 205 | }
|
---|
| 206 | {
|
---|
| 207 | var minlen uint = brotli_max_size_t(4, best_len+1)
|
---|
| 208 | if findAllStaticDictionaryMatches(dictionary, data[cur_ix_masked:], minlen, max_length, dict_matches[0:]) {
|
---|
| 209 | var maxlen uint = brotli_min_size_t(maxStaticDictionaryMatchLen, max_length)
|
---|
| 210 | var l uint
|
---|
| 211 | for l = minlen; l <= maxlen; l++ {
|
---|
| 212 | var dict_id uint32 = dict_matches[l]
|
---|
| 213 | if dict_id < kInvalidMatch {
|
---|
| 214 | var distance uint = max_backward + gap + uint(dict_id>>5) + 1
|
---|
| 215 | if distance <= params.dist.max_distance {
|
---|
| 216 | initDictionaryBackwardMatch(&matches[0], distance, l, uint(dict_id&31))
|
---|
| 217 | matches = matches[1:]
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 | }
|
---|
| 221 | }
|
---|
| 222 | }
|
---|
| 223 |
|
---|
| 224 | return uint(-cap(matches) + cap(orig_matches))
|
---|
| 225 | }
|
---|
| 226 |
|
---|
| 227 | /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
---|
| 228 | current sequence, without returning any matches.
|
---|
| 229 | REQUIRES: ix + 128 <= end-of-current-block */
|
---|
| 230 | func (h *h10) Store(data []byte, mask uint, ix uint) {
|
---|
| 231 | var max_backward uint = h.window_mask_ - windowGap + 1
|
---|
| 232 | /* Maximum distance is window size - 16, see section 9.1. of the spec. */
|
---|
| 233 | storeAndFindMatchesH10(h, data, ix, mask, 128, max_backward, nil, nil)
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | func (h *h10) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
---|
| 237 | var i uint = ix_start
|
---|
| 238 | var j uint = ix_start
|
---|
| 239 | if ix_start+63 <= ix_end {
|
---|
| 240 | i = ix_end - 63
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | if ix_start+512 <= i {
|
---|
| 244 | for ; j < i; j += 8 {
|
---|
| 245 | h.Store(data, mask, j)
|
---|
| 246 | }
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | for ; i < ix_end; i++ {
|
---|
| 250 | h.Store(data, mask, i)
|
---|
| 251 | }
|
---|
| 252 | }
|
---|
| 253 |
|
---|
| 254 | func (h *h10) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
---|
| 255 | if num_bytes >= h.HashTypeLength()-1 && position >= 128 {
|
---|
| 256 | var i_start uint = position - 128 + 1
|
---|
| 257 | var i_end uint = brotli_min_size_t(position, i_start+num_bytes)
|
---|
| 258 | /* Store the last `128 - 1` positions in the hasher.
|
---|
| 259 | These could not be calculated before, since they require knowledge
|
---|
| 260 | of both the previous and the current block. */
|
---|
| 261 |
|
---|
| 262 | var i uint
|
---|
| 263 | for i = i_start; i < i_end; i++ {
|
---|
| 264 | /* Maximum distance is window size - 16, see section 9.1. of the spec.
|
---|
| 265 | Furthermore, we have to make sure that we don't look further back
|
---|
| 266 | from the start of the next block than the window size, otherwise we
|
---|
| 267 | could access already overwritten areas of the ring-buffer. */
|
---|
| 268 | var max_backward uint = h.window_mask_ - brotli_max_size_t(windowGap-1, position-i)
|
---|
| 269 |
|
---|
| 270 | /* We know that i + 128 <= position + num_bytes, i.e. the
|
---|
| 271 | end of the current block and that we have at least
|
---|
| 272 | 128 tail in the ring-buffer. */
|
---|
| 273 | storeAndFindMatchesH10(h, ringbuffer, i, ringbuffer_mask, 128, max_backward, nil, nil)
|
---|
| 274 | }
|
---|
| 275 | }
|
---|
| 276 | }
|
---|
| 277 |
|
---|
| 278 | /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
|
---|
| 279 | const maxNumMatchesH10 = 128
|
---|
| 280 |
|
---|
| 281 | func (*h10) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
|
---|
| 282 | panic("unimplemented")
|
---|
| 283 | }
|
---|
| 284 |
|
---|
| 285 | func (*h10) PrepareDistanceCache(distance_cache []int) {
|
---|
| 286 | panic("unimplemented")
|
---|
| 287 | }
|
---|