[145] | 1 | package brotli
|
---|
| 2 |
|
---|
| 3 | import "encoding/binary"
|
---|
| 4 |
|
---|
| 5 | /* Copyright 2010 Google Inc. All Rights Reserved.
|
---|
| 6 |
|
---|
| 7 | Distributed under MIT license.
|
---|
| 8 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
| 9 | */
|
---|
| 10 |
|
---|
| 11 | /* A (forgetful) hash table to the data seen by the compressor, to
|
---|
| 12 | help create backward references to previous data.
|
---|
| 13 |
|
---|
| 14 | This is a hash map of fixed size (bucket_size_) to a ring buffer of
|
---|
| 15 | fixed size (block_size_). The ring buffer contains the last block_size_
|
---|
| 16 | index positions of the given hash key in the compressed data. */
|
---|
| 17 | func (*h6) HashTypeLength() uint {
|
---|
| 18 | return 8
|
---|
| 19 | }
|
---|
| 20 |
|
---|
| 21 | func (*h6) StoreLookahead() uint {
|
---|
| 22 | return 8
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | /* HashBytes is the function that chooses the bucket to place the address in. */
|
---|
| 26 | func hashBytesH6(data []byte, mask uint64, shift int) uint32 {
|
---|
| 27 | var h uint64 = (binary.LittleEndian.Uint64(data) & mask) * kHashMul64Long
|
---|
| 28 |
|
---|
| 29 | /* The higher bits contain more mixture from the multiplication,
|
---|
| 30 | so we take our results from there. */
|
---|
| 31 | return uint32(h >> uint(shift))
|
---|
| 32 | }
|
---|
| 33 |
|
---|
| 34 | type h6 struct {
|
---|
| 35 | hasherCommon
|
---|
| 36 | bucket_size_ uint
|
---|
| 37 | block_size_ uint
|
---|
| 38 | hash_shift_ int
|
---|
| 39 | hash_mask_ uint64
|
---|
| 40 | block_mask_ uint32
|
---|
| 41 | num []uint16
|
---|
| 42 | buckets []uint32
|
---|
| 43 | }
|
---|
| 44 |
|
---|
| 45 | func (h *h6) Initialize(params *encoderParams) {
|
---|
| 46 | h.hash_shift_ = 64 - h.params.bucket_bits
|
---|
| 47 | h.hash_mask_ = (^(uint64(0))) >> uint(64-8*h.params.hash_len)
|
---|
| 48 | h.bucket_size_ = uint(1) << uint(h.params.bucket_bits)
|
---|
| 49 | h.block_size_ = uint(1) << uint(h.params.block_bits)
|
---|
| 50 | h.block_mask_ = uint32(h.block_size_ - 1)
|
---|
| 51 | h.num = make([]uint16, h.bucket_size_)
|
---|
| 52 | h.buckets = make([]uint32, h.block_size_*h.bucket_size_)
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | func (h *h6) Prepare(one_shot bool, input_size uint, data []byte) {
|
---|
| 56 | var num []uint16 = h.num
|
---|
| 57 | var partial_prepare_threshold uint = h.bucket_size_ >> 6
|
---|
| 58 | /* Partial preparation is 100 times slower (per socket). */
|
---|
| 59 | if one_shot && input_size <= partial_prepare_threshold {
|
---|
| 60 | var i uint
|
---|
| 61 | for i = 0; i < input_size; i++ {
|
---|
| 62 | var key uint32 = hashBytesH6(data[i:], h.hash_mask_, h.hash_shift_)
|
---|
| 63 | num[key] = 0
|
---|
| 64 | }
|
---|
| 65 | } else {
|
---|
| 66 | for i := 0; i < int(h.bucket_size_); i++ {
|
---|
| 67 | num[i] = 0
|
---|
| 68 | }
|
---|
| 69 | }
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | /* Look at 4 bytes at &data[ix & mask].
|
---|
| 73 | Compute a hash from these, and store the value of ix at that position. */
|
---|
| 74 | func (h *h6) Store(data []byte, mask uint, ix uint) {
|
---|
| 75 | var num []uint16 = h.num
|
---|
| 76 | var key uint32 = hashBytesH6(data[ix&mask:], h.hash_mask_, h.hash_shift_)
|
---|
| 77 | var minor_ix uint = uint(num[key]) & uint(h.block_mask_)
|
---|
| 78 | var offset uint = minor_ix + uint(key<<uint(h.params.block_bits))
|
---|
| 79 | h.buckets[offset] = uint32(ix)
|
---|
| 80 | num[key]++
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | func (h *h6) StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) {
|
---|
| 84 | var i uint
|
---|
| 85 | for i = ix_start; i < ix_end; i++ {
|
---|
| 86 | h.Store(data, mask, i)
|
---|
| 87 | }
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | func (h *h6) StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) {
|
---|
| 91 | if num_bytes >= h.HashTypeLength()-1 && position >= 3 {
|
---|
| 92 | /* Prepare the hashes for three last bytes of the last write.
|
---|
| 93 | These could not be calculated before, since they require knowledge
|
---|
| 94 | of both the previous and the current block. */
|
---|
| 95 | h.Store(ringbuffer, ringbuffer_mask, position-3)
|
---|
| 96 | h.Store(ringbuffer, ringbuffer_mask, position-2)
|
---|
| 97 | h.Store(ringbuffer, ringbuffer_mask, position-1)
|
---|
| 98 | }
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | func (h *h6) PrepareDistanceCache(distance_cache []int) {
|
---|
| 102 | prepareDistanceCache(distance_cache, h.params.num_last_distances_to_check)
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | /* Find a longest backward match of &data[cur_ix] up to the length of
|
---|
| 106 | max_length and stores the position cur_ix in the hash table.
|
---|
| 107 |
|
---|
| 108 | REQUIRES: PrepareDistanceCacheH6 must be invoked for current distance cache
|
---|
| 109 | values; if this method is invoked repeatedly with the same distance
|
---|
| 110 | cache values, it is enough to invoke PrepareDistanceCacheH6 once.
|
---|
| 111 |
|
---|
| 112 | Does not look for matches longer than max_length.
|
---|
| 113 | Does not look for matches further away than max_backward.
|
---|
| 114 | Writes the best match into |out|.
|
---|
| 115 | |out|->score is updated only if a better match is found. */
|
---|
| 116 | func (h *h6) FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) {
|
---|
| 117 | var num []uint16 = h.num
|
---|
| 118 | var buckets []uint32 = h.buckets
|
---|
| 119 | var cur_ix_masked uint = cur_ix & ring_buffer_mask
|
---|
| 120 | var min_score uint = out.score
|
---|
| 121 | var best_score uint = out.score
|
---|
| 122 | var best_len uint = out.len
|
---|
| 123 | var i uint
|
---|
| 124 | var bucket []uint32
|
---|
| 125 | /* Don't accept a short copy from far away. */
|
---|
| 126 | out.len = 0
|
---|
| 127 |
|
---|
| 128 | out.len_code_delta = 0
|
---|
| 129 |
|
---|
| 130 | /* Try last distance first. */
|
---|
| 131 | for i = 0; i < uint(h.params.num_last_distances_to_check); i++ {
|
---|
| 132 | var backward uint = uint(distance_cache[i])
|
---|
| 133 | var prev_ix uint = uint(cur_ix - backward)
|
---|
| 134 | if prev_ix >= cur_ix {
|
---|
| 135 | continue
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | if backward > max_backward {
|
---|
| 139 | continue
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | prev_ix &= ring_buffer_mask
|
---|
| 143 |
|
---|
| 144 | if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
|
---|
| 145 | continue
|
---|
| 146 | }
|
---|
| 147 | {
|
---|
| 148 | var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
---|
| 149 | if len >= 3 || (len == 2 && i < 2) {
|
---|
| 150 | /* Comparing for >= 2 does not change the semantics, but just saves for
|
---|
| 151 | a few unnecessary binary logarithms in backward reference score,
|
---|
| 152 | since we are not interested in such short matches. */
|
---|
| 153 | var score uint = backwardReferenceScoreUsingLastDistance(uint(len))
|
---|
| 154 | if best_score < score {
|
---|
| 155 | if i != 0 {
|
---|
| 156 | score -= backwardReferencePenaltyUsingLastDistance(i)
|
---|
| 157 | }
|
---|
| 158 | if best_score < score {
|
---|
| 159 | best_score = score
|
---|
| 160 | best_len = uint(len)
|
---|
| 161 | out.len = best_len
|
---|
| 162 | out.distance = backward
|
---|
| 163 | out.score = best_score
|
---|
| 164 | }
|
---|
| 165 | }
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 | }
|
---|
| 169 | {
|
---|
| 170 | var key uint32 = hashBytesH6(data[cur_ix_masked:], h.hash_mask_, h.hash_shift_)
|
---|
| 171 | bucket = buckets[key<<uint(h.params.block_bits):]
|
---|
| 172 | var down uint
|
---|
| 173 | if uint(num[key]) > h.block_size_ {
|
---|
| 174 | down = uint(num[key]) - h.block_size_
|
---|
| 175 | } else {
|
---|
| 176 | down = 0
|
---|
| 177 | }
|
---|
| 178 | for i = uint(num[key]); i > down; {
|
---|
| 179 | var prev_ix uint
|
---|
| 180 | i--
|
---|
| 181 | prev_ix = uint(bucket[uint32(i)&h.block_mask_])
|
---|
| 182 | var backward uint = cur_ix - prev_ix
|
---|
| 183 | if backward > max_backward {
|
---|
| 184 | break
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | prev_ix &= ring_buffer_mask
|
---|
| 188 | if cur_ix_masked+best_len > ring_buffer_mask || prev_ix+best_len > ring_buffer_mask || data[cur_ix_masked+best_len] != data[prev_ix+best_len] {
|
---|
| 189 | continue
|
---|
| 190 | }
|
---|
| 191 | {
|
---|
| 192 | var len uint = findMatchLengthWithLimit(data[prev_ix:], data[cur_ix_masked:], max_length)
|
---|
| 193 | if len >= 4 {
|
---|
| 194 | /* Comparing for >= 3 does not change the semantics, but just saves
|
---|
| 195 | for a few unnecessary binary logarithms in backward reference
|
---|
| 196 | score, since we are not interested in such short matches. */
|
---|
| 197 | var score uint = backwardReferenceScore(uint(len), backward)
|
---|
| 198 | if best_score < score {
|
---|
| 199 | best_score = score
|
---|
| 200 | best_len = uint(len)
|
---|
| 201 | out.len = best_len
|
---|
| 202 | out.distance = backward
|
---|
| 203 | out.score = best_score
|
---|
| 204 | }
|
---|
| 205 | }
|
---|
| 206 | }
|
---|
| 207 | }
|
---|
| 208 |
|
---|
| 209 | bucket[uint32(num[key])&h.block_mask_] = uint32(cur_ix)
|
---|
| 210 | num[key]++
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | if min_score == out.score {
|
---|
| 214 | searchInStaticDictionary(dictionary, h, data[cur_ix_masked:], max_length, max_backward+gap, max_distance, out, false)
|
---|
| 215 | }
|
---|
| 216 | }
|
---|