[145] | 1 | package brotli
|
---|
| 2 |
|
---|
| 3 | import (
|
---|
| 4 | "encoding/binary"
|
---|
| 5 | "fmt"
|
---|
| 6 | )
|
---|
| 7 |
|
---|
| 8 | type hasherCommon struct {
|
---|
| 9 | params hasherParams
|
---|
| 10 | is_prepared_ bool
|
---|
| 11 | dict_num_lookups uint
|
---|
| 12 | dict_num_matches uint
|
---|
| 13 | }
|
---|
| 14 |
|
---|
| 15 | func (h *hasherCommon) Common() *hasherCommon {
|
---|
| 16 | return h
|
---|
| 17 | }
|
---|
| 18 |
|
---|
| 19 | type hasherHandle interface {
|
---|
| 20 | Common() *hasherCommon
|
---|
| 21 | Initialize(params *encoderParams)
|
---|
| 22 | Prepare(one_shot bool, input_size uint, data []byte)
|
---|
| 23 | StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
|
---|
| 24 | HashTypeLength() uint
|
---|
| 25 | StoreLookahead() uint
|
---|
| 26 | PrepareDistanceCache(distance_cache []int)
|
---|
| 27 | FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
|
---|
| 28 | StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
|
---|
| 29 | Store(data []byte, mask uint, ix uint)
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 | const kCutoffTransformsCount uint32 = 10
|
---|
| 33 |
|
---|
| 34 | /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
|
---|
| 35 | /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
|
---|
| 36 | const kCutoffTransforms uint64 = 0x071B520ADA2D3200
|
---|
| 37 |
|
---|
| 38 | type hasherSearchResult struct {
|
---|
| 39 | len uint
|
---|
| 40 | distance uint
|
---|
| 41 | score uint
|
---|
| 42 | len_code_delta int
|
---|
| 43 | }
|
---|
| 44 |
|
---|
| 45 | /* kHashMul32 multiplier has these properties:
|
---|
| 46 | * The multiplier must be odd. Otherwise we may lose the highest bit.
|
---|
| 47 | * No long streaks of ones or zeros.
|
---|
| 48 | * There is no effort to ensure that it is a prime, the oddity is enough
|
---|
| 49 | for this use.
|
---|
| 50 | * The number has been tuned heuristically against compression benchmarks. */
|
---|
| 51 | const kHashMul32 uint32 = 0x1E35A7BD
|
---|
| 52 |
|
---|
| 53 | const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
|
---|
| 54 |
|
---|
| 55 | const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
|
---|
| 56 |
|
---|
| 57 | func hash14(data []byte) uint32 {
|
---|
| 58 | var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
|
---|
| 59 |
|
---|
| 60 | /* The higher bits contain more mixture from the multiplication,
|
---|
| 61 | so we take our results from there. */
|
---|
| 62 | return h >> (32 - 14)
|
---|
| 63 | }
|
---|
| 64 |
|
---|
| 65 | func prepareDistanceCache(distance_cache []int, num_distances int) {
|
---|
| 66 | if num_distances > 4 {
|
---|
| 67 | var last_distance int = distance_cache[0]
|
---|
| 68 | distance_cache[4] = last_distance - 1
|
---|
| 69 | distance_cache[5] = last_distance + 1
|
---|
| 70 | distance_cache[6] = last_distance - 2
|
---|
| 71 | distance_cache[7] = last_distance + 2
|
---|
| 72 | distance_cache[8] = last_distance - 3
|
---|
| 73 | distance_cache[9] = last_distance + 3
|
---|
| 74 | if num_distances > 10 {
|
---|
| 75 | var next_last_distance int = distance_cache[1]
|
---|
| 76 | distance_cache[10] = next_last_distance - 1
|
---|
| 77 | distance_cache[11] = next_last_distance + 1
|
---|
| 78 | distance_cache[12] = next_last_distance - 2
|
---|
| 79 | distance_cache[13] = next_last_distance + 2
|
---|
| 80 | distance_cache[14] = next_last_distance - 3
|
---|
| 81 | distance_cache[15] = next_last_distance + 3
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | const literalByteScore = 135
|
---|
| 87 |
|
---|
| 88 | const distanceBitPenalty = 30
|
---|
| 89 |
|
---|
| 90 | /* Score must be positive after applying maximal penalty. */
|
---|
| 91 | const scoreBase = (distanceBitPenalty * 8 * 8)
|
---|
| 92 |
|
---|
| 93 | /* Usually, we always choose the longest backward reference. This function
|
---|
| 94 | allows for the exception of that rule.
|
---|
| 95 |
|
---|
| 96 | If we choose a backward reference that is further away, it will
|
---|
| 97 | usually be coded with more bits. We approximate this by assuming
|
---|
| 98 | log2(distance). If the distance can be expressed in terms of the
|
---|
| 99 | last four distances, we use some heuristic constants to estimate
|
---|
| 100 | the bits cost. For the first up to four literals we use the bit
|
---|
| 101 | cost of the literals from the literal cost model, after that we
|
---|
| 102 | use the average bit cost of the cost model.
|
---|
| 103 |
|
---|
| 104 | This function is used to sometimes discard a longer backward reference
|
---|
| 105 | when it is not much longer and the bit cost for encoding it is more
|
---|
| 106 | than the saved literals.
|
---|
| 107 |
|
---|
| 108 | backward_reference_offset MUST be positive. */
|
---|
| 109 | func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
|
---|
| 110 | return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
|
---|
| 111 | }
|
---|
| 112 |
|
---|
| 113 | func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
|
---|
| 114 | return literalByteScore*uint(copy_length) + scoreBase + 15
|
---|
| 115 | }
|
---|
| 116 |
|
---|
| 117 | func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
|
---|
| 118 | return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
|
---|
| 122 | var len uint
|
---|
| 123 | var word_idx uint
|
---|
| 124 | var offset uint
|
---|
| 125 | var matchlen uint
|
---|
| 126 | var backward uint
|
---|
| 127 | var score uint
|
---|
| 128 | len = item & 0x1F
|
---|
| 129 | word_idx = item >> 5
|
---|
| 130 | offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
|
---|
| 131 | if len > max_length {
|
---|
| 132 | return false
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
|
---|
| 136 | if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
|
---|
| 137 | return false
|
---|
| 138 | }
|
---|
| 139 | {
|
---|
| 140 | var cut uint = len - matchlen
|
---|
| 141 | var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
|
---|
| 142 | backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | if backward > max_distance {
|
---|
| 146 | return false
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | score = backwardReferenceScore(matchlen, backward)
|
---|
| 150 | if score < out.score {
|
---|
| 151 | return false
|
---|
| 152 | }
|
---|
| 153 |
|
---|
| 154 | out.len = matchlen
|
---|
| 155 | out.len_code_delta = int(len) - int(matchlen)
|
---|
| 156 | out.distance = backward
|
---|
| 157 | out.score = score
|
---|
| 158 | return true
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
|
---|
| 162 | var key uint
|
---|
| 163 | var i uint
|
---|
| 164 | var self *hasherCommon = handle.Common()
|
---|
| 165 | if self.dict_num_matches < self.dict_num_lookups>>7 {
|
---|
| 166 | return
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | key = uint(hash14(data) << 1)
|
---|
| 170 | for i = 0; ; (func() { i++; key++ })() {
|
---|
| 171 | var tmp uint
|
---|
| 172 | if shallow {
|
---|
| 173 | tmp = 1
|
---|
| 174 | } else {
|
---|
| 175 | tmp = 2
|
---|
| 176 | }
|
---|
| 177 | if i >= tmp {
|
---|
| 178 | break
|
---|
| 179 | }
|
---|
| 180 | var item uint = uint(dictionary.hash_table[key])
|
---|
| 181 | self.dict_num_lookups++
|
---|
| 182 | if item != 0 {
|
---|
| 183 | var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
|
---|
| 184 | if item_matches {
|
---|
| 185 | self.dict_num_matches++
|
---|
| 186 | }
|
---|
| 187 | }
|
---|
| 188 | }
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | type backwardMatch struct {
|
---|
| 192 | distance uint32
|
---|
| 193 | length_and_code uint32
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
|
---|
| 197 | self.distance = uint32(dist)
|
---|
| 198 | self.length_and_code = uint32(len << 5)
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
|
---|
| 202 | self.distance = uint32(dist)
|
---|
| 203 | var tmp uint
|
---|
| 204 | if len == len_code {
|
---|
| 205 | tmp = 0
|
---|
| 206 | } else {
|
---|
| 207 | tmp = len_code
|
---|
| 208 | }
|
---|
| 209 | self.length_and_code = uint32(len<<5 | tmp)
|
---|
| 210 | }
|
---|
| 211 |
|
---|
| 212 | func backwardMatchLength(self *backwardMatch) uint {
|
---|
| 213 | return uint(self.length_and_code >> 5)
|
---|
| 214 | }
|
---|
| 215 |
|
---|
| 216 | func backwardMatchLengthCode(self *backwardMatch) uint {
|
---|
| 217 | var code uint = uint(self.length_and_code) & 31
|
---|
| 218 | if code != 0 {
|
---|
| 219 | return code
|
---|
| 220 | } else {
|
---|
| 221 | return backwardMatchLength(self)
|
---|
| 222 | }
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | func hasherReset(handle hasherHandle) {
|
---|
| 226 | if handle == nil {
|
---|
| 227 | return
|
---|
| 228 | }
|
---|
| 229 | handle.Common().is_prepared_ = false
|
---|
| 230 | }
|
---|
| 231 |
|
---|
| 232 | func newHasher(typ int) hasherHandle {
|
---|
| 233 | switch typ {
|
---|
| 234 | case 2:
|
---|
| 235 | return &hashLongestMatchQuickly{
|
---|
| 236 | bucketBits: 16,
|
---|
| 237 | bucketSweep: 1,
|
---|
| 238 | hashLen: 5,
|
---|
| 239 | useDictionary: true,
|
---|
| 240 | }
|
---|
| 241 | case 3:
|
---|
| 242 | return &hashLongestMatchQuickly{
|
---|
| 243 | bucketBits: 16,
|
---|
| 244 | bucketSweep: 2,
|
---|
| 245 | hashLen: 5,
|
---|
| 246 | useDictionary: false,
|
---|
| 247 | }
|
---|
| 248 | case 4:
|
---|
| 249 | return &hashLongestMatchQuickly{
|
---|
| 250 | bucketBits: 17,
|
---|
| 251 | bucketSweep: 4,
|
---|
| 252 | hashLen: 5,
|
---|
| 253 | useDictionary: true,
|
---|
| 254 | }
|
---|
| 255 | case 5:
|
---|
| 256 | return new(h5)
|
---|
| 257 | case 6:
|
---|
| 258 | return new(h6)
|
---|
| 259 | case 10:
|
---|
| 260 | return new(h10)
|
---|
| 261 | case 35:
|
---|
| 262 | return &hashComposite{
|
---|
| 263 | ha: newHasher(3),
|
---|
| 264 | hb: &hashRolling{jump: 4},
|
---|
| 265 | }
|
---|
| 266 | case 40:
|
---|
| 267 | return &hashForgetfulChain{
|
---|
| 268 | bucketBits: 15,
|
---|
| 269 | numBanks: 1,
|
---|
| 270 | bankBits: 16,
|
---|
| 271 | numLastDistancesToCheck: 4,
|
---|
| 272 | }
|
---|
| 273 | case 41:
|
---|
| 274 | return &hashForgetfulChain{
|
---|
| 275 | bucketBits: 15,
|
---|
| 276 | numBanks: 1,
|
---|
| 277 | bankBits: 16,
|
---|
| 278 | numLastDistancesToCheck: 10,
|
---|
| 279 | }
|
---|
| 280 | case 42:
|
---|
| 281 | return &hashForgetfulChain{
|
---|
| 282 | bucketBits: 15,
|
---|
| 283 | numBanks: 512,
|
---|
| 284 | bankBits: 9,
|
---|
| 285 | numLastDistancesToCheck: 16,
|
---|
| 286 | }
|
---|
| 287 | case 54:
|
---|
| 288 | return &hashLongestMatchQuickly{
|
---|
| 289 | bucketBits: 20,
|
---|
| 290 | bucketSweep: 4,
|
---|
| 291 | hashLen: 7,
|
---|
| 292 | useDictionary: false,
|
---|
| 293 | }
|
---|
| 294 | case 55:
|
---|
| 295 | return &hashComposite{
|
---|
| 296 | ha: newHasher(54),
|
---|
| 297 | hb: &hashRolling{jump: 4},
|
---|
| 298 | }
|
---|
| 299 | case 65:
|
---|
| 300 | return &hashComposite{
|
---|
| 301 | ha: newHasher(6),
|
---|
| 302 | hb: &hashRolling{jump: 1},
|
---|
| 303 | }
|
---|
| 304 | }
|
---|
| 305 |
|
---|
| 306 | panic(fmt.Sprintf("unknown hasher type: %d", typ))
|
---|
| 307 | }
|
---|
| 308 |
|
---|
| 309 | func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
|
---|
| 310 | var self hasherHandle = nil
|
---|
| 311 | var common *hasherCommon = nil
|
---|
| 312 | var one_shot bool = (position == 0 && is_last)
|
---|
| 313 | if *handle == nil {
|
---|
| 314 | chooseHasher(params, ¶ms.hasher)
|
---|
| 315 | self = newHasher(params.hasher.type_)
|
---|
| 316 |
|
---|
| 317 | *handle = self
|
---|
| 318 | common = self.Common()
|
---|
| 319 | common.params = params.hasher
|
---|
| 320 | self.Initialize(params)
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | self = *handle
|
---|
| 324 | common = self.Common()
|
---|
| 325 | if !common.is_prepared_ {
|
---|
| 326 | self.Prepare(one_shot, input_size, data)
|
---|
| 327 |
|
---|
| 328 | if position == 0 {
|
---|
| 329 | common.dict_num_lookups = 0
|
---|
| 330 | common.dict_num_matches = 0
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | common.is_prepared_ = true
|
---|
| 334 | }
|
---|
| 335 | }
|
---|
| 336 |
|
---|
| 337 | func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
|
---|
| 338 | var self hasherHandle
|
---|
| 339 | hasherSetup(handle, params, data, position, input_size, is_last)
|
---|
| 340 | self = *handle
|
---|
| 341 | self.StitchToPreviousBlock(input_size, position, data, mask)
|
---|
| 342 | }
|
---|