1 | package brotli
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "encoding/binary"
|
---|
5 | "fmt"
|
---|
6 | )
|
---|
7 |
|
---|
8 | type hasherCommon struct {
|
---|
9 | params hasherParams
|
---|
10 | is_prepared_ bool
|
---|
11 | dict_num_lookups uint
|
---|
12 | dict_num_matches uint
|
---|
13 | }
|
---|
14 |
|
---|
15 | func (h *hasherCommon) Common() *hasherCommon {
|
---|
16 | return h
|
---|
17 | }
|
---|
18 |
|
---|
19 | type hasherHandle interface {
|
---|
20 | Common() *hasherCommon
|
---|
21 | Initialize(params *encoderParams)
|
---|
22 | Prepare(one_shot bool, input_size uint, data []byte)
|
---|
23 | StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
|
---|
24 | HashTypeLength() uint
|
---|
25 | StoreLookahead() uint
|
---|
26 | PrepareDistanceCache(distance_cache []int)
|
---|
27 | FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
|
---|
28 | StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
|
---|
29 | Store(data []byte, mask uint, ix uint)
|
---|
30 | }
|
---|
31 |
|
---|
32 | const kCutoffTransformsCount uint32 = 10
|
---|
33 |
|
---|
34 | /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
|
---|
35 | /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
|
---|
36 | const kCutoffTransforms uint64 = 0x071B520ADA2D3200
|
---|
37 |
|
---|
38 | type hasherSearchResult struct {
|
---|
39 | len uint
|
---|
40 | distance uint
|
---|
41 | score uint
|
---|
42 | len_code_delta int
|
---|
43 | }
|
---|
44 |
|
---|
45 | /* kHashMul32 multiplier has these properties:
|
---|
46 | * The multiplier must be odd. Otherwise we may lose the highest bit.
|
---|
47 | * No long streaks of ones or zeros.
|
---|
48 | * There is no effort to ensure that it is a prime, the oddity is enough
|
---|
49 | for this use.
|
---|
50 | * The number has been tuned heuristically against compression benchmarks. */
|
---|
51 | const kHashMul32 uint32 = 0x1E35A7BD
|
---|
52 |
|
---|
53 | const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
|
---|
54 |
|
---|
55 | const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
|
---|
56 |
|
---|
57 | func hash14(data []byte) uint32 {
|
---|
58 | var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
|
---|
59 |
|
---|
60 | /* The higher bits contain more mixture from the multiplication,
|
---|
61 | so we take our results from there. */
|
---|
62 | return h >> (32 - 14)
|
---|
63 | }
|
---|
64 |
|
---|
65 | func prepareDistanceCache(distance_cache []int, num_distances int) {
|
---|
66 | if num_distances > 4 {
|
---|
67 | var last_distance int = distance_cache[0]
|
---|
68 | distance_cache[4] = last_distance - 1
|
---|
69 | distance_cache[5] = last_distance + 1
|
---|
70 | distance_cache[6] = last_distance - 2
|
---|
71 | distance_cache[7] = last_distance + 2
|
---|
72 | distance_cache[8] = last_distance - 3
|
---|
73 | distance_cache[9] = last_distance + 3
|
---|
74 | if num_distances > 10 {
|
---|
75 | var next_last_distance int = distance_cache[1]
|
---|
76 | distance_cache[10] = next_last_distance - 1
|
---|
77 | distance_cache[11] = next_last_distance + 1
|
---|
78 | distance_cache[12] = next_last_distance - 2
|
---|
79 | distance_cache[13] = next_last_distance + 2
|
---|
80 | distance_cache[14] = next_last_distance - 3
|
---|
81 | distance_cache[15] = next_last_distance + 3
|
---|
82 | }
|
---|
83 | }
|
---|
84 | }
|
---|
85 |
|
---|
86 | const literalByteScore = 135
|
---|
87 |
|
---|
88 | const distanceBitPenalty = 30
|
---|
89 |
|
---|
90 | /* Score must be positive after applying maximal penalty. */
|
---|
91 | const scoreBase = (distanceBitPenalty * 8 * 8)
|
---|
92 |
|
---|
93 | /* Usually, we always choose the longest backward reference. This function
|
---|
94 | allows for the exception of that rule.
|
---|
95 |
|
---|
96 | If we choose a backward reference that is further away, it will
|
---|
97 | usually be coded with more bits. We approximate this by assuming
|
---|
98 | log2(distance). If the distance can be expressed in terms of the
|
---|
99 | last four distances, we use some heuristic constants to estimate
|
---|
100 | the bits cost. For the first up to four literals we use the bit
|
---|
101 | cost of the literals from the literal cost model, after that we
|
---|
102 | use the average bit cost of the cost model.
|
---|
103 |
|
---|
104 | This function is used to sometimes discard a longer backward reference
|
---|
105 | when it is not much longer and the bit cost for encoding it is more
|
---|
106 | than the saved literals.
|
---|
107 |
|
---|
108 | backward_reference_offset MUST be positive. */
|
---|
109 | func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
|
---|
110 | return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
|
---|
111 | }
|
---|
112 |
|
---|
113 | func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
|
---|
114 | return literalByteScore*uint(copy_length) + scoreBase + 15
|
---|
115 | }
|
---|
116 |
|
---|
117 | func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
|
---|
118 | return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
|
---|
119 | }
|
---|
120 |
|
---|
121 | func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
|
---|
122 | var len uint
|
---|
123 | var word_idx uint
|
---|
124 | var offset uint
|
---|
125 | var matchlen uint
|
---|
126 | var backward uint
|
---|
127 | var score uint
|
---|
128 | len = item & 0x1F
|
---|
129 | word_idx = item >> 5
|
---|
130 | offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
|
---|
131 | if len > max_length {
|
---|
132 | return false
|
---|
133 | }
|
---|
134 |
|
---|
135 | matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
|
---|
136 | if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
|
---|
137 | return false
|
---|
138 | }
|
---|
139 | {
|
---|
140 | var cut uint = len - matchlen
|
---|
141 | var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
|
---|
142 | backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
|
---|
143 | }
|
---|
144 |
|
---|
145 | if backward > max_distance {
|
---|
146 | return false
|
---|
147 | }
|
---|
148 |
|
---|
149 | score = backwardReferenceScore(matchlen, backward)
|
---|
150 | if score < out.score {
|
---|
151 | return false
|
---|
152 | }
|
---|
153 |
|
---|
154 | out.len = matchlen
|
---|
155 | out.len_code_delta = int(len) - int(matchlen)
|
---|
156 | out.distance = backward
|
---|
157 | out.score = score
|
---|
158 | return true
|
---|
159 | }
|
---|
160 |
|
---|
161 | func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
|
---|
162 | var key uint
|
---|
163 | var i uint
|
---|
164 | var self *hasherCommon = handle.Common()
|
---|
165 | if self.dict_num_matches < self.dict_num_lookups>>7 {
|
---|
166 | return
|
---|
167 | }
|
---|
168 |
|
---|
169 | key = uint(hash14(data) << 1)
|
---|
170 | for i = 0; ; (func() { i++; key++ })() {
|
---|
171 | var tmp uint
|
---|
172 | if shallow {
|
---|
173 | tmp = 1
|
---|
174 | } else {
|
---|
175 | tmp = 2
|
---|
176 | }
|
---|
177 | if i >= tmp {
|
---|
178 | break
|
---|
179 | }
|
---|
180 | var item uint = uint(dictionary.hash_table[key])
|
---|
181 | self.dict_num_lookups++
|
---|
182 | if item != 0 {
|
---|
183 | var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
|
---|
184 | if item_matches {
|
---|
185 | self.dict_num_matches++
|
---|
186 | }
|
---|
187 | }
|
---|
188 | }
|
---|
189 | }
|
---|
190 |
|
---|
191 | type backwardMatch struct {
|
---|
192 | distance uint32
|
---|
193 | length_and_code uint32
|
---|
194 | }
|
---|
195 |
|
---|
196 | func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
|
---|
197 | self.distance = uint32(dist)
|
---|
198 | self.length_and_code = uint32(len << 5)
|
---|
199 | }
|
---|
200 |
|
---|
201 | func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
|
---|
202 | self.distance = uint32(dist)
|
---|
203 | var tmp uint
|
---|
204 | if len == len_code {
|
---|
205 | tmp = 0
|
---|
206 | } else {
|
---|
207 | tmp = len_code
|
---|
208 | }
|
---|
209 | self.length_and_code = uint32(len<<5 | tmp)
|
---|
210 | }
|
---|
211 |
|
---|
212 | func backwardMatchLength(self *backwardMatch) uint {
|
---|
213 | return uint(self.length_and_code >> 5)
|
---|
214 | }
|
---|
215 |
|
---|
216 | func backwardMatchLengthCode(self *backwardMatch) uint {
|
---|
217 | var code uint = uint(self.length_and_code) & 31
|
---|
218 | if code != 0 {
|
---|
219 | return code
|
---|
220 | } else {
|
---|
221 | return backwardMatchLength(self)
|
---|
222 | }
|
---|
223 | }
|
---|
224 |
|
---|
225 | func hasherReset(handle hasherHandle) {
|
---|
226 | if handle == nil {
|
---|
227 | return
|
---|
228 | }
|
---|
229 | handle.Common().is_prepared_ = false
|
---|
230 | }
|
---|
231 |
|
---|
232 | func newHasher(typ int) hasherHandle {
|
---|
233 | switch typ {
|
---|
234 | case 2:
|
---|
235 | return &hashLongestMatchQuickly{
|
---|
236 | bucketBits: 16,
|
---|
237 | bucketSweep: 1,
|
---|
238 | hashLen: 5,
|
---|
239 | useDictionary: true,
|
---|
240 | }
|
---|
241 | case 3:
|
---|
242 | return &hashLongestMatchQuickly{
|
---|
243 | bucketBits: 16,
|
---|
244 | bucketSweep: 2,
|
---|
245 | hashLen: 5,
|
---|
246 | useDictionary: false,
|
---|
247 | }
|
---|
248 | case 4:
|
---|
249 | return &hashLongestMatchQuickly{
|
---|
250 | bucketBits: 17,
|
---|
251 | bucketSweep: 4,
|
---|
252 | hashLen: 5,
|
---|
253 | useDictionary: true,
|
---|
254 | }
|
---|
255 | case 5:
|
---|
256 | return new(h5)
|
---|
257 | case 6:
|
---|
258 | return new(h6)
|
---|
259 | case 10:
|
---|
260 | return new(h10)
|
---|
261 | case 35:
|
---|
262 | return &hashComposite{
|
---|
263 | ha: newHasher(3),
|
---|
264 | hb: &hashRolling{jump: 4},
|
---|
265 | }
|
---|
266 | case 40:
|
---|
267 | return &hashForgetfulChain{
|
---|
268 | bucketBits: 15,
|
---|
269 | numBanks: 1,
|
---|
270 | bankBits: 16,
|
---|
271 | numLastDistancesToCheck: 4,
|
---|
272 | }
|
---|
273 | case 41:
|
---|
274 | return &hashForgetfulChain{
|
---|
275 | bucketBits: 15,
|
---|
276 | numBanks: 1,
|
---|
277 | bankBits: 16,
|
---|
278 | numLastDistancesToCheck: 10,
|
---|
279 | }
|
---|
280 | case 42:
|
---|
281 | return &hashForgetfulChain{
|
---|
282 | bucketBits: 15,
|
---|
283 | numBanks: 512,
|
---|
284 | bankBits: 9,
|
---|
285 | numLastDistancesToCheck: 16,
|
---|
286 | }
|
---|
287 | case 54:
|
---|
288 | return &hashLongestMatchQuickly{
|
---|
289 | bucketBits: 20,
|
---|
290 | bucketSweep: 4,
|
---|
291 | hashLen: 7,
|
---|
292 | useDictionary: false,
|
---|
293 | }
|
---|
294 | case 55:
|
---|
295 | return &hashComposite{
|
---|
296 | ha: newHasher(54),
|
---|
297 | hb: &hashRolling{jump: 4},
|
---|
298 | }
|
---|
299 | case 65:
|
---|
300 | return &hashComposite{
|
---|
301 | ha: newHasher(6),
|
---|
302 | hb: &hashRolling{jump: 1},
|
---|
303 | }
|
---|
304 | }
|
---|
305 |
|
---|
306 | panic(fmt.Sprintf("unknown hasher type: %d", typ))
|
---|
307 | }
|
---|
308 |
|
---|
309 | func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
|
---|
310 | var self hasherHandle = nil
|
---|
311 | var common *hasherCommon = nil
|
---|
312 | var one_shot bool = (position == 0 && is_last)
|
---|
313 | if *handle == nil {
|
---|
314 | chooseHasher(params, ¶ms.hasher)
|
---|
315 | self = newHasher(params.hasher.type_)
|
---|
316 |
|
---|
317 | *handle = self
|
---|
318 | common = self.Common()
|
---|
319 | common.params = params.hasher
|
---|
320 | self.Initialize(params)
|
---|
321 | }
|
---|
322 |
|
---|
323 | self = *handle
|
---|
324 | common = self.Common()
|
---|
325 | if !common.is_prepared_ {
|
---|
326 | self.Prepare(one_shot, input_size, data)
|
---|
327 |
|
---|
328 | if position == 0 {
|
---|
329 | common.dict_num_lookups = 0
|
---|
330 | common.dict_num_matches = 0
|
---|
331 | }
|
---|
332 |
|
---|
333 | common.is_prepared_ = true
|
---|
334 | }
|
---|
335 | }
|
---|
336 |
|
---|
337 | func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
|
---|
338 | var self hasherHandle
|
---|
339 | hasherSetup(handle, params, data, position, input_size, is_last)
|
---|
340 | self = *handle
|
---|
341 | self.StitchToPreviousBlock(input_size, position, data, mask)
|
---|
342 | }
|
---|