source: code/trunk/vendor/github.com/andybalholm/brotli/hash.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 9.2 KB
Line 
1package brotli
2
3import (
4 "encoding/binary"
5 "fmt"
6)
7
8type hasherCommon struct {
9 params hasherParams
10 is_prepared_ bool
11 dict_num_lookups uint
12 dict_num_matches uint
13}
14
15func (h *hasherCommon) Common() *hasherCommon {
16 return h
17}
18
19type hasherHandle interface {
20 Common() *hasherCommon
21 Initialize(params *encoderParams)
22 Prepare(one_shot bool, input_size uint, data []byte)
23 StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
24 HashTypeLength() uint
25 StoreLookahead() uint
26 PrepareDistanceCache(distance_cache []int)
27 FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
28 StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
29 Store(data []byte, mask uint, ix uint)
30}
31
32const kCutoffTransformsCount uint32 = 10
33
34/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
35/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
36const kCutoffTransforms uint64 = 0x071B520ADA2D3200
37
38type hasherSearchResult struct {
39 len uint
40 distance uint
41 score uint
42 len_code_delta int
43}
44
45/* kHashMul32 multiplier has these properties:
46 * The multiplier must be odd. Otherwise we may lose the highest bit.
47 * No long streaks of ones or zeros.
48 * There is no effort to ensure that it is a prime, the oddity is enough
49 for this use.
50 * The number has been tuned heuristically against compression benchmarks. */
51const kHashMul32 uint32 = 0x1E35A7BD
52
53const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
54
55const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
56
57func hash14(data []byte) uint32 {
58 var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
59
60 /* The higher bits contain more mixture from the multiplication,
61 so we take our results from there. */
62 return h >> (32 - 14)
63}
64
65func prepareDistanceCache(distance_cache []int, num_distances int) {
66 if num_distances > 4 {
67 var last_distance int = distance_cache[0]
68 distance_cache[4] = last_distance - 1
69 distance_cache[5] = last_distance + 1
70 distance_cache[6] = last_distance - 2
71 distance_cache[7] = last_distance + 2
72 distance_cache[8] = last_distance - 3
73 distance_cache[9] = last_distance + 3
74 if num_distances > 10 {
75 var next_last_distance int = distance_cache[1]
76 distance_cache[10] = next_last_distance - 1
77 distance_cache[11] = next_last_distance + 1
78 distance_cache[12] = next_last_distance - 2
79 distance_cache[13] = next_last_distance + 2
80 distance_cache[14] = next_last_distance - 3
81 distance_cache[15] = next_last_distance + 3
82 }
83 }
84}
85
86const literalByteScore = 135
87
88const distanceBitPenalty = 30
89
90/* Score must be positive after applying maximal penalty. */
91const scoreBase = (distanceBitPenalty * 8 * 8)
92
93/* Usually, we always choose the longest backward reference. This function
94 allows for the exception of that rule.
95
96 If we choose a backward reference that is further away, it will
97 usually be coded with more bits. We approximate this by assuming
98 log2(distance). If the distance can be expressed in terms of the
99 last four distances, we use some heuristic constants to estimate
100 the bits cost. For the first up to four literals we use the bit
101 cost of the literals from the literal cost model, after that we
102 use the average bit cost of the cost model.
103
104 This function is used to sometimes discard a longer backward reference
105 when it is not much longer and the bit cost for encoding it is more
106 than the saved literals.
107
108 backward_reference_offset MUST be positive. */
109func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
110 return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
111}
112
113func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
114 return literalByteScore*uint(copy_length) + scoreBase + 15
115}
116
117func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
118 return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
119}
120
121func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
122 var len uint
123 var word_idx uint
124 var offset uint
125 var matchlen uint
126 var backward uint
127 var score uint
128 len = item & 0x1F
129 word_idx = item >> 5
130 offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
131 if len > max_length {
132 return false
133 }
134
135 matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
136 if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
137 return false
138 }
139 {
140 var cut uint = len - matchlen
141 var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
142 backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
143 }
144
145 if backward > max_distance {
146 return false
147 }
148
149 score = backwardReferenceScore(matchlen, backward)
150 if score < out.score {
151 return false
152 }
153
154 out.len = matchlen
155 out.len_code_delta = int(len) - int(matchlen)
156 out.distance = backward
157 out.score = score
158 return true
159}
160
161func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
162 var key uint
163 var i uint
164 var self *hasherCommon = handle.Common()
165 if self.dict_num_matches < self.dict_num_lookups>>7 {
166 return
167 }
168
169 key = uint(hash14(data) << 1)
170 for i = 0; ; (func() { i++; key++ })() {
171 var tmp uint
172 if shallow {
173 tmp = 1
174 } else {
175 tmp = 2
176 }
177 if i >= tmp {
178 break
179 }
180 var item uint = uint(dictionary.hash_table[key])
181 self.dict_num_lookups++
182 if item != 0 {
183 var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
184 if item_matches {
185 self.dict_num_matches++
186 }
187 }
188 }
189}
190
191type backwardMatch struct {
192 distance uint32
193 length_and_code uint32
194}
195
196func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
197 self.distance = uint32(dist)
198 self.length_and_code = uint32(len << 5)
199}
200
201func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
202 self.distance = uint32(dist)
203 var tmp uint
204 if len == len_code {
205 tmp = 0
206 } else {
207 tmp = len_code
208 }
209 self.length_and_code = uint32(len<<5 | tmp)
210}
211
212func backwardMatchLength(self *backwardMatch) uint {
213 return uint(self.length_and_code >> 5)
214}
215
216func backwardMatchLengthCode(self *backwardMatch) uint {
217 var code uint = uint(self.length_and_code) & 31
218 if code != 0 {
219 return code
220 } else {
221 return backwardMatchLength(self)
222 }
223}
224
225func hasherReset(handle hasherHandle) {
226 if handle == nil {
227 return
228 }
229 handle.Common().is_prepared_ = false
230}
231
232func newHasher(typ int) hasherHandle {
233 switch typ {
234 case 2:
235 return &hashLongestMatchQuickly{
236 bucketBits: 16,
237 bucketSweep: 1,
238 hashLen: 5,
239 useDictionary: true,
240 }
241 case 3:
242 return &hashLongestMatchQuickly{
243 bucketBits: 16,
244 bucketSweep: 2,
245 hashLen: 5,
246 useDictionary: false,
247 }
248 case 4:
249 return &hashLongestMatchQuickly{
250 bucketBits: 17,
251 bucketSweep: 4,
252 hashLen: 5,
253 useDictionary: true,
254 }
255 case 5:
256 return new(h5)
257 case 6:
258 return new(h6)
259 case 10:
260 return new(h10)
261 case 35:
262 return &hashComposite{
263 ha: newHasher(3),
264 hb: &hashRolling{jump: 4},
265 }
266 case 40:
267 return &hashForgetfulChain{
268 bucketBits: 15,
269 numBanks: 1,
270 bankBits: 16,
271 numLastDistancesToCheck: 4,
272 }
273 case 41:
274 return &hashForgetfulChain{
275 bucketBits: 15,
276 numBanks: 1,
277 bankBits: 16,
278 numLastDistancesToCheck: 10,
279 }
280 case 42:
281 return &hashForgetfulChain{
282 bucketBits: 15,
283 numBanks: 512,
284 bankBits: 9,
285 numLastDistancesToCheck: 16,
286 }
287 case 54:
288 return &hashLongestMatchQuickly{
289 bucketBits: 20,
290 bucketSweep: 4,
291 hashLen: 7,
292 useDictionary: false,
293 }
294 case 55:
295 return &hashComposite{
296 ha: newHasher(54),
297 hb: &hashRolling{jump: 4},
298 }
299 case 65:
300 return &hashComposite{
301 ha: newHasher(6),
302 hb: &hashRolling{jump: 1},
303 }
304 }
305
306 panic(fmt.Sprintf("unknown hasher type: %d", typ))
307}
308
309func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
310 var self hasherHandle = nil
311 var common *hasherCommon = nil
312 var one_shot bool = (position == 0 && is_last)
313 if *handle == nil {
314 chooseHasher(params, &params.hasher)
315 self = newHasher(params.hasher.type_)
316
317 *handle = self
318 common = self.Common()
319 common.params = params.hasher
320 self.Initialize(params)
321 }
322
323 self = *handle
324 common = self.Common()
325 if !common.is_prepared_ {
326 self.Prepare(one_shot, input_size, data)
327
328 if position == 0 {
329 common.dict_num_lookups = 0
330 common.dict_num_matches = 0
331 }
332
333 common.is_prepared_ = true
334 }
335}
336
337func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
338 var self hasherHandle
339 hasherSetup(handle, params, data, position, input_size, is_last)
340 self = *handle
341 self.StitchToPreviousBlock(input_size, position, data, mask)
342}
Note: See TracBrowser for help on using the repository browser.