1 | package brotli
|
---|
2 |
|
---|
3 | import "encoding/binary"
|
---|
4 |
|
---|
5 | /* Copyright 2013 Google Inc. All Rights Reserved.
|
---|
6 |
|
---|
7 | Distributed under MIT license.
|
---|
8 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
9 | */
|
---|
10 |
|
---|
11 | /* Class to model the static dictionary. */
|
---|
12 |
|
---|
13 | const maxStaticDictionaryMatchLen = 37
|
---|
14 |
|
---|
15 | const kInvalidMatch uint32 = 0xFFFFFFF
|
---|
16 |
|
---|
17 | /* Copyright 2013 Google Inc. All Rights Reserved.
|
---|
18 |
|
---|
19 | Distributed under MIT license.
|
---|
20 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
---|
21 | */
|
---|
22 | func hash(data []byte) uint32 {
|
---|
23 | var h uint32 = binary.LittleEndian.Uint32(data) * kDictHashMul32
|
---|
24 |
|
---|
25 | /* The higher bits contain more mixture from the multiplication,
|
---|
26 | so we take our results from there. */
|
---|
27 | return h >> uint(32-kDictNumBits)
|
---|
28 | }
|
---|
29 |
|
---|
30 | func addMatch(distance uint, len uint, len_code uint, matches []uint32) {
|
---|
31 | var match uint32 = uint32((distance << 5) + len_code)
|
---|
32 | matches[len] = brotli_min_uint32_t(matches[len], match)
|
---|
33 | }
|
---|
34 |
|
---|
35 | func dictMatchLength(dict *dictionary, data []byte, id uint, len uint, maxlen uint) uint {
|
---|
36 | var offset uint = uint(dict.offsets_by_length[len]) + len*id
|
---|
37 | return findMatchLengthWithLimit(dict.data[offset:], data, brotli_min_size_t(uint(len), maxlen))
|
---|
38 | }
|
---|
39 |
|
---|
40 | func isMatch(d *dictionary, w dictWord, data []byte, max_length uint) bool {
|
---|
41 | if uint(w.len) > max_length {
|
---|
42 | return false
|
---|
43 | } else {
|
---|
44 | var offset uint = uint(d.offsets_by_length[w.len]) + uint(w.len)*uint(w.idx)
|
---|
45 | var dict []byte = d.data[offset:]
|
---|
46 | if w.transform == 0 {
|
---|
47 | /* Match against base dictionary word. */
|
---|
48 | return findMatchLengthWithLimit(dict, data, uint(w.len)) == uint(w.len)
|
---|
49 | } else if w.transform == 10 {
|
---|
50 | /* Match against uppercase first transform.
|
---|
51 | Note that there are only ASCII uppercase words in the lookup table. */
|
---|
52 | return dict[0] >= 'a' && dict[0] <= 'z' && (dict[0]^32) == data[0] && findMatchLengthWithLimit(dict[1:], data[1:], uint(w.len)-1) == uint(w.len-1)
|
---|
53 | } else {
|
---|
54 | /* Match against uppercase all transform.
|
---|
55 | Note that there are only ASCII uppercase words in the lookup table. */
|
---|
56 | var i uint
|
---|
57 | for i = 0; i < uint(w.len); i++ {
|
---|
58 | if dict[i] >= 'a' && dict[i] <= 'z' {
|
---|
59 | if (dict[i] ^ 32) != data[i] {
|
---|
60 | return false
|
---|
61 | }
|
---|
62 | } else {
|
---|
63 | if dict[i] != data[i] {
|
---|
64 | return false
|
---|
65 | }
|
---|
66 | }
|
---|
67 | }
|
---|
68 |
|
---|
69 | return true
|
---|
70 | }
|
---|
71 | }
|
---|
72 | }
|
---|
73 |
|
---|
74 | func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_length uint, max_length uint, matches []uint32) bool {
|
---|
75 | var has_found_match bool = false
|
---|
76 | {
|
---|
77 | var offset uint = uint(dict.buckets[hash(data)])
|
---|
78 | var end bool = offset == 0
|
---|
79 | for !end {
|
---|
80 | w := dict.dict_words[offset]
|
---|
81 | offset++
|
---|
82 | var l uint = uint(w.len) & 0x1F
|
---|
83 | var n uint = uint(1) << dict.words.size_bits_by_length[l]
|
---|
84 | var id uint = uint(w.idx)
|
---|
85 | end = !(w.len&0x80 == 0)
|
---|
86 | w.len = byte(l)
|
---|
87 | if w.transform == 0 {
|
---|
88 | var matchlen uint = dictMatchLength(dict.words, data, id, l, max_length)
|
---|
89 | var s []byte
|
---|
90 | var minlen uint
|
---|
91 | var maxlen uint
|
---|
92 | var len uint
|
---|
93 |
|
---|
94 | /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
---|
95 | if matchlen == l {
|
---|
96 | addMatch(id, l, l, matches)
|
---|
97 | has_found_match = true
|
---|
98 | }
|
---|
99 |
|
---|
100 | /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
---|
101 | "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
---|
102 | if matchlen >= l-1 {
|
---|
103 | addMatch(id+12*n, l-1, l, matches)
|
---|
104 | if l+2 < max_length && data[l-1] == 'i' && data[l] == 'n' && data[l+1] == 'g' && data[l+2] == ' ' {
|
---|
105 | addMatch(id+49*n, l+3, l, matches)
|
---|
106 | }
|
---|
107 |
|
---|
108 | has_found_match = true
|
---|
109 | }
|
---|
110 |
|
---|
111 | /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
---|
112 | minlen = min_length
|
---|
113 |
|
---|
114 | if l > 9 {
|
---|
115 | minlen = brotli_max_size_t(minlen, l-9)
|
---|
116 | }
|
---|
117 | maxlen = brotli_min_size_t(matchlen, l-2)
|
---|
118 | for len = minlen; len <= maxlen; len++ {
|
---|
119 | var cut uint = l - len
|
---|
120 | var transform_id uint = (cut << 2) + uint((dict.cutoffTransforms>>(cut*6))&0x3F)
|
---|
121 | addMatch(id+transform_id*n, uint(len), l, matches)
|
---|
122 | has_found_match = true
|
---|
123 | }
|
---|
124 |
|
---|
125 | if matchlen < l || l+6 >= max_length {
|
---|
126 | continue
|
---|
127 | }
|
---|
128 |
|
---|
129 | s = data[l:]
|
---|
130 |
|
---|
131 | /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
---|
132 | if s[0] == ' ' {
|
---|
133 | addMatch(id+n, l+1, l, matches)
|
---|
134 | if s[1] == 'a' {
|
---|
135 | if s[2] == ' ' {
|
---|
136 | addMatch(id+28*n, l+3, l, matches)
|
---|
137 | } else if s[2] == 's' {
|
---|
138 | if s[3] == ' ' {
|
---|
139 | addMatch(id+46*n, l+4, l, matches)
|
---|
140 | }
|
---|
141 | } else if s[2] == 't' {
|
---|
142 | if s[3] == ' ' {
|
---|
143 | addMatch(id+60*n, l+4, l, matches)
|
---|
144 | }
|
---|
145 | } else if s[2] == 'n' {
|
---|
146 | if s[3] == 'd' && s[4] == ' ' {
|
---|
147 | addMatch(id+10*n, l+5, l, matches)
|
---|
148 | }
|
---|
149 | }
|
---|
150 | } else if s[1] == 'b' {
|
---|
151 | if s[2] == 'y' && s[3] == ' ' {
|
---|
152 | addMatch(id+38*n, l+4, l, matches)
|
---|
153 | }
|
---|
154 | } else if s[1] == 'i' {
|
---|
155 | if s[2] == 'n' {
|
---|
156 | if s[3] == ' ' {
|
---|
157 | addMatch(id+16*n, l+4, l, matches)
|
---|
158 | }
|
---|
159 | } else if s[2] == 's' {
|
---|
160 | if s[3] == ' ' {
|
---|
161 | addMatch(id+47*n, l+4, l, matches)
|
---|
162 | }
|
---|
163 | }
|
---|
164 | } else if s[1] == 'f' {
|
---|
165 | if s[2] == 'o' {
|
---|
166 | if s[3] == 'r' && s[4] == ' ' {
|
---|
167 | addMatch(id+25*n, l+5, l, matches)
|
---|
168 | }
|
---|
169 | } else if s[2] == 'r' {
|
---|
170 | if s[3] == 'o' && s[4] == 'm' && s[5] == ' ' {
|
---|
171 | addMatch(id+37*n, l+6, l, matches)
|
---|
172 | }
|
---|
173 | }
|
---|
174 | } else if s[1] == 'o' {
|
---|
175 | if s[2] == 'f' {
|
---|
176 | if s[3] == ' ' {
|
---|
177 | addMatch(id+8*n, l+4, l, matches)
|
---|
178 | }
|
---|
179 | } else if s[2] == 'n' {
|
---|
180 | if s[3] == ' ' {
|
---|
181 | addMatch(id+45*n, l+4, l, matches)
|
---|
182 | }
|
---|
183 | }
|
---|
184 | } else if s[1] == 'n' {
|
---|
185 | if s[2] == 'o' && s[3] == 't' && s[4] == ' ' {
|
---|
186 | addMatch(id+80*n, l+5, l, matches)
|
---|
187 | }
|
---|
188 | } else if s[1] == 't' {
|
---|
189 | if s[2] == 'h' {
|
---|
190 | if s[3] == 'e' {
|
---|
191 | if s[4] == ' ' {
|
---|
192 | addMatch(id+5*n, l+5, l, matches)
|
---|
193 | }
|
---|
194 | } else if s[3] == 'a' {
|
---|
195 | if s[4] == 't' && s[5] == ' ' {
|
---|
196 | addMatch(id+29*n, l+6, l, matches)
|
---|
197 | }
|
---|
198 | }
|
---|
199 | } else if s[2] == 'o' {
|
---|
200 | if s[3] == ' ' {
|
---|
201 | addMatch(id+17*n, l+4, l, matches)
|
---|
202 | }
|
---|
203 | }
|
---|
204 | } else if s[1] == 'w' {
|
---|
205 | if s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ' {
|
---|
206 | addMatch(id+35*n, l+6, l, matches)
|
---|
207 | }
|
---|
208 | }
|
---|
209 | } else if s[0] == '"' {
|
---|
210 | addMatch(id+19*n, l+1, l, matches)
|
---|
211 | if s[1] == '>' {
|
---|
212 | addMatch(id+21*n, l+2, l, matches)
|
---|
213 | }
|
---|
214 | } else if s[0] == '.' {
|
---|
215 | addMatch(id+20*n, l+1, l, matches)
|
---|
216 | if s[1] == ' ' {
|
---|
217 | addMatch(id+31*n, l+2, l, matches)
|
---|
218 | if s[2] == 'T' && s[3] == 'h' {
|
---|
219 | if s[4] == 'e' {
|
---|
220 | if s[5] == ' ' {
|
---|
221 | addMatch(id+43*n, l+6, l, matches)
|
---|
222 | }
|
---|
223 | } else if s[4] == 'i' {
|
---|
224 | if s[5] == 's' && s[6] == ' ' {
|
---|
225 | addMatch(id+75*n, l+7, l, matches)
|
---|
226 | }
|
---|
227 | }
|
---|
228 | }
|
---|
229 | }
|
---|
230 | } else if s[0] == ',' {
|
---|
231 | addMatch(id+76*n, l+1, l, matches)
|
---|
232 | if s[1] == ' ' {
|
---|
233 | addMatch(id+14*n, l+2, l, matches)
|
---|
234 | }
|
---|
235 | } else if s[0] == '\n' {
|
---|
236 | addMatch(id+22*n, l+1, l, matches)
|
---|
237 | if s[1] == '\t' {
|
---|
238 | addMatch(id+50*n, l+2, l, matches)
|
---|
239 | }
|
---|
240 | } else if s[0] == ']' {
|
---|
241 | addMatch(id+24*n, l+1, l, matches)
|
---|
242 | } else if s[0] == '\'' {
|
---|
243 | addMatch(id+36*n, l+1, l, matches)
|
---|
244 | } else if s[0] == ':' {
|
---|
245 | addMatch(id+51*n, l+1, l, matches)
|
---|
246 | } else if s[0] == '(' {
|
---|
247 | addMatch(id+57*n, l+1, l, matches)
|
---|
248 | } else if s[0] == '=' {
|
---|
249 | if s[1] == '"' {
|
---|
250 | addMatch(id+70*n, l+2, l, matches)
|
---|
251 | } else if s[1] == '\'' {
|
---|
252 | addMatch(id+86*n, l+2, l, matches)
|
---|
253 | }
|
---|
254 | } else if s[0] == 'a' {
|
---|
255 | if s[1] == 'l' && s[2] == ' ' {
|
---|
256 | addMatch(id+84*n, l+3, l, matches)
|
---|
257 | }
|
---|
258 | } else if s[0] == 'e' {
|
---|
259 | if s[1] == 'd' {
|
---|
260 | if s[2] == ' ' {
|
---|
261 | addMatch(id+53*n, l+3, l, matches)
|
---|
262 | }
|
---|
263 | } else if s[1] == 'r' {
|
---|
264 | if s[2] == ' ' {
|
---|
265 | addMatch(id+82*n, l+3, l, matches)
|
---|
266 | }
|
---|
267 | } else if s[1] == 's' {
|
---|
268 | if s[2] == 't' && s[3] == ' ' {
|
---|
269 | addMatch(id+95*n, l+4, l, matches)
|
---|
270 | }
|
---|
271 | }
|
---|
272 | } else if s[0] == 'f' {
|
---|
273 | if s[1] == 'u' && s[2] == 'l' && s[3] == ' ' {
|
---|
274 | addMatch(id+90*n, l+4, l, matches)
|
---|
275 | }
|
---|
276 | } else if s[0] == 'i' {
|
---|
277 | if s[1] == 'v' {
|
---|
278 | if s[2] == 'e' && s[3] == ' ' {
|
---|
279 | addMatch(id+92*n, l+4, l, matches)
|
---|
280 | }
|
---|
281 | } else if s[1] == 'z' {
|
---|
282 | if s[2] == 'e' && s[3] == ' ' {
|
---|
283 | addMatch(id+100*n, l+4, l, matches)
|
---|
284 | }
|
---|
285 | }
|
---|
286 | } else if s[0] == 'l' {
|
---|
287 | if s[1] == 'e' {
|
---|
288 | if s[2] == 's' && s[3] == 's' && s[4] == ' ' {
|
---|
289 | addMatch(id+93*n, l+5, l, matches)
|
---|
290 | }
|
---|
291 | } else if s[1] == 'y' {
|
---|
292 | if s[2] == ' ' {
|
---|
293 | addMatch(id+61*n, l+3, l, matches)
|
---|
294 | }
|
---|
295 | }
|
---|
296 | } else if s[0] == 'o' {
|
---|
297 | if s[1] == 'u' && s[2] == 's' && s[3] == ' ' {
|
---|
298 | addMatch(id+106*n, l+4, l, matches)
|
---|
299 | }
|
---|
300 | }
|
---|
301 | } else {
|
---|
302 | var is_all_caps bool = (w.transform != transformUppercaseFirst)
|
---|
303 | /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
---|
304 | is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
---|
305 | transform. */
|
---|
306 |
|
---|
307 | var s []byte
|
---|
308 | if !isMatch(dict.words, w, data, max_length) {
|
---|
309 | continue
|
---|
310 | }
|
---|
311 |
|
---|
312 | /* Transform "" + kUppercase{First,All} + "" */
|
---|
313 | var tmp int
|
---|
314 | if is_all_caps {
|
---|
315 | tmp = 44
|
---|
316 | } else {
|
---|
317 | tmp = 9
|
---|
318 | }
|
---|
319 | addMatch(id+uint(tmp)*n, l, l, matches)
|
---|
320 |
|
---|
321 | has_found_match = true
|
---|
322 | if l+1 >= max_length {
|
---|
323 | continue
|
---|
324 | }
|
---|
325 |
|
---|
326 | /* Transforms "" + kUppercase{First,All} + <suffix> */
|
---|
327 | s = data[l:]
|
---|
328 |
|
---|
329 | if s[0] == ' ' {
|
---|
330 | var tmp int
|
---|
331 | if is_all_caps {
|
---|
332 | tmp = 68
|
---|
333 | } else {
|
---|
334 | tmp = 4
|
---|
335 | }
|
---|
336 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
337 | } else if s[0] == '"' {
|
---|
338 | var tmp int
|
---|
339 | if is_all_caps {
|
---|
340 | tmp = 87
|
---|
341 | } else {
|
---|
342 | tmp = 66
|
---|
343 | }
|
---|
344 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
345 | if s[1] == '>' {
|
---|
346 | var tmp int
|
---|
347 | if is_all_caps {
|
---|
348 | tmp = 97
|
---|
349 | } else {
|
---|
350 | tmp = 69
|
---|
351 | }
|
---|
352 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
353 | }
|
---|
354 | } else if s[0] == '.' {
|
---|
355 | var tmp int
|
---|
356 | if is_all_caps {
|
---|
357 | tmp = 101
|
---|
358 | } else {
|
---|
359 | tmp = 79
|
---|
360 | }
|
---|
361 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
362 | if s[1] == ' ' {
|
---|
363 | var tmp int
|
---|
364 | if is_all_caps {
|
---|
365 | tmp = 114
|
---|
366 | } else {
|
---|
367 | tmp = 88
|
---|
368 | }
|
---|
369 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
370 | }
|
---|
371 | } else if s[0] == ',' {
|
---|
372 | var tmp int
|
---|
373 | if is_all_caps {
|
---|
374 | tmp = 112
|
---|
375 | } else {
|
---|
376 | tmp = 99
|
---|
377 | }
|
---|
378 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
379 | if s[1] == ' ' {
|
---|
380 | var tmp int
|
---|
381 | if is_all_caps {
|
---|
382 | tmp = 107
|
---|
383 | } else {
|
---|
384 | tmp = 58
|
---|
385 | }
|
---|
386 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
387 | }
|
---|
388 | } else if s[0] == '\'' {
|
---|
389 | var tmp int
|
---|
390 | if is_all_caps {
|
---|
391 | tmp = 94
|
---|
392 | } else {
|
---|
393 | tmp = 74
|
---|
394 | }
|
---|
395 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
396 | } else if s[0] == '(' {
|
---|
397 | var tmp int
|
---|
398 | if is_all_caps {
|
---|
399 | tmp = 113
|
---|
400 | } else {
|
---|
401 | tmp = 78
|
---|
402 | }
|
---|
403 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
404 | } else if s[0] == '=' {
|
---|
405 | if s[1] == '"' {
|
---|
406 | var tmp int
|
---|
407 | if is_all_caps {
|
---|
408 | tmp = 105
|
---|
409 | } else {
|
---|
410 | tmp = 104
|
---|
411 | }
|
---|
412 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
413 | } else if s[1] == '\'' {
|
---|
414 | var tmp int
|
---|
415 | if is_all_caps {
|
---|
416 | tmp = 116
|
---|
417 | } else {
|
---|
418 | tmp = 108
|
---|
419 | }
|
---|
420 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
421 | }
|
---|
422 | }
|
---|
423 | }
|
---|
424 | }
|
---|
425 | }
|
---|
426 |
|
---|
427 | /* Transforms with prefixes " " and "." */
|
---|
428 | if max_length >= 5 && (data[0] == ' ' || data[0] == '.') {
|
---|
429 | var is_space bool = (data[0] == ' ')
|
---|
430 | var offset uint = uint(dict.buckets[hash(data[1:])])
|
---|
431 | var end bool = offset == 0
|
---|
432 | for !end {
|
---|
433 | w := dict.dict_words[offset]
|
---|
434 | offset++
|
---|
435 | var l uint = uint(w.len) & 0x1F
|
---|
436 | var n uint = uint(1) << dict.words.size_bits_by_length[l]
|
---|
437 | var id uint = uint(w.idx)
|
---|
438 | end = !(w.len&0x80 == 0)
|
---|
439 | w.len = byte(l)
|
---|
440 | if w.transform == 0 {
|
---|
441 | var s []byte
|
---|
442 | if !isMatch(dict.words, w, data[1:], max_length-1) {
|
---|
443 | continue
|
---|
444 | }
|
---|
445 |
|
---|
446 | /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
---|
447 | "." + BROTLI_TRANSFORM_IDENTITY + "" */
|
---|
448 | var tmp int
|
---|
449 | if is_space {
|
---|
450 | tmp = 6
|
---|
451 | } else {
|
---|
452 | tmp = 32
|
---|
453 | }
|
---|
454 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
455 |
|
---|
456 | has_found_match = true
|
---|
457 | if l+2 >= max_length {
|
---|
458 | continue
|
---|
459 | }
|
---|
460 |
|
---|
461 | /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
---|
462 | "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
---|
463 | */
|
---|
464 | s = data[l+1:]
|
---|
465 |
|
---|
466 | if s[0] == ' ' {
|
---|
467 | var tmp int
|
---|
468 | if is_space {
|
---|
469 | tmp = 2
|
---|
470 | } else {
|
---|
471 | tmp = 77
|
---|
472 | }
|
---|
473 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
474 | } else if s[0] == '(' {
|
---|
475 | var tmp int
|
---|
476 | if is_space {
|
---|
477 | tmp = 89
|
---|
478 | } else {
|
---|
479 | tmp = 67
|
---|
480 | }
|
---|
481 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
482 | } else if is_space {
|
---|
483 | if s[0] == ',' {
|
---|
484 | addMatch(id+103*n, l+2, l, matches)
|
---|
485 | if s[1] == ' ' {
|
---|
486 | addMatch(id+33*n, l+3, l, matches)
|
---|
487 | }
|
---|
488 | } else if s[0] == '.' {
|
---|
489 | addMatch(id+71*n, l+2, l, matches)
|
---|
490 | if s[1] == ' ' {
|
---|
491 | addMatch(id+52*n, l+3, l, matches)
|
---|
492 | }
|
---|
493 | } else if s[0] == '=' {
|
---|
494 | if s[1] == '"' {
|
---|
495 | addMatch(id+81*n, l+3, l, matches)
|
---|
496 | } else if s[1] == '\'' {
|
---|
497 | addMatch(id+98*n, l+3, l, matches)
|
---|
498 | }
|
---|
499 | }
|
---|
500 | }
|
---|
501 | } else if is_space {
|
---|
502 | var is_all_caps bool = (w.transform != transformUppercaseFirst)
|
---|
503 | /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
---|
504 | is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
---|
505 | transform. */
|
---|
506 |
|
---|
507 | var s []byte
|
---|
508 | if !isMatch(dict.words, w, data[1:], max_length-1) {
|
---|
509 | continue
|
---|
510 | }
|
---|
511 |
|
---|
512 | /* Transforms " " + kUppercase{First,All} + "" */
|
---|
513 | var tmp int
|
---|
514 | if is_all_caps {
|
---|
515 | tmp = 85
|
---|
516 | } else {
|
---|
517 | tmp = 30
|
---|
518 | }
|
---|
519 | addMatch(id+uint(tmp)*n, l+1, l, matches)
|
---|
520 |
|
---|
521 | has_found_match = true
|
---|
522 | if l+2 >= max_length {
|
---|
523 | continue
|
---|
524 | }
|
---|
525 |
|
---|
526 | /* Transforms " " + kUppercase{First,All} + <suffix> */
|
---|
527 | s = data[l+1:]
|
---|
528 |
|
---|
529 | if s[0] == ' ' {
|
---|
530 | var tmp int
|
---|
531 | if is_all_caps {
|
---|
532 | tmp = 83
|
---|
533 | } else {
|
---|
534 | tmp = 15
|
---|
535 | }
|
---|
536 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
537 | } else if s[0] == ',' {
|
---|
538 | if !is_all_caps {
|
---|
539 | addMatch(id+109*n, l+2, l, matches)
|
---|
540 | }
|
---|
541 |
|
---|
542 | if s[1] == ' ' {
|
---|
543 | var tmp int
|
---|
544 | if is_all_caps {
|
---|
545 | tmp = 111
|
---|
546 | } else {
|
---|
547 | tmp = 65
|
---|
548 | }
|
---|
549 | addMatch(id+uint(tmp)*n, l+3, l, matches)
|
---|
550 | }
|
---|
551 | } else if s[0] == '.' {
|
---|
552 | var tmp int
|
---|
553 | if is_all_caps {
|
---|
554 | tmp = 115
|
---|
555 | } else {
|
---|
556 | tmp = 96
|
---|
557 | }
|
---|
558 | addMatch(id+uint(tmp)*n, l+2, l, matches)
|
---|
559 | if s[1] == ' ' {
|
---|
560 | var tmp int
|
---|
561 | if is_all_caps {
|
---|
562 | tmp = 117
|
---|
563 | } else {
|
---|
564 | tmp = 91
|
---|
565 | }
|
---|
566 | addMatch(id+uint(tmp)*n, l+3, l, matches)
|
---|
567 | }
|
---|
568 | } else if s[0] == '=' {
|
---|
569 | if s[1] == '"' {
|
---|
570 | var tmp int
|
---|
571 | if is_all_caps {
|
---|
572 | tmp = 110
|
---|
573 | } else {
|
---|
574 | tmp = 118
|
---|
575 | }
|
---|
576 | addMatch(id+uint(tmp)*n, l+3, l, matches)
|
---|
577 | } else if s[1] == '\'' {
|
---|
578 | var tmp int
|
---|
579 | if is_all_caps {
|
---|
580 | tmp = 119
|
---|
581 | } else {
|
---|
582 | tmp = 120
|
---|
583 | }
|
---|
584 | addMatch(id+uint(tmp)*n, l+3, l, matches)
|
---|
585 | }
|
---|
586 | }
|
---|
587 | }
|
---|
588 | }
|
---|
589 | }
|
---|
590 |
|
---|
591 | if max_length >= 6 {
|
---|
592 | /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
---|
593 | if (data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || (data[0] == 0xC2 && data[1] == 0xA0) {
|
---|
594 | var offset uint = uint(dict.buckets[hash(data[2:])])
|
---|
595 | var end bool = offset == 0
|
---|
596 | for !end {
|
---|
597 | w := dict.dict_words[offset]
|
---|
598 | offset++
|
---|
599 | var l uint = uint(w.len) & 0x1F
|
---|
600 | var n uint = uint(1) << dict.words.size_bits_by_length[l]
|
---|
601 | var id uint = uint(w.idx)
|
---|
602 | end = !(w.len&0x80 == 0)
|
---|
603 | w.len = byte(l)
|
---|
604 | if w.transform == 0 && isMatch(dict.words, w, data[2:], max_length-2) {
|
---|
605 | if data[0] == 0xC2 {
|
---|
606 | addMatch(id+102*n, l+2, l, matches)
|
---|
607 | has_found_match = true
|
---|
608 | } else if l+2 < max_length && data[l+2] == ' ' {
|
---|
609 | var t uint = 13
|
---|
610 | if data[0] == 'e' {
|
---|
611 | t = 18
|
---|
612 | } else if data[0] == 's' {
|
---|
613 | t = 7
|
---|
614 | }
|
---|
615 | addMatch(id+t*n, l+3, l, matches)
|
---|
616 | has_found_match = true
|
---|
617 | }
|
---|
618 | }
|
---|
619 | }
|
---|
620 | }
|
---|
621 | }
|
---|
622 |
|
---|
623 | if max_length >= 9 {
|
---|
624 | /* Transforms with prefixes " the " and ".com/" */
|
---|
625 | if (data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/') {
|
---|
626 | var offset uint = uint(dict.buckets[hash(data[5:])])
|
---|
627 | var end bool = offset == 0
|
---|
628 | for !end {
|
---|
629 | w := dict.dict_words[offset]
|
---|
630 | offset++
|
---|
631 | var l uint = uint(w.len) & 0x1F
|
---|
632 | var n uint = uint(1) << dict.words.size_bits_by_length[l]
|
---|
633 | var id uint = uint(w.idx)
|
---|
634 | end = !(w.len&0x80 == 0)
|
---|
635 | w.len = byte(l)
|
---|
636 | if w.transform == 0 && isMatch(dict.words, w, data[5:], max_length-5) {
|
---|
637 | var tmp int
|
---|
638 | if data[0] == ' ' {
|
---|
639 | tmp = 41
|
---|
640 | } else {
|
---|
641 | tmp = 72
|
---|
642 | }
|
---|
643 | addMatch(id+uint(tmp)*n, l+5, l, matches)
|
---|
644 | has_found_match = true
|
---|
645 | if l+5 < max_length {
|
---|
646 | var s []byte = data[l+5:]
|
---|
647 | if data[0] == ' ' {
|
---|
648 | if l+8 < max_length && s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ' {
|
---|
649 | addMatch(id+62*n, l+9, l, matches)
|
---|
650 | if l+12 < max_length && s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ' {
|
---|
651 | addMatch(id+73*n, l+13, l, matches)
|
---|
652 | }
|
---|
653 | }
|
---|
654 | }
|
---|
655 | }
|
---|
656 | }
|
---|
657 | }
|
---|
658 | }
|
---|
659 | }
|
---|
660 |
|
---|
661 | return has_found_match
|
---|
662 | }
|
---|