source: code/trunk/vendor/github.com/andybalholm/brotli/static_dict.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 16.7 KB
Line 
1package brotli
2
3import "encoding/binary"
4
5/* Copyright 2013 Google Inc. All Rights Reserved.
6
7 Distributed under MIT license.
8 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9*/
10
11/* Class to model the static dictionary. */
12
13const maxStaticDictionaryMatchLen = 37
14
15const kInvalidMatch uint32 = 0xFFFFFFF
16
17/* Copyright 2013 Google Inc. All Rights Reserved.
18
19 Distributed under MIT license.
20 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
21*/
22func hash(data []byte) uint32 {
23 var h uint32 = binary.LittleEndian.Uint32(data) * kDictHashMul32
24
25 /* The higher bits contain more mixture from the multiplication,
26 so we take our results from there. */
27 return h >> uint(32-kDictNumBits)
28}
29
30func addMatch(distance uint, len uint, len_code uint, matches []uint32) {
31 var match uint32 = uint32((distance << 5) + len_code)
32 matches[len] = brotli_min_uint32_t(matches[len], match)
33}
34
35func dictMatchLength(dict *dictionary, data []byte, id uint, len uint, maxlen uint) uint {
36 var offset uint = uint(dict.offsets_by_length[len]) + len*id
37 return findMatchLengthWithLimit(dict.data[offset:], data, brotli_min_size_t(uint(len), maxlen))
38}
39
40func isMatch(d *dictionary, w dictWord, data []byte, max_length uint) bool {
41 if uint(w.len) > max_length {
42 return false
43 } else {
44 var offset uint = uint(d.offsets_by_length[w.len]) + uint(w.len)*uint(w.idx)
45 var dict []byte = d.data[offset:]
46 if w.transform == 0 {
47 /* Match against base dictionary word. */
48 return findMatchLengthWithLimit(dict, data, uint(w.len)) == uint(w.len)
49 } else if w.transform == 10 {
50 /* Match against uppercase first transform.
51 Note that there are only ASCII uppercase words in the lookup table. */
52 return dict[0] >= 'a' && dict[0] <= 'z' && (dict[0]^32) == data[0] && findMatchLengthWithLimit(dict[1:], data[1:], uint(w.len)-1) == uint(w.len-1)
53 } else {
54 /* Match against uppercase all transform.
55 Note that there are only ASCII uppercase words in the lookup table. */
56 var i uint
57 for i = 0; i < uint(w.len); i++ {
58 if dict[i] >= 'a' && dict[i] <= 'z' {
59 if (dict[i] ^ 32) != data[i] {
60 return false
61 }
62 } else {
63 if dict[i] != data[i] {
64 return false
65 }
66 }
67 }
68
69 return true
70 }
71 }
72}
73
74func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_length uint, max_length uint, matches []uint32) bool {
75 var has_found_match bool = false
76 {
77 var offset uint = uint(dict.buckets[hash(data)])
78 var end bool = offset == 0
79 for !end {
80 w := dict.dict_words[offset]
81 offset++
82 var l uint = uint(w.len) & 0x1F
83 var n uint = uint(1) << dict.words.size_bits_by_length[l]
84 var id uint = uint(w.idx)
85 end = !(w.len&0x80 == 0)
86 w.len = byte(l)
87 if w.transform == 0 {
88 var matchlen uint = dictMatchLength(dict.words, data, id, l, max_length)
89 var s []byte
90 var minlen uint
91 var maxlen uint
92 var len uint
93
94 /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
95 if matchlen == l {
96 addMatch(id, l, l, matches)
97 has_found_match = true
98 }
99
100 /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
101 "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
102 if matchlen >= l-1 {
103 addMatch(id+12*n, l-1, l, matches)
104 if l+2 < max_length && data[l-1] == 'i' && data[l] == 'n' && data[l+1] == 'g' && data[l+2] == ' ' {
105 addMatch(id+49*n, l+3, l, matches)
106 }
107
108 has_found_match = true
109 }
110
111 /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
112 minlen = min_length
113
114 if l > 9 {
115 minlen = brotli_max_size_t(minlen, l-9)
116 }
117 maxlen = brotli_min_size_t(matchlen, l-2)
118 for len = minlen; len <= maxlen; len++ {
119 var cut uint = l - len
120 var transform_id uint = (cut << 2) + uint((dict.cutoffTransforms>>(cut*6))&0x3F)
121 addMatch(id+transform_id*n, uint(len), l, matches)
122 has_found_match = true
123 }
124
125 if matchlen < l || l+6 >= max_length {
126 continue
127 }
128
129 s = data[l:]
130
131 /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
132 if s[0] == ' ' {
133 addMatch(id+n, l+1, l, matches)
134 if s[1] == 'a' {
135 if s[2] == ' ' {
136 addMatch(id+28*n, l+3, l, matches)
137 } else if s[2] == 's' {
138 if s[3] == ' ' {
139 addMatch(id+46*n, l+4, l, matches)
140 }
141 } else if s[2] == 't' {
142 if s[3] == ' ' {
143 addMatch(id+60*n, l+4, l, matches)
144 }
145 } else if s[2] == 'n' {
146 if s[3] == 'd' && s[4] == ' ' {
147 addMatch(id+10*n, l+5, l, matches)
148 }
149 }
150 } else if s[1] == 'b' {
151 if s[2] == 'y' && s[3] == ' ' {
152 addMatch(id+38*n, l+4, l, matches)
153 }
154 } else if s[1] == 'i' {
155 if s[2] == 'n' {
156 if s[3] == ' ' {
157 addMatch(id+16*n, l+4, l, matches)
158 }
159 } else if s[2] == 's' {
160 if s[3] == ' ' {
161 addMatch(id+47*n, l+4, l, matches)
162 }
163 }
164 } else if s[1] == 'f' {
165 if s[2] == 'o' {
166 if s[3] == 'r' && s[4] == ' ' {
167 addMatch(id+25*n, l+5, l, matches)
168 }
169 } else if s[2] == 'r' {
170 if s[3] == 'o' && s[4] == 'm' && s[5] == ' ' {
171 addMatch(id+37*n, l+6, l, matches)
172 }
173 }
174 } else if s[1] == 'o' {
175 if s[2] == 'f' {
176 if s[3] == ' ' {
177 addMatch(id+8*n, l+4, l, matches)
178 }
179 } else if s[2] == 'n' {
180 if s[3] == ' ' {
181 addMatch(id+45*n, l+4, l, matches)
182 }
183 }
184 } else if s[1] == 'n' {
185 if s[2] == 'o' && s[3] == 't' && s[4] == ' ' {
186 addMatch(id+80*n, l+5, l, matches)
187 }
188 } else if s[1] == 't' {
189 if s[2] == 'h' {
190 if s[3] == 'e' {
191 if s[4] == ' ' {
192 addMatch(id+5*n, l+5, l, matches)
193 }
194 } else if s[3] == 'a' {
195 if s[4] == 't' && s[5] == ' ' {
196 addMatch(id+29*n, l+6, l, matches)
197 }
198 }
199 } else if s[2] == 'o' {
200 if s[3] == ' ' {
201 addMatch(id+17*n, l+4, l, matches)
202 }
203 }
204 } else if s[1] == 'w' {
205 if s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ' {
206 addMatch(id+35*n, l+6, l, matches)
207 }
208 }
209 } else if s[0] == '"' {
210 addMatch(id+19*n, l+1, l, matches)
211 if s[1] == '>' {
212 addMatch(id+21*n, l+2, l, matches)
213 }
214 } else if s[0] == '.' {
215 addMatch(id+20*n, l+1, l, matches)
216 if s[1] == ' ' {
217 addMatch(id+31*n, l+2, l, matches)
218 if s[2] == 'T' && s[3] == 'h' {
219 if s[4] == 'e' {
220 if s[5] == ' ' {
221 addMatch(id+43*n, l+6, l, matches)
222 }
223 } else if s[4] == 'i' {
224 if s[5] == 's' && s[6] == ' ' {
225 addMatch(id+75*n, l+7, l, matches)
226 }
227 }
228 }
229 }
230 } else if s[0] == ',' {
231 addMatch(id+76*n, l+1, l, matches)
232 if s[1] == ' ' {
233 addMatch(id+14*n, l+2, l, matches)
234 }
235 } else if s[0] == '\n' {
236 addMatch(id+22*n, l+1, l, matches)
237 if s[1] == '\t' {
238 addMatch(id+50*n, l+2, l, matches)
239 }
240 } else if s[0] == ']' {
241 addMatch(id+24*n, l+1, l, matches)
242 } else if s[0] == '\'' {
243 addMatch(id+36*n, l+1, l, matches)
244 } else if s[0] == ':' {
245 addMatch(id+51*n, l+1, l, matches)
246 } else if s[0] == '(' {
247 addMatch(id+57*n, l+1, l, matches)
248 } else if s[0] == '=' {
249 if s[1] == '"' {
250 addMatch(id+70*n, l+2, l, matches)
251 } else if s[1] == '\'' {
252 addMatch(id+86*n, l+2, l, matches)
253 }
254 } else if s[0] == 'a' {
255 if s[1] == 'l' && s[2] == ' ' {
256 addMatch(id+84*n, l+3, l, matches)
257 }
258 } else if s[0] == 'e' {
259 if s[1] == 'd' {
260 if s[2] == ' ' {
261 addMatch(id+53*n, l+3, l, matches)
262 }
263 } else if s[1] == 'r' {
264 if s[2] == ' ' {
265 addMatch(id+82*n, l+3, l, matches)
266 }
267 } else if s[1] == 's' {
268 if s[2] == 't' && s[3] == ' ' {
269 addMatch(id+95*n, l+4, l, matches)
270 }
271 }
272 } else if s[0] == 'f' {
273 if s[1] == 'u' && s[2] == 'l' && s[3] == ' ' {
274 addMatch(id+90*n, l+4, l, matches)
275 }
276 } else if s[0] == 'i' {
277 if s[1] == 'v' {
278 if s[2] == 'e' && s[3] == ' ' {
279 addMatch(id+92*n, l+4, l, matches)
280 }
281 } else if s[1] == 'z' {
282 if s[2] == 'e' && s[3] == ' ' {
283 addMatch(id+100*n, l+4, l, matches)
284 }
285 }
286 } else if s[0] == 'l' {
287 if s[1] == 'e' {
288 if s[2] == 's' && s[3] == 's' && s[4] == ' ' {
289 addMatch(id+93*n, l+5, l, matches)
290 }
291 } else if s[1] == 'y' {
292 if s[2] == ' ' {
293 addMatch(id+61*n, l+3, l, matches)
294 }
295 }
296 } else if s[0] == 'o' {
297 if s[1] == 'u' && s[2] == 's' && s[3] == ' ' {
298 addMatch(id+106*n, l+4, l, matches)
299 }
300 }
301 } else {
302 var is_all_caps bool = (w.transform != transformUppercaseFirst)
303 /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
304 is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
305 transform. */
306
307 var s []byte
308 if !isMatch(dict.words, w, data, max_length) {
309 continue
310 }
311
312 /* Transform "" + kUppercase{First,All} + "" */
313 var tmp int
314 if is_all_caps {
315 tmp = 44
316 } else {
317 tmp = 9
318 }
319 addMatch(id+uint(tmp)*n, l, l, matches)
320
321 has_found_match = true
322 if l+1 >= max_length {
323 continue
324 }
325
326 /* Transforms "" + kUppercase{First,All} + <suffix> */
327 s = data[l:]
328
329 if s[0] == ' ' {
330 var tmp int
331 if is_all_caps {
332 tmp = 68
333 } else {
334 tmp = 4
335 }
336 addMatch(id+uint(tmp)*n, l+1, l, matches)
337 } else if s[0] == '"' {
338 var tmp int
339 if is_all_caps {
340 tmp = 87
341 } else {
342 tmp = 66
343 }
344 addMatch(id+uint(tmp)*n, l+1, l, matches)
345 if s[1] == '>' {
346 var tmp int
347 if is_all_caps {
348 tmp = 97
349 } else {
350 tmp = 69
351 }
352 addMatch(id+uint(tmp)*n, l+2, l, matches)
353 }
354 } else if s[0] == '.' {
355 var tmp int
356 if is_all_caps {
357 tmp = 101
358 } else {
359 tmp = 79
360 }
361 addMatch(id+uint(tmp)*n, l+1, l, matches)
362 if s[1] == ' ' {
363 var tmp int
364 if is_all_caps {
365 tmp = 114
366 } else {
367 tmp = 88
368 }
369 addMatch(id+uint(tmp)*n, l+2, l, matches)
370 }
371 } else if s[0] == ',' {
372 var tmp int
373 if is_all_caps {
374 tmp = 112
375 } else {
376 tmp = 99
377 }
378 addMatch(id+uint(tmp)*n, l+1, l, matches)
379 if s[1] == ' ' {
380 var tmp int
381 if is_all_caps {
382 tmp = 107
383 } else {
384 tmp = 58
385 }
386 addMatch(id+uint(tmp)*n, l+2, l, matches)
387 }
388 } else if s[0] == '\'' {
389 var tmp int
390 if is_all_caps {
391 tmp = 94
392 } else {
393 tmp = 74
394 }
395 addMatch(id+uint(tmp)*n, l+1, l, matches)
396 } else if s[0] == '(' {
397 var tmp int
398 if is_all_caps {
399 tmp = 113
400 } else {
401 tmp = 78
402 }
403 addMatch(id+uint(tmp)*n, l+1, l, matches)
404 } else if s[0] == '=' {
405 if s[1] == '"' {
406 var tmp int
407 if is_all_caps {
408 tmp = 105
409 } else {
410 tmp = 104
411 }
412 addMatch(id+uint(tmp)*n, l+2, l, matches)
413 } else if s[1] == '\'' {
414 var tmp int
415 if is_all_caps {
416 tmp = 116
417 } else {
418 tmp = 108
419 }
420 addMatch(id+uint(tmp)*n, l+2, l, matches)
421 }
422 }
423 }
424 }
425 }
426
427 /* Transforms with prefixes " " and "." */
428 if max_length >= 5 && (data[0] == ' ' || data[0] == '.') {
429 var is_space bool = (data[0] == ' ')
430 var offset uint = uint(dict.buckets[hash(data[1:])])
431 var end bool = offset == 0
432 for !end {
433 w := dict.dict_words[offset]
434 offset++
435 var l uint = uint(w.len) & 0x1F
436 var n uint = uint(1) << dict.words.size_bits_by_length[l]
437 var id uint = uint(w.idx)
438 end = !(w.len&0x80 == 0)
439 w.len = byte(l)
440 if w.transform == 0 {
441 var s []byte
442 if !isMatch(dict.words, w, data[1:], max_length-1) {
443 continue
444 }
445
446 /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
447 "." + BROTLI_TRANSFORM_IDENTITY + "" */
448 var tmp int
449 if is_space {
450 tmp = 6
451 } else {
452 tmp = 32
453 }
454 addMatch(id+uint(tmp)*n, l+1, l, matches)
455
456 has_found_match = true
457 if l+2 >= max_length {
458 continue
459 }
460
461 /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
462 "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
463 */
464 s = data[l+1:]
465
466 if s[0] == ' ' {
467 var tmp int
468 if is_space {
469 tmp = 2
470 } else {
471 tmp = 77
472 }
473 addMatch(id+uint(tmp)*n, l+2, l, matches)
474 } else if s[0] == '(' {
475 var tmp int
476 if is_space {
477 tmp = 89
478 } else {
479 tmp = 67
480 }
481 addMatch(id+uint(tmp)*n, l+2, l, matches)
482 } else if is_space {
483 if s[0] == ',' {
484 addMatch(id+103*n, l+2, l, matches)
485 if s[1] == ' ' {
486 addMatch(id+33*n, l+3, l, matches)
487 }
488 } else if s[0] == '.' {
489 addMatch(id+71*n, l+2, l, matches)
490 if s[1] == ' ' {
491 addMatch(id+52*n, l+3, l, matches)
492 }
493 } else if s[0] == '=' {
494 if s[1] == '"' {
495 addMatch(id+81*n, l+3, l, matches)
496 } else if s[1] == '\'' {
497 addMatch(id+98*n, l+3, l, matches)
498 }
499 }
500 }
501 } else if is_space {
502 var is_all_caps bool = (w.transform != transformUppercaseFirst)
503 /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
504 is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
505 transform. */
506
507 var s []byte
508 if !isMatch(dict.words, w, data[1:], max_length-1) {
509 continue
510 }
511
512 /* Transforms " " + kUppercase{First,All} + "" */
513 var tmp int
514 if is_all_caps {
515 tmp = 85
516 } else {
517 tmp = 30
518 }
519 addMatch(id+uint(tmp)*n, l+1, l, matches)
520
521 has_found_match = true
522 if l+2 >= max_length {
523 continue
524 }
525
526 /* Transforms " " + kUppercase{First,All} + <suffix> */
527 s = data[l+1:]
528
529 if s[0] == ' ' {
530 var tmp int
531 if is_all_caps {
532 tmp = 83
533 } else {
534 tmp = 15
535 }
536 addMatch(id+uint(tmp)*n, l+2, l, matches)
537 } else if s[0] == ',' {
538 if !is_all_caps {
539 addMatch(id+109*n, l+2, l, matches)
540 }
541
542 if s[1] == ' ' {
543 var tmp int
544 if is_all_caps {
545 tmp = 111
546 } else {
547 tmp = 65
548 }
549 addMatch(id+uint(tmp)*n, l+3, l, matches)
550 }
551 } else if s[0] == '.' {
552 var tmp int
553 if is_all_caps {
554 tmp = 115
555 } else {
556 tmp = 96
557 }
558 addMatch(id+uint(tmp)*n, l+2, l, matches)
559 if s[1] == ' ' {
560 var tmp int
561 if is_all_caps {
562 tmp = 117
563 } else {
564 tmp = 91
565 }
566 addMatch(id+uint(tmp)*n, l+3, l, matches)
567 }
568 } else if s[0] == '=' {
569 if s[1] == '"' {
570 var tmp int
571 if is_all_caps {
572 tmp = 110
573 } else {
574 tmp = 118
575 }
576 addMatch(id+uint(tmp)*n, l+3, l, matches)
577 } else if s[1] == '\'' {
578 var tmp int
579 if is_all_caps {
580 tmp = 119
581 } else {
582 tmp = 120
583 }
584 addMatch(id+uint(tmp)*n, l+3, l, matches)
585 }
586 }
587 }
588 }
589 }
590
591 if max_length >= 6 {
592 /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
593 if (data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || (data[0] == 0xC2 && data[1] == 0xA0) {
594 var offset uint = uint(dict.buckets[hash(data[2:])])
595 var end bool = offset == 0
596 for !end {
597 w := dict.dict_words[offset]
598 offset++
599 var l uint = uint(w.len) & 0x1F
600 var n uint = uint(1) << dict.words.size_bits_by_length[l]
601 var id uint = uint(w.idx)
602 end = !(w.len&0x80 == 0)
603 w.len = byte(l)
604 if w.transform == 0 && isMatch(dict.words, w, data[2:], max_length-2) {
605 if data[0] == 0xC2 {
606 addMatch(id+102*n, l+2, l, matches)
607 has_found_match = true
608 } else if l+2 < max_length && data[l+2] == ' ' {
609 var t uint = 13
610 if data[0] == 'e' {
611 t = 18
612 } else if data[0] == 's' {
613 t = 7
614 }
615 addMatch(id+t*n, l+3, l, matches)
616 has_found_match = true
617 }
618 }
619 }
620 }
621 }
622
623 if max_length >= 9 {
624 /* Transforms with prefixes " the " and ".com/" */
625 if (data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/') {
626 var offset uint = uint(dict.buckets[hash(data[5:])])
627 var end bool = offset == 0
628 for !end {
629 w := dict.dict_words[offset]
630 offset++
631 var l uint = uint(w.len) & 0x1F
632 var n uint = uint(1) << dict.words.size_bits_by_length[l]
633 var id uint = uint(w.idx)
634 end = !(w.len&0x80 == 0)
635 w.len = byte(l)
636 if w.transform == 0 && isMatch(dict.words, w, data[5:], max_length-5) {
637 var tmp int
638 if data[0] == ' ' {
639 tmp = 41
640 } else {
641 tmp = 72
642 }
643 addMatch(id+uint(tmp)*n, l+5, l, matches)
644 has_found_match = true
645 if l+5 < max_length {
646 var s []byte = data[l+5:]
647 if data[0] == ' ' {
648 if l+8 < max_length && s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ' {
649 addMatch(id+62*n, l+9, l, matches)
650 if l+12 < max_length && s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ' {
651 addMatch(id+73*n, l+13, l, matches)
652 }
653 }
654 }
655 }
656 }
657 }
658 }
659 }
660
661 return has_found_match
662}
Note: See TracBrowser for help on using the repository browser.