Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/github.com/andybalholm/brotli/static_dict.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 16.7 KB

Line
1	package brotli
2
3	import "encoding/binary"
4
5	/* Copyright 2013 Google Inc. All Rights Reserved.
6
7	Distributed under MIT license.
8	See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9	*/
10
11	/* Class to model the static dictionary. */
12
13	const maxStaticDictionaryMatchLen = 37
14
15	const kInvalidMatch uint32 = 0xFFFFFFF
16
17	/* Copyright 2013 Google Inc. All Rights Reserved.
18
19	Distributed under MIT license.
20	See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
21	*/
22	func hash(data []byte) uint32 {
23	var h uint32 = binary.LittleEndian.Uint32(data) * kDictHashMul32
24
25	/* The higher bits contain more mixture from the multiplication,
26	so we take our results from there. */
27	return h >> uint(32-kDictNumBits)
28	}
29
30	func addMatch(distance uint, len uint, len_code uint, matches []uint32) {
31	var match uint32 = uint32((distance << 5) + len_code)
32	matches[len] = brotli_min_uint32_t(matches[len], match)
33	}
34
35	func dictMatchLength(dict *dictionary, data []byte, id uint, len uint, maxlen uint) uint {
36	var offset uint = uint(dict.offsets_by_length[len]) + len*id
37	return findMatchLengthWithLimit(dict.data[offset:], data, brotli_min_size_t(uint(len), maxlen))
38	}
39
40	func isMatch(d *dictionary, w dictWord, data []byte, max_length uint) bool {
41	if uint(w.len) > max_length {
42	return false
43	} else {
44	var offset uint = uint(d.offsets_by_length[w.len]) + uint(w.len)*uint(w.idx)
45	var dict []byte = d.data[offset:]
46	if w.transform == 0 {
47	/* Match against base dictionary word. */
48	return findMatchLengthWithLimit(dict, data, uint(w.len)) == uint(w.len)
49	} else if w.transform == 10 {
50	/* Match against uppercase first transform.
51	Note that there are only ASCII uppercase words in the lookup table. */
52	return dict[0] >= 'a' && dict[0] <= 'z' && (dict[0]^32) == data[0] && findMatchLengthWithLimit(dict[1:], data[1:], uint(w.len)-1) == uint(w.len-1)
53	} else {
54	/* Match against uppercase all transform.
55	Note that there are only ASCII uppercase words in the lookup table. */
56	var i uint
57	for i = 0; i < uint(w.len); i++ {
58	if dict[i] >= 'a' && dict[i] <= 'z' {
59	if (dict[i] ^ 32) != data[i] {
60	return false
61	}
62	} else {
63	if dict[i] != data[i] {
64	return false
65	}
66	}
67	}
68
69	return true
70	}
71	}
72	}
73
74	func findAllStaticDictionaryMatches(dict *encoderDictionary, data []byte, min_length uint, max_length uint, matches []uint32) bool {
75	var has_found_match bool = false
76	{
77	var offset uint = uint(dict.buckets[hash(data)])
78	var end bool = offset == 0
79	for !end {
80	w := dict.dict_words[offset]
81	offset++
82	var l uint = uint(w.len) & 0x1F
83	var n uint = uint(1) << dict.words.size_bits_by_length[l]
84	var id uint = uint(w.idx)
85	end = !(w.len&0x80 == 0)
86	w.len = byte(l)
87	if w.transform == 0 {
88	var matchlen uint = dictMatchLength(dict.words, data, id, l, max_length)
89	var s []byte
90	var minlen uint
91	var maxlen uint
92	var len uint
93
94	/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
95	if matchlen == l {
96	addMatch(id, l, l, matches)
97	has_found_match = true
98	}
99
100	/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
101	"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
102	if matchlen >= l-1 {
103	addMatch(id+12*n, l-1, l, matches)
104	if l+2 < max_length && data[l-1] == 'i' && data[l] == 'n' && data[l+1] == 'g' && data[l+2] == ' ' {
105	addMatch(id+49*n, l+3, l, matches)
106	}
107
108	has_found_match = true
109	}
110
111	/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
112	minlen = min_length
113
114	if l > 9 {
115	minlen = brotli_max_size_t(minlen, l-9)
116	}
117	maxlen = brotli_min_size_t(matchlen, l-2)
118	for len = minlen; len <= maxlen; len++ {
119	var cut uint = l - len
120	var transform_id uint = (cut << 2) + uint((dict.cutoffTransforms>>(cut*6))&0x3F)
121	addMatch(id+transform_id*n, uint(len), l, matches)
122	has_found_match = true
123	}
124
125	if matchlen < l \|\| l+6 >= max_length {
126	continue
127	}
128
129	s = data[l:]
130
131	/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
132	if s[0] == ' ' {
133	addMatch(id+n, l+1, l, matches)
134	if s[1] == 'a' {
135	if s[2] == ' ' {
136	addMatch(id+28*n, l+3, l, matches)
137	} else if s[2] == 's' {
138	if s[3] == ' ' {
139	addMatch(id+46*n, l+4, l, matches)
140	}
141	} else if s[2] == 't' {
142	if s[3] == ' ' {
143	addMatch(id+60*n, l+4, l, matches)
144	}
145	} else if s[2] == 'n' {
146	if s[3] == 'd' && s[4] == ' ' {
147	addMatch(id+10*n, l+5, l, matches)
148	}
149	}
150	} else if s[1] == 'b' {
151	if s[2] == 'y' && s[3] == ' ' {
152	addMatch(id+38*n, l+4, l, matches)
153	}
154	} else if s[1] == 'i' {
155	if s[2] == 'n' {
156	if s[3] == ' ' {
157	addMatch(id+16*n, l+4, l, matches)
158	}
159	} else if s[2] == 's' {
160	if s[3] == ' ' {
161	addMatch(id+47*n, l+4, l, matches)
162	}
163	}
164	} else if s[1] == 'f' {
165	if s[2] == 'o' {
166	if s[3] == 'r' && s[4] == ' ' {
167	addMatch(id+25*n, l+5, l, matches)
168	}
169	} else if s[2] == 'r' {
170	if s[3] == 'o' && s[4] == 'm' && s[5] == ' ' {
171	addMatch(id+37*n, l+6, l, matches)
172	}
173	}
174	} else if s[1] == 'o' {
175	if s[2] == 'f' {
176	if s[3] == ' ' {
177	addMatch(id+8*n, l+4, l, matches)
178	}
179	} else if s[2] == 'n' {
180	if s[3] == ' ' {
181	addMatch(id+45*n, l+4, l, matches)
182	}
183	}
184	} else if s[1] == 'n' {
185	if s[2] == 'o' && s[3] == 't' && s[4] == ' ' {
186	addMatch(id+80*n, l+5, l, matches)
187	}
188	} else if s[1] == 't' {
189	if s[2] == 'h' {
190	if s[3] == 'e' {
191	if s[4] == ' ' {
192	addMatch(id+5*n, l+5, l, matches)
193	}
194	} else if s[3] == 'a' {
195	if s[4] == 't' && s[5] == ' ' {
196	addMatch(id+29*n, l+6, l, matches)
197	}
198	}
199	} else if s[2] == 'o' {
200	if s[3] == ' ' {
201	addMatch(id+17*n, l+4, l, matches)
202	}
203	}
204	} else if s[1] == 'w' {
205	if s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ' {
206	addMatch(id+35*n, l+6, l, matches)
207	}
208	}
209	} else if s[0] == '"' {
210	addMatch(id+19*n, l+1, l, matches)
211	if s[1] == '>' {
212	addMatch(id+21*n, l+2, l, matches)
213	}
214	} else if s[0] == '.' {
215	addMatch(id+20*n, l+1, l, matches)
216	if s[1] == ' ' {
217	addMatch(id+31*n, l+2, l, matches)
218	if s[2] == 'T' && s[3] == 'h' {
219	if s[4] == 'e' {
220	if s[5] == ' ' {
221	addMatch(id+43*n, l+6, l, matches)
222	}
223	} else if s[4] == 'i' {
224	if s[5] == 's' && s[6] == ' ' {
225	addMatch(id+75*n, l+7, l, matches)
226	}
227	}
228	}
229	}
230	} else if s[0] == ',' {
231	addMatch(id+76*n, l+1, l, matches)
232	if s[1] == ' ' {
233	addMatch(id+14*n, l+2, l, matches)
234	}
235	} else if s[0] == '\n' {
236	addMatch(id+22*n, l+1, l, matches)
237	if s[1] == '\t' {
238	addMatch(id+50*n, l+2, l, matches)
239	}
240	} else if s[0] == ']' {
241	addMatch(id+24*n, l+1, l, matches)
242	} else if s[0] == '\'' {
243	addMatch(id+36*n, l+1, l, matches)
244	} else if s[0] == ':' {
245	addMatch(id+51*n, l+1, l, matches)
246	} else if s[0] == '(' {
247	addMatch(id+57*n, l+1, l, matches)
248	} else if s[0] == '=' {
249	if s[1] == '"' {
250	addMatch(id+70*n, l+2, l, matches)
251	} else if s[1] == '\'' {
252	addMatch(id+86*n, l+2, l, matches)
253	}
254	} else if s[0] == 'a' {
255	if s[1] == 'l' && s[2] == ' ' {
256	addMatch(id+84*n, l+3, l, matches)
257	}
258	} else if s[0] == 'e' {
259	if s[1] == 'd' {
260	if s[2] == ' ' {
261	addMatch(id+53*n, l+3, l, matches)
262	}
263	} else if s[1] == 'r' {
264	if s[2] == ' ' {
265	addMatch(id+82*n, l+3, l, matches)
266	}
267	} else if s[1] == 's' {
268	if s[2] == 't' && s[3] == ' ' {
269	addMatch(id+95*n, l+4, l, matches)
270	}
271	}
272	} else if s[0] == 'f' {
273	if s[1] == 'u' && s[2] == 'l' && s[3] == ' ' {
274	addMatch(id+90*n, l+4, l, matches)
275	}
276	} else if s[0] == 'i' {
277	if s[1] == 'v' {
278	if s[2] == 'e' && s[3] == ' ' {
279	addMatch(id+92*n, l+4, l, matches)
280	}
281	} else if s[1] == 'z' {
282	if s[2] == 'e' && s[3] == ' ' {
283	addMatch(id+100*n, l+4, l, matches)
284	}
285	}
286	} else if s[0] == 'l' {
287	if s[1] == 'e' {
288	if s[2] == 's' && s[3] == 's' && s[4] == ' ' {
289	addMatch(id+93*n, l+5, l, matches)
290	}
291	} else if s[1] == 'y' {
292	if s[2] == ' ' {
293	addMatch(id+61*n, l+3, l, matches)
294	}
295	}
296	} else if s[0] == 'o' {
297	if s[1] == 'u' && s[2] == 's' && s[3] == ' ' {
298	addMatch(id+106*n, l+4, l, matches)
299	}
300	}
301	} else {
302	var is_all_caps bool = (w.transform != transformUppercaseFirst)
303	/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
304	is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
305	transform. */
306
307	var s []byte
308	if !isMatch(dict.words, w, data, max_length) {
309	continue
310	}
311
312	/* Transform "" + kUppercase{First,All} + "" */
313	var tmp int
314	if is_all_caps {
315	tmp = 44
316	} else {
317	tmp = 9
318	}
319	addMatch(id+uint(tmp)*n, l, l, matches)
320
321	has_found_match = true
322	if l+1 >= max_length {
323	continue
324	}
325
326	/* Transforms "" + kUppercase{First,All} + <suffix> */
327	s = data[l:]
328
329	if s[0] == ' ' {
330	var tmp int
331	if is_all_caps {
332	tmp = 68
333	} else {
334	tmp = 4
335	}
336	addMatch(id+uint(tmp)*n, l+1, l, matches)
337	} else if s[0] == '"' {
338	var tmp int
339	if is_all_caps {
340	tmp = 87
341	} else {
342	tmp = 66
343	}
344	addMatch(id+uint(tmp)*n, l+1, l, matches)
345	if s[1] == '>' {
346	var tmp int
347	if is_all_caps {
348	tmp = 97
349	} else {
350	tmp = 69
351	}
352	addMatch(id+uint(tmp)*n, l+2, l, matches)
353	}
354	} else if s[0] == '.' {
355	var tmp int
356	if is_all_caps {
357	tmp = 101
358	} else {
359	tmp = 79
360	}
361	addMatch(id+uint(tmp)*n, l+1, l, matches)
362	if s[1] == ' ' {
363	var tmp int
364	if is_all_caps {
365	tmp = 114
366	} else {
367	tmp = 88
368	}
369	addMatch(id+uint(tmp)*n, l+2, l, matches)
370	}
371	} else if s[0] == ',' {
372	var tmp int
373	if is_all_caps {
374	tmp = 112
375	} else {
376	tmp = 99
377	}
378	addMatch(id+uint(tmp)*n, l+1, l, matches)
379	if s[1] == ' ' {
380	var tmp int
381	if is_all_caps {
382	tmp = 107
383	} else {
384	tmp = 58
385	}
386	addMatch(id+uint(tmp)*n, l+2, l, matches)
387	}
388	} else if s[0] == '\'' {
389	var tmp int
390	if is_all_caps {
391	tmp = 94
392	} else {
393	tmp = 74
394	}
395	addMatch(id+uint(tmp)*n, l+1, l, matches)
396	} else if s[0] == '(' {
397	var tmp int
398	if is_all_caps {
399	tmp = 113
400	} else {
401	tmp = 78
402	}
403	addMatch(id+uint(tmp)*n, l+1, l, matches)
404	} else if s[0] == '=' {
405	if s[1] == '"' {
406	var tmp int
407	if is_all_caps {
408	tmp = 105
409	} else {
410	tmp = 104
411	}
412	addMatch(id+uint(tmp)*n, l+2, l, matches)
413	} else if s[1] == '\'' {
414	var tmp int
415	if is_all_caps {
416	tmp = 116
417	} else {
418	tmp = 108
419	}
420	addMatch(id+uint(tmp)*n, l+2, l, matches)
421	}
422	}
423	}
424	}
425	}
426
427	/* Transforms with prefixes " " and "." */
428	if max_length >= 5 && (data[0] == ' ' \|\| data[0] == '.') {
429	var is_space bool = (data[0] == ' ')
430	var offset uint = uint(dict.buckets[hash(data[1:])])
431	var end bool = offset == 0
432	for !end {
433	w := dict.dict_words[offset]
434	offset++
435	var l uint = uint(w.len) & 0x1F
436	var n uint = uint(1) << dict.words.size_bits_by_length[l]
437	var id uint = uint(w.idx)
438	end = !(w.len&0x80 == 0)
439	w.len = byte(l)
440	if w.transform == 0 {
441	var s []byte
442	if !isMatch(dict.words, w, data[1:], max_length-1) {
443	continue
444	}
445
446	/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
447	"." + BROTLI_TRANSFORM_IDENTITY + "" */
448	var tmp int
449	if is_space {
450	tmp = 6
451	} else {
452	tmp = 32
453	}
454	addMatch(id+uint(tmp)*n, l+1, l, matches)
455
456	has_found_match = true
457	if l+2 >= max_length {
458	continue
459	}
460
461	/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
462	"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
463	*/
464	s = data[l+1:]
465
466	if s[0] == ' ' {
467	var tmp int
468	if is_space {
469	tmp = 2
470	} else {
471	tmp = 77
472	}
473	addMatch(id+uint(tmp)*n, l+2, l, matches)
474	} else if s[0] == '(' {
475	var tmp int
476	if is_space {
477	tmp = 89
478	} else {
479	tmp = 67
480	}
481	addMatch(id+uint(tmp)*n, l+2, l, matches)
482	} else if is_space {
483	if s[0] == ',' {
484	addMatch(id+103*n, l+2, l, matches)
485	if s[1] == ' ' {
486	addMatch(id+33*n, l+3, l, matches)
487	}
488	} else if s[0] == '.' {
489	addMatch(id+71*n, l+2, l, matches)
490	if s[1] == ' ' {
491	addMatch(id+52*n, l+3, l, matches)
492	}
493	} else if s[0] == '=' {
494	if s[1] == '"' {
495	addMatch(id+81*n, l+3, l, matches)
496	} else if s[1] == '\'' {
497	addMatch(id+98*n, l+3, l, matches)
498	}
499	}
500	}
501	} else if is_space {
502	var is_all_caps bool = (w.transform != transformUppercaseFirst)
503	/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
504	is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
505	transform. */
506
507	var s []byte
508	if !isMatch(dict.words, w, data[1:], max_length-1) {
509	continue
510	}
511
512	/* Transforms " " + kUppercase{First,All} + "" */
513	var tmp int
514	if is_all_caps {
515	tmp = 85
516	} else {
517	tmp = 30
518	}
519	addMatch(id+uint(tmp)*n, l+1, l, matches)
520
521	has_found_match = true
522	if l+2 >= max_length {
523	continue
524	}
525
526	/* Transforms " " + kUppercase{First,All} + <suffix> */
527	s = data[l+1:]
528
529	if s[0] == ' ' {
530	var tmp int
531	if is_all_caps {
532	tmp = 83
533	} else {
534	tmp = 15
535	}
536	addMatch(id+uint(tmp)*n, l+2, l, matches)
537	} else if s[0] == ',' {
538	if !is_all_caps {
539	addMatch(id+109*n, l+2, l, matches)
540	}
541
542	if s[1] == ' ' {
543	var tmp int
544	if is_all_caps {
545	tmp = 111
546	} else {
547	tmp = 65
548	}
549	addMatch(id+uint(tmp)*n, l+3, l, matches)
550	}
551	} else if s[0] == '.' {
552	var tmp int
553	if is_all_caps {
554	tmp = 115
555	} else {
556	tmp = 96
557	}
558	addMatch(id+uint(tmp)*n, l+2, l, matches)
559	if s[1] == ' ' {
560	var tmp int
561	if is_all_caps {
562	tmp = 117
563	} else {
564	tmp = 91
565	}
566	addMatch(id+uint(tmp)*n, l+3, l, matches)
567	}
568	} else if s[0] == '=' {
569	if s[1] == '"' {
570	var tmp int
571	if is_all_caps {
572	tmp = 110
573	} else {
574	tmp = 118
575	}
576	addMatch(id+uint(tmp)*n, l+3, l, matches)
577	} else if s[1] == '\'' {
578	var tmp int
579	if is_all_caps {
580	tmp = 119
581	} else {
582	tmp = 120
583	}
584	addMatch(id+uint(tmp)*n, l+3, l, matches)
585	}
586	}
587	}
588	}
589	}
590
591	if max_length >= 6 {
592	/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
593	if (data[1] == ' ' && (data[0] == 'e' \|\| data[0] == 's' \|\| data[0] == ',')) \|\| (data[0] == 0xC2 && data[1] == 0xA0) {
594	var offset uint = uint(dict.buckets[hash(data[2:])])
595	var end bool = offset == 0
596	for !end {
597	w := dict.dict_words[offset]
598	offset++
599	var l uint = uint(w.len) & 0x1F
600	var n uint = uint(1) << dict.words.size_bits_by_length[l]
601	var id uint = uint(w.idx)
602	end = !(w.len&0x80 == 0)
603	w.len = byte(l)
604	if w.transform == 0 && isMatch(dict.words, w, data[2:], max_length-2) {
605	if data[0] == 0xC2 {
606	addMatch(id+102*n, l+2, l, matches)
607	has_found_match = true
608	} else if l+2 < max_length && data[l+2] == ' ' {
609	var t uint = 13
610	if data[0] == 'e' {
611	t = 18
612	} else if data[0] == 's' {
613	t = 7
614	}
615	addMatch(id+t*n, l+3, l, matches)
616	has_found_match = true
617	}
618	}
619	}
620	}
621	}
622
623	if max_length >= 9 {
624	/* Transforms with prefixes " the " and ".com/" */
625	if (data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') \|\| (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/') {
626	var offset uint = uint(dict.buckets[hash(data[5:])])
627	var end bool = offset == 0
628	for !end {
629	w := dict.dict_words[offset]
630	offset++
631	var l uint = uint(w.len) & 0x1F
632	var n uint = uint(1) << dict.words.size_bits_by_length[l]
633	var id uint = uint(w.idx)
634	end = !(w.len&0x80 == 0)
635	w.len = byte(l)
636	if w.transform == 0 && isMatch(dict.words, w, data[5:], max_length-5) {
637	var tmp int
638	if data[0] == ' ' {
639	tmp = 41
640	} else {
641	tmp = 72
642	}
643	addMatch(id+uint(tmp)*n, l+5, l, matches)
644	has_found_match = true
645	if l+5 < max_length {
646	var s []byte = data[l+5:]
647	if data[0] == ' ' {
648	if l+8 < max_length && s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ' {
649	addMatch(id+62*n, l+9, l, matches)
650	if l+12 < max_length && s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ' {
651	addMatch(id+73*n, l+13, l, matches)
652	}
653	}
654	}
655	}
656	}
657	}
658	}
659	}
660
661	return has_found_match
662	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: