Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/github.com/andybalholm/brotli/compress_fragment.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 26.5 KB

Line
1	package brotli
2
3	import "encoding/binary"
4
5	/* Copyright 2015 Google Inc. All Rights Reserved.
6
7	Distributed under MIT license.
8	See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9	*/
10
11	/* Function for fast encoding of an input fragment, independently from the input
12	history. This function uses one-pass processing: when we find a backward
13	match, we immediately emit the corresponding command and literal codes to
14	the bit stream.
15
16	Adapted from the CompressFragment() function in
17	https://github.com/google/snappy/blob/master/snappy.cc */
18
19	const maxDistance_compress_fragment = 262128
20
21	func hash5(p []byte, shift uint) uint32 {
22	var h uint64 = (binary.LittleEndian.Uint64(p) << 24) * uint64(kHashMul32)
23	return uint32(h >> shift)
24	}
25
26	func hashBytesAtOffset5(v uint64, offset int, shift uint) uint32 {
27	assert(offset >= 0)
28	assert(offset <= 3)
29	{
30	var h uint64 = ((v >> uint(8offset)) << 24) uint64(kHashMul32)
31	return uint32(h >> shift)
32	}
33	}
34
35	func isMatch5(p1 []byte, p2 []byte) bool {
36	return binary.LittleEndian.Uint32(p1) == binary.LittleEndian.Uint32(p2) &&
37	p1[4] == p2[4]
38	}
39
40	/* Builds a literal prefix code into "depths" and "bits" based on the statistics
41	of the "input" string and stores it into the bit stream.
42	Note that the prefix code here is built from the pre-LZ77 input, therefore
43	we can only approximate the statistics of the actual literal stream.
44	Moreover, for long inputs we build a histogram from a sample of the input
45	and thus have to assign a non-zero depth for each literal.
46	Returns estimated compression ratio millibytes/char for encoding given input
47	with generated code. */
48	func buildAndStoreLiteralPrefixCode(input []byte, input_size uint, depths []byte, bits []uint16, storage_ix *uint, storage []byte) uint {
49	var histogram = [256]uint32{0}
50	var histogram_total uint
51	var i uint
52	if input_size < 1<<15 {
53	for i = 0; i < input_size; i++ {
54	histogram[input[i]]++
55	}
56
57	histogram_total = input_size
58	for i = 0; i < 256; i++ {
59	/* We weigh the first 11 samples with weight 3 to account for the
60	balancing effect of the LZ77 phase on the histogram. */
61	var adjust uint32 = 2 * brotli_min_uint32_t(histogram[i], 11)
62	histogram[i] += adjust
63	histogram_total += uint(adjust)
64	}
65	} else {
66	const kSampleRate uint = 29
67	for i = 0; i < input_size; i += kSampleRate {
68	histogram[input[i]]++
69	}
70
71	histogram_total = (input_size + kSampleRate - 1) / kSampleRate
72	for i = 0; i < 256; i++ {
73	/* We add 1 to each population count to avoid 0 bit depths (since this is
74	only a sample and we don't know if the symbol appears or not), and we
75	weigh the first 11 samples with weight 3 to account for the balancing
76	effect of the LZ77 phase on the histogram (more frequent symbols are
77	more likely to be in backward references instead as literals). */
78	var adjust uint32 = 1 + 2*brotli_min_uint32_t(histogram[i], 11)
79	histogram[i] += adjust
80	histogram_total += uint(adjust)
81	}
82	}
83
84	buildAndStoreHuffmanTreeFast(histogram[:], histogram_total, /* max_bits = */
85	8, depths, bits, storage_ix, storage)
86	{
87	var literal_ratio uint = 0
88	for i = 0; i < 256; i++ {
89	if histogram[i] != 0 {
90	literal_ratio += uint(histogram[i] * uint32(depths[i]))
91	}
92	}
93
94	/* Estimated encoding ratio, millibytes per symbol. */
95	return (literal_ratio * 125) / histogram_total
96	}
97	}
98
99	/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
100	"bits" based on "histogram" and stores it into the bit stream. */
101	func buildAndStoreCommandPrefixCode1(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
102	var tree [129]huffmanTree
103	var cmd_depth = [numCommandSymbols]byte{0}
104	/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
105
106	var cmd_bits [64]uint16
107
108	createHuffmanTree(histogram, 64, 15, tree[:], depth)
109	createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
110
111	/* We have to jump through a few hoops here in order to compute
112	the command bits because the symbols are in a different order than in
113	the full alphabet. This looks complicated, but having the symbols
114	in this order in the command bits saves a few branches in the Emit*
115	functions. */
116	copy(cmd_depth[:], depth[:24])
117
118	copy(cmd_depth[24:][:], depth[40:][:8])
119	copy(cmd_depth[32:][:], depth[24:][:8])
120	copy(cmd_depth[40:][:], depth[48:][:8])
121	copy(cmd_depth[48:][:], depth[32:][:8])
122	copy(cmd_depth[56:][:], depth[56:][:8])
123	convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
124	copy(bits, cmd_bits[:24])
125	copy(bits[24:], cmd_bits[32:][:8])
126	copy(bits[32:], cmd_bits[48:][:8])
127	copy(bits[40:], cmd_bits[24:][:8])
128	copy(bits[48:], cmd_bits[40:][:8])
129	copy(bits[56:], cmd_bits[56:][:8])
130	convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
131	{
132	/* Create the bit length array for the full command alphabet. */
133	var i uint
134	for i := 0; i < int(64); i++ {
135	cmd_depth[i] = 0
136	} /* only 64 first values were used */
137	copy(cmd_depth[:], depth[:8])
138	copy(cmd_depth[64:][:], depth[8:][:8])
139	copy(cmd_depth[128:][:], depth[16:][:8])
140	copy(cmd_depth[192:][:], depth[24:][:8])
141	copy(cmd_depth[384:][:], depth[32:][:8])
142	for i = 0; i < 8; i++ {
143	cmd_depth[128+8*i] = depth[40+i]
144	cmd_depth[256+8*i] = depth[48+i]
145	cmd_depth[448+8*i] = depth[56+i]
146	}
147
148	storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
149	}
150
151	storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
152	}
153
154	/* REQUIRES: insertlen < 6210 */
155	func emitInsertLen1(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
156	if insertlen < 6 {
157	var code uint = insertlen + 40
158	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
159	histo[code]++
160	} else if insertlen < 130 {
161	var tail uint = insertlen - 2
162	var nbits uint32 = log2FloorNonZero(tail) - 1
163	var prefix uint = tail >> nbits
164	var inscode uint = uint((nbits << 1) + uint32(prefix) + 42)
165	writeBits(uint(depth[inscode]), uint64(bits[inscode]), storage_ix, storage)
166	writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
167	histo[inscode]++
168	} else if insertlen < 2114 {
169	var tail uint = insertlen - 66
170	var nbits uint32 = log2FloorNonZero(tail)
171	var code uint = uint(nbits + 50)
172	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
173	writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
174	histo[code]++
175	} else {
176	writeBits(uint(depth[61]), uint64(bits[61]), storage_ix, storage)
177	writeBits(12, uint64(insertlen)-2114, storage_ix, storage)
178	histo[61]++
179	}
180	}
181
182	func emitLongInsertLen(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
183	if insertlen < 22594 {
184	writeBits(uint(depth[62]), uint64(bits[62]), storage_ix, storage)
185	writeBits(14, uint64(insertlen)-6210, storage_ix, storage)
186	histo[62]++
187	} else {
188	writeBits(uint(depth[63]), uint64(bits[63]), storage_ix, storage)
189	writeBits(24, uint64(insertlen)-22594, storage_ix, storage)
190	histo[63]++
191	}
192	}
193
194	func emitCopyLen1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
195	if copylen < 10 {
196	writeBits(uint(depth[copylen+14]), uint64(bits[copylen+14]), storage_ix, storage)
197	histo[copylen+14]++
198	} else if copylen < 134 {
199	var tail uint = copylen - 6
200	var nbits uint32 = log2FloorNonZero(tail) - 1
201	var prefix uint = tail >> nbits
202	var code uint = uint((nbits << 1) + uint32(prefix) + 20)
203	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
204	writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
205	histo[code]++
206	} else if copylen < 2118 {
207	var tail uint = copylen - 70
208	var nbits uint32 = log2FloorNonZero(tail)
209	var code uint = uint(nbits + 28)
210	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
211	writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
212	histo[code]++
213	} else {
214	writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
215	writeBits(24, uint64(copylen)-2118, storage_ix, storage)
216	histo[39]++
217	}
218	}
219
220	func emitCopyLenLastDistance1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
221	if copylen < 12 {
222	writeBits(uint(depth[copylen-4]), uint64(bits[copylen-4]), storage_ix, storage)
223	histo[copylen-4]++
224	} else if copylen < 72 {
225	var tail uint = copylen - 8
226	var nbits uint32 = log2FloorNonZero(tail) - 1
227	var prefix uint = tail >> nbits
228	var code uint = uint((nbits << 1) + uint32(prefix) + 4)
229	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
230	writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
231	histo[code]++
232	} else if copylen < 136 {
233	var tail uint = copylen - 8
234	var code uint = (tail >> 5) + 30
235	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
236	writeBits(5, uint64(tail)&31, storage_ix, storage)
237	writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
238	histo[code]++
239	histo[64]++
240	} else if copylen < 2120 {
241	var tail uint = copylen - 72
242	var nbits uint32 = log2FloorNonZero(tail)
243	var code uint = uint(nbits + 28)
244	writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
245	writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
246	writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
247	histo[code]++
248	histo[64]++
249	} else {
250	writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
251	writeBits(24, uint64(copylen)-2120, storage_ix, storage)
252	writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
253	histo[39]++
254	histo[64]++
255	}
256	}
257
258	func emitDistance1(distance uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
259	var d uint = distance + 3
260	var nbits uint32 = log2FloorNonZero(d) - 1
261	var prefix uint = (d >> nbits) & 1
262	var offset uint = (2 + prefix) << nbits
263	var distcode uint = uint(2*(nbits-1) + uint32(prefix) + 80)
264	writeBits(uint(depth[distcode]), uint64(bits[distcode]), storage_ix, storage)
265	writeBits(uint(nbits), uint64(d)-uint64(offset), storage_ix, storage)
266	histo[distcode]++
267	}
268
269	func emitLiterals(input []byte, len uint, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
270	var j uint
271	for j = 0; j < len; j++ {
272	var lit byte = input[j]
273	writeBits(uint(depth[lit]), uint64(bits[lit]), storage_ix, storage)
274	}
275	}
276
277	/* REQUIRES: len <= 1 << 24. */
278	func storeMetaBlockHeader1(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
279	var nibbles uint = 6
280
281	/* ISLAST */
282	writeBits(1, 0, storage_ix, storage)
283
284	if len <= 1<<16 {
285	nibbles = 4
286	} else if len <= 1<<20 {
287	nibbles = 5
288	}
289
290	writeBits(2, uint64(nibbles)-4, storage_ix, storage)
291	writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
292
293	/* ISUNCOMPRESSED */
294	writeSingleBit(is_uncompressed, storage_ix, storage)
295	}
296
297	func updateBits(n_bits uint, bits uint32, pos uint, array []byte) {
298	for n_bits > 0 {
299	var byte_pos uint = pos >> 3
300	var n_unchanged_bits uint = pos & 7
301	var n_changed_bits uint = brotli_min_size_t(n_bits, 8-n_unchanged_bits)
302	var total_bits uint = n_unchanged_bits + n_changed_bits
303	var mask uint32 = (^((1 << total_bits) - 1)) \| ((1 << n_unchanged_bits) - 1)
304	var unchanged_bits uint32 = uint32(array[byte_pos]) & mask
305	var changed_bits uint32 = bits & ((1 << n_changed_bits) - 1)
306	array[byte_pos] = byte(changed_bits<<n_unchanged_bits \| unchanged_bits)
307	n_bits -= n_changed_bits
308	bits >>= n_changed_bits
309	pos += n_changed_bits
310	}
311	}
312
313	func rewindBitPosition1(new_storage_ix uint, storage_ix *uint, storage []byte) {
314	var bitpos uint = new_storage_ix & 7
315	var mask uint = (1 << bitpos) - 1
316	storage[new_storage_ix>>3] &= byte(mask)
317	*storage_ix = new_storage_ix
318	}
319
320	var shouldMergeBlock_kSampleRate uint = 43
321
322	func shouldMergeBlock(data []byte, len uint, depths []byte) bool {
323	var histo = [256]uint{0}
324	var i uint
325	for i = 0; i < len; i += shouldMergeBlock_kSampleRate {
326	histo[data[i]]++
327	}
328	{
329	var total uint = (len + shouldMergeBlock_kSampleRate - 1) / shouldMergeBlock_kSampleRate
330	var r float64 = (fastLog2(total)+0.5)*float64(total) + 200
331	for i = 0; i < 256; i++ {
332	r -= float64(histo[i]) * (float64(depths[i]) + fastLog2(histo[i]))
333	}
334
335	return r >= 0.0
336	}
337	}
338
339	func shouldUseUncompressedMode(metablock_start []byte, next_emit []byte, insertlen uint, literal_ratio uint) bool {
340	var compressed uint = uint(-cap(next_emit) + cap(metablock_start))
341	if compressed*50 > insertlen {
342	return false
343	} else {
344	return literal_ratio > 980
345	}
346	}
347
348	func emitUncompressedMetaBlock1(begin []byte, end []byte, storage_ix_start uint, storage_ix *uint, storage []byte) {
349	var len uint = uint(-cap(end) + cap(begin))
350	rewindBitPosition1(storage_ix_start, storage_ix, storage)
351	storeMetaBlockHeader1(uint(len), true, storage_ix, storage)
352	storage_ix = (storage_ix + 7) &^ 7
353	copy(storage[*storage_ix>>3:], begin[:len])
354	*storage_ix += uint(len << 3)
355	storage[*storage_ix>>3] = 0
356	}
357
358	var kCmdHistoSeed = [128]uint32{
359	0,
360	1,
361	1,
362	1,
363	1,
364	1,
365	1,
366	1,
367	1,
368	1,
369	1,
370	1,
371	1,
372	1,
373	1,
374	1,
375	0,
376	0,
377	0,
378	1,
379	1,
380	1,
381	1,
382	1,
383	1,
384	1,
385	1,
386	1,
387	1,
388	1,
389	1,
390	1,
391	1,
392	1,
393	1,
394	1,
395	1,
396	1,
397	1,
398	1,
399	0,
400	1,
401	1,
402	1,
403	1,
404	1,
405	1,
406	1,
407	1,
408	1,
409	1,
410	1,
411	1,
412	1,
413	1,
414	1,
415	1,
416	1,
417	1,
418	1,
419	1,
420	1,
421	1,
422	1,
423	1,
424	0,
425	0,
426	0,
427	0,
428	0,
429	0,
430	0,
431	0,
432	0,
433	0,
434	0,
435	0,
436	0,
437	0,
438	0,
439	1,
440	1,
441	1,
442	1,
443	1,
444	1,
445	1,
446	1,
447	1,
448	1,
449	1,
450	1,
451	1,
452	1,
453	1,
454	1,
455	1,
456	1,
457	1,
458	1,
459	1,
460	1,
461	1,
462	1,
463	1,
464	1,
465	1,
466	1,
467	1,
468	1,
469	1,
470	1,
471	1,
472	1,
473	1,
474	1,
475	1,
476	1,
477	1,
478	1,
479	1,
480	1,
481	1,
482	1,
483	0,
484	0,
485	0,
486	0,
487	}
488
489	var compressFragmentFastImpl_kFirstBlockSize uint = 3 << 15
490	var compressFragmentFastImpl_kMergeBlockSize uint = 1 << 16
491
492	func compressFragmentFastImpl(in []byte, input_size uint, is_last bool, table []int, table_bits uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits uint, cmd_code []byte, storage_ix uint, storage []byte) {
493	var cmd_histo [128]uint32
494	var ip_end int
495	var next_emit int = 0
496	var base_ip int = 0
497	var input int = 0
498	const kInputMarginBytes uint = windowGap
499	const kMinMatchLen uint = 5
500	var metablock_start int = input
501	var block_size uint = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
502	var total_block_size uint = block_size
503	var mlen_storage_ix uint = *storage_ix + 3
504	var lit_depth [256]byte
505	var lit_bits [256]uint16
506	var literal_ratio uint
507	var ip int
508	var last_distance int
509	var shift uint = 64 - table_bits
510
511	/* "next_emit" is a pointer to the first byte that is not covered by a
512	previous copy. Bytes between "next_emit" and the start of the next copy or
513	the end of the input will be emitted as literal bytes. */
514
515	/* Save the start of the first block for position and distance computations.
516	*/
517
518	/* Save the bit position of the MLEN field of the meta-block header, so that
519	we can update it later if we decide to extend this meta-block. */
520	storeMetaBlockHeader1(block_size, false, storage_ix, storage)
521
522	/* No block splits, no contexts. */
523	writeBits(13, 0, storage_ix, storage)
524
525	literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
526	{
527	/* Store the pre-compressed command and distance prefix codes. */
528	var i uint
529	for i = 0; i+7 < *cmd_code_numbits; i += 8 {
530	writeBits(8, uint64(cmd_code[i>>3]), storage_ix, storage)
531	}
532	}
533
534	writeBits(cmd_code_numbits&7, uint64(cmd_code[cmd_code_numbits>>3]), storage_ix, storage)
535
536	/* Initialize the command and distance histograms. We will gather
537	statistics of command and distance codes during the processing
538	of this block and use it to update the command and distance
539	prefix codes for the next block. */
540	emit_commands:
541	copy(cmd_histo[:], kCmdHistoSeed[:])
542
543	/* "ip" is the input pointer. */
544	ip = input
545
546	last_distance = -1
547	ip_end = int(uint(input) + block_size)
548
549	if block_size >= kInputMarginBytes {
550	var len_limit uint = brotli_min_size_t(block_size-kMinMatchLen, input_size-kInputMarginBytes)
551	var ip_limit int = int(uint(input) + len_limit)
552	/* For the last block, we need to keep a 16 bytes margin so that we can be
553	sure that all distances are at most window size - 16.
554	For all other blocks, we only need to keep a margin of 5 bytes so that
555	we don't go over the block size with a copy. */
556
557	var next_hash uint32
558	ip++
559	for next_hash = hash5(in[ip:], shift); ; {
560	var skip uint32 = 32
561	var next_ip int = ip
562	/* Step 1: Scan forward in the input looking for a 5-byte-long match.
563	If we get close to exhausting the input then goto emit_remainder.
564
565	Heuristic match skipping: If 32 bytes are scanned with no matches
566	found, start looking only at every other byte. If 32 more bytes are
567	scanned, look at every third byte, etc.. When a match is found,
568	immediately go back to looking at every byte. This is a small loss
569	(~5% performance, ~0.1% density) for compressible data due to more
570	bookkeeping, but for non-compressible data (such as JPEG) it's a huge
571	win since the compressor quickly "realizes" the data is incompressible
572	and doesn't bother looking for matches everywhere.
573
574	The "skip" variable keeps track of how many bytes there are since the
575	last match; dividing it by 32 (i.e. right-shifting by five) gives the
576	number of bytes to move ahead for each iteration. */
577
578	var candidate int
579	assert(next_emit < ip)
580
581	trawl:
582	for {
583	var hash uint32 = next_hash
584	var bytes_between_hash_lookups uint32 = skip >> 5
585	skip++
586	assert(hash == hash5(in[next_ip:], shift))
587	ip = next_ip
588	next_ip = int(uint32(ip) + bytes_between_hash_lookups)
589	if next_ip > ip_limit {
590	goto emit_remainder
591	}
592
593	next_hash = hash5(in[next_ip:], shift)
594	candidate = ip - last_distance
595	if isMatch5(in[ip:], in[candidate:]) {
596	if candidate < ip {
597	table[hash] = int(ip - base_ip)
598	break
599	}
600	}
601
602	candidate = base_ip + table[hash]
603	assert(candidate >= base_ip)
604	assert(candidate < ip)
605
606	table[hash] = int(ip - base_ip)
607	if isMatch5(in[ip:], in[candidate:]) {
608	break
609	}
610	}
611
612	/* Check copy distance. If candidate is not feasible, continue search.
613	Checking is done outside of hot loop to reduce overhead. */
614	if ip-candidate > maxDistance_compress_fragment {
615	goto trawl
616	}
617
618	/* Step 2: Emit the found match together with the literal bytes from
619	"next_emit" to the bit stream, and then see if we can find a next match
620	immediately afterwards. Repeat until we find no match for the input
621	without emitting some literal bytes. */
622	{
623	var base int = ip
624	/* > 0 */
625	var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
626	var distance int = int(base - candidate)
627	/* We have a 5-byte match at ip, and we need to emit bytes in
628	[next_emit, ip). */
629
630	var insert uint = uint(base - next_emit)
631	ip += int(matched)
632	if insert < 6210 {
633	emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
634	} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
635	emitUncompressedMetaBlock1(in[metablock_start:], in[base:], mlen_storage_ix-3, storage_ix, storage)
636	input_size -= uint(base - input)
637	input = base
638	next_emit = input
639	goto next_block
640	} else {
641	emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
642	}
643
644	emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
645	if distance == last_distance {
646	writeBits(uint(cmd_depth[64]), uint64(cmd_bits[64]), storage_ix, storage)
647	cmd_histo[64]++
648	} else {
649	emitDistance1(uint(distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
650	last_distance = distance
651	}
652
653	emitCopyLenLastDistance1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
654
655	next_emit = ip
656	if ip >= ip_limit {
657	goto emit_remainder
658	}
659
660	/* We could immediately start working at ip now, but to improve
661	compression we first update "table" with the hashes of some positions
662	within the last copy. */
663	{
664	var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
665	var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
666	var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
667	table[prev_hash] = int(ip - base_ip - 3)
668	prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
669	table[prev_hash] = int(ip - base_ip - 2)
670	prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
671	table[prev_hash] = int(ip - base_ip - 1)
672
673	candidate = base_ip + table[cur_hash]
674	table[cur_hash] = int(ip - base_ip)
675	}
676	}
677
678	for isMatch5(in[ip:], in[candidate:]) {
679	var base int = ip
680	/* We have a 5-byte match at ip, and no need to emit any literal bytes
681	prior to ip. */
682
683	var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
684	if ip-candidate > maxDistance_compress_fragment {
685	break
686	}
687	ip += int(matched)
688	last_distance = int(base - candidate) /* > 0 */
689	emitCopyLen1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
690	emitDistance1(uint(last_distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
691
692	next_emit = ip
693	if ip >= ip_limit {
694	goto emit_remainder
695	}
696
697	/* We could immediately start working at ip now, but to improve
698	compression we first update "table" with the hashes of some positions
699	within the last copy. */
700	{
701	var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
702	var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
703	var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
704	table[prev_hash] = int(ip - base_ip - 3)
705	prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
706	table[prev_hash] = int(ip - base_ip - 2)
707	prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
708	table[prev_hash] = int(ip - base_ip - 1)
709
710	candidate = base_ip + table[cur_hash]
711	table[cur_hash] = int(ip - base_ip)
712	}
713	}
714
715	ip++
716	next_hash = hash5(in[ip:], shift)
717	}
718	}
719
720	emit_remainder:
721	assert(next_emit <= ip_end)
722	input += int(block_size)
723	input_size -= block_size
724	block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kMergeBlockSize)
725
726	/* Decide if we want to continue this meta-block instead of emitting the
727	last insert-only command. */
728	if input_size > 0 && total_block_size+block_size <= 1<<20 && shouldMergeBlock(in[input:], block_size, lit_depth[:]) {
729	assert(total_block_size > 1<<16)
730
731	/* Update the size of the current meta-block and continue emitting commands.
732	We can do this because the current size and the new size both have 5
733	nibbles. */
734	total_block_size += block_size
735
736	updateBits(20, uint32(total_block_size-1), mlen_storage_ix, storage)
737	goto emit_commands
738	}
739
740	/* Emit the remaining bytes as literals. */
741	if next_emit < ip_end {
742	var insert uint = uint(ip_end - next_emit)
743	if insert < 6210 {
744	emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
745	emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
746	} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
747	emitUncompressedMetaBlock1(in[metablock_start:], in[ip_end:], mlen_storage_ix-3, storage_ix, storage)
748	} else {
749	emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
750	emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
751	}
752	}
753
754	next_emit = ip_end
755
756	/* If we have more data, write a new meta-block header and prefix codes and
757	then continue emitting commands. */
758	next_block:
759	if input_size > 0 {
760	metablock_start = input
761	block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
762	total_block_size = block_size
763
764	/* Save the bit position of the MLEN field of the meta-block header, so that
765	we can update it later if we decide to extend this meta-block. */
766	mlen_storage_ix = *storage_ix + 3
767
768	storeMetaBlockHeader1(block_size, false, storage_ix, storage)
769
770	/* No block splits, no contexts. */
771	writeBits(13, 0, storage_ix, storage)
772
773	literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
774	buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, storage_ix, storage)
775	goto emit_commands
776	}
777
778	if !is_last {
779	/* If this is not the last block, update the command and distance prefix
780	codes for the next block and store the compressed forms. */
781	cmd_code[0] = 0
782
783	*cmd_code_numbits = 0
784	buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, cmd_code_numbits, cmd_code)
785	}
786	}
787
788	/* Compresses "input" string to the "*storage" buffer as one or more complete
789	meta-blocks, and updates the "*storage_ix" bit position.
790
791	If "is_last" is 1, emits an additional empty last meta-block.
792
793	"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
794	(see comment in encode.h) used for the encoding of this input fragment.
795	If "is_last" is 0, they are updated to reflect the statistics
796	of this input fragment, to be used for the encoding of the next fragment.
797
798	"*cmd_code_numbits" is the number of bits of the compressed representation
799	of the command and distance prefix codes, and "cmd_code" is an array of
800	at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
801	command and distance prefix codes. If "is_last" is 0, these are also
802	updated to represent the updated "cmd_depth" and "cmd_bits".
803
804	REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
805	REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
806	REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
807	REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
808	OUTPUT: maximal copy distance <= \|input_size\|
809	OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
810	func compressFragmentFast(input []byte, input_size uint, is_last bool, table []int, table_size uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits uint, cmd_code []byte, storage_ix uint, storage []byte) {
811	var initial_storage_ix uint = *storage_ix
812	var table_bits uint = uint(log2FloorNonZero(table_size))
813
814	if input_size == 0 {
815	assert(is_last)
816	writeBits(1, 1, storage_ix, storage) /* islast */
817	writeBits(1, 1, storage_ix, storage) /* isempty */
818	storage_ix = (storage_ix + 7) &^ 7
819	return
820	}
821
822	compressFragmentFastImpl(input, input_size, is_last, table, table_bits, cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage)
823
824	/* If output is larger than single uncompressed block, rewrite it. */
825	if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
826	emitUncompressedMetaBlock1(input, input[input_size:], initial_storage_ix, storage_ix, storage)
827	}
828
829	if is_last {
830	writeBits(1, 1, storage_ix, storage) /* islast */
831	writeBits(1, 1, storage_ix, storage) /* isempty */
832	storage_ix = (storage_ix + 7) &^ 7
833	}
834	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: