1 | // Copyright 2016 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | package flate
|
---|
6 |
|
---|
7 | // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
|
---|
8 | // LZ77 decompresses data through sequences of two forms of commands:
|
---|
9 | //
|
---|
10 | // * Literal insertions: Runs of one or more symbols are inserted into the data
|
---|
11 | // stream as is. This is accomplished through the writeByte method for a
|
---|
12 | // single symbol, or combinations of writeSlice/writeMark for multiple symbols.
|
---|
13 | // Any valid stream must start with a literal insertion if no preset dictionary
|
---|
14 | // is used.
|
---|
15 | //
|
---|
16 | // * Backward copies: Runs of one or more symbols are copied from previously
|
---|
17 | // emitted data. Backward copies come as the tuple (dist, length) where dist
|
---|
18 | // determines how far back in the stream to copy from and length determines how
|
---|
19 | // many bytes to copy. Note that it is valid for the length to be greater than
|
---|
20 | // the distance. Since LZ77 uses forward copies, that situation is used to
|
---|
21 | // perform a form of run-length encoding on repeated runs of symbols.
|
---|
22 | // The writeCopy and tryWriteCopy are used to implement this command.
|
---|
23 | //
|
---|
24 | // For performance reasons, this implementation performs little to no sanity
|
---|
25 | // checks about the arguments. As such, the invariants documented for each
|
---|
26 | // method call must be respected.
|
---|
27 | type dictDecoder struct {
|
---|
28 | hist []byte // Sliding window history
|
---|
29 |
|
---|
30 | // Invariant: 0 <= rdPos <= wrPos <= len(hist)
|
---|
31 | wrPos int // Current output position in buffer
|
---|
32 | rdPos int // Have emitted hist[:rdPos] already
|
---|
33 | full bool // Has a full window length been written yet?
|
---|
34 | }
|
---|
35 |
|
---|
36 | // init initializes dictDecoder to have a sliding window dictionary of the given
|
---|
37 | // size. If a preset dict is provided, it will initialize the dictionary with
|
---|
38 | // the contents of dict.
|
---|
39 | func (dd *dictDecoder) init(size int, dict []byte) {
|
---|
40 | *dd = dictDecoder{hist: dd.hist}
|
---|
41 |
|
---|
42 | if cap(dd.hist) < size {
|
---|
43 | dd.hist = make([]byte, size)
|
---|
44 | }
|
---|
45 | dd.hist = dd.hist[:size]
|
---|
46 |
|
---|
47 | if len(dict) > len(dd.hist) {
|
---|
48 | dict = dict[len(dict)-len(dd.hist):]
|
---|
49 | }
|
---|
50 | dd.wrPos = copy(dd.hist, dict)
|
---|
51 | if dd.wrPos == len(dd.hist) {
|
---|
52 | dd.wrPos = 0
|
---|
53 | dd.full = true
|
---|
54 | }
|
---|
55 | dd.rdPos = dd.wrPos
|
---|
56 | }
|
---|
57 |
|
---|
58 | // histSize reports the total amount of historical data in the dictionary.
|
---|
59 | func (dd *dictDecoder) histSize() int {
|
---|
60 | if dd.full {
|
---|
61 | return len(dd.hist)
|
---|
62 | }
|
---|
63 | return dd.wrPos
|
---|
64 | }
|
---|
65 |
|
---|
66 | // availRead reports the number of bytes that can be flushed by readFlush.
|
---|
67 | func (dd *dictDecoder) availRead() int {
|
---|
68 | return dd.wrPos - dd.rdPos
|
---|
69 | }
|
---|
70 |
|
---|
71 | // availWrite reports the available amount of output buffer space.
|
---|
72 | func (dd *dictDecoder) availWrite() int {
|
---|
73 | return len(dd.hist) - dd.wrPos
|
---|
74 | }
|
---|
75 |
|
---|
76 | // writeSlice returns a slice of the available buffer to write data to.
|
---|
77 | //
|
---|
78 | // This invariant will be kept: len(s) <= availWrite()
|
---|
79 | func (dd *dictDecoder) writeSlice() []byte {
|
---|
80 | return dd.hist[dd.wrPos:]
|
---|
81 | }
|
---|
82 |
|
---|
83 | // writeMark advances the writer pointer by cnt.
|
---|
84 | //
|
---|
85 | // This invariant must be kept: 0 <= cnt <= availWrite()
|
---|
86 | func (dd *dictDecoder) writeMark(cnt int) {
|
---|
87 | dd.wrPos += cnt
|
---|
88 | }
|
---|
89 |
|
---|
90 | // writeByte writes a single byte to the dictionary.
|
---|
91 | //
|
---|
92 | // This invariant must be kept: 0 < availWrite()
|
---|
93 | func (dd *dictDecoder) writeByte(c byte) {
|
---|
94 | dd.hist[dd.wrPos] = c
|
---|
95 | dd.wrPos++
|
---|
96 | }
|
---|
97 |
|
---|
98 | // writeCopy copies a string at a given (dist, length) to the output.
|
---|
99 | // This returns the number of bytes copied and may be less than the requested
|
---|
100 | // length if the available space in the output buffer is too small.
|
---|
101 | //
|
---|
102 | // This invariant must be kept: 0 < dist <= histSize()
|
---|
103 | func (dd *dictDecoder) writeCopy(dist, length int) int {
|
---|
104 | dstBase := dd.wrPos
|
---|
105 | dstPos := dstBase
|
---|
106 | srcPos := dstPos - dist
|
---|
107 | endPos := dstPos + length
|
---|
108 | if endPos > len(dd.hist) {
|
---|
109 | endPos = len(dd.hist)
|
---|
110 | }
|
---|
111 |
|
---|
112 | // Copy non-overlapping section after destination position.
|
---|
113 | //
|
---|
114 | // This section is non-overlapping in that the copy length for this section
|
---|
115 | // is always less than or equal to the backwards distance. This can occur
|
---|
116 | // if a distance refers to data that wraps-around in the buffer.
|
---|
117 | // Thus, a backwards copy is performed here; that is, the exact bytes in
|
---|
118 | // the source prior to the copy is placed in the destination.
|
---|
119 | if srcPos < 0 {
|
---|
120 | srcPos += len(dd.hist)
|
---|
121 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
|
---|
122 | srcPos = 0
|
---|
123 | }
|
---|
124 |
|
---|
125 | // Copy possibly overlapping section before destination position.
|
---|
126 | //
|
---|
127 | // This section can overlap if the copy length for this section is larger
|
---|
128 | // than the backwards distance. This is allowed by LZ77 so that repeated
|
---|
129 | // strings can be succinctly represented using (dist, length) pairs.
|
---|
130 | // Thus, a forwards copy is performed here; that is, the bytes copied is
|
---|
131 | // possibly dependent on the resulting bytes in the destination as the copy
|
---|
132 | // progresses along. This is functionally equivalent to the following:
|
---|
133 | //
|
---|
134 | // for i := 0; i < endPos-dstPos; i++ {
|
---|
135 | // dd.hist[dstPos+i] = dd.hist[srcPos+i]
|
---|
136 | // }
|
---|
137 | // dstPos = endPos
|
---|
138 | //
|
---|
139 | for dstPos < endPos {
|
---|
140 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
|
---|
141 | }
|
---|
142 |
|
---|
143 | dd.wrPos = dstPos
|
---|
144 | return dstPos - dstBase
|
---|
145 | }
|
---|
146 |
|
---|
147 | // tryWriteCopy tries to copy a string at a given (distance, length) to the
|
---|
148 | // output. This specialized version is optimized for short distances.
|
---|
149 | //
|
---|
150 | // This method is designed to be inlined for performance reasons.
|
---|
151 | //
|
---|
152 | // This invariant must be kept: 0 < dist <= histSize()
|
---|
153 | func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
|
---|
154 | dstPos := dd.wrPos
|
---|
155 | endPos := dstPos + length
|
---|
156 | if dstPos < dist || endPos > len(dd.hist) {
|
---|
157 | return 0
|
---|
158 | }
|
---|
159 | dstBase := dstPos
|
---|
160 | srcPos := dstPos - dist
|
---|
161 |
|
---|
162 | // Copy possibly overlapping section before destination position.
|
---|
163 | loop:
|
---|
164 | dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
|
---|
165 | if dstPos < endPos {
|
---|
166 | goto loop // Avoid for-loop so that this function can be inlined
|
---|
167 | }
|
---|
168 |
|
---|
169 | dd.wrPos = dstPos
|
---|
170 | return dstPos - dstBase
|
---|
171 | }
|
---|
172 |
|
---|
173 | // readFlush returns a slice of the historical buffer that is ready to be
|
---|
174 | // emitted to the user. The data returned by readFlush must be fully consumed
|
---|
175 | // before calling any other dictDecoder methods.
|
---|
176 | func (dd *dictDecoder) readFlush() []byte {
|
---|
177 | toRead := dd.hist[dd.rdPos:dd.wrPos]
|
---|
178 | dd.rdPos = dd.wrPos
|
---|
179 | if dd.wrPos == len(dd.hist) {
|
---|
180 | dd.wrPos, dd.rdPos = 0, 0
|
---|
181 | dd.full = true
|
---|
182 | }
|
---|
183 | return toRead
|
---|
184 | }
|
---|