Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/gopkg.in/yaml.v3/readerc.go@ 75

Last change on this file since 75 was 75, checked in by Izuru Yakumo, 18 months ago

The Empress (III)

Change the way how versions are handled in version.go (to ease `go install`)
Upgrade yaml.v2 to yaml.v3

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 13.8 KB

Line
1	//
2	// Copyright (c) 2011-2019 Canonical Ltd
3	// Copyright (c) 2006-2010 Kirill Simonov
4	//
5	// Permission is hereby granted, free of charge, to any person obtaining a copy of
6	// this software and associated documentation files (the "Software"), to deal in
7	// the Software without restriction, including without limitation the rights to
8	// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9	// of the Software, and to permit persons to whom the Software is furnished to do
10	// so, subject to the following conditions:
11	//
12	// The above copyright notice and this permission notice shall be included in all
13	// copies or substantial portions of the Software.
14	//
15	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21	// SOFTWARE.
22
23	package yaml
24
25	import (
26	"io"
27	)
28
29	// Set the reader error and return 0.
30	func yaml_parser_set_reader_error(parser *yaml_parser_t, problem string, offset int, value int) bool {
31	parser.error = yaml_READER_ERROR
32	parser.problem = problem
33	parser.problem_offset = offset
34	parser.problem_value = value
35	return false
36	}
37
38	// Byte order marks.
39	const (
40	bom_UTF8 = "\xef\xbb\xbf"
41	bom_UTF16LE = "\xff\xfe"
42	bom_UTF16BE = "\xfe\xff"
43	)
44
45	// Determine the input stream encoding by checking the BOM symbol. If no BOM is
46	// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.
47	func yaml_parser_determine_encoding(parser *yaml_parser_t) bool {
48	// Ensure that we had enough bytes in the raw buffer.
49	for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 {
50	if !yaml_parser_update_raw_buffer(parser) {
51	return false
52	}
53	}
54
55	// Determine the encoding.
56	buf := parser.raw_buffer
57	pos := parser.raw_buffer_pos
58	avail := len(buf) - pos
59	if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] {
60	parser.encoding = yaml_UTF16LE_ENCODING
61	parser.raw_buffer_pos += 2
62	parser.offset += 2
63	} else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] {
64	parser.encoding = yaml_UTF16BE_ENCODING
65	parser.raw_buffer_pos += 2
66	parser.offset += 2
67	} else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] {
68	parser.encoding = yaml_UTF8_ENCODING
69	parser.raw_buffer_pos += 3
70	parser.offset += 3
71	} else {
72	parser.encoding = yaml_UTF8_ENCODING
73	}
74	return true
75	}
76
77	// Update the raw buffer.
78	func yaml_parser_update_raw_buffer(parser *yaml_parser_t) bool {
79	size_read := 0
80
81	// Return if the raw buffer is full.
82	if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) {
83	return true
84	}
85
86	// Return on EOF.
87	if parser.eof {
88	return true
89	}
90
91	// Move the remaining bytes in the raw buffer to the beginning.
92	if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) {
93	copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:])
94	}
95	parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos]
96	parser.raw_buffer_pos = 0
97
98	// Call the read handler to fill the buffer.
99	size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)])
100	parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read]
101	if err == io.EOF {
102	parser.eof = true
103	} else if err != nil {
104	return yaml_parser_set_reader_error(parser, "input error: "+err.Error(), parser.offset, -1)
105	}
106	return true
107	}
108
109	// Ensure that the buffer contains at least `length` characters.
110	// Return true on success, false on failure.
111	//
112	// The length is supposed to be significantly less that the buffer size.
113	func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
114	if parser.read_handler == nil {
115	panic("read handler must be set")
116	}
117
118	// [Go] This function was changed to guarantee the requested length size at EOF.
119	// The fact we need to do this is pretty awful, but the description above implies
120	// for that to be the case, and there are tests
121
122	// If the EOF flag is set and the raw buffer is empty, do nothing.
123	if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) {
124	// [Go] ACTUALLY! Read the documentation of this function above.
125	// This is just broken. To return true, we need to have the
126	// given length in the buffer. Not doing that means every single
127	// check that calls this function to make sure the buffer has a
128	// given length is Go) panicking; or C) accessing invalid memory.
129	//return true
130	}
131
132	// Return if the buffer contains enough characters.
133	if parser.unread >= length {
134	return true
135	}
136
137	// Determine the input encoding if it is not known yet.
138	if parser.encoding == yaml_ANY_ENCODING {
139	if !yaml_parser_determine_encoding(parser) {
140	return false
141	}
142	}
143
144	// Move the unread characters to the beginning of the buffer.
145	buffer_len := len(parser.buffer)
146	if parser.buffer_pos > 0 && parser.buffer_pos < buffer_len {
147	copy(parser.buffer, parser.buffer[parser.buffer_pos:])
148	buffer_len -= parser.buffer_pos
149	parser.buffer_pos = 0
150	} else if parser.buffer_pos == buffer_len {
151	buffer_len = 0
152	parser.buffer_pos = 0
153	}
154
155	// Open the whole buffer for writing, and cut it before returning.
156	parser.buffer = parser.buffer[:cap(parser.buffer)]
157
158	// Fill the buffer until it has enough characters.
159	first := true
160	for parser.unread < length {
161
162	// Fill the raw buffer if necessary.
163	if !first \|\| parser.raw_buffer_pos == len(parser.raw_buffer) {
164	if !yaml_parser_update_raw_buffer(parser) {
165	parser.buffer = parser.buffer[:buffer_len]
166	return false
167	}
168	}
169	first = false
170
171	// Decode the raw buffer.
172	inner:
173	for parser.raw_buffer_pos != len(parser.raw_buffer) {
174	var value rune
175	var width int
176
177	raw_unread := len(parser.raw_buffer) - parser.raw_buffer_pos
178
179	// Decode the next character.
180	switch parser.encoding {
181	case yaml_UTF8_ENCODING:
182	// Decode a UTF-8 character. Check RFC 3629
183	// (http://www.ietf.org/rfc/rfc3629.txt) for more details.
184	//
185	// The following table (taken from the RFC) is used for
186	// decoding.
187	//
188	// Char. number range \| UTF-8 octet sequence
189	// (hexadecimal) \| (binary)
190	// --------------------+------------------------------------
191	// 0000 0000-0000 007F \| 0xxxxxxx
192	// 0000 0080-0000 07FF \| 110xxxxx 10xxxxxx
193	// 0000 0800-0000 FFFF \| 1110xxxx 10xxxxxx 10xxxxxx
194	// 0001 0000-0010 FFFF \| 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
195	//
196	// Additionally, the characters in the range 0xD800-0xDFFF
197	// are prohibited as they are reserved for use with UTF-16
198	// surrogate pairs.
199
200	// Determine the length of the UTF-8 sequence.
201	octet := parser.raw_buffer[parser.raw_buffer_pos]
202	switch {
203	case octet&0x80 == 0x00:
204	width = 1
205	case octet&0xE0 == 0xC0:
206	width = 2
207	case octet&0xF0 == 0xE0:
208	width = 3
209	case octet&0xF8 == 0xF0:
210	width = 4
211	default:
212	// The leading octet is invalid.
213	return yaml_parser_set_reader_error(parser,
214	"invalid leading UTF-8 octet",
215	parser.offset, int(octet))
216	}
217
218	// Check if the raw buffer contains an incomplete character.
219	if width > raw_unread {
220	if parser.eof {
221	return yaml_parser_set_reader_error(parser,
222	"incomplete UTF-8 octet sequence",
223	parser.offset, -1)
224	}
225	break inner
226	}
227
228	// Decode the leading octet.
229	switch {
230	case octet&0x80 == 0x00:
231	value = rune(octet & 0x7F)
232	case octet&0xE0 == 0xC0:
233	value = rune(octet & 0x1F)
234	case octet&0xF0 == 0xE0:
235	value = rune(octet & 0x0F)
236	case octet&0xF8 == 0xF0:
237	value = rune(octet & 0x07)
238	default:
239	value = 0
240	}
241
242	// Check and decode the trailing octets.
243	for k := 1; k < width; k++ {
244	octet = parser.raw_buffer[parser.raw_buffer_pos+k]
245
246	// Check if the octet is valid.
247	if (octet & 0xC0) != 0x80 {
248	return yaml_parser_set_reader_error(parser,
249	"invalid trailing UTF-8 octet",
250	parser.offset+k, int(octet))
251	}
252
253	// Decode the octet.
254	value = (value << 6) + rune(octet&0x3F)
255	}
256
257	// Check the length of the sequence against the value.
258	switch {
259	case width == 1:
260	case width == 2 && value >= 0x80:
261	case width == 3 && value >= 0x800:
262	case width == 4 && value >= 0x10000:
263	default:
264	return yaml_parser_set_reader_error(parser,
265	"invalid length of a UTF-8 sequence",
266	parser.offset, -1)
267	}
268
269	// Check the range of the value.
270	if value >= 0xD800 && value <= 0xDFFF \|\| value > 0x10FFFF {
271	return yaml_parser_set_reader_error(parser,
272	"invalid Unicode character",
273	parser.offset, int(value))
274	}
275
276	case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING:
277	var low, high int
278	if parser.encoding == yaml_UTF16LE_ENCODING {
279	low, high = 0, 1
280	} else {
281	low, high = 1, 0
282	}
283
284	// The UTF-16 encoding is not as simple as one might
285	// naively think. Check RFC 2781
286	// (http://www.ietf.org/rfc/rfc2781.txt).
287	//
288	// Normally, two subsequent bytes describe a Unicode
289	// character. However a special technique (called a
290	// surrogate pair) is used for specifying character
291	// values larger than 0xFFFF.
292	//
293	// A surrogate pair consists of two pseudo-characters:
294	// high surrogate area (0xD800-0xDBFF)
295	// low surrogate area (0xDC00-0xDFFF)
296	//
297	// The following formulas are used for decoding
298	// and encoding characters using surrogate pairs:
299	//
300	// U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF)
301	// U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF)
302	// W1 = 110110yyyyyyyyyy
303	// W2 = 110111xxxxxxxxxx
304	//
305	// where U is the character value, W1 is the high surrogate
306	// area, W2 is the low surrogate area.
307
308	// Check for incomplete UTF-16 character.
309	if raw_unread < 2 {
310	if parser.eof {
311	return yaml_parser_set_reader_error(parser,
312	"incomplete UTF-16 character",
313	parser.offset, -1)
314	}
315	break inner
316	}
317
318	// Get the character.
319	value = rune(parser.raw_buffer[parser.raw_buffer_pos+low]) +
320	(rune(parser.raw_buffer[parser.raw_buffer_pos+high]) << 8)
321
322	// Check for unexpected low surrogate area.
323	if value&0xFC00 == 0xDC00 {
324	return yaml_parser_set_reader_error(parser,
325	"unexpected low surrogate area",
326	parser.offset, int(value))
327	}
328
329	// Check for a high surrogate area.
330	if value&0xFC00 == 0xD800 {
331	width = 4
332
333	// Check for incomplete surrogate pair.
334	if raw_unread < 4 {
335	if parser.eof {
336	return yaml_parser_set_reader_error(parser,
337	"incomplete UTF-16 surrogate pair",
338	parser.offset, -1)
339	}
340	break inner
341	}
342
343	// Get the next character.
344	value2 := rune(parser.raw_buffer[parser.raw_buffer_pos+low+2]) +
345	(rune(parser.raw_buffer[parser.raw_buffer_pos+high+2]) << 8)
346
347	// Check for a low surrogate area.
348	if value2&0xFC00 != 0xDC00 {
349	return yaml_parser_set_reader_error(parser,
350	"expected low surrogate area",
351	parser.offset+2, int(value2))
352	}
353
354	// Generate the value of the surrogate pair.
355	value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF)
356	} else {
357	width = 2
358	}
359
360	default:
361	panic("impossible")
362	}
363
364	// Check if the character is in the allowed range:
365	// #x9 \| #xA \| #xD \| [#x20-#x7E] (8 bit)
366	// \| #x85 \| [#xA0-#xD7FF] \| [#xE000-#xFFFD] (16 bit)
367	// \| [#x10000-#x10FFFF] (32 bit)
368	switch {
369	case value == 0x09:
370	case value == 0x0A:
371	case value == 0x0D:
372	case value >= 0x20 && value <= 0x7E:
373	case value == 0x85:
374	case value >= 0xA0 && value <= 0xD7FF:
375	case value >= 0xE000 && value <= 0xFFFD:
376	case value >= 0x10000 && value <= 0x10FFFF:
377	default:
378	return yaml_parser_set_reader_error(parser,
379	"control characters are not allowed",
380	parser.offset, int(value))
381	}
382
383	// Move the raw pointers.
384	parser.raw_buffer_pos += width
385	parser.offset += width
386
387	// Finally put the character into the buffer.
388	if value <= 0x7F {
389	// 0000 0000-0000 007F . 0xxxxxxx
390	parser.buffer[buffer_len+0] = byte(value)
391	buffer_len += 1
392	} else if value <= 0x7FF {
393	// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
394	parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6))
395	parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F))
396	buffer_len += 2
397	} else if value <= 0xFFFF {
398	// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
399	parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12))
400	parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F))
401	parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F))
402	buffer_len += 3
403	} else {
404	// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
405	parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18))
406	parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F))
407	parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F))
408	parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F))
409	buffer_len += 4
410	}
411
412	parser.unread++
413	}
414
415	// On EOF, put NUL into the buffer and return.
416	if parser.eof {
417	parser.buffer[buffer_len] = 0
418	buffer_len++
419	parser.unread++
420	break
421	}
422	}
423	// [Go] Read the documentation of this function above. To return true,
424	// we need to have the given length in the buffer. Not doing that means
425	// every single check that calls this function to make sure the buffer
426	// has a given length is Go) panicking; or C) accessing invalid memory.
427	// This happens here due to the EOF above breaking early.
428	for buffer_len < length {
429	parser.buffer[buffer_len] = 0
430	buffer_len++
431	}
432	parser.buffer = parser.buffer[:buffer_len]
433	return true
434	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: