Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/github.com/russross/blackfriday/v2/block.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 33.7 KB

Line
1	//
2	// Blackfriday Markdown Processor
3	// Available at http://github.com/russross/blackfriday
4	//
5	// Copyright © 2011 Russ Ross <russ@russross.com>.
6	// Distributed under the Simplified BSD License.
7	// See README.md for details.
8	//
9
10	//
11	// Functions to parse block-level elements.
12	//
13
14	package blackfriday
15
16	import (
17	"bytes"
18	"html"
19	"regexp"
20	"strings"
21	"unicode"
22	)
23
24	const (
25	charEntity = "&(?:#x[a-f0-9]{1,8}\|#[0-9]{1,8}\|[a-z][a-z0-9]{1,31});"
26	escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{\|}~-]"
27	)
28
29	var (
30	reBackslashOrAmp = regexp.MustCompile("[\\&]")
31	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "\|" + charEntity)
32	)
33
34	// Parse block-level data.
35	// Note: this function and many that it calls assume that
36	// the input buffer ends with a newline.
37	func (p *Markdown) block(data []byte) {
38	// this is called recursively: enforce a maximum depth
39	if p.nesting >= p.maxNesting {
40	return
41	}
42	p.nesting++
43
44	// parse out one block-level construct at a time
45	for len(data) > 0 {
46	// prefixed heading:
47	//
48	// # Heading 1
49	// ## Heading 2
50	// ...
51	// ###### Heading 6
52	if p.isPrefixHeading(data) {
53	data = data[p.prefixHeading(data):]
54	continue
55	}
56
57	// block of preformatted HTML:
58	//
59	// <div>
60	// ...
61	// </div>
62	if data[0] == '<' {
63	if i := p.html(data, true); i > 0 {
64	data = data[i:]
65	continue
66	}
67	}
68
69	// title block
70	//
71	// % stuff
72	// % more stuff
73	// % even more stuff
74	if p.extensions&Titleblock != 0 {
75	if data[0] == '%' {
76	if i := p.titleBlock(data, true); i > 0 {
77	data = data[i:]
78	continue
79	}
80	}
81	}
82
83	// blank lines. note: returns the # of bytes to skip
84	if i := p.isEmpty(data); i > 0 {
85	data = data[i:]
86	continue
87	}
88
89	// indented code block:
90	//
91	// func max(a, b int) int {
92	// if a > b {
93	// return a
94	// }
95	// return b
96	// }
97	if p.codePrefix(data) > 0 {
98	data = data[p.code(data):]
99	continue
100	}
101
102	// fenced code block:
103	//
104	// ``` go
105	// func fact(n int) int {
106	// if n <= 1 {
107	// return n
108	// }
109	// return n * fact(n-1)
110	// }
111	// ```
112	if p.extensions&FencedCode != 0 {
113	if i := p.fencedCodeBlock(data, true); i > 0 {
114	data = data[i:]
115	continue
116	}
117	}
118
119	// horizontal rule:
120	//
121	// ------
122	// or
123	// ******
124	// or
125	// ______
126	if p.isHRule(data) {
127	p.addBlock(HorizontalRule, nil)
128	var i int
129	for i = 0; i < len(data) && data[i] != '\n'; i++ {
130	}
131	data = data[i:]
132	continue
133	}
134
135	// block quote:
136	//
137	// > A big quote I found somewhere
138	// > on the web
139	if p.quotePrefix(data) > 0 {
140	data = data[p.quote(data):]
141	continue
142	}
143
144	// table:
145	//
146	// Name \| Age \| Phone
147	// ------\|-----\|---------
148	// Bob \| 31 \| 555-1234
149	// Alice \| 27 \| 555-4321
150	if p.extensions&Tables != 0 {
151	if i := p.table(data); i > 0 {
152	data = data[i:]
153	continue
154	}
155	}
156
157	// an itemized/unordered list:
158	//
159	// * Item 1
160	// * Item 2
161	//
162	// also works with + or -
163	if p.uliPrefix(data) > 0 {
164	data = data[p.list(data, 0):]
165	continue
166	}
167
168	// a numbered/ordered list:
169	//
170	// 1. Item 1
171	// 2. Item 2
172	if p.oliPrefix(data) > 0 {
173	data = data[p.list(data, ListTypeOrdered):]
174	continue
175	}
176
177	// definition lists:
178	//
179	// Term 1
180	// : Definition a
181	// : Definition b
182	//
183	// Term 2
184	// : Definition c
185	if p.extensions&DefinitionLists != 0 {
186	if p.dliPrefix(data) > 0 {
187	data = data[p.list(data, ListTypeDefinition):]
188	continue
189	}
190	}
191
192	// anything else must look like a normal paragraph
193	// note: this finds underlined headings, too
194	data = data[p.paragraph(data):]
195	}
196
197	p.nesting--
198	}
199
200	func (p Markdown) addBlock(typ NodeType, content []byte) Node {
201	p.closeUnmatchedBlocks()
202	container := p.addChild(typ, 0)
203	container.content = content
204	return container
205	}
206
207	func (p *Markdown) isPrefixHeading(data []byte) bool {
208	if data[0] != '#' {
209	return false
210	}
211
212	if p.extensions&SpaceHeadings != 0 {
213	level := 0
214	for level < 6 && level < len(data) && data[level] == '#' {
215	level++
216	}
217	if level == len(data) \|\| data[level] != ' ' {
218	return false
219	}
220	}
221	return true
222	}
223
224	func (p *Markdown) prefixHeading(data []byte) int {
225	level := 0
226	for level < 6 && level < len(data) && data[level] == '#' {
227	level++
228	}
229	i := skipChar(data, level, ' ')
230	end := skipUntilChar(data, i, '\n')
231	skip := end
232	id := ""
233	if p.extensions&HeadingIDs != 0 {
234	j, k := 0, 0
235	// find start/end of heading id
236	for j = i; j < end-1 && (data[j] != '{' \|\| data[j+1] != '#'); j++ {
237	}
238	for k = j + 1; k < end && data[k] != '}'; k++ {
239	}
240	// extract heading id iff found
241	if j < end && k < end {
242	id = string(data[j+2 : k])
243	end = j
244	skip = k + 1
245	for end > 0 && data[end-1] == ' ' {
246	end--
247	}
248	}
249	}
250	for end > 0 && data[end-1] == '#' {
251	if isBackslashEscaped(data, end-1) {
252	break
253	}
254	end--
255	}
256	for end > 0 && data[end-1] == ' ' {
257	end--
258	}
259	if end > i {
260	if id == "" && p.extensions&AutoHeadingIDs != 0 {
261	id = SanitizedAnchorName(string(data[i:end]))
262	}
263	block := p.addBlock(Heading, data[i:end])
264	block.HeadingID = id
265	block.Level = level
266	}
267	return skip
268	}
269
270	func (p *Markdown) isUnderlinedHeading(data []byte) int {
271	// test of level 1 heading
272	if data[0] == '=' {
273	i := skipChar(data, 1, '=')
274	i = skipChar(data, i, ' ')
275	if i < len(data) && data[i] == '\n' {
276	return 1
277	}
278	return 0
279	}
280
281	// test of level 2 heading
282	if data[0] == '-' {
283	i := skipChar(data, 1, '-')
284	i = skipChar(data, i, ' ')
285	if i < len(data) && data[i] == '\n' {
286	return 2
287	}
288	return 0
289	}
290
291	return 0
292	}
293
294	func (p *Markdown) titleBlock(data []byte, doRender bool) int {
295	if data[0] != '%' {
296	return 0
297	}
298	splitData := bytes.Split(data, []byte("\n"))
299	var i int
300	for idx, b := range splitData {
301	if !bytes.HasPrefix(b, []byte("%")) {
302	i = idx // - 1
303	break
304	}
305	}
306
307	data = bytes.Join(splitData[0:i], []byte("\n"))
308	consumed := len(data)
309	data = bytes.TrimPrefix(data, []byte("% "))
310	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
311	block := p.addBlock(Heading, data)
312	block.Level = 1
313	block.IsTitleblock = true
314
315	return consumed
316	}
317
318	func (p *Markdown) html(data []byte, doRender bool) int {
319	var i, j int
320
321	// identify the opening tag
322	if data[0] != '<' {
323	return 0
324	}
325	curtag, tagfound := p.htmlFindTag(data[1:])
326
327	// handle special cases
328	if !tagfound {
329	// check for an HTML comment
330	if size := p.htmlComment(data, doRender); size > 0 {
331	return size
332	}
333
334	// check for an <hr> tag
335	if size := p.htmlHr(data, doRender); size > 0 {
336	return size
337	}
338
339	// no special case recognized
340	return 0
341	}
342
343	// look for an unindented matching closing tag
344	// followed by a blank line
345	found := false
346	/*
347	closetag := []byte("\n</" + curtag + ">")
348	j = len(curtag) + 1
349	for !found {
350	// scan for a closing tag at the beginning of a line
351	if skip := bytes.Index(data[j:], closetag); skip >= 0 {
352	j += skip + len(closetag)
353	} else {
354	break
355	}
356
357	// see if it is the only thing on the line
358	if skip := p.isEmpty(data[j:]); skip > 0 {
359	// see if it is followed by a blank line/eof
360	j += skip
361	if j >= len(data) {
362	found = true
363	i = j
364	} else {
365	if skip := p.isEmpty(data[j:]); skip > 0 {
366	j += skip
367	found = true
368	i = j
369	}
370	}
371	}
372	}
373	*/
374
375	// if not found, try a second pass looking for indented match
376	// but not if tag is "ins" or "del" (following original Markdown.pl)
377	if !found && curtag != "ins" && curtag != "del" {
378	i = 1
379	for i < len(data) {
380	i++
381	for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
382	i++
383	}
384
385	if i+2+len(curtag) >= len(data) {
386	break
387	}
388
389	j = p.htmlFindEnd(curtag, data[i-1:])
390
391	if j > 0 {
392	i += j - 1
393	found = true
394	break
395	}
396	}
397	}
398
399	if !found {
400	return 0
401	}
402
403	// the end of the block has been found
404	if doRender {
405	// trim newlines
406	end := i
407	for end > 0 && data[end-1] == '\n' {
408	end--
409	}
410	finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
411	}
412
413	return i
414	}
415
416	func finalizeHTMLBlock(block *Node) {
417	block.Literal = block.content
418	block.content = nil
419	}
420
421	// HTML comment, lax form
422	func (p *Markdown) htmlComment(data []byte, doRender bool) int {
423	i := p.inlineHTMLComment(data)
424	// needs to end with a blank line
425	if j := p.isEmpty(data[i:]); j > 0 {
426	size := i + j
427	if doRender {
428	// trim trailing newlines
429	end := size
430	for end > 0 && data[end-1] == '\n' {
431	end--
432	}
433	block := p.addBlock(HTMLBlock, data[:end])
434	finalizeHTMLBlock(block)
435	}
436	return size
437	}
438	return 0
439	}
440
441	// HR, which is the only self-closing block tag considered
442	func (p *Markdown) htmlHr(data []byte, doRender bool) int {
443	if len(data) < 4 {
444	return 0
445	}
446	if data[0] != '<' \|\| (data[1] != 'h' && data[1] != 'H') \|\| (data[2] != 'r' && data[2] != 'R') {
447	return 0
448	}
449	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
450	// not an <hr> tag after all; at least not a valid one
451	return 0
452	}
453	i := 3
454	for i < len(data) && data[i] != '>' && data[i] != '\n' {
455	i++
456	}
457	if i < len(data) && data[i] == '>' {
458	i++
459	if j := p.isEmpty(data[i:]); j > 0 {
460	size := i + j
461	if doRender {
462	// trim newlines
463	end := size
464	for end > 0 && data[end-1] == '\n' {
465	end--
466	}
467	finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
468	}
469	return size
470	}
471	}
472	return 0
473	}
474
475	func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
476	i := 0
477	for i < len(data) && isalnum(data[i]) {
478	i++
479	}
480	key := string(data[:i])
481	if _, ok := blockTags[key]; ok {
482	return key, true
483	}
484	return "", false
485	}
486
487	func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
488	// assume data[0] == '<' && data[1] == '/' already tested
489	if tag == "hr" {
490	return 2
491	}
492	// check if tag is a match
493	closetag := []byte("</" + tag + ">")
494	if !bytes.HasPrefix(data, closetag) {
495	return 0
496	}
497	i := len(closetag)
498
499	// check that the rest of the line is blank
500	skip := 0
501	if skip = p.isEmpty(data[i:]); skip == 0 {
502	return 0
503	}
504	i += skip
505	skip = 0
506
507	if i >= len(data) {
508	return i
509	}
510
511	if p.extensions&LaxHTMLBlocks != 0 {
512	return i
513	}
514	if skip = p.isEmpty(data[i:]); skip == 0 {
515	// following line must be blank
516	return 0
517	}
518
519	return i + skip
520	}
521
522	func (*Markdown) isEmpty(data []byte) int {
523	// it is okay to call isEmpty on an empty buffer
524	if len(data) == 0 {
525	return 0
526	}
527
528	var i int
529	for i = 0; i < len(data) && data[i] != '\n'; i++ {
530	if data[i] != ' ' && data[i] != '\t' {
531	return 0
532	}
533	}
534	if i < len(data) && data[i] == '\n' {
535	i++
536	}
537	return i
538	}
539
540	func (*Markdown) isHRule(data []byte) bool {
541	i := 0
542
543	// skip up to three spaces
544	for i < 3 && data[i] == ' ' {
545	i++
546	}
547
548	// look at the hrule char
549	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
550	return false
551	}
552	c := data[i]
553
554	// the whole line must be the char or whitespace
555	n := 0
556	for i < len(data) && data[i] != '\n' {
557	switch {
558	case data[i] == c:
559	n++
560	case data[i] != ' ':
561	return false
562	}
563	i++
564	}
565
566	return n >= 3
567	}
568
569	// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
570	// and returns the end index if so, or 0 otherwise. It also returns the marker found.
571	// If info is not nil, it gets set to the syntax specified in the fence line.
572	func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
573	i, size := 0, 0
574
575	// skip up to three spaces
576	for i < len(data) && i < 3 && data[i] == ' ' {
577	i++
578	}
579
580	// check for the marker characters: ~ or `
581	if i >= len(data) {
582	return 0, ""
583	}
584	if data[i] != '~' && data[i] != '`' {
585	return 0, ""
586	}
587
588	c := data[i]
589
590	// the whole line must be the same char or whitespace
591	for i < len(data) && data[i] == c {
592	size++
593	i++
594	}
595
596	// the marker char must occur at least 3 times
597	if size < 3 {
598	return 0, ""
599	}
600	marker = string(data[i-size : i])
601
602	// if this is the end marker, it must match the beginning marker
603	if oldmarker != "" && marker != oldmarker {
604	return 0, ""
605	}
606
607	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
608	// into one, always get the info string, and discard it if the caller doesn't care.
609	if info != nil {
610	infoLength := 0
611	i = skipChar(data, i, ' ')
612
613	if i >= len(data) {
614	if i == len(data) {
615	return i, marker
616	}
617	return 0, ""
618	}
619
620	infoStart := i
621
622	if data[i] == '{' {
623	i++
624	infoStart++
625
626	for i < len(data) && data[i] != '}' && data[i] != '\n' {
627	infoLength++
628	i++
629	}
630
631	if i >= len(data) \|\| data[i] != '}' {
632	return 0, ""
633	}
634
635	// strip all whitespace at the beginning and the end
636	// of the {} block
637	for infoLength > 0 && isspace(data[infoStart]) {
638	infoStart++
639	infoLength--
640	}
641
642	for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
643	infoLength--
644	}
645	i++
646	i = skipChar(data, i, ' ')
647	} else {
648	for i < len(data) && !isverticalspace(data[i]) {
649	infoLength++
650	i++
651	}
652	}
653
654	*info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
655	}
656
657	if i == len(data) {
658	return i, marker
659	}
660	if i > len(data) \|\| data[i] != '\n' {
661	return 0, ""
662	}
663	return i + 1, marker // Take newline into account.
664	}
665
666	// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
667	// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
668	// If doRender is true, a final newline is mandatory to recognize the fenced code block.
669	func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
670	var info string
671	beg, marker := isFenceLine(data, &info, "")
672	if beg == 0 \|\| beg >= len(data) {
673	return 0
674	}
675	fenceLength := beg - 1
676
677	var work bytes.Buffer
678	work.Write([]byte(info))
679	work.WriteByte('\n')
680
681	for {
682	// safe to assume beg < len(data)
683
684	// check for the end of the code block
685	fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
686	if fenceEnd != 0 {
687	beg += fenceEnd
688	break
689	}
690
691	// copy the current line
692	end := skipUntilChar(data, beg, '\n') + 1
693
694	// did we reach the end of the buffer without a closing marker?
695	if end >= len(data) {
696	return 0
697	}
698
699	// verbatim copy to the working buffer
700	if doRender {
701	work.Write(data[beg:end])
702	}
703	beg = end
704	}
705
706	if doRender {
707	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
708	block.IsFenced = true
709	block.FenceLength = fenceLength
710	finalizeCodeBlock(block)
711	}
712
713	return beg
714	}
715
716	func unescapeChar(str []byte) []byte {
717	if str[0] == '\\' {
718	return []byte{str[1]}
719	}
720	return []byte(html.UnescapeString(string(str)))
721	}
722
723	func unescapeString(str []byte) []byte {
724	if reBackslashOrAmp.Match(str) {
725	return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
726	}
727	return str
728	}
729
730	func finalizeCodeBlock(block *Node) {
731	if block.IsFenced {
732	newlinePos := bytes.IndexByte(block.content, '\n')
733	firstLine := block.content[:newlinePos]
734	rest := block.content[newlinePos+1:]
735	block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
736	block.Literal = rest
737	} else {
738	block.Literal = block.content
739	}
740	block.content = nil
741	}
742
743	func (p *Markdown) table(data []byte) int {
744	table := p.addBlock(Table, nil)
745	i, columns := p.tableHeader(data)
746	if i == 0 {
747	p.tip = table.Parent
748	table.Unlink()
749	return 0
750	}
751
752	p.addBlock(TableBody, nil)
753
754	for i < len(data) {
755	pipes, rowStart := 0, i
756	for ; i < len(data) && data[i] != '\n'; i++ {
757	if data[i] == '\|' {
758	pipes++
759	}
760	}
761
762	if pipes == 0 {
763	i = rowStart
764	break
765	}
766
767	// include the newline in data sent to tableRow
768	if i < len(data) && data[i] == '\n' {
769	i++
770	}
771	p.tableRow(data[rowStart:i], columns, false)
772	}
773
774	return i
775	}
776
777	// check if the specified position is preceded by an odd number of backslashes
778	func isBackslashEscaped(data []byte, i int) bool {
779	backslashes := 0
780	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
781	backslashes++
782	}
783	return backslashes&1 == 1
784	}
785
786	func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
787	i := 0
788	colCount := 1
789	for i = 0; i < len(data) && data[i] != '\n'; i++ {
790	if data[i] == '\|' && !isBackslashEscaped(data, i) {
791	colCount++
792	}
793	}
794
795	// doesn't look like a table header
796	if colCount == 1 {
797	return
798	}
799
800	// include the newline in the data sent to tableRow
801	j := i
802	if j < len(data) && data[j] == '\n' {
803	j++
804	}
805	header := data[:j]
806
807	// column count ignores pipes at beginning or end of line
808	if data[0] == '\|' {
809	colCount--
810	}
811	if i > 2 && data[i-1] == '\|' && !isBackslashEscaped(data, i-1) {
812	colCount--
813	}
814
815	columns = make([]CellAlignFlags, colCount)
816
817	// move on to the header underline
818	i++
819	if i >= len(data) {
820	return
821	}
822
823	if data[i] == '\|' && !isBackslashEscaped(data, i) {
824	i++
825	}
826	i = skipChar(data, i, ' ')
827
828	// each column header is of form: / :?-+:? \|/ with # dashes + # colons >= 3
829	// and trailing \| optional on last column
830	col := 0
831	for i < len(data) && data[i] != '\n' {
832	dashes := 0
833
834	if data[i] == ':' {
835	i++
836	columns[col] \|= TableAlignmentLeft
837	dashes++
838	}
839	for i < len(data) && data[i] == '-' {
840	i++
841	dashes++
842	}
843	if i < len(data) && data[i] == ':' {
844	i++
845	columns[col] \|= TableAlignmentRight
846	dashes++
847	}
848	for i < len(data) && data[i] == ' ' {
849	i++
850	}
851	if i == len(data) {
852	return
853	}
854	// end of column test is messy
855	switch {
856	case dashes < 3:
857	// not a valid column
858	return
859
860	case data[i] == '\|' && !isBackslashEscaped(data, i):
861	// marker found, now skip past trailing whitespace
862	col++
863	i++
864	for i < len(data) && data[i] == ' ' {
865	i++
866	}
867
868	// trailing junk found after last column
869	if col >= colCount && i < len(data) && data[i] != '\n' {
870	return
871	}
872
873	case (data[i] != '\|' \|\| isBackslashEscaped(data, i)) && col+1 < colCount:
874	// something else found where marker was required
875	return
876
877	case data[i] == '\n':
878	// marker is optional for the last column
879	col++
880
881	default:
882	// trailing junk found after last column
883	return
884	}
885	}
886	if col != colCount {
887	return
888	}
889
890	p.addBlock(TableHead, nil)
891	p.tableRow(header, columns, true)
892	size = i
893	if size < len(data) && data[size] == '\n' {
894	size++
895	}
896	return
897	}
898
899	func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
900	p.addBlock(TableRow, nil)
901	i, col := 0, 0
902
903	if data[i] == '\|' && !isBackslashEscaped(data, i) {
904	i++
905	}
906
907	for col = 0; col < len(columns) && i < len(data); col++ {
908	for i < len(data) && data[i] == ' ' {
909	i++
910	}
911
912	cellStart := i
913
914	for i < len(data) && (data[i] != '\|' \|\| isBackslashEscaped(data, i)) && data[i] != '\n' {
915	i++
916	}
917
918	cellEnd := i
919
920	// skip the end-of-cell marker, possibly taking us past end of buffer
921	i++
922
923	for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
924	cellEnd--
925	}
926
927	cell := p.addBlock(TableCell, data[cellStart:cellEnd])
928	cell.IsHeader = header
929	cell.Align = columns[col]
930	}
931
932	// pad it out with empty columns to get the right number
933	for ; col < len(columns); col++ {
934	cell := p.addBlock(TableCell, nil)
935	cell.IsHeader = header
936	cell.Align = columns[col]
937	}
938
939	// silently ignore rows with too many cells
940	}
941
942	// returns blockquote prefix length
943	func (p *Markdown) quotePrefix(data []byte) int {
944	i := 0
945	for i < 3 && i < len(data) && data[i] == ' ' {
946	i++
947	}
948	if i < len(data) && data[i] == '>' {
949	if i+1 < len(data) && data[i+1] == ' ' {
950	return i + 2
951	}
952	return i + 1
953	}
954	return 0
955	}
956
957	// blockquote ends with at least one blank line
958	// followed by something without a blockquote prefix
959	func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
960	if p.isEmpty(data[beg:]) <= 0 {
961	return false
962	}
963	if end >= len(data) {
964	return true
965	}
966	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
967	}
968
969	// parse a blockquote fragment
970	func (p *Markdown) quote(data []byte) int {
971	block := p.addBlock(BlockQuote, nil)
972	var raw bytes.Buffer
973	beg, end := 0, 0
974	for beg < len(data) {
975	end = beg
976	// Step over whole lines, collecting them. While doing that, check for
977	// fenced code and if one's found, incorporate it altogether,
978	// irregardless of any contents inside it
979	for end < len(data) && data[end] != '\n' {
980	if p.extensions&FencedCode != 0 {
981	if i := p.fencedCodeBlock(data[end:], false); i > 0 {
982	// -1 to compensate for the extra end++ after the loop:
983	end += i - 1
984	break
985	}
986	}
987	end++
988	}
989	if end < len(data) && data[end] == '\n' {
990	end++
991	}
992	if pre := p.quotePrefix(data[beg:]); pre > 0 {
993	// skip the prefix
994	beg += pre
995	} else if p.terminateBlockquote(data, beg, end) {
996	break
997	}
998	// this line is part of the blockquote
999	raw.Write(data[beg:end])
1000	beg = end
1001	}
1002	p.block(raw.Bytes())
1003	p.finalize(block)
1004	return end
1005	}
1006
1007	// returns prefix length for block code
1008	func (p *Markdown) codePrefix(data []byte) int {
1009	if len(data) >= 1 && data[0] == '\t' {
1010	return 1
1011	}
1012	if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1013	return 4
1014	}
1015	return 0
1016	}
1017
1018	func (p *Markdown) code(data []byte) int {
1019	var work bytes.Buffer
1020
1021	i := 0
1022	for i < len(data) {
1023	beg := i
1024	for i < len(data) && data[i] != '\n' {
1025	i++
1026	}
1027	if i < len(data) && data[i] == '\n' {
1028	i++
1029	}
1030
1031	blankline := p.isEmpty(data[beg:i]) > 0
1032	if pre := p.codePrefix(data[beg:i]); pre > 0 {
1033	beg += pre
1034	} else if !blankline {
1035	// non-empty, non-prefixed line breaks the pre
1036	i = beg
1037	break
1038	}
1039
1040	// verbatim copy to the working buffer
1041	if blankline {
1042	work.WriteByte('\n')
1043	} else {
1044	work.Write(data[beg:i])
1045	}
1046	}
1047
1048	// trim all the \n off the end of work
1049	workbytes := work.Bytes()
1050	eol := len(workbytes)
1051	for eol > 0 && workbytes[eol-1] == '\n' {
1052	eol--
1053	}
1054	if eol != len(workbytes) {
1055	work.Truncate(eol)
1056	}
1057
1058	work.WriteByte('\n')
1059
1060	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1061	block.IsFenced = false
1062	finalizeCodeBlock(block)
1063
1064	return i
1065	}
1066
1067	// returns unordered list item prefix
1068	func (p *Markdown) uliPrefix(data []byte) int {
1069	i := 0
1070	// start with up to 3 spaces
1071	for i < len(data) && i < 3 && data[i] == ' ' {
1072	i++
1073	}
1074	if i >= len(data)-1 {
1075	return 0
1076	}
1077	// need one of {'*', '+', '-'} followed by a space or a tab
1078	if (data[i] != '*' && data[i] != '+' && data[i] != '-') \|\|
1079	(data[i+1] != ' ' && data[i+1] != '\t') {
1080	return 0
1081	}
1082	return i + 2
1083	}
1084
1085	// returns ordered list item prefix
1086	func (p *Markdown) oliPrefix(data []byte) int {
1087	i := 0
1088
1089	// start with up to 3 spaces
1090	for i < 3 && i < len(data) && data[i] == ' ' {
1091	i++
1092	}
1093
1094	// count the digits
1095	start := i
1096	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1097	i++
1098	}
1099	if start == i \|\| i >= len(data)-1 {
1100	return 0
1101	}
1102
1103	// we need >= 1 digits followed by a dot and a space or a tab
1104	if data[i] != '.' \|\| !(data[i+1] == ' ' \|\| data[i+1] == '\t') {
1105	return 0
1106	}
1107	return i + 2
1108	}
1109
1110	// returns definition list item prefix
1111	func (p *Markdown) dliPrefix(data []byte) int {
1112	if len(data) < 2 {
1113	return 0
1114	}
1115	i := 0
1116	// need a ':' followed by a space or a tab
1117	if data[i] != ':' \|\| !(data[i+1] == ' ' \|\| data[i+1] == '\t') {
1118	return 0
1119	}
1120	for i < len(data) && data[i] == ' ' {
1121	i++
1122	}
1123	return i + 2
1124	}
1125
1126	// parse ordered or unordered list block
1127	func (p *Markdown) list(data []byte, flags ListType) int {
1128	i := 0
1129	flags \|= ListItemBeginningOfList
1130	block := p.addBlock(List, nil)
1131	block.ListFlags = flags
1132	block.Tight = true
1133
1134	for i < len(data) {
1135	skip := p.listItem(data[i:], &flags)
1136	if flags&ListItemContainsBlock != 0 {
1137	block.ListData.Tight = false
1138	}
1139	i += skip
1140	if skip == 0 \|\| flags&ListItemEndOfList != 0 {
1141	break
1142	}
1143	flags &= ^ListItemBeginningOfList
1144	}
1145
1146	above := block.Parent
1147	finalizeList(block)
1148	p.tip = above
1149	return i
1150	}
1151
1152	// Returns true if the list item is not the same type as its parent list
1153	func (p Markdown) listTypeChanged(data []byte, flags ListType) bool {
1154	if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
1155	return true
1156	} else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
1157	return true
1158	} else if p.uliPrefix(data) > 0 && (flags&ListTypeOrdered != 0 \|\| flags&ListTypeDefinition != 0) {
1159	return true
1160	}
1161	return false
1162	}
1163
1164	// Returns true if block ends with a blank line, descending if needed
1165	// into lists and sublists.
1166	func endsWithBlankLine(block *Node) bool {
1167	// TODO: figure this out. Always false now.
1168	for block != nil {
1169	//if block.lastLineBlank {
1170	//return true
1171	//}
1172	t := block.Type
1173	if t == List \|\| t == Item {
1174	block = block.LastChild
1175	} else {
1176	break
1177	}
1178	}
1179	return false
1180	}
1181
1182	func finalizeList(block *Node) {
1183	block.open = false
1184	item := block.FirstChild
1185	for item != nil {
1186	// check for non-final list item ending with blank line:
1187	if endsWithBlankLine(item) && item.Next != nil {
1188	block.ListData.Tight = false
1189	break
1190	}
1191	// recurse into children of list item, to see if there are spaces
1192	// between any of them:
1193	subItem := item.FirstChild
1194	for subItem != nil {
1195	if endsWithBlankLine(subItem) && (item.Next != nil \|\| subItem.Next != nil) {
1196	block.ListData.Tight = false
1197	break
1198	}
1199	subItem = subItem.Next
1200	}
1201	item = item.Next
1202	}
1203	}
1204
1205	// Parse a single list item.
1206	// Assumes initial prefix is already removed if this is a sublist.
1207	func (p Markdown) listItem(data []byte, flags ListType) int {
1208	// keep track of the indentation of the first line
1209	itemIndent := 0
1210	if data[0] == '\t' {
1211	itemIndent += 4
1212	} else {
1213	for itemIndent < 3 && data[itemIndent] == ' ' {
1214	itemIndent++
1215	}
1216	}
1217
1218	var bulletChar byte = '*'
1219	i := p.uliPrefix(data)
1220	if i == 0 {
1221	i = p.oliPrefix(data)
1222	} else {
1223	bulletChar = data[i-2]
1224	}
1225	if i == 0 {
1226	i = p.dliPrefix(data)
1227	// reset definition term flag
1228	if i > 0 {
1229	*flags &= ^ListTypeTerm
1230	}
1231	}
1232	if i == 0 {
1233	// if in definition list, set term flag and continue
1234	if *flags&ListTypeDefinition != 0 {
1235	*flags \|= ListTypeTerm
1236	} else {
1237	return 0
1238	}
1239	}
1240
1241	// skip leading whitespace on first line
1242	for i < len(data) && data[i] == ' ' {
1243	i++
1244	}
1245
1246	// find the end of the line
1247	line := i
1248	for i > 0 && i < len(data) && data[i-1] != '\n' {
1249	i++
1250	}
1251
1252	// get working buffer
1253	var raw bytes.Buffer
1254
1255	// put the first line into the working buffer
1256	raw.Write(data[line:i])
1257	line = i
1258
1259	// process the following lines
1260	containsBlankLine := false
1261	sublist := 0
1262	codeBlockMarker := ""
1263
1264	gatherlines:
1265	for line < len(data) {
1266	i++
1267
1268	// find the end of this line
1269	for i < len(data) && data[i-1] != '\n' {
1270	i++
1271	}
1272
1273	// if it is an empty line, guess that it is part of this item
1274	// and move on to the next line
1275	if p.isEmpty(data[line:i]) > 0 {
1276	containsBlankLine = true
1277	line = i
1278	continue
1279	}
1280
1281	// calculate the indentation
1282	indent := 0
1283	indentIndex := 0
1284	if data[line] == '\t' {
1285	indentIndex++
1286	indent += 4
1287	} else {
1288	for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1289	indent++
1290	indentIndex++
1291	}
1292	}
1293
1294	chunk := data[line+indentIndex : i]
1295
1296	if p.extensions&FencedCode != 0 {
1297	// determine if in or out of codeblock
1298	// if in codeblock, ignore normal list processing
1299	_, marker := isFenceLine(chunk, nil, codeBlockMarker)
1300	if marker != "" {
1301	if codeBlockMarker == "" {
1302	// start of codeblock
1303	codeBlockMarker = marker
1304	} else {
1305	// end of codeblock.
1306	codeBlockMarker = ""
1307	}
1308	}
1309	// we are in a codeblock, write line, and continue
1310	if codeBlockMarker != "" \|\| marker != "" {
1311	raw.Write(data[line+indentIndex : i])
1312	line = i
1313	continue gatherlines
1314	}
1315	}
1316
1317	// evaluate how this line fits in
1318	switch {
1319	// is this a nested list item?
1320	case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) \|\|
1321	p.oliPrefix(chunk) > 0 \|\|
1322	p.dliPrefix(chunk) > 0:
1323
1324	// to be a nested list, it must be indented more
1325	// if not, it is either a different kind of list
1326	// or the next item in the same list
1327	if indent <= itemIndent {
1328	if p.listTypeChanged(chunk, flags) {
1329	*flags \|= ListItemEndOfList
1330	} else if containsBlankLine {
1331	*flags \|= ListItemContainsBlock
1332	}
1333
1334	break gatherlines
1335	}
1336
1337	if containsBlankLine {
1338	*flags \|= ListItemContainsBlock
1339	}
1340
1341	// is this the first item in the nested list?
1342	if sublist == 0 {
1343	sublist = raw.Len()
1344	}
1345
1346	// is this a nested prefix heading?
1347	case p.isPrefixHeading(chunk):
1348	// if the heading is not indented, it is not nested in the list
1349	// and thus ends the list
1350	if containsBlankLine && indent < 4 {
1351	*flags \|= ListItemEndOfList
1352	break gatherlines
1353	}
1354	*flags \|= ListItemContainsBlock
1355
1356	// anything following an empty line is only part
1357	// of this item if it is indented 4 spaces
1358	// (regardless of the indentation of the beginning of the item)
1359	case containsBlankLine && indent < 4:
1360	if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1361	// is the next item still a part of this list?
1362	next := i
1363	for next < len(data) && data[next] != '\n' {
1364	next++
1365	}
1366	for next < len(data)-1 && data[next] == '\n' {
1367	next++
1368	}
1369	if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1370	*flags \|= ListItemEndOfList
1371	}
1372	} else {
1373	*flags \|= ListItemEndOfList
1374	}
1375	break gatherlines
1376
1377	// a blank line means this should be parsed as a block
1378	case containsBlankLine:
1379	raw.WriteByte('\n')
1380	*flags \|= ListItemContainsBlock
1381	}
1382
1383	// if this line was preceded by one or more blanks,
1384	// re-introduce the blank into the buffer
1385	if containsBlankLine {
1386	containsBlankLine = false
1387	raw.WriteByte('\n')
1388	}
1389
1390	// add the line into the working buffer without prefix
1391	raw.Write(data[line+indentIndex : i])
1392
1393	line = i
1394	}
1395
1396	rawBytes := raw.Bytes()
1397
1398	block := p.addBlock(Item, nil)
1399	block.ListFlags = *flags
1400	block.Tight = false
1401	block.BulletChar = bulletChar
1402	block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1403
1404	// render the contents of the list item
1405	if flags&ListItemContainsBlock != 0 && flags&ListTypeTerm == 0 {
1406	// intermediate render of block item, except for definition term
1407	if sublist > 0 {
1408	p.block(rawBytes[:sublist])
1409	p.block(rawBytes[sublist:])
1410	} else {
1411	p.block(rawBytes)
1412	}
1413	} else {
1414	// intermediate render of inline item
1415	if sublist > 0 {
1416	child := p.addChild(Paragraph, 0)
1417	child.content = rawBytes[:sublist]
1418	p.block(rawBytes[sublist:])
1419	} else {
1420	child := p.addChild(Paragraph, 0)
1421	child.content = rawBytes
1422	}
1423	}
1424	return line
1425	}
1426
1427	// render a single paragraph that has already been parsed out
1428	func (p *Markdown) renderParagraph(data []byte) {
1429	if len(data) == 0 {
1430	return
1431	}
1432
1433	// trim leading spaces
1434	beg := 0
1435	for data[beg] == ' ' {
1436	beg++
1437	}
1438
1439	end := len(data)
1440	// trim trailing newline
1441	if data[len(data)-1] == '\n' {
1442	end--
1443	}
1444
1445	// trim trailing spaces
1446	for end > beg && data[end-1] == ' ' {
1447	end--
1448	}
1449
1450	p.addBlock(Paragraph, data[beg:end])
1451	}
1452
1453	func (p *Markdown) paragraph(data []byte) int {
1454	// prev: index of 1st char of previous line
1455	// line: index of 1st char of current line
1456	// i: index of cursor/end of current line
1457	var prev, line, i int
1458	tabSize := TabSizeDefault
1459	if p.extensions&TabSizeEight != 0 {
1460	tabSize = TabSizeDouble
1461	}
1462	// keep going until we find something to mark the end of the paragraph
1463	for i < len(data) {
1464	// mark the beginning of the current line
1465	prev = line
1466	current := data[i:]
1467	line = i
1468
1469	// did we find a reference or a footnote? If so, end a paragraph
1470	// preceding it and report that we have consumed up to the end of that
1471	// reference:
1472	if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1473	p.renderParagraph(data[:i])
1474	return i + refEnd
1475	}
1476
1477	// did we find a blank line marking the end of the paragraph?
1478	if n := p.isEmpty(current); n > 0 {
1479	// did this blank line followed by a definition list item?
1480	if p.extensions&DefinitionLists != 0 {
1481	if i < len(data)-1 && data[i+1] == ':' {
1482	return p.list(data[prev:], ListTypeDefinition)
1483	}
1484	}
1485
1486	p.renderParagraph(data[:i])
1487	return i + n
1488	}
1489
1490	// an underline under some text marks a heading, so our paragraph ended on prev line
1491	if i > 0 {
1492	if level := p.isUnderlinedHeading(current); level > 0 {
1493	// render the paragraph
1494	p.renderParagraph(data[:prev])
1495
1496	// ignore leading and trailing whitespace
1497	eol := i - 1
1498	for prev < eol && data[prev] == ' ' {
1499	prev++
1500	}
1501	for eol > prev && data[eol-1] == ' ' {
1502	eol--
1503	}
1504
1505	id := ""
1506	if p.extensions&AutoHeadingIDs != 0 {
1507	id = SanitizedAnchorName(string(data[prev:eol]))
1508	}
1509
1510	block := p.addBlock(Heading, data[prev:eol])
1511	block.Level = level
1512	block.HeadingID = id
1513
1514	// find the end of the underline
1515	for i < len(data) && data[i] != '\n' {
1516	i++
1517	}
1518	return i
1519	}
1520	}
1521
1522	// if the next line starts a block of HTML, then the paragraph ends here
1523	if p.extensions&LaxHTMLBlocks != 0 {
1524	if data[i] == '<' && p.html(current, false) > 0 {
1525	// rewind to before the HTML block
1526	p.renderParagraph(data[:i])
1527	return i
1528	}
1529	}
1530
1531	// if there's a prefixed heading or a horizontal rule after this, paragraph is over
1532	if p.isPrefixHeading(current) \|\| p.isHRule(current) {
1533	p.renderParagraph(data[:i])
1534	return i
1535	}
1536
1537	// if there's a fenced code block, paragraph is over
1538	if p.extensions&FencedCode != 0 {
1539	if p.fencedCodeBlock(current, false) > 0 {
1540	p.renderParagraph(data[:i])
1541	return i
1542	}
1543	}
1544
1545	// if there's a definition list item, prev line is a definition term
1546	if p.extensions&DefinitionLists != 0 {
1547	if p.dliPrefix(current) != 0 {
1548	ret := p.list(data[prev:], ListTypeDefinition)
1549	return ret
1550	}
1551	}
1552
1553	// if there's a list after this, paragraph is over
1554	if p.extensions&NoEmptyLineBeforeBlock != 0 {
1555	if p.uliPrefix(current) != 0 \|\|
1556	p.oliPrefix(current) != 0 \|\|
1557	p.quotePrefix(current) != 0 \|\|
1558	p.codePrefix(current) != 0 {
1559	p.renderParagraph(data[:i])
1560	return i
1561	}
1562	}
1563
1564	// otherwise, scan to the beginning of the next line
1565	nl := bytes.IndexByte(data[i:], '\n')
1566	if nl >= 0 {
1567	i += nl + 1
1568	} else {
1569	i += len(data[i:])
1570	}
1571	}
1572
1573	p.renderParagraph(data[:i])
1574	return i
1575	}
1576
1577	func skipChar(data []byte, start int, char byte) int {
1578	i := start
1579	for i < len(data) && data[i] == char {
1580	i++
1581	}
1582	return i
1583	}
1584
1585	func skipUntilChar(text []byte, start int, char byte) int {
1586	i := start
1587	for i < len(text) && text[i] != char {
1588	i++
1589	}
1590	return i
1591	}
1592
1593	// SanitizedAnchorName returns a sanitized anchor name for the given text.
1594	//
1595	// It implements the algorithm specified in the package comment.
1596	func SanitizedAnchorName(text string) string {
1597	var anchorName []rune
1598	futureDash := false
1599	for _, r := range text {
1600	switch {
1601	case unicode.IsLetter(r) \|\| unicode.IsNumber(r):
1602	if futureDash && len(anchorName) > 0 {
1603	anchorName = append(anchorName, '-')
1604	}
1605	futureDash = false
1606	anchorName = append(anchorName, unicode.ToLower(r))
1607	default:
1608	futureDash = true
1609	}
1610	}
1611	return string(anchorName)
1612	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: