Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/github.com/russross/blackfriday/v2/inline.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 25.2 KB

Line
1	//
2	// Blackfriday Markdown Processor
3	// Available at http://github.com/russross/blackfriday
4	//
5	// Copyright © 2011 Russ Ross <russ@russross.com>.
6	// Distributed under the Simplified BSD License.
7	// See README.md for details.
8	//
9
10	//
11	// Functions to parse inline elements.
12	//
13
14	package blackfriday
15
16	import (
17	"bytes"
18	"regexp"
19	"strconv"
20	)
21
22	var (
23	urlRe = `((https?\|ftp):\/\/\|\/)[-A-Za-z0-9+&@#\/%?=~_\|!:,.;\(\)]+`
24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25
26	// https://www.w3.org/TR/html5/syntax.html#character-references
27	// highest unicode code point in 17 planes (2^20): 1,114,112d =
28	// 7 dec digits or 6 hex digits
29	// named entity references can be 2-31 characters with stuff like <
30	// at one end and &CounterClockwiseContourIntegral; at the other. There
31	// are also sometimes numbers at the end, although this isn't inherent
32	// in the specification; there are never numbers anywhere else in
33	// current character references, though; see ¾ and &blk12;, etc.
34	// https://www.w3.org/TR/html5/syntax.html#named-character-references
35	//
36	// entity := "&" (named group \| number ref) ";"
37	// named group := [a-zA-Z]{2,31}[0-9]{0,2}
38	// number ref := "#" (dec ref \| hex ref)
39	// dec ref := [0-9]{1,7}
40	// hex ref := ("x" \| "X") [0-9a-fA-F]{1,6}
41	htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}\|#([0-9]{1,7}\|[xX][0-9a-fA-F]{1,6}));`)
42	)
43
44	// Functions to parse text within a block
45	// Each function returns the number of chars taken care of
46	// data is the complete block being rendered
47	// offset is the number of valid chars before the current cursor
48
49	func (p Markdown) inline(currBlock Node, data []byte) {
50	// handlers might call us recursively: enforce a maximum depth
51	if p.nesting >= p.maxNesting \|\| len(data) == 0 {
52	return
53	}
54	p.nesting++
55	beg, end := 0, 0
56	for end < len(data) {
57	handler := p.inlineCallback[data[end]]
58	if handler != nil {
59	if consumed, node := handler(p, data, end); consumed == 0 {
60	// No action from the callback.
61	end++
62	} else {
63	// Copy inactive chars into the output.
64	currBlock.AppendChild(text(data[beg:end]))
65	if node != nil {
66	currBlock.AppendChild(node)
67	}
68	// Skip past whatever the callback used.
69	beg = end + consumed
70	end = beg
71	}
72	} else {
73	end++
74	}
75	}
76	if beg < len(data) {
77	if data[end-1] == '\n' {
78	end--
79	}
80	currBlock.AppendChild(text(data[beg:end]))
81	}
82	p.nesting--
83	}
84
85	// single and double emphasis parsing
86	func emphasis(p Markdown, data []byte, offset int) (int, Node) {
87	data = data[offset:]
88	c := data[0]
89
90	if len(data) > 2 && data[1] != c {
91	// whitespace cannot follow an opening emphasis;
92	// strikethrough only takes two characters '~~'
93	if c == '~' \|\| isspace(data[1]) {
94	return 0, nil
95	}
96	ret, node := helperEmphasis(p, data[1:], c)
97	if ret == 0 {
98	return 0, nil
99	}
100
101	return ret + 1, node
102	}
103
104	if len(data) > 3 && data[1] == c && data[2] != c {
105	if isspace(data[2]) {
106	return 0, nil
107	}
108	ret, node := helperDoubleEmphasis(p, data[2:], c)
109	if ret == 0 {
110	return 0, nil
111	}
112
113	return ret + 2, node
114	}
115
116	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
117	if c == '~' \|\| isspace(data[3]) {
118	return 0, nil
119	}
120	ret, node := helperTripleEmphasis(p, data, 3, c)
121	if ret == 0 {
122	return 0, nil
123	}
124
125	return ret + 3, node
126	}
127
128	return 0, nil
129	}
130
131	func codeSpan(p Markdown, data []byte, offset int) (int, Node) {
132	data = data[offset:]
133
134	nb := 0
135
136	// count the number of backticks in the delimiter
137	for nb < len(data) && data[nb] == '`' {
138	nb++
139	}
140
141	// find the next delimiter
142	i, end := 0, 0
143	for end = nb; end < len(data) && i < nb; end++ {
144	if data[end] == '`' {
145	i++
146	} else {
147	i = 0
148	}
149	}
150
151	// no matching delimiter?
152	if i < nb && end >= len(data) {
153	return 0, nil
154	}
155
156	// trim outside whitespace
157	fBegin := nb
158	for fBegin < end && data[fBegin] == ' ' {
159	fBegin++
160	}
161
162	fEnd := end - nb
163	for fEnd > fBegin && data[fEnd-1] == ' ' {
164	fEnd--
165	}
166
167	// render the code span
168	if fBegin != fEnd {
169	code := NewNode(Code)
170	code.Literal = data[fBegin:fEnd]
171	return end, code
172	}
173
174	return end, nil
175	}
176
177	// newline preceded by two spaces becomes <br>
178	func maybeLineBreak(p Markdown, data []byte, offset int) (int, Node) {
179	origOffset := offset
180	for offset < len(data) && data[offset] == ' ' {
181	offset++
182	}
183
184	if offset < len(data) && data[offset] == '\n' {
185	if offset-origOffset >= 2 {
186	return offset - origOffset + 1, NewNode(Hardbreak)
187	}
188	return offset - origOffset, nil
189	}
190	return 0, nil
191	}
192
193	// newline without two spaces works when HardLineBreak is enabled
194	func lineBreak(p Markdown, data []byte, offset int) (int, Node) {
195	if p.extensions&HardLineBreak != 0 {
196	return 1, NewNode(Hardbreak)
197	}
198	return 0, nil
199	}
200
201	type linkType int
202
203	const (
204	linkNormal linkType = iota
205	linkImg
206	linkDeferredFootnote
207	linkInlineFootnote
208	)
209
210	func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
211	if t == linkDeferredFootnote {
212	return false
213	}
214	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
215	}
216
217	func maybeImage(p Markdown, data []byte, offset int) (int, Node) {
218	if offset < len(data)-1 && data[offset+1] == '[' {
219	return link(p, data, offset)
220	}
221	return 0, nil
222	}
223
224	func maybeInlineFootnote(p Markdown, data []byte, offset int) (int, Node) {
225	if offset < len(data)-1 && data[offset+1] == '[' {
226	return link(p, data, offset)
227	}
228	return 0, nil
229	}
230
231	// '[': parse a link or an image or a footnote
232	func link(p Markdown, data []byte, offset int) (int, Node) {
233	// no links allowed inside regular links, footnote, and deferred footnotes
234	if p.insideLink && (offset > 0 && data[offset-1] == '[' \|\| len(data)-1 > offset && data[offset+1] == '^') {
235	return 0, nil
236	}
237
238	var t linkType
239	switch {
240	// special case: ![^text] == deferred footnote (that follows something with
241	// an exclamation point)
242	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
243	t = linkDeferredFootnote
244	// ![alt] == image
245	case offset >= 0 && data[offset] == '!':
246	t = linkImg
247	offset++
248	// ^[text] == inline footnote
249	// [^refId] == deferred footnote
250	case p.extensions&Footnotes != 0:
251	if offset >= 0 && data[offset] == '^' {
252	t = linkInlineFootnote
253	offset++
254	} else if len(data)-1 > offset && data[offset+1] == '^' {
255	t = linkDeferredFootnote
256	}
257	// [text] == regular link
258	default:
259	t = linkNormal
260	}
261
262	data = data[offset:]
263
264	var (
265	i = 1
266	noteID int
267	title, link, altContent []byte
268	textHasNl = false
269	)
270
271	if t == linkDeferredFootnote {
272	i++
273	}
274
275	// look for the matching closing bracket
276	for level := 1; level > 0 && i < len(data); i++ {
277	switch {
278	case data[i] == '\n':
279	textHasNl = true
280
281	case isBackslashEscaped(data, i):
282	continue
283
284	case data[i] == '[':
285	level++
286
287	case data[i] == ']':
288	level--
289	if level <= 0 {
290	i-- // compensate for extra i++ in for loop
291	}
292	}
293	}
294
295	if i >= len(data) {
296	return 0, nil
297	}
298
299	txtE := i
300	i++
301	var footnoteNode *Node
302
303	// skip any amount of whitespace or newline
304	// (this is much more lax than original markdown syntax)
305	for i < len(data) && isspace(data[i]) {
306	i++
307	}
308
309	// inline style link
310	switch {
311	case i < len(data) && data[i] == '(':
312	// skip initial whitespace
313	i++
314
315	for i < len(data) && isspace(data[i]) {
316	i++
317	}
318
319	linkB := i
320
321	// look for link end: ' " )
322	findlinkend:
323	for i < len(data) {
324	switch {
325	case data[i] == '\\':
326	i += 2
327
328	case data[i] == ')' \|\| data[i] == '\'' \|\| data[i] == '"':
329	break findlinkend
330
331	default:
332	i++
333	}
334	}
335
336	if i >= len(data) {
337	return 0, nil
338	}
339	linkE := i
340
341	// look for title end if present
342	titleB, titleE := 0, 0
343	if data[i] == '\'' \|\| data[i] == '"' {
344	i++
345	titleB = i
346
347	findtitleend:
348	for i < len(data) {
349	switch {
350	case data[i] == '\\':
351	i += 2
352
353	case data[i] == ')':
354	break findtitleend
355
356	default:
357	i++
358	}
359	}
360
361	if i >= len(data) {
362	return 0, nil
363	}
364
365	// skip whitespace after title
366	titleE = i - 1
367	for titleE > titleB && isspace(data[titleE]) {
368	titleE--
369	}
370
371	// check for closing quote presence
372	if data[titleE] != '\'' && data[titleE] != '"' {
373	titleB, titleE = 0, 0
374	linkE = i
375	}
376	}
377
378	// remove whitespace at the end of the link
379	for linkE > linkB && isspace(data[linkE-1]) {
380	linkE--
381	}
382
383	// remove optional angle brackets around the link
384	if data[linkB] == '<' {
385	linkB++
386	}
387	if data[linkE-1] == '>' {
388	linkE--
389	}
390
391	// build escaped link and title
392	if linkE > linkB {
393	link = data[linkB:linkE]
394	}
395
396	if titleE > titleB {
397	title = data[titleB:titleE]
398	}
399
400	i++
401
402	// reference style link
403	case isReferenceStyleLink(data, i, t):
404	var id []byte
405	altContentConsidered := false
406
407	// look for the id
408	i++
409	linkB := i
410	for i < len(data) && data[i] != ']' {
411	i++
412	}
413	if i >= len(data) {
414	return 0, nil
415	}
416	linkE := i
417
418	// find the reference
419	if linkB == linkE {
420	if textHasNl {
421	var b bytes.Buffer
422
423	for j := 1; j < txtE; j++ {
424	switch {
425	case data[j] != '\n':
426	b.WriteByte(data[j])
427	case data[j-1] != ' ':
428	b.WriteByte(' ')
429	}
430	}
431
432	id = b.Bytes()
433	} else {
434	id = data[1:txtE]
435	altContentConsidered = true
436	}
437	} else {
438	id = data[linkB:linkE]
439	}
440
441	// find the reference with matching id
442	lr, ok := p.getRef(string(id))
443	if !ok {
444	return 0, nil
445	}
446
447	// keep link and title from reference
448	link = lr.link
449	title = lr.title
450	if altContentConsidered {
451	altContent = lr.text
452	}
453	i++
454
455	// shortcut reference style link or reference or inline footnote
456	default:
457	var id []byte
458
459	// craft the id
460	if textHasNl {
461	var b bytes.Buffer
462
463	for j := 1; j < txtE; j++ {
464	switch {
465	case data[j] != '\n':
466	b.WriteByte(data[j])
467	case data[j-1] != ' ':
468	b.WriteByte(' ')
469	}
470	}
471
472	id = b.Bytes()
473	} else {
474	if t == linkDeferredFootnote {
475	id = data[2:txtE] // get rid of the ^
476	} else {
477	id = data[1:txtE]
478	}
479	}
480
481	footnoteNode = NewNode(Item)
482	if t == linkInlineFootnote {
483	// create a new reference
484	noteID = len(p.notes) + 1
485
486	var fragment []byte
487	if len(id) > 0 {
488	if len(id) < 16 {
489	fragment = make([]byte, len(id))
490	} else {
491	fragment = make([]byte, 16)
492	}
493	copy(fragment, slugify(id))
494	} else {
495	fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
496	}
497
498	ref := &reference{
499	noteID: noteID,
500	hasBlock: false,
501	link: fragment,
502	title: id,
503	footnote: footnoteNode,
504	}
505
506	p.notes = append(p.notes, ref)
507
508	link = ref.link
509	title = ref.title
510	} else {
511	// find the reference with matching id
512	lr, ok := p.getRef(string(id))
513	if !ok {
514	return 0, nil
515	}
516
517	if t == linkDeferredFootnote {
518	lr.noteID = len(p.notes) + 1
519	lr.footnote = footnoteNode
520	p.notes = append(p.notes, lr)
521	}
522
523	// keep link and title from reference
524	link = lr.link
525	// if inline footnote, title == footnote contents
526	title = lr.title
527	noteID = lr.noteID
528	}
529
530	// rewind the whitespace
531	i = txtE + 1
532	}
533
534	var uLink []byte
535	if t == linkNormal \|\| t == linkImg {
536	if len(link) > 0 {
537	var uLinkBuf bytes.Buffer
538	unescapeText(&uLinkBuf, link)
539	uLink = uLinkBuf.Bytes()
540	}
541
542	// links need something to click on and somewhere to go
543	if len(uLink) == 0 \|\| (t == linkNormal && txtE <= 1) {
544	return 0, nil
545	}
546	}
547
548	// call the relevant rendering function
549	var linkNode *Node
550	switch t {
551	case linkNormal:
552	linkNode = NewNode(Link)
553	linkNode.Destination = normalizeURI(uLink)
554	linkNode.Title = title
555	if len(altContent) > 0 {
556	linkNode.AppendChild(text(altContent))
557	} else {
558	// links cannot contain other links, so turn off link parsing
559	// temporarily and recurse
560	insideLink := p.insideLink
561	p.insideLink = true
562	p.inline(linkNode, data[1:txtE])
563	p.insideLink = insideLink
564	}
565
566	case linkImg:
567	linkNode = NewNode(Image)
568	linkNode.Destination = uLink
569	linkNode.Title = title
570	linkNode.AppendChild(text(data[1:txtE]))
571	i++
572
573	case linkInlineFootnote, linkDeferredFootnote:
574	linkNode = NewNode(Link)
575	linkNode.Destination = link
576	linkNode.Title = title
577	linkNode.NoteID = noteID
578	linkNode.Footnote = footnoteNode
579	if t == linkInlineFootnote {
580	i++
581	}
582
583	default:
584	return 0, nil
585	}
586
587	return i, linkNode
588	}
589
590	func (p *Markdown) inlineHTMLComment(data []byte) int {
591	if len(data) < 5 {
592	return 0
593	}
594	if data[0] != '<' \|\| data[1] != '!' \|\| data[2] != '-' \|\| data[3] != '-' {
595	return 0
596	}
597	i := 5
598	// scan for an end-of-comment marker, across lines if necessary
599	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
600	i++
601	}
602	// no end-of-comment marker
603	if i >= len(data) {
604	return 0
605	}
606	return i + 1
607	}
608
609	func stripMailto(link []byte) []byte {
610	if bytes.HasPrefix(link, []byte("mailto://")) {
611	return link[9:]
612	} else if bytes.HasPrefix(link, []byte("mailto:")) {
613	return link[7:]
614	} else {
615	return link
616	}
617	}
618
619	// autolinkType specifies a kind of autolink that gets detected.
620	type autolinkType int
621
622	// These are the possible flag values for the autolink renderer.
623	const (
624	notAutolink autolinkType = iota
625	normalAutolink
626	emailAutolink
627	)
628
629	// '<' when tags or autolinks are allowed
630	func leftAngle(p Markdown, data []byte, offset int) (int, Node) {
631	data = data[offset:]
632	altype, end := tagLength(data)
633	if size := p.inlineHTMLComment(data); size > 0 {
634	end = size
635	}
636	if end > 2 {
637	if altype != notAutolink {
638	var uLink bytes.Buffer
639	unescapeText(&uLink, data[1:end+1-2])
640	if uLink.Len() > 0 {
641	link := uLink.Bytes()
642	node := NewNode(Link)
643	node.Destination = link
644	if altype == emailAutolink {
645	node.Destination = append([]byte("mailto:"), link...)
646	}
647	node.AppendChild(text(stripMailto(link)))
648	return end, node
649	}
650	} else {
651	htmlTag := NewNode(HTMLSpan)
652	htmlTag.Literal = data[:end]
653	return end, htmlTag
654	}
655	}
656
657	return end, nil
658	}
659
660	// '\\' backslash escape
661	var escapeChars = []byte("\\`*_{}[]()#+-.!:\|&<>~")
662
663	func escape(p Markdown, data []byte, offset int) (int, Node) {
664	data = data[offset:]
665
666	if len(data) > 1 {
667	if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
668	return 2, NewNode(Hardbreak)
669	}
670	if bytes.IndexByte(escapeChars, data[1]) < 0 {
671	return 0, nil
672	}
673
674	return 2, text(data[1:2])
675	}
676
677	return 2, nil
678	}
679
680	func unescapeText(ob *bytes.Buffer, src []byte) {
681	i := 0
682	for i < len(src) {
683	org := i
684	for i < len(src) && src[i] != '\\' {
685	i++
686	}
687
688	if i > org {
689	ob.Write(src[org:i])
690	}
691
692	if i+1 >= len(src) {
693	break
694	}
695
696	ob.WriteByte(src[i+1])
697	i += 2
698	}
699	}
700
701	// '&' escaped when it doesn't belong to an entity
702	// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
703	func entity(p Markdown, data []byte, offset int) (int, Node) {
704	data = data[offset:]
705
706	end := 1
707
708	if end < len(data) && data[end] == '#' {
709	end++
710	}
711
712	for end < len(data) && isalnum(data[end]) {
713	end++
714	}
715
716	if end < len(data) && data[end] == ';' {
717	end++ // real entity
718	} else {
719	return 0, nil // lone '&'
720	}
721
722	ent := data[:end]
723	// undo & escaping or it will be converted to &amp; by another
724	// escaper in the renderer
725	if bytes.Equal(ent, []byte("&")) {
726	ent = []byte{'&'}
727	}
728
729	return end, text(ent)
730	}
731
732	func linkEndsWithEntity(data []byte, linkEnd int) bool {
733	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
734	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
735	}
736
737	// hasPrefixCaseInsensitive is a custom implementation of
738	// strings.HasPrefix(strings.ToLower(s), prefix)
739	// we rolled our own because ToLower pulls in a huge machinery of lowercasing
740	// anything from Unicode and that's very slow. Since this func will only be
741	// used on ASCII protocol prefixes, we can take shortcuts.
742	func hasPrefixCaseInsensitive(s, prefix []byte) bool {
743	if len(s) < len(prefix) {
744	return false
745	}
746	delta := byte('a' - 'A')
747	for i, b := range prefix {
748	if b != s[i] && b != s[i]+delta {
749	return false
750	}
751	}
752	return true
753	}
754
755	var protocolPrefixes = [][]byte{
756	[]byte("http://"),
757	[]byte("https://"),
758	[]byte("ftp://"),
759	[]byte("file://"),
760	[]byte("mailto:"),
761	}
762
763	const shortestPrefix = 6 // len("ftp://"), the shortest of the above
764
765	func maybeAutoLink(p Markdown, data []byte, offset int) (int, Node) {
766	// quick check to rule out most false hits
767	if p.insideLink \|\| len(data) < offset+shortestPrefix {
768	return 0, nil
769	}
770	for _, prefix := range protocolPrefixes {
771	endOfHead := offset + 8 // 8 is the len() of the longest prefix
772	if endOfHead > len(data) {
773	endOfHead = len(data)
774	}
775	if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
776	return autoLink(p, data, offset)
777	}
778	}
779	return 0, nil
780	}
781
782	func autoLink(p Markdown, data []byte, offset int) (int, Node) {
783	// Now a more expensive check to see if we're not inside an anchor element
784	anchorStart := offset
785	offsetFromAnchor := 0
786	for anchorStart > 0 && data[anchorStart] != '<' {
787	anchorStart--
788	offsetFromAnchor++
789	}
790
791	anchorStr := anchorRe.Find(data[anchorStart:])
792	if anchorStr != nil {
793	anchorClose := NewNode(HTMLSpan)
794	anchorClose.Literal = anchorStr[offsetFromAnchor:]
795	return len(anchorStr) - offsetFromAnchor, anchorClose
796	}
797
798	// scan backward for a word boundary
799	rewind := 0
800	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
801	rewind++
802	}
803	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
804	return 0, nil
805	}
806
807	origData := data
808	data = data[offset-rewind:]
809
810	if !isSafeLink(data) {
811	return 0, nil
812	}
813
814	linkEnd := 0
815	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
816	linkEnd++
817	}
818
819	// Skip punctuation at the end of the link
820	if (data[linkEnd-1] == '.' \|\| data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
821	linkEnd--
822	}
823
824	// But don't skip semicolon if it's a part of escaped entity:
825	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
826	linkEnd--
827	}
828
829	// See if the link finishes with a punctuation sign that can be closed.
830	var copen byte
831	switch data[linkEnd-1] {
832	case '"':
833	copen = '"'
834	case '\'':
835	copen = '\''
836	case ')':
837	copen = '('
838	case ']':
839	copen = '['
840	case '}':
841	copen = '{'
842	default:
843	copen = 0
844	}
845
846	if copen != 0 {
847	bufEnd := offset - rewind + linkEnd - 2
848
849	openDelim := 1
850
851	/* Try to close the final punctuation sign in this same line;
852	* if we managed to close it outside of the URL, that means that it's
853	* not part of the URL. If it closes inside the URL, that means it
854	* is part of the URL.
855	*
856	* Examples:
857	*
858	* foo http://www.pokemon.com/Pikachu_(Electric) bar
859	* => http://www.pokemon.com/Pikachu_(Electric)
860	*
861	* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
862	* => http://www.pokemon.com/Pikachu_(Electric)
863	*
864	* foo http://www.pokemon.com/Pikachu_(Electric)) bar
865	* => http://www.pokemon.com/Pikachu_(Electric))
866	*
867	* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
868	* => foo http://www.pokemon.com/Pikachu_(Electric)
869	*/
870
871	for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
872	if origData[bufEnd] == data[linkEnd-1] {
873	openDelim++
874	}
875
876	if origData[bufEnd] == copen {
877	openDelim--
878	}
879
880	bufEnd--
881	}
882
883	if openDelim == 0 {
884	linkEnd--
885	}
886	}
887
888	var uLink bytes.Buffer
889	unescapeText(&uLink, data[:linkEnd])
890
891	if uLink.Len() > 0 {
892	node := NewNode(Link)
893	node.Destination = uLink.Bytes()
894	node.AppendChild(text(uLink.Bytes()))
895	return linkEnd, node
896	}
897
898	return linkEnd, nil
899	}
900
901	func isEndOfLink(char byte) bool {
902	return isspace(char) \|\| char == '<'
903	}
904
905	var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
906	var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
907
908	func isSafeLink(link []byte) bool {
909	for _, path := range validPaths {
910	if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
911	if len(link) == len(path) {
912	return true
913	} else if isalnum(link[len(path)]) {
914	return true
915	}
916	}
917	}
918
919	for _, prefix := range validUris {
920	// TODO: handle unicode here
921	// case-insensitive prefix test
922	if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
923	return true
924	}
925	}
926
927	return false
928	}
929
930	// return the length of the given tag, or 0 is it's not valid
931	func tagLength(data []byte) (autolink autolinkType, end int) {
932	var i, j int
933
934	// a valid tag can't be shorter than 3 chars
935	if len(data) < 3 {
936	return notAutolink, 0
937	}
938
939	// begins with a '<' optionally followed by '/', followed by letter or number
940	if data[0] != '<' {
941	return notAutolink, 0
942	}
943	if data[1] == '/' {
944	i = 2
945	} else {
946	i = 1
947	}
948
949	if !isalnum(data[i]) {
950	return notAutolink, 0
951	}
952
953	// scheme test
954	autolink = notAutolink
955
956	// try to find the beginning of an URI
957	for i < len(data) && (isalnum(data[i]) \|\| data[i] == '.' \|\| data[i] == '+' \|\| data[i] == '-') {
958	i++
959	}
960
961	if i > 1 && i < len(data) && data[i] == '@' {
962	if j = isMailtoAutoLink(data[i:]); j != 0 {
963	return emailAutolink, i + j
964	}
965	}
966
967	if i > 2 && i < len(data) && data[i] == ':' {
968	autolink = normalAutolink
969	i++
970	}
971
972	// complete autolink test: no whitespace or ' or "
973	switch {
974	case i >= len(data):
975	autolink = notAutolink
976	case autolink != notAutolink:
977	j = i
978
979	for i < len(data) {
980	if data[i] == '\\' {
981	i += 2
982	} else if data[i] == '>' \|\| data[i] == '\'' \|\| data[i] == '"' \|\| isspace(data[i]) {
983	break
984	} else {
985	i++
986	}
987
988	}
989
990	if i >= len(data) {
991	return autolink, 0
992	}
993	if i > j && data[i] == '>' {
994	return autolink, i + 1
995	}
996
997	// one of the forbidden chars has been found
998	autolink = notAutolink
999	}
1000	i += bytes.IndexByte(data[i:], '>')
1001	if i < 0 {
1002	return autolink, 0
1003	}
1004	return autolink, i + 1
1005	}
1006
1007	// look for the address part of a mail autolink and '>'
1008	// this is less strict than the original markdown e-mail address matching
1009	func isMailtoAutoLink(data []byte) int {
1010	nb := 0
1011
1012	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1013	for i := 0; i < len(data); i++ {
1014	if isalnum(data[i]) {
1015	continue
1016	}
1017
1018	switch data[i] {
1019	case '@':
1020	nb++
1021
1022	case '-', '.', '_':
1023	break
1024
1025	case '>':
1026	if nb == 1 {
1027	return i + 1
1028	}
1029	return 0
1030	default:
1031	return 0
1032	}
1033	}
1034
1035	return 0
1036	}
1037
1038	// look for the next emph char, skipping other constructs
1039	func helperFindEmphChar(data []byte, c byte) int {
1040	i := 0
1041
1042	for i < len(data) {
1043	for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1044	i++
1045	}
1046	if i >= len(data) {
1047	return 0
1048	}
1049	// do not count escaped chars
1050	if i != 0 && data[i-1] == '\\' {
1051	i++
1052	continue
1053	}
1054	if data[i] == c {
1055	return i
1056	}
1057
1058	if data[i] == '`' {
1059	// skip a code span
1060	tmpI := 0
1061	i++
1062	for i < len(data) && data[i] != '`' {
1063	if tmpI == 0 && data[i] == c {
1064	tmpI = i
1065	}
1066	i++
1067	}
1068	if i >= len(data) {
1069	return tmpI
1070	}
1071	i++
1072	} else if data[i] == '[' {
1073	// skip a link
1074	tmpI := 0
1075	i++
1076	for i < len(data) && data[i] != ']' {
1077	if tmpI == 0 && data[i] == c {
1078	tmpI = i
1079	}
1080	i++
1081	}
1082	i++
1083	for i < len(data) && (data[i] == ' ' \|\| data[i] == '\n') {
1084	i++
1085	}
1086	if i >= len(data) {
1087	return tmpI
1088	}
1089	if data[i] != '[' && data[i] != '(' { // not a link
1090	if tmpI > 0 {
1091	return tmpI
1092	}
1093	continue
1094	}
1095	cc := data[i]
1096	i++
1097	for i < len(data) && data[i] != cc {
1098	if tmpI == 0 && data[i] == c {
1099	return i
1100	}
1101	i++
1102	}
1103	if i >= len(data) {
1104	return tmpI
1105	}
1106	i++
1107	}
1108	}
1109	return 0
1110	}
1111
1112	func helperEmphasis(p Markdown, data []byte, c byte) (int, Node) {
1113	i := 0
1114
1115	// skip one symbol if coming from emph3
1116	if len(data) > 1 && data[0] == c && data[1] == c {
1117	i = 1
1118	}
1119
1120	for i < len(data) {
1121	length := helperFindEmphChar(data[i:], c)
1122	if length == 0 {
1123	return 0, nil
1124	}
1125	i += length
1126	if i >= len(data) {
1127	return 0, nil
1128	}
1129
1130	if i+1 < len(data) && data[i+1] == c {
1131	i++
1132	continue
1133	}
1134
1135	if data[i] == c && !isspace(data[i-1]) {
1136
1137	if p.extensions&NoIntraEmphasis != 0 {
1138	if !(i+1 == len(data) \|\| isspace(data[i+1]) \|\| ispunct(data[i+1])) {
1139	continue
1140	}
1141	}
1142
1143	emph := NewNode(Emph)
1144	p.inline(emph, data[:i])
1145	return i + 1, emph
1146	}
1147	}
1148
1149	return 0, nil
1150	}
1151
1152	func helperDoubleEmphasis(p Markdown, data []byte, c byte) (int, Node) {
1153	i := 0
1154
1155	for i < len(data) {
1156	length := helperFindEmphChar(data[i:], c)
1157	if length == 0 {
1158	return 0, nil
1159	}
1160	i += length
1161
1162	if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1163	nodeType := Strong
1164	if c == '~' {
1165	nodeType = Del
1166	}
1167	node := NewNode(nodeType)
1168	p.inline(node, data[:i])
1169	return i + 2, node
1170	}
1171	i++
1172	}
1173	return 0, nil
1174	}
1175
1176	func helperTripleEmphasis(p Markdown, data []byte, offset int, c byte) (int, Node) {
1177	i := 0
1178	origData := data
1179	data = data[offset:]
1180
1181	for i < len(data) {
1182	length := helperFindEmphChar(data[i:], c)
1183	if length == 0 {
1184	return 0, nil
1185	}
1186	i += length
1187
1188	// skip whitespace preceded symbols
1189	if data[i] != c \|\| isspace(data[i-1]) {
1190	continue
1191	}
1192
1193	switch {
1194	case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1195	// triple symbol found
1196	strong := NewNode(Strong)
1197	em := NewNode(Emph)
1198	strong.AppendChild(em)
1199	p.inline(em, data[:i])
1200	return i + 3, strong
1201	case (i+1 < len(data) && data[i+1] == c):
1202	// double symbol found, hand over to emph1
1203	length, node := helperEmphasis(p, origData[offset-2:], c)
1204	if length == 0 {
1205	return 0, nil
1206	}
1207	return length - 2, node
1208	default:
1209	// single symbol found, hand over to emph2
1210	length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1211	if length == 0 {
1212	return 0, nil
1213	}
1214	return length - 1, node
1215	}
1216	}
1217	return 0, nil
1218	}
1219
1220	func text(s []byte) *Node {
1221	node := NewNode(Text)
1222	node.Literal = s
1223	return node
1224	}
1225
1226	func normalizeURI(s []byte) []byte {
1227	return s // TODO: implement
1228	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: