1 | //
|
---|
2 | // Blackfriday Markdown Processor
|
---|
3 | // Available at http://github.com/russross/blackfriday
|
---|
4 | //
|
---|
5 | // Copyright © 2011 Russ Ross <russ@russross.com>.
|
---|
6 | // Distributed under the Simplified BSD License.
|
---|
7 | // See README.md for details.
|
---|
8 | //
|
---|
9 |
|
---|
10 | //
|
---|
11 | // Functions to parse block-level elements.
|
---|
12 | //
|
---|
13 |
|
---|
14 | package blackfriday
|
---|
15 |
|
---|
16 | import (
|
---|
17 | "bytes"
|
---|
18 | "html"
|
---|
19 | "regexp"
|
---|
20 | "strings"
|
---|
21 | "unicode"
|
---|
22 | )
|
---|
23 |
|
---|
24 | const (
|
---|
25 | charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
|
---|
26 | escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
|
---|
27 | )
|
---|
28 |
|
---|
29 | var (
|
---|
30 | reBackslashOrAmp = regexp.MustCompile("[\\&]")
|
---|
31 | reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
|
---|
32 | )
|
---|
33 |
|
---|
34 | // Parse block-level data.
|
---|
35 | // Note: this function and many that it calls assume that
|
---|
36 | // the input buffer ends with a newline.
|
---|
37 | func (p *Markdown) block(data []byte) {
|
---|
38 | // this is called recursively: enforce a maximum depth
|
---|
39 | if p.nesting >= p.maxNesting {
|
---|
40 | return
|
---|
41 | }
|
---|
42 | p.nesting++
|
---|
43 |
|
---|
44 | // parse out one block-level construct at a time
|
---|
45 | for len(data) > 0 {
|
---|
46 | // prefixed heading:
|
---|
47 | //
|
---|
48 | // # Heading 1
|
---|
49 | // ## Heading 2
|
---|
50 | // ...
|
---|
51 | // ###### Heading 6
|
---|
52 | if p.isPrefixHeading(data) {
|
---|
53 | data = data[p.prefixHeading(data):]
|
---|
54 | continue
|
---|
55 | }
|
---|
56 |
|
---|
57 | // block of preformatted HTML:
|
---|
58 | //
|
---|
59 | // <div>
|
---|
60 | // ...
|
---|
61 | // </div>
|
---|
62 | if data[0] == '<' {
|
---|
63 | if i := p.html(data, true); i > 0 {
|
---|
64 | data = data[i:]
|
---|
65 | continue
|
---|
66 | }
|
---|
67 | }
|
---|
68 |
|
---|
69 | // title block
|
---|
70 | //
|
---|
71 | // % stuff
|
---|
72 | // % more stuff
|
---|
73 | // % even more stuff
|
---|
74 | if p.extensions&Titleblock != 0 {
|
---|
75 | if data[0] == '%' {
|
---|
76 | if i := p.titleBlock(data, true); i > 0 {
|
---|
77 | data = data[i:]
|
---|
78 | continue
|
---|
79 | }
|
---|
80 | }
|
---|
81 | }
|
---|
82 |
|
---|
83 | // blank lines. note: returns the # of bytes to skip
|
---|
84 | if i := p.isEmpty(data); i > 0 {
|
---|
85 | data = data[i:]
|
---|
86 | continue
|
---|
87 | }
|
---|
88 |
|
---|
89 | // indented code block:
|
---|
90 | //
|
---|
91 | // func max(a, b int) int {
|
---|
92 | // if a > b {
|
---|
93 | // return a
|
---|
94 | // }
|
---|
95 | // return b
|
---|
96 | // }
|
---|
97 | if p.codePrefix(data) > 0 {
|
---|
98 | data = data[p.code(data):]
|
---|
99 | continue
|
---|
100 | }
|
---|
101 |
|
---|
102 | // fenced code block:
|
---|
103 | //
|
---|
104 | // ``` go
|
---|
105 | // func fact(n int) int {
|
---|
106 | // if n <= 1 {
|
---|
107 | // return n
|
---|
108 | // }
|
---|
109 | // return n * fact(n-1)
|
---|
110 | // }
|
---|
111 | // ```
|
---|
112 | if p.extensions&FencedCode != 0 {
|
---|
113 | if i := p.fencedCodeBlock(data, true); i > 0 {
|
---|
114 | data = data[i:]
|
---|
115 | continue
|
---|
116 | }
|
---|
117 | }
|
---|
118 |
|
---|
119 | // horizontal rule:
|
---|
120 | //
|
---|
121 | // ------
|
---|
122 | // or
|
---|
123 | // ******
|
---|
124 | // or
|
---|
125 | // ______
|
---|
126 | if p.isHRule(data) {
|
---|
127 | p.addBlock(HorizontalRule, nil)
|
---|
128 | var i int
|
---|
129 | for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
---|
130 | }
|
---|
131 | data = data[i:]
|
---|
132 | continue
|
---|
133 | }
|
---|
134 |
|
---|
135 | // block quote:
|
---|
136 | //
|
---|
137 | // > A big quote I found somewhere
|
---|
138 | // > on the web
|
---|
139 | if p.quotePrefix(data) > 0 {
|
---|
140 | data = data[p.quote(data):]
|
---|
141 | continue
|
---|
142 | }
|
---|
143 |
|
---|
144 | // table:
|
---|
145 | //
|
---|
146 | // Name | Age | Phone
|
---|
147 | // ------|-----|---------
|
---|
148 | // Bob | 31 | 555-1234
|
---|
149 | // Alice | 27 | 555-4321
|
---|
150 | if p.extensions&Tables != 0 {
|
---|
151 | if i := p.table(data); i > 0 {
|
---|
152 | data = data[i:]
|
---|
153 | continue
|
---|
154 | }
|
---|
155 | }
|
---|
156 |
|
---|
157 | // an itemized/unordered list:
|
---|
158 | //
|
---|
159 | // * Item 1
|
---|
160 | // * Item 2
|
---|
161 | //
|
---|
162 | // also works with + or -
|
---|
163 | if p.uliPrefix(data) > 0 {
|
---|
164 | data = data[p.list(data, 0):]
|
---|
165 | continue
|
---|
166 | }
|
---|
167 |
|
---|
168 | // a numbered/ordered list:
|
---|
169 | //
|
---|
170 | // 1. Item 1
|
---|
171 | // 2. Item 2
|
---|
172 | if p.oliPrefix(data) > 0 {
|
---|
173 | data = data[p.list(data, ListTypeOrdered):]
|
---|
174 | continue
|
---|
175 | }
|
---|
176 |
|
---|
177 | // definition lists:
|
---|
178 | //
|
---|
179 | // Term 1
|
---|
180 | // : Definition a
|
---|
181 | // : Definition b
|
---|
182 | //
|
---|
183 | // Term 2
|
---|
184 | // : Definition c
|
---|
185 | if p.extensions&DefinitionLists != 0 {
|
---|
186 | if p.dliPrefix(data) > 0 {
|
---|
187 | data = data[p.list(data, ListTypeDefinition):]
|
---|
188 | continue
|
---|
189 | }
|
---|
190 | }
|
---|
191 |
|
---|
192 | // anything else must look like a normal paragraph
|
---|
193 | // note: this finds underlined headings, too
|
---|
194 | data = data[p.paragraph(data):]
|
---|
195 | }
|
---|
196 |
|
---|
197 | p.nesting--
|
---|
198 | }
|
---|
199 |
|
---|
200 | func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
|
---|
201 | p.closeUnmatchedBlocks()
|
---|
202 | container := p.addChild(typ, 0)
|
---|
203 | container.content = content
|
---|
204 | return container
|
---|
205 | }
|
---|
206 |
|
---|
207 | func (p *Markdown) isPrefixHeading(data []byte) bool {
|
---|
208 | if data[0] != '#' {
|
---|
209 | return false
|
---|
210 | }
|
---|
211 |
|
---|
212 | if p.extensions&SpaceHeadings != 0 {
|
---|
213 | level := 0
|
---|
214 | for level < 6 && level < len(data) && data[level] == '#' {
|
---|
215 | level++
|
---|
216 | }
|
---|
217 | if level == len(data) || data[level] != ' ' {
|
---|
218 | return false
|
---|
219 | }
|
---|
220 | }
|
---|
221 | return true
|
---|
222 | }
|
---|
223 |
|
---|
224 | func (p *Markdown) prefixHeading(data []byte) int {
|
---|
225 | level := 0
|
---|
226 | for level < 6 && level < len(data) && data[level] == '#' {
|
---|
227 | level++
|
---|
228 | }
|
---|
229 | i := skipChar(data, level, ' ')
|
---|
230 | end := skipUntilChar(data, i, '\n')
|
---|
231 | skip := end
|
---|
232 | id := ""
|
---|
233 | if p.extensions&HeadingIDs != 0 {
|
---|
234 | j, k := 0, 0
|
---|
235 | // find start/end of heading id
|
---|
236 | for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
|
---|
237 | }
|
---|
238 | for k = j + 1; k < end && data[k] != '}'; k++ {
|
---|
239 | }
|
---|
240 | // extract heading id iff found
|
---|
241 | if j < end && k < end {
|
---|
242 | id = string(data[j+2 : k])
|
---|
243 | end = j
|
---|
244 | skip = k + 1
|
---|
245 | for end > 0 && data[end-1] == ' ' {
|
---|
246 | end--
|
---|
247 | }
|
---|
248 | }
|
---|
249 | }
|
---|
250 | for end > 0 && data[end-1] == '#' {
|
---|
251 | if isBackslashEscaped(data, end-1) {
|
---|
252 | break
|
---|
253 | }
|
---|
254 | end--
|
---|
255 | }
|
---|
256 | for end > 0 && data[end-1] == ' ' {
|
---|
257 | end--
|
---|
258 | }
|
---|
259 | if end > i {
|
---|
260 | if id == "" && p.extensions&AutoHeadingIDs != 0 {
|
---|
261 | id = SanitizedAnchorName(string(data[i:end]))
|
---|
262 | }
|
---|
263 | block := p.addBlock(Heading, data[i:end])
|
---|
264 | block.HeadingID = id
|
---|
265 | block.Level = level
|
---|
266 | }
|
---|
267 | return skip
|
---|
268 | }
|
---|
269 |
|
---|
270 | func (p *Markdown) isUnderlinedHeading(data []byte) int {
|
---|
271 | // test of level 1 heading
|
---|
272 | if data[0] == '=' {
|
---|
273 | i := skipChar(data, 1, '=')
|
---|
274 | i = skipChar(data, i, ' ')
|
---|
275 | if i < len(data) && data[i] == '\n' {
|
---|
276 | return 1
|
---|
277 | }
|
---|
278 | return 0
|
---|
279 | }
|
---|
280 |
|
---|
281 | // test of level 2 heading
|
---|
282 | if data[0] == '-' {
|
---|
283 | i := skipChar(data, 1, '-')
|
---|
284 | i = skipChar(data, i, ' ')
|
---|
285 | if i < len(data) && data[i] == '\n' {
|
---|
286 | return 2
|
---|
287 | }
|
---|
288 | return 0
|
---|
289 | }
|
---|
290 |
|
---|
291 | return 0
|
---|
292 | }
|
---|
293 |
|
---|
294 | func (p *Markdown) titleBlock(data []byte, doRender bool) int {
|
---|
295 | if data[0] != '%' {
|
---|
296 | return 0
|
---|
297 | }
|
---|
298 | splitData := bytes.Split(data, []byte("\n"))
|
---|
299 | var i int
|
---|
300 | for idx, b := range splitData {
|
---|
301 | if !bytes.HasPrefix(b, []byte("%")) {
|
---|
302 | i = idx // - 1
|
---|
303 | break
|
---|
304 | }
|
---|
305 | }
|
---|
306 |
|
---|
307 | data = bytes.Join(splitData[0:i], []byte("\n"))
|
---|
308 | consumed := len(data)
|
---|
309 | data = bytes.TrimPrefix(data, []byte("% "))
|
---|
310 | data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
|
---|
311 | block := p.addBlock(Heading, data)
|
---|
312 | block.Level = 1
|
---|
313 | block.IsTitleblock = true
|
---|
314 |
|
---|
315 | return consumed
|
---|
316 | }
|
---|
317 |
|
---|
318 | func (p *Markdown) html(data []byte, doRender bool) int {
|
---|
319 | var i, j int
|
---|
320 |
|
---|
321 | // identify the opening tag
|
---|
322 | if data[0] != '<' {
|
---|
323 | return 0
|
---|
324 | }
|
---|
325 | curtag, tagfound := p.htmlFindTag(data[1:])
|
---|
326 |
|
---|
327 | // handle special cases
|
---|
328 | if !tagfound {
|
---|
329 | // check for an HTML comment
|
---|
330 | if size := p.htmlComment(data, doRender); size > 0 {
|
---|
331 | return size
|
---|
332 | }
|
---|
333 |
|
---|
334 | // check for an <hr> tag
|
---|
335 | if size := p.htmlHr(data, doRender); size > 0 {
|
---|
336 | return size
|
---|
337 | }
|
---|
338 |
|
---|
339 | // no special case recognized
|
---|
340 | return 0
|
---|
341 | }
|
---|
342 |
|
---|
343 | // look for an unindented matching closing tag
|
---|
344 | // followed by a blank line
|
---|
345 | found := false
|
---|
346 | /*
|
---|
347 | closetag := []byte("\n</" + curtag + ">")
|
---|
348 | j = len(curtag) + 1
|
---|
349 | for !found {
|
---|
350 | // scan for a closing tag at the beginning of a line
|
---|
351 | if skip := bytes.Index(data[j:], closetag); skip >= 0 {
|
---|
352 | j += skip + len(closetag)
|
---|
353 | } else {
|
---|
354 | break
|
---|
355 | }
|
---|
356 |
|
---|
357 | // see if it is the only thing on the line
|
---|
358 | if skip := p.isEmpty(data[j:]); skip > 0 {
|
---|
359 | // see if it is followed by a blank line/eof
|
---|
360 | j += skip
|
---|
361 | if j >= len(data) {
|
---|
362 | found = true
|
---|
363 | i = j
|
---|
364 | } else {
|
---|
365 | if skip := p.isEmpty(data[j:]); skip > 0 {
|
---|
366 | j += skip
|
---|
367 | found = true
|
---|
368 | i = j
|
---|
369 | }
|
---|
370 | }
|
---|
371 | }
|
---|
372 | }
|
---|
373 | */
|
---|
374 |
|
---|
375 | // if not found, try a second pass looking for indented match
|
---|
376 | // but not if tag is "ins" or "del" (following original Markdown.pl)
|
---|
377 | if !found && curtag != "ins" && curtag != "del" {
|
---|
378 | i = 1
|
---|
379 | for i < len(data) {
|
---|
380 | i++
|
---|
381 | for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
|
---|
382 | i++
|
---|
383 | }
|
---|
384 |
|
---|
385 | if i+2+len(curtag) >= len(data) {
|
---|
386 | break
|
---|
387 | }
|
---|
388 |
|
---|
389 | j = p.htmlFindEnd(curtag, data[i-1:])
|
---|
390 |
|
---|
391 | if j > 0 {
|
---|
392 | i += j - 1
|
---|
393 | found = true
|
---|
394 | break
|
---|
395 | }
|
---|
396 | }
|
---|
397 | }
|
---|
398 |
|
---|
399 | if !found {
|
---|
400 | return 0
|
---|
401 | }
|
---|
402 |
|
---|
403 | // the end of the block has been found
|
---|
404 | if doRender {
|
---|
405 | // trim newlines
|
---|
406 | end := i
|
---|
407 | for end > 0 && data[end-1] == '\n' {
|
---|
408 | end--
|
---|
409 | }
|
---|
410 | finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
|
---|
411 | }
|
---|
412 |
|
---|
413 | return i
|
---|
414 | }
|
---|
415 |
|
---|
416 | func finalizeHTMLBlock(block *Node) {
|
---|
417 | block.Literal = block.content
|
---|
418 | block.content = nil
|
---|
419 | }
|
---|
420 |
|
---|
421 | // HTML comment, lax form
|
---|
422 | func (p *Markdown) htmlComment(data []byte, doRender bool) int {
|
---|
423 | i := p.inlineHTMLComment(data)
|
---|
424 | // needs to end with a blank line
|
---|
425 | if j := p.isEmpty(data[i:]); j > 0 {
|
---|
426 | size := i + j
|
---|
427 | if doRender {
|
---|
428 | // trim trailing newlines
|
---|
429 | end := size
|
---|
430 | for end > 0 && data[end-1] == '\n' {
|
---|
431 | end--
|
---|
432 | }
|
---|
433 | block := p.addBlock(HTMLBlock, data[:end])
|
---|
434 | finalizeHTMLBlock(block)
|
---|
435 | }
|
---|
436 | return size
|
---|
437 | }
|
---|
438 | return 0
|
---|
439 | }
|
---|
440 |
|
---|
441 | // HR, which is the only self-closing block tag considered
|
---|
442 | func (p *Markdown) htmlHr(data []byte, doRender bool) int {
|
---|
443 | if len(data) < 4 {
|
---|
444 | return 0
|
---|
445 | }
|
---|
446 | if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
|
---|
447 | return 0
|
---|
448 | }
|
---|
449 | if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
|
---|
450 | // not an <hr> tag after all; at least not a valid one
|
---|
451 | return 0
|
---|
452 | }
|
---|
453 | i := 3
|
---|
454 | for i < len(data) && data[i] != '>' && data[i] != '\n' {
|
---|
455 | i++
|
---|
456 | }
|
---|
457 | if i < len(data) && data[i] == '>' {
|
---|
458 | i++
|
---|
459 | if j := p.isEmpty(data[i:]); j > 0 {
|
---|
460 | size := i + j
|
---|
461 | if doRender {
|
---|
462 | // trim newlines
|
---|
463 | end := size
|
---|
464 | for end > 0 && data[end-1] == '\n' {
|
---|
465 | end--
|
---|
466 | }
|
---|
467 | finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
|
---|
468 | }
|
---|
469 | return size
|
---|
470 | }
|
---|
471 | }
|
---|
472 | return 0
|
---|
473 | }
|
---|
474 |
|
---|
475 | func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
|
---|
476 | i := 0
|
---|
477 | for i < len(data) && isalnum(data[i]) {
|
---|
478 | i++
|
---|
479 | }
|
---|
480 | key := string(data[:i])
|
---|
481 | if _, ok := blockTags[key]; ok {
|
---|
482 | return key, true
|
---|
483 | }
|
---|
484 | return "", false
|
---|
485 | }
|
---|
486 |
|
---|
487 | func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
|
---|
488 | // assume data[0] == '<' && data[1] == '/' already tested
|
---|
489 | if tag == "hr" {
|
---|
490 | return 2
|
---|
491 | }
|
---|
492 | // check if tag is a match
|
---|
493 | closetag := []byte("</" + tag + ">")
|
---|
494 | if !bytes.HasPrefix(data, closetag) {
|
---|
495 | return 0
|
---|
496 | }
|
---|
497 | i := len(closetag)
|
---|
498 |
|
---|
499 | // check that the rest of the line is blank
|
---|
500 | skip := 0
|
---|
501 | if skip = p.isEmpty(data[i:]); skip == 0 {
|
---|
502 | return 0
|
---|
503 | }
|
---|
504 | i += skip
|
---|
505 | skip = 0
|
---|
506 |
|
---|
507 | if i >= len(data) {
|
---|
508 | return i
|
---|
509 | }
|
---|
510 |
|
---|
511 | if p.extensions&LaxHTMLBlocks != 0 {
|
---|
512 | return i
|
---|
513 | }
|
---|
514 | if skip = p.isEmpty(data[i:]); skip == 0 {
|
---|
515 | // following line must be blank
|
---|
516 | return 0
|
---|
517 | }
|
---|
518 |
|
---|
519 | return i + skip
|
---|
520 | }
|
---|
521 |
|
---|
522 | func (*Markdown) isEmpty(data []byte) int {
|
---|
523 | // it is okay to call isEmpty on an empty buffer
|
---|
524 | if len(data) == 0 {
|
---|
525 | return 0
|
---|
526 | }
|
---|
527 |
|
---|
528 | var i int
|
---|
529 | for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
---|
530 | if data[i] != ' ' && data[i] != '\t' {
|
---|
531 | return 0
|
---|
532 | }
|
---|
533 | }
|
---|
534 | if i < len(data) && data[i] == '\n' {
|
---|
535 | i++
|
---|
536 | }
|
---|
537 | return i
|
---|
538 | }
|
---|
539 |
|
---|
540 | func (*Markdown) isHRule(data []byte) bool {
|
---|
541 | i := 0
|
---|
542 |
|
---|
543 | // skip up to three spaces
|
---|
544 | for i < 3 && data[i] == ' ' {
|
---|
545 | i++
|
---|
546 | }
|
---|
547 |
|
---|
548 | // look at the hrule char
|
---|
549 | if data[i] != '*' && data[i] != '-' && data[i] != '_' {
|
---|
550 | return false
|
---|
551 | }
|
---|
552 | c := data[i]
|
---|
553 |
|
---|
554 | // the whole line must be the char or whitespace
|
---|
555 | n := 0
|
---|
556 | for i < len(data) && data[i] != '\n' {
|
---|
557 | switch {
|
---|
558 | case data[i] == c:
|
---|
559 | n++
|
---|
560 | case data[i] != ' ':
|
---|
561 | return false
|
---|
562 | }
|
---|
563 | i++
|
---|
564 | }
|
---|
565 |
|
---|
566 | return n >= 3
|
---|
567 | }
|
---|
568 |
|
---|
569 | // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
|
---|
570 | // and returns the end index if so, or 0 otherwise. It also returns the marker found.
|
---|
571 | // If info is not nil, it gets set to the syntax specified in the fence line.
|
---|
572 | func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
|
---|
573 | i, size := 0, 0
|
---|
574 |
|
---|
575 | // skip up to three spaces
|
---|
576 | for i < len(data) && i < 3 && data[i] == ' ' {
|
---|
577 | i++
|
---|
578 | }
|
---|
579 |
|
---|
580 | // check for the marker characters: ~ or `
|
---|
581 | if i >= len(data) {
|
---|
582 | return 0, ""
|
---|
583 | }
|
---|
584 | if data[i] != '~' && data[i] != '`' {
|
---|
585 | return 0, ""
|
---|
586 | }
|
---|
587 |
|
---|
588 | c := data[i]
|
---|
589 |
|
---|
590 | // the whole line must be the same char or whitespace
|
---|
591 | for i < len(data) && data[i] == c {
|
---|
592 | size++
|
---|
593 | i++
|
---|
594 | }
|
---|
595 |
|
---|
596 | // the marker char must occur at least 3 times
|
---|
597 | if size < 3 {
|
---|
598 | return 0, ""
|
---|
599 | }
|
---|
600 | marker = string(data[i-size : i])
|
---|
601 |
|
---|
602 | // if this is the end marker, it must match the beginning marker
|
---|
603 | if oldmarker != "" && marker != oldmarker {
|
---|
604 | return 0, ""
|
---|
605 | }
|
---|
606 |
|
---|
607 | // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
|
---|
608 | // into one, always get the info string, and discard it if the caller doesn't care.
|
---|
609 | if info != nil {
|
---|
610 | infoLength := 0
|
---|
611 | i = skipChar(data, i, ' ')
|
---|
612 |
|
---|
613 | if i >= len(data) {
|
---|
614 | if i == len(data) {
|
---|
615 | return i, marker
|
---|
616 | }
|
---|
617 | return 0, ""
|
---|
618 | }
|
---|
619 |
|
---|
620 | infoStart := i
|
---|
621 |
|
---|
622 | if data[i] == '{' {
|
---|
623 | i++
|
---|
624 | infoStart++
|
---|
625 |
|
---|
626 | for i < len(data) && data[i] != '}' && data[i] != '\n' {
|
---|
627 | infoLength++
|
---|
628 | i++
|
---|
629 | }
|
---|
630 |
|
---|
631 | if i >= len(data) || data[i] != '}' {
|
---|
632 | return 0, ""
|
---|
633 | }
|
---|
634 |
|
---|
635 | // strip all whitespace at the beginning and the end
|
---|
636 | // of the {} block
|
---|
637 | for infoLength > 0 && isspace(data[infoStart]) {
|
---|
638 | infoStart++
|
---|
639 | infoLength--
|
---|
640 | }
|
---|
641 |
|
---|
642 | for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
|
---|
643 | infoLength--
|
---|
644 | }
|
---|
645 | i++
|
---|
646 | i = skipChar(data, i, ' ')
|
---|
647 | } else {
|
---|
648 | for i < len(data) && !isverticalspace(data[i]) {
|
---|
649 | infoLength++
|
---|
650 | i++
|
---|
651 | }
|
---|
652 | }
|
---|
653 |
|
---|
654 | *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
|
---|
655 | }
|
---|
656 |
|
---|
657 | if i == len(data) {
|
---|
658 | return i, marker
|
---|
659 | }
|
---|
660 | if i > len(data) || data[i] != '\n' {
|
---|
661 | return 0, ""
|
---|
662 | }
|
---|
663 | return i + 1, marker // Take newline into account.
|
---|
664 | }
|
---|
665 |
|
---|
666 | // fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
|
---|
667 | // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
|
---|
668 | // If doRender is true, a final newline is mandatory to recognize the fenced code block.
|
---|
669 | func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
|
---|
670 | var info string
|
---|
671 | beg, marker := isFenceLine(data, &info, "")
|
---|
672 | if beg == 0 || beg >= len(data) {
|
---|
673 | return 0
|
---|
674 | }
|
---|
675 | fenceLength := beg - 1
|
---|
676 |
|
---|
677 | var work bytes.Buffer
|
---|
678 | work.Write([]byte(info))
|
---|
679 | work.WriteByte('\n')
|
---|
680 |
|
---|
681 | for {
|
---|
682 | // safe to assume beg < len(data)
|
---|
683 |
|
---|
684 | // check for the end of the code block
|
---|
685 | fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
|
---|
686 | if fenceEnd != 0 {
|
---|
687 | beg += fenceEnd
|
---|
688 | break
|
---|
689 | }
|
---|
690 |
|
---|
691 | // copy the current line
|
---|
692 | end := skipUntilChar(data, beg, '\n') + 1
|
---|
693 |
|
---|
694 | // did we reach the end of the buffer without a closing marker?
|
---|
695 | if end >= len(data) {
|
---|
696 | return 0
|
---|
697 | }
|
---|
698 |
|
---|
699 | // verbatim copy to the working buffer
|
---|
700 | if doRender {
|
---|
701 | work.Write(data[beg:end])
|
---|
702 | }
|
---|
703 | beg = end
|
---|
704 | }
|
---|
705 |
|
---|
706 | if doRender {
|
---|
707 | block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
---|
708 | block.IsFenced = true
|
---|
709 | block.FenceLength = fenceLength
|
---|
710 | finalizeCodeBlock(block)
|
---|
711 | }
|
---|
712 |
|
---|
713 | return beg
|
---|
714 | }
|
---|
715 |
|
---|
716 | func unescapeChar(str []byte) []byte {
|
---|
717 | if str[0] == '\\' {
|
---|
718 | return []byte{str[1]}
|
---|
719 | }
|
---|
720 | return []byte(html.UnescapeString(string(str)))
|
---|
721 | }
|
---|
722 |
|
---|
723 | func unescapeString(str []byte) []byte {
|
---|
724 | if reBackslashOrAmp.Match(str) {
|
---|
725 | return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
|
---|
726 | }
|
---|
727 | return str
|
---|
728 | }
|
---|
729 |
|
---|
730 | func finalizeCodeBlock(block *Node) {
|
---|
731 | if block.IsFenced {
|
---|
732 | newlinePos := bytes.IndexByte(block.content, '\n')
|
---|
733 | firstLine := block.content[:newlinePos]
|
---|
734 | rest := block.content[newlinePos+1:]
|
---|
735 | block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
|
---|
736 | block.Literal = rest
|
---|
737 | } else {
|
---|
738 | block.Literal = block.content
|
---|
739 | }
|
---|
740 | block.content = nil
|
---|
741 | }
|
---|
742 |
|
---|
743 | func (p *Markdown) table(data []byte) int {
|
---|
744 | table := p.addBlock(Table, nil)
|
---|
745 | i, columns := p.tableHeader(data)
|
---|
746 | if i == 0 {
|
---|
747 | p.tip = table.Parent
|
---|
748 | table.Unlink()
|
---|
749 | return 0
|
---|
750 | }
|
---|
751 |
|
---|
752 | p.addBlock(TableBody, nil)
|
---|
753 |
|
---|
754 | for i < len(data) {
|
---|
755 | pipes, rowStart := 0, i
|
---|
756 | for ; i < len(data) && data[i] != '\n'; i++ {
|
---|
757 | if data[i] == '|' {
|
---|
758 | pipes++
|
---|
759 | }
|
---|
760 | }
|
---|
761 |
|
---|
762 | if pipes == 0 {
|
---|
763 | i = rowStart
|
---|
764 | break
|
---|
765 | }
|
---|
766 |
|
---|
767 | // include the newline in data sent to tableRow
|
---|
768 | if i < len(data) && data[i] == '\n' {
|
---|
769 | i++
|
---|
770 | }
|
---|
771 | p.tableRow(data[rowStart:i], columns, false)
|
---|
772 | }
|
---|
773 |
|
---|
774 | return i
|
---|
775 | }
|
---|
776 |
|
---|
777 | // check if the specified position is preceded by an odd number of backslashes
|
---|
778 | func isBackslashEscaped(data []byte, i int) bool {
|
---|
779 | backslashes := 0
|
---|
780 | for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
|
---|
781 | backslashes++
|
---|
782 | }
|
---|
783 | return backslashes&1 == 1
|
---|
784 | }
|
---|
785 |
|
---|
786 | func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
|
---|
787 | i := 0
|
---|
788 | colCount := 1
|
---|
789 | for i = 0; i < len(data) && data[i] != '\n'; i++ {
|
---|
790 | if data[i] == '|' && !isBackslashEscaped(data, i) {
|
---|
791 | colCount++
|
---|
792 | }
|
---|
793 | }
|
---|
794 |
|
---|
795 | // doesn't look like a table header
|
---|
796 | if colCount == 1 {
|
---|
797 | return
|
---|
798 | }
|
---|
799 |
|
---|
800 | // include the newline in the data sent to tableRow
|
---|
801 | j := i
|
---|
802 | if j < len(data) && data[j] == '\n' {
|
---|
803 | j++
|
---|
804 | }
|
---|
805 | header := data[:j]
|
---|
806 |
|
---|
807 | // column count ignores pipes at beginning or end of line
|
---|
808 | if data[0] == '|' {
|
---|
809 | colCount--
|
---|
810 | }
|
---|
811 | if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
|
---|
812 | colCount--
|
---|
813 | }
|
---|
814 |
|
---|
815 | columns = make([]CellAlignFlags, colCount)
|
---|
816 |
|
---|
817 | // move on to the header underline
|
---|
818 | i++
|
---|
819 | if i >= len(data) {
|
---|
820 | return
|
---|
821 | }
|
---|
822 |
|
---|
823 | if data[i] == '|' && !isBackslashEscaped(data, i) {
|
---|
824 | i++
|
---|
825 | }
|
---|
826 | i = skipChar(data, i, ' ')
|
---|
827 |
|
---|
828 | // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
|
---|
829 | // and trailing | optional on last column
|
---|
830 | col := 0
|
---|
831 | for i < len(data) && data[i] != '\n' {
|
---|
832 | dashes := 0
|
---|
833 |
|
---|
834 | if data[i] == ':' {
|
---|
835 | i++
|
---|
836 | columns[col] |= TableAlignmentLeft
|
---|
837 | dashes++
|
---|
838 | }
|
---|
839 | for i < len(data) && data[i] == '-' {
|
---|
840 | i++
|
---|
841 | dashes++
|
---|
842 | }
|
---|
843 | if i < len(data) && data[i] == ':' {
|
---|
844 | i++
|
---|
845 | columns[col] |= TableAlignmentRight
|
---|
846 | dashes++
|
---|
847 | }
|
---|
848 | for i < len(data) && data[i] == ' ' {
|
---|
849 | i++
|
---|
850 | }
|
---|
851 | if i == len(data) {
|
---|
852 | return
|
---|
853 | }
|
---|
854 | // end of column test is messy
|
---|
855 | switch {
|
---|
856 | case dashes < 3:
|
---|
857 | // not a valid column
|
---|
858 | return
|
---|
859 |
|
---|
860 | case data[i] == '|' && !isBackslashEscaped(data, i):
|
---|
861 | // marker found, now skip past trailing whitespace
|
---|
862 | col++
|
---|
863 | i++
|
---|
864 | for i < len(data) && data[i] == ' ' {
|
---|
865 | i++
|
---|
866 | }
|
---|
867 |
|
---|
868 | // trailing junk found after last column
|
---|
869 | if col >= colCount && i < len(data) && data[i] != '\n' {
|
---|
870 | return
|
---|
871 | }
|
---|
872 |
|
---|
873 | case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
|
---|
874 | // something else found where marker was required
|
---|
875 | return
|
---|
876 |
|
---|
877 | case data[i] == '\n':
|
---|
878 | // marker is optional for the last column
|
---|
879 | col++
|
---|
880 |
|
---|
881 | default:
|
---|
882 | // trailing junk found after last column
|
---|
883 | return
|
---|
884 | }
|
---|
885 | }
|
---|
886 | if col != colCount {
|
---|
887 | return
|
---|
888 | }
|
---|
889 |
|
---|
890 | p.addBlock(TableHead, nil)
|
---|
891 | p.tableRow(header, columns, true)
|
---|
892 | size = i
|
---|
893 | if size < len(data) && data[size] == '\n' {
|
---|
894 | size++
|
---|
895 | }
|
---|
896 | return
|
---|
897 | }
|
---|
898 |
|
---|
899 | func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
|
---|
900 | p.addBlock(TableRow, nil)
|
---|
901 | i, col := 0, 0
|
---|
902 |
|
---|
903 | if data[i] == '|' && !isBackslashEscaped(data, i) {
|
---|
904 | i++
|
---|
905 | }
|
---|
906 |
|
---|
907 | for col = 0; col < len(columns) && i < len(data); col++ {
|
---|
908 | for i < len(data) && data[i] == ' ' {
|
---|
909 | i++
|
---|
910 | }
|
---|
911 |
|
---|
912 | cellStart := i
|
---|
913 |
|
---|
914 | for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
|
---|
915 | i++
|
---|
916 | }
|
---|
917 |
|
---|
918 | cellEnd := i
|
---|
919 |
|
---|
920 | // skip the end-of-cell marker, possibly taking us past end of buffer
|
---|
921 | i++
|
---|
922 |
|
---|
923 | for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
|
---|
924 | cellEnd--
|
---|
925 | }
|
---|
926 |
|
---|
927 | cell := p.addBlock(TableCell, data[cellStart:cellEnd])
|
---|
928 | cell.IsHeader = header
|
---|
929 | cell.Align = columns[col]
|
---|
930 | }
|
---|
931 |
|
---|
932 | // pad it out with empty columns to get the right number
|
---|
933 | for ; col < len(columns); col++ {
|
---|
934 | cell := p.addBlock(TableCell, nil)
|
---|
935 | cell.IsHeader = header
|
---|
936 | cell.Align = columns[col]
|
---|
937 | }
|
---|
938 |
|
---|
939 | // silently ignore rows with too many cells
|
---|
940 | }
|
---|
941 |
|
---|
942 | // returns blockquote prefix length
|
---|
943 | func (p *Markdown) quotePrefix(data []byte) int {
|
---|
944 | i := 0
|
---|
945 | for i < 3 && i < len(data) && data[i] == ' ' {
|
---|
946 | i++
|
---|
947 | }
|
---|
948 | if i < len(data) && data[i] == '>' {
|
---|
949 | if i+1 < len(data) && data[i+1] == ' ' {
|
---|
950 | return i + 2
|
---|
951 | }
|
---|
952 | return i + 1
|
---|
953 | }
|
---|
954 | return 0
|
---|
955 | }
|
---|
956 |
|
---|
957 | // blockquote ends with at least one blank line
|
---|
958 | // followed by something without a blockquote prefix
|
---|
959 | func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
|
---|
960 | if p.isEmpty(data[beg:]) <= 0 {
|
---|
961 | return false
|
---|
962 | }
|
---|
963 | if end >= len(data) {
|
---|
964 | return true
|
---|
965 | }
|
---|
966 | return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
|
---|
967 | }
|
---|
968 |
|
---|
969 | // parse a blockquote fragment
|
---|
970 | func (p *Markdown) quote(data []byte) int {
|
---|
971 | block := p.addBlock(BlockQuote, nil)
|
---|
972 | var raw bytes.Buffer
|
---|
973 | beg, end := 0, 0
|
---|
974 | for beg < len(data) {
|
---|
975 | end = beg
|
---|
976 | // Step over whole lines, collecting them. While doing that, check for
|
---|
977 | // fenced code and if one's found, incorporate it altogether,
|
---|
978 | // irregardless of any contents inside it
|
---|
979 | for end < len(data) && data[end] != '\n' {
|
---|
980 | if p.extensions&FencedCode != 0 {
|
---|
981 | if i := p.fencedCodeBlock(data[end:], false); i > 0 {
|
---|
982 | // -1 to compensate for the extra end++ after the loop:
|
---|
983 | end += i - 1
|
---|
984 | break
|
---|
985 | }
|
---|
986 | }
|
---|
987 | end++
|
---|
988 | }
|
---|
989 | if end < len(data) && data[end] == '\n' {
|
---|
990 | end++
|
---|
991 | }
|
---|
992 | if pre := p.quotePrefix(data[beg:]); pre > 0 {
|
---|
993 | // skip the prefix
|
---|
994 | beg += pre
|
---|
995 | } else if p.terminateBlockquote(data, beg, end) {
|
---|
996 | break
|
---|
997 | }
|
---|
998 | // this line is part of the blockquote
|
---|
999 | raw.Write(data[beg:end])
|
---|
1000 | beg = end
|
---|
1001 | }
|
---|
1002 | p.block(raw.Bytes())
|
---|
1003 | p.finalize(block)
|
---|
1004 | return end
|
---|
1005 | }
|
---|
1006 |
|
---|
1007 | // returns prefix length for block code
|
---|
1008 | func (p *Markdown) codePrefix(data []byte) int {
|
---|
1009 | if len(data) >= 1 && data[0] == '\t' {
|
---|
1010 | return 1
|
---|
1011 | }
|
---|
1012 | if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
|
---|
1013 | return 4
|
---|
1014 | }
|
---|
1015 | return 0
|
---|
1016 | }
|
---|
1017 |
|
---|
1018 | func (p *Markdown) code(data []byte) int {
|
---|
1019 | var work bytes.Buffer
|
---|
1020 |
|
---|
1021 | i := 0
|
---|
1022 | for i < len(data) {
|
---|
1023 | beg := i
|
---|
1024 | for i < len(data) && data[i] != '\n' {
|
---|
1025 | i++
|
---|
1026 | }
|
---|
1027 | if i < len(data) && data[i] == '\n' {
|
---|
1028 | i++
|
---|
1029 | }
|
---|
1030 |
|
---|
1031 | blankline := p.isEmpty(data[beg:i]) > 0
|
---|
1032 | if pre := p.codePrefix(data[beg:i]); pre > 0 {
|
---|
1033 | beg += pre
|
---|
1034 | } else if !blankline {
|
---|
1035 | // non-empty, non-prefixed line breaks the pre
|
---|
1036 | i = beg
|
---|
1037 | break
|
---|
1038 | }
|
---|
1039 |
|
---|
1040 | // verbatim copy to the working buffer
|
---|
1041 | if blankline {
|
---|
1042 | work.WriteByte('\n')
|
---|
1043 | } else {
|
---|
1044 | work.Write(data[beg:i])
|
---|
1045 | }
|
---|
1046 | }
|
---|
1047 |
|
---|
1048 | // trim all the \n off the end of work
|
---|
1049 | workbytes := work.Bytes()
|
---|
1050 | eol := len(workbytes)
|
---|
1051 | for eol > 0 && workbytes[eol-1] == '\n' {
|
---|
1052 | eol--
|
---|
1053 | }
|
---|
1054 | if eol != len(workbytes) {
|
---|
1055 | work.Truncate(eol)
|
---|
1056 | }
|
---|
1057 |
|
---|
1058 | work.WriteByte('\n')
|
---|
1059 |
|
---|
1060 | block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
|
---|
1061 | block.IsFenced = false
|
---|
1062 | finalizeCodeBlock(block)
|
---|
1063 |
|
---|
1064 | return i
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 | // returns unordered list item prefix
|
---|
1068 | func (p *Markdown) uliPrefix(data []byte) int {
|
---|
1069 | i := 0
|
---|
1070 | // start with up to 3 spaces
|
---|
1071 | for i < len(data) && i < 3 && data[i] == ' ' {
|
---|
1072 | i++
|
---|
1073 | }
|
---|
1074 | if i >= len(data)-1 {
|
---|
1075 | return 0
|
---|
1076 | }
|
---|
1077 | // need one of {'*', '+', '-'} followed by a space or a tab
|
---|
1078 | if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
|
---|
1079 | (data[i+1] != ' ' && data[i+1] != '\t') {
|
---|
1080 | return 0
|
---|
1081 | }
|
---|
1082 | return i + 2
|
---|
1083 | }
|
---|
1084 |
|
---|
1085 | // returns ordered list item prefix
|
---|
1086 | func (p *Markdown) oliPrefix(data []byte) int {
|
---|
1087 | i := 0
|
---|
1088 |
|
---|
1089 | // start with up to 3 spaces
|
---|
1090 | for i < 3 && i < len(data) && data[i] == ' ' {
|
---|
1091 | i++
|
---|
1092 | }
|
---|
1093 |
|
---|
1094 | // count the digits
|
---|
1095 | start := i
|
---|
1096 | for i < len(data) && data[i] >= '0' && data[i] <= '9' {
|
---|
1097 | i++
|
---|
1098 | }
|
---|
1099 | if start == i || i >= len(data)-1 {
|
---|
1100 | return 0
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | // we need >= 1 digits followed by a dot and a space or a tab
|
---|
1104 | if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
|
---|
1105 | return 0
|
---|
1106 | }
|
---|
1107 | return i + 2
|
---|
1108 | }
|
---|
1109 |
|
---|
1110 | // returns definition list item prefix
|
---|
1111 | func (p *Markdown) dliPrefix(data []byte) int {
|
---|
1112 | if len(data) < 2 {
|
---|
1113 | return 0
|
---|
1114 | }
|
---|
1115 | i := 0
|
---|
1116 | // need a ':' followed by a space or a tab
|
---|
1117 | if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
|
---|
1118 | return 0
|
---|
1119 | }
|
---|
1120 | for i < len(data) && data[i] == ' ' {
|
---|
1121 | i++
|
---|
1122 | }
|
---|
1123 | return i + 2
|
---|
1124 | }
|
---|
1125 |
|
---|
1126 | // parse ordered or unordered list block
|
---|
1127 | func (p *Markdown) list(data []byte, flags ListType) int {
|
---|
1128 | i := 0
|
---|
1129 | flags |= ListItemBeginningOfList
|
---|
1130 | block := p.addBlock(List, nil)
|
---|
1131 | block.ListFlags = flags
|
---|
1132 | block.Tight = true
|
---|
1133 |
|
---|
1134 | for i < len(data) {
|
---|
1135 | skip := p.listItem(data[i:], &flags)
|
---|
1136 | if flags&ListItemContainsBlock != 0 {
|
---|
1137 | block.ListData.Tight = false
|
---|
1138 | }
|
---|
1139 | i += skip
|
---|
1140 | if skip == 0 || flags&ListItemEndOfList != 0 {
|
---|
1141 | break
|
---|
1142 | }
|
---|
1143 | flags &= ^ListItemBeginningOfList
|
---|
1144 | }
|
---|
1145 |
|
---|
1146 | above := block.Parent
|
---|
1147 | finalizeList(block)
|
---|
1148 | p.tip = above
|
---|
1149 | return i
|
---|
1150 | }
|
---|
1151 |
|
---|
1152 | // Returns true if the list item is not the same type as its parent list
|
---|
1153 | func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool {
|
---|
1154 | if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
|
---|
1155 | return true
|
---|
1156 | } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
|
---|
1157 | return true
|
---|
1158 | } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) {
|
---|
1159 | return true
|
---|
1160 | }
|
---|
1161 | return false
|
---|
1162 | }
|
---|
1163 |
|
---|
1164 | // Returns true if block ends with a blank line, descending if needed
|
---|
1165 | // into lists and sublists.
|
---|
1166 | func endsWithBlankLine(block *Node) bool {
|
---|
1167 | // TODO: figure this out. Always false now.
|
---|
1168 | for block != nil {
|
---|
1169 | //if block.lastLineBlank {
|
---|
1170 | //return true
|
---|
1171 | //}
|
---|
1172 | t := block.Type
|
---|
1173 | if t == List || t == Item {
|
---|
1174 | block = block.LastChild
|
---|
1175 | } else {
|
---|
1176 | break
|
---|
1177 | }
|
---|
1178 | }
|
---|
1179 | return false
|
---|
1180 | }
|
---|
1181 |
|
---|
1182 | func finalizeList(block *Node) {
|
---|
1183 | block.open = false
|
---|
1184 | item := block.FirstChild
|
---|
1185 | for item != nil {
|
---|
1186 | // check for non-final list item ending with blank line:
|
---|
1187 | if endsWithBlankLine(item) && item.Next != nil {
|
---|
1188 | block.ListData.Tight = false
|
---|
1189 | break
|
---|
1190 | }
|
---|
1191 | // recurse into children of list item, to see if there are spaces
|
---|
1192 | // between any of them:
|
---|
1193 | subItem := item.FirstChild
|
---|
1194 | for subItem != nil {
|
---|
1195 | if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
|
---|
1196 | block.ListData.Tight = false
|
---|
1197 | break
|
---|
1198 | }
|
---|
1199 | subItem = subItem.Next
|
---|
1200 | }
|
---|
1201 | item = item.Next
|
---|
1202 | }
|
---|
1203 | }
|
---|
1204 |
|
---|
1205 | // Parse a single list item.
|
---|
1206 | // Assumes initial prefix is already removed if this is a sublist.
|
---|
1207 | func (p *Markdown) listItem(data []byte, flags *ListType) int {
|
---|
1208 | // keep track of the indentation of the first line
|
---|
1209 | itemIndent := 0
|
---|
1210 | if data[0] == '\t' {
|
---|
1211 | itemIndent += 4
|
---|
1212 | } else {
|
---|
1213 | for itemIndent < 3 && data[itemIndent] == ' ' {
|
---|
1214 | itemIndent++
|
---|
1215 | }
|
---|
1216 | }
|
---|
1217 |
|
---|
1218 | var bulletChar byte = '*'
|
---|
1219 | i := p.uliPrefix(data)
|
---|
1220 | if i == 0 {
|
---|
1221 | i = p.oliPrefix(data)
|
---|
1222 | } else {
|
---|
1223 | bulletChar = data[i-2]
|
---|
1224 | }
|
---|
1225 | if i == 0 {
|
---|
1226 | i = p.dliPrefix(data)
|
---|
1227 | // reset definition term flag
|
---|
1228 | if i > 0 {
|
---|
1229 | *flags &= ^ListTypeTerm
|
---|
1230 | }
|
---|
1231 | }
|
---|
1232 | if i == 0 {
|
---|
1233 | // if in definition list, set term flag and continue
|
---|
1234 | if *flags&ListTypeDefinition != 0 {
|
---|
1235 | *flags |= ListTypeTerm
|
---|
1236 | } else {
|
---|
1237 | return 0
|
---|
1238 | }
|
---|
1239 | }
|
---|
1240 |
|
---|
1241 | // skip leading whitespace on first line
|
---|
1242 | for i < len(data) && data[i] == ' ' {
|
---|
1243 | i++
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 | // find the end of the line
|
---|
1247 | line := i
|
---|
1248 | for i > 0 && i < len(data) && data[i-1] != '\n' {
|
---|
1249 | i++
|
---|
1250 | }
|
---|
1251 |
|
---|
1252 | // get working buffer
|
---|
1253 | var raw bytes.Buffer
|
---|
1254 |
|
---|
1255 | // put the first line into the working buffer
|
---|
1256 | raw.Write(data[line:i])
|
---|
1257 | line = i
|
---|
1258 |
|
---|
1259 | // process the following lines
|
---|
1260 | containsBlankLine := false
|
---|
1261 | sublist := 0
|
---|
1262 | codeBlockMarker := ""
|
---|
1263 |
|
---|
1264 | gatherlines:
|
---|
1265 | for line < len(data) {
|
---|
1266 | i++
|
---|
1267 |
|
---|
1268 | // find the end of this line
|
---|
1269 | for i < len(data) && data[i-1] != '\n' {
|
---|
1270 | i++
|
---|
1271 | }
|
---|
1272 |
|
---|
1273 | // if it is an empty line, guess that it is part of this item
|
---|
1274 | // and move on to the next line
|
---|
1275 | if p.isEmpty(data[line:i]) > 0 {
|
---|
1276 | containsBlankLine = true
|
---|
1277 | line = i
|
---|
1278 | continue
|
---|
1279 | }
|
---|
1280 |
|
---|
1281 | // calculate the indentation
|
---|
1282 | indent := 0
|
---|
1283 | indentIndex := 0
|
---|
1284 | if data[line] == '\t' {
|
---|
1285 | indentIndex++
|
---|
1286 | indent += 4
|
---|
1287 | } else {
|
---|
1288 | for indent < 4 && line+indent < i && data[line+indent] == ' ' {
|
---|
1289 | indent++
|
---|
1290 | indentIndex++
|
---|
1291 | }
|
---|
1292 | }
|
---|
1293 |
|
---|
1294 | chunk := data[line+indentIndex : i]
|
---|
1295 |
|
---|
1296 | if p.extensions&FencedCode != 0 {
|
---|
1297 | // determine if in or out of codeblock
|
---|
1298 | // if in codeblock, ignore normal list processing
|
---|
1299 | _, marker := isFenceLine(chunk, nil, codeBlockMarker)
|
---|
1300 | if marker != "" {
|
---|
1301 | if codeBlockMarker == "" {
|
---|
1302 | // start of codeblock
|
---|
1303 | codeBlockMarker = marker
|
---|
1304 | } else {
|
---|
1305 | // end of codeblock.
|
---|
1306 | codeBlockMarker = ""
|
---|
1307 | }
|
---|
1308 | }
|
---|
1309 | // we are in a codeblock, write line, and continue
|
---|
1310 | if codeBlockMarker != "" || marker != "" {
|
---|
1311 | raw.Write(data[line+indentIndex : i])
|
---|
1312 | line = i
|
---|
1313 | continue gatherlines
|
---|
1314 | }
|
---|
1315 | }
|
---|
1316 |
|
---|
1317 | // evaluate how this line fits in
|
---|
1318 | switch {
|
---|
1319 | // is this a nested list item?
|
---|
1320 | case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
|
---|
1321 | p.oliPrefix(chunk) > 0 ||
|
---|
1322 | p.dliPrefix(chunk) > 0:
|
---|
1323 |
|
---|
1324 | // to be a nested list, it must be indented more
|
---|
1325 | // if not, it is either a different kind of list
|
---|
1326 | // or the next item in the same list
|
---|
1327 | if indent <= itemIndent {
|
---|
1328 | if p.listTypeChanged(chunk, flags) {
|
---|
1329 | *flags |= ListItemEndOfList
|
---|
1330 | } else if containsBlankLine {
|
---|
1331 | *flags |= ListItemContainsBlock
|
---|
1332 | }
|
---|
1333 |
|
---|
1334 | break gatherlines
|
---|
1335 | }
|
---|
1336 |
|
---|
1337 | if containsBlankLine {
|
---|
1338 | *flags |= ListItemContainsBlock
|
---|
1339 | }
|
---|
1340 |
|
---|
1341 | // is this the first item in the nested list?
|
---|
1342 | if sublist == 0 {
|
---|
1343 | sublist = raw.Len()
|
---|
1344 | }
|
---|
1345 |
|
---|
1346 | // is this a nested prefix heading?
|
---|
1347 | case p.isPrefixHeading(chunk):
|
---|
1348 | // if the heading is not indented, it is not nested in the list
|
---|
1349 | // and thus ends the list
|
---|
1350 | if containsBlankLine && indent < 4 {
|
---|
1351 | *flags |= ListItemEndOfList
|
---|
1352 | break gatherlines
|
---|
1353 | }
|
---|
1354 | *flags |= ListItemContainsBlock
|
---|
1355 |
|
---|
1356 | // anything following an empty line is only part
|
---|
1357 | // of this item if it is indented 4 spaces
|
---|
1358 | // (regardless of the indentation of the beginning of the item)
|
---|
1359 | case containsBlankLine && indent < 4:
|
---|
1360 | if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
|
---|
1361 | // is the next item still a part of this list?
|
---|
1362 | next := i
|
---|
1363 | for next < len(data) && data[next] != '\n' {
|
---|
1364 | next++
|
---|
1365 | }
|
---|
1366 | for next < len(data)-1 && data[next] == '\n' {
|
---|
1367 | next++
|
---|
1368 | }
|
---|
1369 | if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
|
---|
1370 | *flags |= ListItemEndOfList
|
---|
1371 | }
|
---|
1372 | } else {
|
---|
1373 | *flags |= ListItemEndOfList
|
---|
1374 | }
|
---|
1375 | break gatherlines
|
---|
1376 |
|
---|
1377 | // a blank line means this should be parsed as a block
|
---|
1378 | case containsBlankLine:
|
---|
1379 | raw.WriteByte('\n')
|
---|
1380 | *flags |= ListItemContainsBlock
|
---|
1381 | }
|
---|
1382 |
|
---|
1383 | // if this line was preceded by one or more blanks,
|
---|
1384 | // re-introduce the blank into the buffer
|
---|
1385 | if containsBlankLine {
|
---|
1386 | containsBlankLine = false
|
---|
1387 | raw.WriteByte('\n')
|
---|
1388 | }
|
---|
1389 |
|
---|
1390 | // add the line into the working buffer without prefix
|
---|
1391 | raw.Write(data[line+indentIndex : i])
|
---|
1392 |
|
---|
1393 | line = i
|
---|
1394 | }
|
---|
1395 |
|
---|
1396 | rawBytes := raw.Bytes()
|
---|
1397 |
|
---|
1398 | block := p.addBlock(Item, nil)
|
---|
1399 | block.ListFlags = *flags
|
---|
1400 | block.Tight = false
|
---|
1401 | block.BulletChar = bulletChar
|
---|
1402 | block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
|
---|
1403 |
|
---|
1404 | // render the contents of the list item
|
---|
1405 | if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
|
---|
1406 | // intermediate render of block item, except for definition term
|
---|
1407 | if sublist > 0 {
|
---|
1408 | p.block(rawBytes[:sublist])
|
---|
1409 | p.block(rawBytes[sublist:])
|
---|
1410 | } else {
|
---|
1411 | p.block(rawBytes)
|
---|
1412 | }
|
---|
1413 | } else {
|
---|
1414 | // intermediate render of inline item
|
---|
1415 | if sublist > 0 {
|
---|
1416 | child := p.addChild(Paragraph, 0)
|
---|
1417 | child.content = rawBytes[:sublist]
|
---|
1418 | p.block(rawBytes[sublist:])
|
---|
1419 | } else {
|
---|
1420 | child := p.addChild(Paragraph, 0)
|
---|
1421 | child.content = rawBytes
|
---|
1422 | }
|
---|
1423 | }
|
---|
1424 | return line
|
---|
1425 | }
|
---|
1426 |
|
---|
1427 | // render a single paragraph that has already been parsed out
|
---|
1428 | func (p *Markdown) renderParagraph(data []byte) {
|
---|
1429 | if len(data) == 0 {
|
---|
1430 | return
|
---|
1431 | }
|
---|
1432 |
|
---|
1433 | // trim leading spaces
|
---|
1434 | beg := 0
|
---|
1435 | for data[beg] == ' ' {
|
---|
1436 | beg++
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | end := len(data)
|
---|
1440 | // trim trailing newline
|
---|
1441 | if data[len(data)-1] == '\n' {
|
---|
1442 | end--
|
---|
1443 | }
|
---|
1444 |
|
---|
1445 | // trim trailing spaces
|
---|
1446 | for end > beg && data[end-1] == ' ' {
|
---|
1447 | end--
|
---|
1448 | }
|
---|
1449 |
|
---|
1450 | p.addBlock(Paragraph, data[beg:end])
|
---|
1451 | }
|
---|
1452 |
|
---|
1453 | func (p *Markdown) paragraph(data []byte) int {
|
---|
1454 | // prev: index of 1st char of previous line
|
---|
1455 | // line: index of 1st char of current line
|
---|
1456 | // i: index of cursor/end of current line
|
---|
1457 | var prev, line, i int
|
---|
1458 | tabSize := TabSizeDefault
|
---|
1459 | if p.extensions&TabSizeEight != 0 {
|
---|
1460 | tabSize = TabSizeDouble
|
---|
1461 | }
|
---|
1462 | // keep going until we find something to mark the end of the paragraph
|
---|
1463 | for i < len(data) {
|
---|
1464 | // mark the beginning of the current line
|
---|
1465 | prev = line
|
---|
1466 | current := data[i:]
|
---|
1467 | line = i
|
---|
1468 |
|
---|
1469 | // did we find a reference or a footnote? If so, end a paragraph
|
---|
1470 | // preceding it and report that we have consumed up to the end of that
|
---|
1471 | // reference:
|
---|
1472 | if refEnd := isReference(p, current, tabSize); refEnd > 0 {
|
---|
1473 | p.renderParagraph(data[:i])
|
---|
1474 | return i + refEnd
|
---|
1475 | }
|
---|
1476 |
|
---|
1477 | // did we find a blank line marking the end of the paragraph?
|
---|
1478 | if n := p.isEmpty(current); n > 0 {
|
---|
1479 | // did this blank line followed by a definition list item?
|
---|
1480 | if p.extensions&DefinitionLists != 0 {
|
---|
1481 | if i < len(data)-1 && data[i+1] == ':' {
|
---|
1482 | return p.list(data[prev:], ListTypeDefinition)
|
---|
1483 | }
|
---|
1484 | }
|
---|
1485 |
|
---|
1486 | p.renderParagraph(data[:i])
|
---|
1487 | return i + n
|
---|
1488 | }
|
---|
1489 |
|
---|
1490 | // an underline under some text marks a heading, so our paragraph ended on prev line
|
---|
1491 | if i > 0 {
|
---|
1492 | if level := p.isUnderlinedHeading(current); level > 0 {
|
---|
1493 | // render the paragraph
|
---|
1494 | p.renderParagraph(data[:prev])
|
---|
1495 |
|
---|
1496 | // ignore leading and trailing whitespace
|
---|
1497 | eol := i - 1
|
---|
1498 | for prev < eol && data[prev] == ' ' {
|
---|
1499 | prev++
|
---|
1500 | }
|
---|
1501 | for eol > prev && data[eol-1] == ' ' {
|
---|
1502 | eol--
|
---|
1503 | }
|
---|
1504 |
|
---|
1505 | id := ""
|
---|
1506 | if p.extensions&AutoHeadingIDs != 0 {
|
---|
1507 | id = SanitizedAnchorName(string(data[prev:eol]))
|
---|
1508 | }
|
---|
1509 |
|
---|
1510 | block := p.addBlock(Heading, data[prev:eol])
|
---|
1511 | block.Level = level
|
---|
1512 | block.HeadingID = id
|
---|
1513 |
|
---|
1514 | // find the end of the underline
|
---|
1515 | for i < len(data) && data[i] != '\n' {
|
---|
1516 | i++
|
---|
1517 | }
|
---|
1518 | return i
|
---|
1519 | }
|
---|
1520 | }
|
---|
1521 |
|
---|
1522 | // if the next line starts a block of HTML, then the paragraph ends here
|
---|
1523 | if p.extensions&LaxHTMLBlocks != 0 {
|
---|
1524 | if data[i] == '<' && p.html(current, false) > 0 {
|
---|
1525 | // rewind to before the HTML block
|
---|
1526 | p.renderParagraph(data[:i])
|
---|
1527 | return i
|
---|
1528 | }
|
---|
1529 | }
|
---|
1530 |
|
---|
1531 | // if there's a prefixed heading or a horizontal rule after this, paragraph is over
|
---|
1532 | if p.isPrefixHeading(current) || p.isHRule(current) {
|
---|
1533 | p.renderParagraph(data[:i])
|
---|
1534 | return i
|
---|
1535 | }
|
---|
1536 |
|
---|
1537 | // if there's a fenced code block, paragraph is over
|
---|
1538 | if p.extensions&FencedCode != 0 {
|
---|
1539 | if p.fencedCodeBlock(current, false) > 0 {
|
---|
1540 | p.renderParagraph(data[:i])
|
---|
1541 | return i
|
---|
1542 | }
|
---|
1543 | }
|
---|
1544 |
|
---|
1545 | // if there's a definition list item, prev line is a definition term
|
---|
1546 | if p.extensions&DefinitionLists != 0 {
|
---|
1547 | if p.dliPrefix(current) != 0 {
|
---|
1548 | ret := p.list(data[prev:], ListTypeDefinition)
|
---|
1549 | return ret
|
---|
1550 | }
|
---|
1551 | }
|
---|
1552 |
|
---|
1553 | // if there's a list after this, paragraph is over
|
---|
1554 | if p.extensions&NoEmptyLineBeforeBlock != 0 {
|
---|
1555 | if p.uliPrefix(current) != 0 ||
|
---|
1556 | p.oliPrefix(current) != 0 ||
|
---|
1557 | p.quotePrefix(current) != 0 ||
|
---|
1558 | p.codePrefix(current) != 0 {
|
---|
1559 | p.renderParagraph(data[:i])
|
---|
1560 | return i
|
---|
1561 | }
|
---|
1562 | }
|
---|
1563 |
|
---|
1564 | // otherwise, scan to the beginning of the next line
|
---|
1565 | nl := bytes.IndexByte(data[i:], '\n')
|
---|
1566 | if nl >= 0 {
|
---|
1567 | i += nl + 1
|
---|
1568 | } else {
|
---|
1569 | i += len(data[i:])
|
---|
1570 | }
|
---|
1571 | }
|
---|
1572 |
|
---|
1573 | p.renderParagraph(data[:i])
|
---|
1574 | return i
|
---|
1575 | }
|
---|
1576 |
|
---|
1577 | func skipChar(data []byte, start int, char byte) int {
|
---|
1578 | i := start
|
---|
1579 | for i < len(data) && data[i] == char {
|
---|
1580 | i++
|
---|
1581 | }
|
---|
1582 | return i
|
---|
1583 | }
|
---|
1584 |
|
---|
1585 | func skipUntilChar(text []byte, start int, char byte) int {
|
---|
1586 | i := start
|
---|
1587 | for i < len(text) && text[i] != char {
|
---|
1588 | i++
|
---|
1589 | }
|
---|
1590 | return i
|
---|
1591 | }
|
---|
1592 |
|
---|
1593 | // SanitizedAnchorName returns a sanitized anchor name for the given text.
|
---|
1594 | //
|
---|
1595 | // It implements the algorithm specified in the package comment.
|
---|
1596 | func SanitizedAnchorName(text string) string {
|
---|
1597 | var anchorName []rune
|
---|
1598 | futureDash := false
|
---|
1599 | for _, r := range text {
|
---|
1600 | switch {
|
---|
1601 | case unicode.IsLetter(r) || unicode.IsNumber(r):
|
---|
1602 | if futureDash && len(anchorName) > 0 {
|
---|
1603 | anchorName = append(anchorName, '-')
|
---|
1604 | }
|
---|
1605 | futureDash = false
|
---|
1606 | anchorName = append(anchorName, unicode.ToLower(r))
|
---|
1607 | default:
|
---|
1608 | futureDash = true
|
---|
1609 | }
|
---|
1610 | }
|
---|
1611 | return string(anchorName)
|
---|
1612 | }
|
---|