1 | // Blackfriday Markdown Processor
|
---|
2 | // Available at http://github.com/russross/blackfriday
|
---|
3 | //
|
---|
4 | // Copyright © 2011 Russ Ross <russ@russross.com>.
|
---|
5 | // Distributed under the Simplified BSD License.
|
---|
6 | // See README.md for details.
|
---|
7 |
|
---|
8 | package blackfriday
|
---|
9 |
|
---|
10 | import (
|
---|
11 | "bytes"
|
---|
12 | "fmt"
|
---|
13 | "io"
|
---|
14 | "strings"
|
---|
15 | "unicode/utf8"
|
---|
16 | )
|
---|
17 |
|
---|
18 | //
|
---|
19 | // Markdown parsing and processing
|
---|
20 | //
|
---|
21 |
|
---|
22 | // Version string of the package. Appears in the rendered document when
|
---|
23 | // CompletePage flag is on.
|
---|
24 | const Version = "2.0"
|
---|
25 |
|
---|
26 | // Extensions is a bitwise or'ed collection of enabled Blackfriday's
|
---|
27 | // extensions.
|
---|
28 | type Extensions int
|
---|
29 |
|
---|
30 | // These are the supported markdown parsing extensions.
|
---|
31 | // OR these values together to select multiple extensions.
|
---|
32 | const (
|
---|
33 | NoExtensions Extensions = 0
|
---|
34 | NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
|
---|
35 | Tables // Render tables
|
---|
36 | FencedCode // Render fenced code blocks
|
---|
37 | Autolink // Detect embedded URLs that are not explicitly marked
|
---|
38 | Strikethrough // Strikethrough text using ~~test~~
|
---|
39 | LaxHTMLBlocks // Loosen up HTML block parsing rules
|
---|
40 | SpaceHeadings // Be strict about prefix heading rules
|
---|
41 | HardLineBreak // Translate newlines into line breaks
|
---|
42 | TabSizeEight // Expand tabs to eight spaces instead of four
|
---|
43 | Footnotes // Pandoc-style footnotes
|
---|
44 | NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
|
---|
45 | HeadingIDs // specify heading IDs with {#id}
|
---|
46 | Titleblock // Titleblock ala pandoc
|
---|
47 | AutoHeadingIDs // Create the heading ID from the text
|
---|
48 | BackslashLineBreak // Translate trailing backslashes into line breaks
|
---|
49 | DefinitionLists // Render definition lists
|
---|
50 |
|
---|
51 | CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
|
---|
52 | SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
|
---|
53 |
|
---|
54 | CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
|
---|
55 | Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
|
---|
56 | BackslashLineBreak | DefinitionLists
|
---|
57 | )
|
---|
58 |
|
---|
59 | // ListType contains bitwise or'ed flags for list and list item objects.
|
---|
60 | type ListType int
|
---|
61 |
|
---|
62 | // These are the possible flag values for the ListItem renderer.
|
---|
63 | // Multiple flag values may be ORed together.
|
---|
64 | // These are mostly of interest if you are writing a new output format.
|
---|
65 | const (
|
---|
66 | ListTypeOrdered ListType = 1 << iota
|
---|
67 | ListTypeDefinition
|
---|
68 | ListTypeTerm
|
---|
69 |
|
---|
70 | ListItemContainsBlock
|
---|
71 | ListItemBeginningOfList // TODO: figure out if this is of any use now
|
---|
72 | ListItemEndOfList
|
---|
73 | )
|
---|
74 |
|
---|
75 | // CellAlignFlags holds a type of alignment in a table cell.
|
---|
76 | type CellAlignFlags int
|
---|
77 |
|
---|
78 | // These are the possible flag values for the table cell renderer.
|
---|
79 | // Only a single one of these values will be used; they are not ORed together.
|
---|
80 | // These are mostly of interest if you are writing a new output format.
|
---|
81 | const (
|
---|
82 | TableAlignmentLeft CellAlignFlags = 1 << iota
|
---|
83 | TableAlignmentRight
|
---|
84 | TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
|
---|
85 | )
|
---|
86 |
|
---|
87 | // The size of a tab stop.
|
---|
88 | const (
|
---|
89 | TabSizeDefault = 4
|
---|
90 | TabSizeDouble = 8
|
---|
91 | )
|
---|
92 |
|
---|
93 | // blockTags is a set of tags that are recognized as HTML block tags.
|
---|
94 | // Any of these can be included in markdown text without special escaping.
|
---|
95 | var blockTags = map[string]struct{}{
|
---|
96 | "blockquote": {},
|
---|
97 | "del": {},
|
---|
98 | "div": {},
|
---|
99 | "dl": {},
|
---|
100 | "fieldset": {},
|
---|
101 | "form": {},
|
---|
102 | "h1": {},
|
---|
103 | "h2": {},
|
---|
104 | "h3": {},
|
---|
105 | "h4": {},
|
---|
106 | "h5": {},
|
---|
107 | "h6": {},
|
---|
108 | "iframe": {},
|
---|
109 | "ins": {},
|
---|
110 | "math": {},
|
---|
111 | "noscript": {},
|
---|
112 | "ol": {},
|
---|
113 | "pre": {},
|
---|
114 | "p": {},
|
---|
115 | "script": {},
|
---|
116 | "style": {},
|
---|
117 | "table": {},
|
---|
118 | "ul": {},
|
---|
119 |
|
---|
120 | // HTML5
|
---|
121 | "address": {},
|
---|
122 | "article": {},
|
---|
123 | "aside": {},
|
---|
124 | "canvas": {},
|
---|
125 | "figcaption": {},
|
---|
126 | "figure": {},
|
---|
127 | "footer": {},
|
---|
128 | "header": {},
|
---|
129 | "hgroup": {},
|
---|
130 | "main": {},
|
---|
131 | "nav": {},
|
---|
132 | "output": {},
|
---|
133 | "progress": {},
|
---|
134 | "section": {},
|
---|
135 | "video": {},
|
---|
136 | }
|
---|
137 |
|
---|
138 | // Renderer is the rendering interface. This is mostly of interest if you are
|
---|
139 | // implementing a new rendering format.
|
---|
140 | //
|
---|
141 | // Only an HTML implementation is provided in this repository, see the README
|
---|
142 | // for external implementations.
|
---|
143 | type Renderer interface {
|
---|
144 | // RenderNode is the main rendering method. It will be called once for
|
---|
145 | // every leaf node and twice for every non-leaf node (first with
|
---|
146 | // entering=true, then with entering=false). The method should write its
|
---|
147 | // rendition of the node to the supplied writer w.
|
---|
148 | RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
|
---|
149 |
|
---|
150 | // RenderHeader is a method that allows the renderer to produce some
|
---|
151 | // content preceding the main body of the output document. The header is
|
---|
152 | // understood in the broad sense here. For example, the default HTML
|
---|
153 | // renderer will write not only the HTML document preamble, but also the
|
---|
154 | // table of contents if it was requested.
|
---|
155 | //
|
---|
156 | // The method will be passed an entire document tree, in case a particular
|
---|
157 | // implementation needs to inspect it to produce output.
|
---|
158 | //
|
---|
159 | // The output should be written to the supplied writer w. If your
|
---|
160 | // implementation has no header to write, supply an empty implementation.
|
---|
161 | RenderHeader(w io.Writer, ast *Node)
|
---|
162 |
|
---|
163 | // RenderFooter is a symmetric counterpart of RenderHeader.
|
---|
164 | RenderFooter(w io.Writer, ast *Node)
|
---|
165 | }
|
---|
166 |
|
---|
167 | // Callback functions for inline parsing. One such function is defined
|
---|
168 | // for each character that triggers a response when parsing inline data.
|
---|
169 | type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
|
---|
170 |
|
---|
171 | // Markdown is a type that holds extensions and the runtime state used by
|
---|
172 | // Parse, and the renderer. You can not use it directly, construct it with New.
|
---|
173 | type Markdown struct {
|
---|
174 | renderer Renderer
|
---|
175 | referenceOverride ReferenceOverrideFunc
|
---|
176 | refs map[string]*reference
|
---|
177 | inlineCallback [256]inlineParser
|
---|
178 | extensions Extensions
|
---|
179 | nesting int
|
---|
180 | maxNesting int
|
---|
181 | insideLink bool
|
---|
182 |
|
---|
183 | // Footnotes need to be ordered as well as available to quickly check for
|
---|
184 | // presence. If a ref is also a footnote, it's stored both in refs and here
|
---|
185 | // in notes. Slice is nil if footnotes not enabled.
|
---|
186 | notes []*reference
|
---|
187 |
|
---|
188 | doc *Node
|
---|
189 | tip *Node // = doc
|
---|
190 | oldTip *Node
|
---|
191 | lastMatchedContainer *Node // = doc
|
---|
192 | allClosed bool
|
---|
193 | }
|
---|
194 |
|
---|
195 | func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
|
---|
196 | if p.referenceOverride != nil {
|
---|
197 | r, overridden := p.referenceOverride(refid)
|
---|
198 | if overridden {
|
---|
199 | if r == nil {
|
---|
200 | return nil, false
|
---|
201 | }
|
---|
202 | return &reference{
|
---|
203 | link: []byte(r.Link),
|
---|
204 | title: []byte(r.Title),
|
---|
205 | noteID: 0,
|
---|
206 | hasBlock: false,
|
---|
207 | text: []byte(r.Text)}, true
|
---|
208 | }
|
---|
209 | }
|
---|
210 | // refs are case insensitive
|
---|
211 | ref, found = p.refs[strings.ToLower(refid)]
|
---|
212 | return ref, found
|
---|
213 | }
|
---|
214 |
|
---|
215 | func (p *Markdown) finalize(block *Node) {
|
---|
216 | above := block.Parent
|
---|
217 | block.open = false
|
---|
218 | p.tip = above
|
---|
219 | }
|
---|
220 |
|
---|
221 | func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
|
---|
222 | return p.addExistingChild(NewNode(node), offset)
|
---|
223 | }
|
---|
224 |
|
---|
225 | func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
|
---|
226 | for !p.tip.canContain(node.Type) {
|
---|
227 | p.finalize(p.tip)
|
---|
228 | }
|
---|
229 | p.tip.AppendChild(node)
|
---|
230 | p.tip = node
|
---|
231 | return node
|
---|
232 | }
|
---|
233 |
|
---|
234 | func (p *Markdown) closeUnmatchedBlocks() {
|
---|
235 | if !p.allClosed {
|
---|
236 | for p.oldTip != p.lastMatchedContainer {
|
---|
237 | parent := p.oldTip.Parent
|
---|
238 | p.finalize(p.oldTip)
|
---|
239 | p.oldTip = parent
|
---|
240 | }
|
---|
241 | p.allClosed = true
|
---|
242 | }
|
---|
243 | }
|
---|
244 |
|
---|
245 | //
|
---|
246 | //
|
---|
247 | // Public interface
|
---|
248 | //
|
---|
249 | //
|
---|
250 |
|
---|
251 | // Reference represents the details of a link.
|
---|
252 | // See the documentation in Options for more details on use-case.
|
---|
253 | type Reference struct {
|
---|
254 | // Link is usually the URL the reference points to.
|
---|
255 | Link string
|
---|
256 | // Title is the alternate text describing the link in more detail.
|
---|
257 | Title string
|
---|
258 | // Text is the optional text to override the ref with if the syntax used was
|
---|
259 | // [refid][]
|
---|
260 | Text string
|
---|
261 | }
|
---|
262 |
|
---|
263 | // ReferenceOverrideFunc is expected to be called with a reference string and
|
---|
264 | // return either a valid Reference type that the reference string maps to or
|
---|
265 | // nil. If overridden is false, the default reference logic will be executed.
|
---|
266 | // See the documentation in Options for more details on use-case.
|
---|
267 | type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
|
---|
268 |
|
---|
269 | // New constructs a Markdown processor. You can use the same With* functions as
|
---|
270 | // for Run() to customize parser's behavior and the renderer.
|
---|
271 | func New(opts ...Option) *Markdown {
|
---|
272 | var p Markdown
|
---|
273 | for _, opt := range opts {
|
---|
274 | opt(&p)
|
---|
275 | }
|
---|
276 | p.refs = make(map[string]*reference)
|
---|
277 | p.maxNesting = 16
|
---|
278 | p.insideLink = false
|
---|
279 | docNode := NewNode(Document)
|
---|
280 | p.doc = docNode
|
---|
281 | p.tip = docNode
|
---|
282 | p.oldTip = docNode
|
---|
283 | p.lastMatchedContainer = docNode
|
---|
284 | p.allClosed = true
|
---|
285 | // register inline parsers
|
---|
286 | p.inlineCallback[' '] = maybeLineBreak
|
---|
287 | p.inlineCallback['*'] = emphasis
|
---|
288 | p.inlineCallback['_'] = emphasis
|
---|
289 | if p.extensions&Strikethrough != 0 {
|
---|
290 | p.inlineCallback['~'] = emphasis
|
---|
291 | }
|
---|
292 | p.inlineCallback['`'] = codeSpan
|
---|
293 | p.inlineCallback['\n'] = lineBreak
|
---|
294 | p.inlineCallback['['] = link
|
---|
295 | p.inlineCallback['<'] = leftAngle
|
---|
296 | p.inlineCallback['\\'] = escape
|
---|
297 | p.inlineCallback['&'] = entity
|
---|
298 | p.inlineCallback['!'] = maybeImage
|
---|
299 | p.inlineCallback['^'] = maybeInlineFootnote
|
---|
300 | if p.extensions&Autolink != 0 {
|
---|
301 | p.inlineCallback['h'] = maybeAutoLink
|
---|
302 | p.inlineCallback['m'] = maybeAutoLink
|
---|
303 | p.inlineCallback['f'] = maybeAutoLink
|
---|
304 | p.inlineCallback['H'] = maybeAutoLink
|
---|
305 | p.inlineCallback['M'] = maybeAutoLink
|
---|
306 | p.inlineCallback['F'] = maybeAutoLink
|
---|
307 | }
|
---|
308 | if p.extensions&Footnotes != 0 {
|
---|
309 | p.notes = make([]*reference, 0)
|
---|
310 | }
|
---|
311 | return &p
|
---|
312 | }
|
---|
313 |
|
---|
314 | // Option customizes the Markdown processor's default behavior.
|
---|
315 | type Option func(*Markdown)
|
---|
316 |
|
---|
317 | // WithRenderer allows you to override the default renderer.
|
---|
318 | func WithRenderer(r Renderer) Option {
|
---|
319 | return func(p *Markdown) {
|
---|
320 | p.renderer = r
|
---|
321 | }
|
---|
322 | }
|
---|
323 |
|
---|
324 | // WithExtensions allows you to pick some of the many extensions provided by
|
---|
325 | // Blackfriday. You can bitwise OR them.
|
---|
326 | func WithExtensions(e Extensions) Option {
|
---|
327 | return func(p *Markdown) {
|
---|
328 | p.extensions = e
|
---|
329 | }
|
---|
330 | }
|
---|
331 |
|
---|
332 | // WithNoExtensions turns off all extensions and custom behavior.
|
---|
333 | func WithNoExtensions() Option {
|
---|
334 | return func(p *Markdown) {
|
---|
335 | p.extensions = NoExtensions
|
---|
336 | p.renderer = NewHTMLRenderer(HTMLRendererParameters{
|
---|
337 | Flags: HTMLFlagsNone,
|
---|
338 | })
|
---|
339 | }
|
---|
340 | }
|
---|
341 |
|
---|
342 | // WithRefOverride sets an optional function callback that is called every
|
---|
343 | // time a reference is resolved.
|
---|
344 | //
|
---|
345 | // In Markdown, the link reference syntax can be made to resolve a link to
|
---|
346 | // a reference instead of an inline URL, in one of the following ways:
|
---|
347 | //
|
---|
348 | // * [link text][refid]
|
---|
349 | // * [refid][]
|
---|
350 | //
|
---|
351 | // Usually, the refid is defined at the bottom of the Markdown document. If
|
---|
352 | // this override function is provided, the refid is passed to the override
|
---|
353 | // function first, before consulting the defined refids at the bottom. If
|
---|
354 | // the override function indicates an override did not occur, the refids at
|
---|
355 | // the bottom will be used to fill in the link details.
|
---|
356 | func WithRefOverride(o ReferenceOverrideFunc) Option {
|
---|
357 | return func(p *Markdown) {
|
---|
358 | p.referenceOverride = o
|
---|
359 | }
|
---|
360 | }
|
---|
361 |
|
---|
362 | // Run is the main entry point to Blackfriday. It parses and renders a
|
---|
363 | // block of markdown-encoded text.
|
---|
364 | //
|
---|
365 | // The simplest invocation of Run takes one argument, input:
|
---|
366 | // output := Run(input)
|
---|
367 | // This will parse the input with CommonExtensions enabled and render it with
|
---|
368 | // the default HTMLRenderer (with CommonHTMLFlags).
|
---|
369 | //
|
---|
370 | // Variadic arguments opts can customize the default behavior. Since Markdown
|
---|
371 | // type does not contain exported fields, you can not use it directly. Instead,
|
---|
372 | // use the With* functions. For example, this will call the most basic
|
---|
373 | // functionality, with no extensions:
|
---|
374 | // output := Run(input, WithNoExtensions())
|
---|
375 | //
|
---|
376 | // You can use any number of With* arguments, even contradicting ones. They
|
---|
377 | // will be applied in order of appearance and the latter will override the
|
---|
378 | // former:
|
---|
379 | // output := Run(input, WithNoExtensions(), WithExtensions(exts),
|
---|
380 | // WithRenderer(yourRenderer))
|
---|
381 | func Run(input []byte, opts ...Option) []byte {
|
---|
382 | r := NewHTMLRenderer(HTMLRendererParameters{
|
---|
383 | Flags: CommonHTMLFlags,
|
---|
384 | })
|
---|
385 | optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
|
---|
386 | optList = append(optList, opts...)
|
---|
387 | parser := New(optList...)
|
---|
388 | ast := parser.Parse(input)
|
---|
389 | var buf bytes.Buffer
|
---|
390 | parser.renderer.RenderHeader(&buf, ast)
|
---|
391 | ast.Walk(func(node *Node, entering bool) WalkStatus {
|
---|
392 | return parser.renderer.RenderNode(&buf, node, entering)
|
---|
393 | })
|
---|
394 | parser.renderer.RenderFooter(&buf, ast)
|
---|
395 | return buf.Bytes()
|
---|
396 | }
|
---|
397 |
|
---|
398 | // Parse is an entry point to the parsing part of Blackfriday. It takes an
|
---|
399 | // input markdown document and produces a syntax tree for its contents. This
|
---|
400 | // tree can then be rendered with a default or custom renderer, or
|
---|
401 | // analyzed/transformed by the caller to whatever non-standard needs they have.
|
---|
402 | // The return value is the root node of the syntax tree.
|
---|
403 | func (p *Markdown) Parse(input []byte) *Node {
|
---|
404 | p.block(input)
|
---|
405 | // Walk the tree and finish up some of unfinished blocks
|
---|
406 | for p.tip != nil {
|
---|
407 | p.finalize(p.tip)
|
---|
408 | }
|
---|
409 | // Walk the tree again and process inline markdown in each block
|
---|
410 | p.doc.Walk(func(node *Node, entering bool) WalkStatus {
|
---|
411 | if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
|
---|
412 | p.inline(node, node.content)
|
---|
413 | node.content = nil
|
---|
414 | }
|
---|
415 | return GoToNext
|
---|
416 | })
|
---|
417 | p.parseRefsToAST()
|
---|
418 | return p.doc
|
---|
419 | }
|
---|
420 |
|
---|
421 | func (p *Markdown) parseRefsToAST() {
|
---|
422 | if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
|
---|
423 | return
|
---|
424 | }
|
---|
425 | p.tip = p.doc
|
---|
426 | block := p.addBlock(List, nil)
|
---|
427 | block.IsFootnotesList = true
|
---|
428 | block.ListFlags = ListTypeOrdered
|
---|
429 | flags := ListItemBeginningOfList
|
---|
430 | // Note: this loop is intentionally explicit, not range-form. This is
|
---|
431 | // because the body of the loop will append nested footnotes to p.notes and
|
---|
432 | // we need to process those late additions. Range form would only walk over
|
---|
433 | // the fixed initial set.
|
---|
434 | for i := 0; i < len(p.notes); i++ {
|
---|
435 | ref := p.notes[i]
|
---|
436 | p.addExistingChild(ref.footnote, 0)
|
---|
437 | block := ref.footnote
|
---|
438 | block.ListFlags = flags | ListTypeOrdered
|
---|
439 | block.RefLink = ref.link
|
---|
440 | if ref.hasBlock {
|
---|
441 | flags |= ListItemContainsBlock
|
---|
442 | p.block(ref.title)
|
---|
443 | } else {
|
---|
444 | p.inline(block, ref.title)
|
---|
445 | }
|
---|
446 | flags &^= ListItemBeginningOfList | ListItemContainsBlock
|
---|
447 | }
|
---|
448 | above := block.Parent
|
---|
449 | finalizeList(block)
|
---|
450 | p.tip = above
|
---|
451 | block.Walk(func(node *Node, entering bool) WalkStatus {
|
---|
452 | if node.Type == Paragraph || node.Type == Heading {
|
---|
453 | p.inline(node, node.content)
|
---|
454 | node.content = nil
|
---|
455 | }
|
---|
456 | return GoToNext
|
---|
457 | })
|
---|
458 | }
|
---|
459 |
|
---|
460 | //
|
---|
461 | // Link references
|
---|
462 | //
|
---|
463 | // This section implements support for references that (usually) appear
|
---|
464 | // as footnotes in a document, and can be referenced anywhere in the document.
|
---|
465 | // The basic format is:
|
---|
466 | //
|
---|
467 | // [1]: http://www.google.com/ "Google"
|
---|
468 | // [2]: http://www.github.com/ "Github"
|
---|
469 | //
|
---|
470 | // Anywhere in the document, the reference can be linked by referring to its
|
---|
471 | // label, i.e., 1 and 2 in this example, as in:
|
---|
472 | //
|
---|
473 | // This library is hosted on [Github][2], a git hosting site.
|
---|
474 | //
|
---|
475 | // Actual footnotes as specified in Pandoc and supported by some other Markdown
|
---|
476 | // libraries such as php-markdown are also taken care of. They look like this:
|
---|
477 | //
|
---|
478 | // This sentence needs a bit of further explanation.[^note]
|
---|
479 | //
|
---|
480 | // [^note]: This is the explanation.
|
---|
481 | //
|
---|
482 | // Footnotes should be placed at the end of the document in an ordered list.
|
---|
483 | // Finally, there are inline footnotes such as:
|
---|
484 | //
|
---|
485 | // Inline footnotes^[Also supported.] provide a quick inline explanation,
|
---|
486 | // but are rendered at the bottom of the document.
|
---|
487 | //
|
---|
488 |
|
---|
489 | // reference holds all information necessary for a reference-style links or
|
---|
490 | // footnotes.
|
---|
491 | //
|
---|
492 | // Consider this markdown with reference-style links:
|
---|
493 | //
|
---|
494 | // [link][ref]
|
---|
495 | //
|
---|
496 | // [ref]: /url/ "tooltip title"
|
---|
497 | //
|
---|
498 | // It will be ultimately converted to this HTML:
|
---|
499 | //
|
---|
500 | // <p><a href=\"/url/\" title=\"title\">link</a></p>
|
---|
501 | //
|
---|
502 | // And a reference structure will be populated as follows:
|
---|
503 | //
|
---|
504 | // p.refs["ref"] = &reference{
|
---|
505 | // link: "/url/",
|
---|
506 | // title: "tooltip title",
|
---|
507 | // }
|
---|
508 | //
|
---|
509 | // Alternatively, reference can contain information about a footnote. Consider
|
---|
510 | // this markdown:
|
---|
511 | //
|
---|
512 | // Text needing a footnote.[^a]
|
---|
513 | //
|
---|
514 | // [^a]: This is the note
|
---|
515 | //
|
---|
516 | // A reference structure will be populated as follows:
|
---|
517 | //
|
---|
518 | // p.refs["a"] = &reference{
|
---|
519 | // link: "a",
|
---|
520 | // title: "This is the note",
|
---|
521 | // noteID: <some positive int>,
|
---|
522 | // }
|
---|
523 | //
|
---|
524 | // TODO: As you can see, it begs for splitting into two dedicated structures
|
---|
525 | // for refs and for footnotes.
|
---|
526 | type reference struct {
|
---|
527 | link []byte
|
---|
528 | title []byte
|
---|
529 | noteID int // 0 if not a footnote ref
|
---|
530 | hasBlock bool
|
---|
531 | footnote *Node // a link to the Item node within a list of footnotes
|
---|
532 |
|
---|
533 | text []byte // only gets populated by refOverride feature with Reference.Text
|
---|
534 | }
|
---|
535 |
|
---|
536 | func (r *reference) String() string {
|
---|
537 | return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
|
---|
538 | r.link, r.title, r.text, r.noteID, r.hasBlock)
|
---|
539 | }
|
---|
540 |
|
---|
541 | // Check whether or not data starts with a reference link.
|
---|
542 | // If so, it is parsed and stored in the list of references
|
---|
543 | // (in the render struct).
|
---|
544 | // Returns the number of bytes to skip to move past it,
|
---|
545 | // or zero if the first line is not a reference.
|
---|
546 | func isReference(p *Markdown, data []byte, tabSize int) int {
|
---|
547 | // up to 3 optional leading spaces
|
---|
548 | if len(data) < 4 {
|
---|
549 | return 0
|
---|
550 | }
|
---|
551 | i := 0
|
---|
552 | for i < 3 && data[i] == ' ' {
|
---|
553 | i++
|
---|
554 | }
|
---|
555 |
|
---|
556 | noteID := 0
|
---|
557 |
|
---|
558 | // id part: anything but a newline between brackets
|
---|
559 | if data[i] != '[' {
|
---|
560 | return 0
|
---|
561 | }
|
---|
562 | i++
|
---|
563 | if p.extensions&Footnotes != 0 {
|
---|
564 | if i < len(data) && data[i] == '^' {
|
---|
565 | // we can set it to anything here because the proper noteIds will
|
---|
566 | // be assigned later during the second pass. It just has to be != 0
|
---|
567 | noteID = 1
|
---|
568 | i++
|
---|
569 | }
|
---|
570 | }
|
---|
571 | idOffset := i
|
---|
572 | for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
|
---|
573 | i++
|
---|
574 | }
|
---|
575 | if i >= len(data) || data[i] != ']' {
|
---|
576 | return 0
|
---|
577 | }
|
---|
578 | idEnd := i
|
---|
579 | // footnotes can have empty ID, like this: [^], but a reference can not be
|
---|
580 | // empty like this: []. Break early if it's not a footnote and there's no ID
|
---|
581 | if noteID == 0 && idOffset == idEnd {
|
---|
582 | return 0
|
---|
583 | }
|
---|
584 | // spacer: colon (space | tab)* newline? (space | tab)*
|
---|
585 | i++
|
---|
586 | if i >= len(data) || data[i] != ':' {
|
---|
587 | return 0
|
---|
588 | }
|
---|
589 | i++
|
---|
590 | for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
---|
591 | i++
|
---|
592 | }
|
---|
593 | if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
|
---|
594 | i++
|
---|
595 | if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
|
---|
596 | i++
|
---|
597 | }
|
---|
598 | }
|
---|
599 | for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
---|
600 | i++
|
---|
601 | }
|
---|
602 | if i >= len(data) {
|
---|
603 | return 0
|
---|
604 | }
|
---|
605 |
|
---|
606 | var (
|
---|
607 | linkOffset, linkEnd int
|
---|
608 | titleOffset, titleEnd int
|
---|
609 | lineEnd int
|
---|
610 | raw []byte
|
---|
611 | hasBlock bool
|
---|
612 | )
|
---|
613 |
|
---|
614 | if p.extensions&Footnotes != 0 && noteID != 0 {
|
---|
615 | linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
|
---|
616 | lineEnd = linkEnd
|
---|
617 | } else {
|
---|
618 | linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
|
---|
619 | }
|
---|
620 | if lineEnd == 0 {
|
---|
621 | return 0
|
---|
622 | }
|
---|
623 |
|
---|
624 | // a valid ref has been found
|
---|
625 |
|
---|
626 | ref := &reference{
|
---|
627 | noteID: noteID,
|
---|
628 | hasBlock: hasBlock,
|
---|
629 | }
|
---|
630 |
|
---|
631 | if noteID > 0 {
|
---|
632 | // reusing the link field for the id since footnotes don't have links
|
---|
633 | ref.link = data[idOffset:idEnd]
|
---|
634 | // if footnote, it's not really a title, it's the contained text
|
---|
635 | ref.title = raw
|
---|
636 | } else {
|
---|
637 | ref.link = data[linkOffset:linkEnd]
|
---|
638 | ref.title = data[titleOffset:titleEnd]
|
---|
639 | }
|
---|
640 |
|
---|
641 | // id matches are case-insensitive
|
---|
642 | id := string(bytes.ToLower(data[idOffset:idEnd]))
|
---|
643 |
|
---|
644 | p.refs[id] = ref
|
---|
645 |
|
---|
646 | return lineEnd
|
---|
647 | }
|
---|
648 |
|
---|
649 | func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
|
---|
650 | // link: whitespace-free sequence, optionally between angle brackets
|
---|
651 | if data[i] == '<' {
|
---|
652 | i++
|
---|
653 | }
|
---|
654 | linkOffset = i
|
---|
655 | for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
|
---|
656 | i++
|
---|
657 | }
|
---|
658 | linkEnd = i
|
---|
659 | if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
|
---|
660 | linkOffset++
|
---|
661 | linkEnd--
|
---|
662 | }
|
---|
663 |
|
---|
664 | // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
|
---|
665 | for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
---|
666 | i++
|
---|
667 | }
|
---|
668 | if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
|
---|
669 | return
|
---|
670 | }
|
---|
671 |
|
---|
672 | // compute end-of-line
|
---|
673 | if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
|
---|
674 | lineEnd = i
|
---|
675 | }
|
---|
676 | if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
|
---|
677 | lineEnd++
|
---|
678 | }
|
---|
679 |
|
---|
680 | // optional (space|tab)* spacer after a newline
|
---|
681 | if lineEnd > 0 {
|
---|
682 | i = lineEnd + 1
|
---|
683 | for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
|
---|
684 | i++
|
---|
685 | }
|
---|
686 | }
|
---|
687 |
|
---|
688 | // optional title: any non-newline sequence enclosed in '"() alone on its line
|
---|
689 | if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
|
---|
690 | i++
|
---|
691 | titleOffset = i
|
---|
692 |
|
---|
693 | // look for EOL
|
---|
694 | for i < len(data) && data[i] != '\n' && data[i] != '\r' {
|
---|
695 | i++
|
---|
696 | }
|
---|
697 | if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
|
---|
698 | titleEnd = i + 1
|
---|
699 | } else {
|
---|
700 | titleEnd = i
|
---|
701 | }
|
---|
702 |
|
---|
703 | // step back
|
---|
704 | i--
|
---|
705 | for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
|
---|
706 | i--
|
---|
707 | }
|
---|
708 | if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
|
---|
709 | lineEnd = titleEnd
|
---|
710 | titleEnd = i
|
---|
711 | }
|
---|
712 | }
|
---|
713 |
|
---|
714 | return
|
---|
715 | }
|
---|
716 |
|
---|
717 | // The first bit of this logic is the same as Parser.listItem, but the rest
|
---|
718 | // is much simpler. This function simply finds the entire block and shifts it
|
---|
719 | // over by one tab if it is indeed a block (just returns the line if it's not).
|
---|
720 | // blockEnd is the end of the section in the input buffer, and contents is the
|
---|
721 | // extracted text that was shifted over one tab. It will need to be rendered at
|
---|
722 | // the end of the document.
|
---|
723 | func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
|
---|
724 | if i == 0 || len(data) == 0 {
|
---|
725 | return
|
---|
726 | }
|
---|
727 |
|
---|
728 | // skip leading whitespace on first line
|
---|
729 | for i < len(data) && data[i] == ' ' {
|
---|
730 | i++
|
---|
731 | }
|
---|
732 |
|
---|
733 | blockStart = i
|
---|
734 |
|
---|
735 | // find the end of the line
|
---|
736 | blockEnd = i
|
---|
737 | for i < len(data) && data[i-1] != '\n' {
|
---|
738 | i++
|
---|
739 | }
|
---|
740 |
|
---|
741 | // get working buffer
|
---|
742 | var raw bytes.Buffer
|
---|
743 |
|
---|
744 | // put the first line into the working buffer
|
---|
745 | raw.Write(data[blockEnd:i])
|
---|
746 | blockEnd = i
|
---|
747 |
|
---|
748 | // process the following lines
|
---|
749 | containsBlankLine := false
|
---|
750 |
|
---|
751 | gatherLines:
|
---|
752 | for blockEnd < len(data) {
|
---|
753 | i++
|
---|
754 |
|
---|
755 | // find the end of this line
|
---|
756 | for i < len(data) && data[i-1] != '\n' {
|
---|
757 | i++
|
---|
758 | }
|
---|
759 |
|
---|
760 | // if it is an empty line, guess that it is part of this item
|
---|
761 | // and move on to the next line
|
---|
762 | if p.isEmpty(data[blockEnd:i]) > 0 {
|
---|
763 | containsBlankLine = true
|
---|
764 | blockEnd = i
|
---|
765 | continue
|
---|
766 | }
|
---|
767 |
|
---|
768 | n := 0
|
---|
769 | if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
|
---|
770 | // this is the end of the block.
|
---|
771 | // we don't want to include this last line in the index.
|
---|
772 | break gatherLines
|
---|
773 | }
|
---|
774 |
|
---|
775 | // if there were blank lines before this one, insert a new one now
|
---|
776 | if containsBlankLine {
|
---|
777 | raw.WriteByte('\n')
|
---|
778 | containsBlankLine = false
|
---|
779 | }
|
---|
780 |
|
---|
781 | // get rid of that first tab, write to buffer
|
---|
782 | raw.Write(data[blockEnd+n : i])
|
---|
783 | hasBlock = true
|
---|
784 |
|
---|
785 | blockEnd = i
|
---|
786 | }
|
---|
787 |
|
---|
788 | if data[blockEnd-1] != '\n' {
|
---|
789 | raw.WriteByte('\n')
|
---|
790 | }
|
---|
791 |
|
---|
792 | contents = raw.Bytes()
|
---|
793 |
|
---|
794 | return
|
---|
795 | }
|
---|
796 |
|
---|
797 | //
|
---|
798 | //
|
---|
799 | // Miscellaneous helper functions
|
---|
800 | //
|
---|
801 | //
|
---|
802 |
|
---|
803 | // Test if a character is a punctuation symbol.
|
---|
804 | // Taken from a private function in regexp in the stdlib.
|
---|
805 | func ispunct(c byte) bool {
|
---|
806 | for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
|
---|
807 | if c == r {
|
---|
808 | return true
|
---|
809 | }
|
---|
810 | }
|
---|
811 | return false
|
---|
812 | }
|
---|
813 |
|
---|
814 | // Test if a character is a whitespace character.
|
---|
815 | func isspace(c byte) bool {
|
---|
816 | return ishorizontalspace(c) || isverticalspace(c)
|
---|
817 | }
|
---|
818 |
|
---|
819 | // Test if a character is a horizontal whitespace character.
|
---|
820 | func ishorizontalspace(c byte) bool {
|
---|
821 | return c == ' ' || c == '\t'
|
---|
822 | }
|
---|
823 |
|
---|
824 | // Test if a character is a vertical character.
|
---|
825 | func isverticalspace(c byte) bool {
|
---|
826 | return c == '\n' || c == '\r' || c == '\f' || c == '\v'
|
---|
827 | }
|
---|
828 |
|
---|
829 | // Test if a character is letter.
|
---|
830 | func isletter(c byte) bool {
|
---|
831 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
---|
832 | }
|
---|
833 |
|
---|
834 | // Test if a character is a letter or a digit.
|
---|
835 | // TODO: check when this is looking for ASCII alnum and when it should use unicode
|
---|
836 | func isalnum(c byte) bool {
|
---|
837 | return (c >= '0' && c <= '9') || isletter(c)
|
---|
838 | }
|
---|
839 |
|
---|
840 | // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
|
---|
841 | // always ends output with a newline
|
---|
842 | func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
|
---|
843 | // first, check for common cases: no tabs, or only tabs at beginning of line
|
---|
844 | i, prefix := 0, 0
|
---|
845 | slowcase := false
|
---|
846 | for i = 0; i < len(line); i++ {
|
---|
847 | if line[i] == '\t' {
|
---|
848 | if prefix == i {
|
---|
849 | prefix++
|
---|
850 | } else {
|
---|
851 | slowcase = true
|
---|
852 | break
|
---|
853 | }
|
---|
854 | }
|
---|
855 | }
|
---|
856 |
|
---|
857 | // no need to decode runes if all tabs are at the beginning of the line
|
---|
858 | if !slowcase {
|
---|
859 | for i = 0; i < prefix*tabSize; i++ {
|
---|
860 | out.WriteByte(' ')
|
---|
861 | }
|
---|
862 | out.Write(line[prefix:])
|
---|
863 | return
|
---|
864 | }
|
---|
865 |
|
---|
866 | // the slow case: we need to count runes to figure out how
|
---|
867 | // many spaces to insert for each tab
|
---|
868 | column := 0
|
---|
869 | i = 0
|
---|
870 | for i < len(line) {
|
---|
871 | start := i
|
---|
872 | for i < len(line) && line[i] != '\t' {
|
---|
873 | _, size := utf8.DecodeRune(line[i:])
|
---|
874 | i += size
|
---|
875 | column++
|
---|
876 | }
|
---|
877 |
|
---|
878 | if i > start {
|
---|
879 | out.Write(line[start:i])
|
---|
880 | }
|
---|
881 |
|
---|
882 | if i >= len(line) {
|
---|
883 | break
|
---|
884 | }
|
---|
885 |
|
---|
886 | for {
|
---|
887 | out.WriteByte(' ')
|
---|
888 | column++
|
---|
889 | if column%tabSize == 0 {
|
---|
890 | break
|
---|
891 | }
|
---|
892 | }
|
---|
893 |
|
---|
894 | i++
|
---|
895 | }
|
---|
896 | }
|
---|
897 |
|
---|
898 | // Find if a line counts as indented or not.
|
---|
899 | // Returns number of characters the indent is (0 = not indented).
|
---|
900 | func isIndented(data []byte, indentSize int) int {
|
---|
901 | if len(data) == 0 {
|
---|
902 | return 0
|
---|
903 | }
|
---|
904 | if data[0] == '\t' {
|
---|
905 | return 1
|
---|
906 | }
|
---|
907 | if len(data) < indentSize {
|
---|
908 | return 0
|
---|
909 | }
|
---|
910 | for i := 0; i < indentSize; i++ {
|
---|
911 | if data[i] != ' ' {
|
---|
912 | return 0
|
---|
913 | }
|
---|
914 | }
|
---|
915 | return indentSize
|
---|
916 | }
|
---|
917 |
|
---|
918 | // Create a url-safe slug for fragments
|
---|
919 | func slugify(in []byte) []byte {
|
---|
920 | if len(in) == 0 {
|
---|
921 | return in
|
---|
922 | }
|
---|
923 | out := make([]byte, 0, len(in))
|
---|
924 | sym := false
|
---|
925 |
|
---|
926 | for _, ch := range in {
|
---|
927 | if isalnum(ch) {
|
---|
928 | sym = false
|
---|
929 | out = append(out, ch)
|
---|
930 | } else if sym {
|
---|
931 | continue
|
---|
932 | } else {
|
---|
933 | out = append(out, '-')
|
---|
934 | sym = true
|
---|
935 | }
|
---|
936 | }
|
---|
937 | var a, b int
|
---|
938 | var ch byte
|
---|
939 | for a, ch = range out {
|
---|
940 | if ch != '-' {
|
---|
941 | break
|
---|
942 | }
|
---|
943 | }
|
---|
944 | for b = len(out) - 1; b > 0; b-- {
|
---|
945 | if out[b] != '-' {
|
---|
946 | break
|
---|
947 | }
|
---|
948 | }
|
---|
949 | return out[a : b+1]
|
---|
950 | }
|
---|