1 | package blackfriday
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "bytes"
|
---|
5 | "fmt"
|
---|
6 | )
|
---|
7 |
|
---|
8 | // NodeType specifies a type of a single node of a syntax tree. Usually one
|
---|
9 | // node (and its type) corresponds to a single markdown feature, e.g. emphasis
|
---|
10 | // or code block.
|
---|
11 | type NodeType int
|
---|
12 |
|
---|
13 | // Constants for identifying different types of nodes. See NodeType.
|
---|
14 | const (
|
---|
15 | Document NodeType = iota
|
---|
16 | BlockQuote
|
---|
17 | List
|
---|
18 | Item
|
---|
19 | Paragraph
|
---|
20 | Heading
|
---|
21 | HorizontalRule
|
---|
22 | Emph
|
---|
23 | Strong
|
---|
24 | Del
|
---|
25 | Link
|
---|
26 | Image
|
---|
27 | Text
|
---|
28 | HTMLBlock
|
---|
29 | CodeBlock
|
---|
30 | Softbreak
|
---|
31 | Hardbreak
|
---|
32 | Code
|
---|
33 | HTMLSpan
|
---|
34 | Table
|
---|
35 | TableCell
|
---|
36 | TableHead
|
---|
37 | TableBody
|
---|
38 | TableRow
|
---|
39 | )
|
---|
40 |
|
---|
41 | var nodeTypeNames = []string{
|
---|
42 | Document: "Document",
|
---|
43 | BlockQuote: "BlockQuote",
|
---|
44 | List: "List",
|
---|
45 | Item: "Item",
|
---|
46 | Paragraph: "Paragraph",
|
---|
47 | Heading: "Heading",
|
---|
48 | HorizontalRule: "HorizontalRule",
|
---|
49 | Emph: "Emph",
|
---|
50 | Strong: "Strong",
|
---|
51 | Del: "Del",
|
---|
52 | Link: "Link",
|
---|
53 | Image: "Image",
|
---|
54 | Text: "Text",
|
---|
55 | HTMLBlock: "HTMLBlock",
|
---|
56 | CodeBlock: "CodeBlock",
|
---|
57 | Softbreak: "Softbreak",
|
---|
58 | Hardbreak: "Hardbreak",
|
---|
59 | Code: "Code",
|
---|
60 | HTMLSpan: "HTMLSpan",
|
---|
61 | Table: "Table",
|
---|
62 | TableCell: "TableCell",
|
---|
63 | TableHead: "TableHead",
|
---|
64 | TableBody: "TableBody",
|
---|
65 | TableRow: "TableRow",
|
---|
66 | }
|
---|
67 |
|
---|
68 | func (t NodeType) String() string {
|
---|
69 | return nodeTypeNames[t]
|
---|
70 | }
|
---|
71 |
|
---|
72 | // ListData contains fields relevant to a List and Item node type.
|
---|
73 | type ListData struct {
|
---|
74 | ListFlags ListType
|
---|
75 | Tight bool // Skip <p>s around list item data if true
|
---|
76 | BulletChar byte // '*', '+' or '-' in bullet lists
|
---|
77 | Delimiter byte // '.' or ')' after the number in ordered lists
|
---|
78 | RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
|
---|
79 | IsFootnotesList bool // This is a list of footnotes
|
---|
80 | }
|
---|
81 |
|
---|
82 | // LinkData contains fields relevant to a Link node type.
|
---|
83 | type LinkData struct {
|
---|
84 | Destination []byte // Destination is what goes into a href
|
---|
85 | Title []byte // Title is the tooltip thing that goes in a title attribute
|
---|
86 | NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote
|
---|
87 | Footnote *Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
|
---|
88 | }
|
---|
89 |
|
---|
90 | // CodeBlockData contains fields relevant to a CodeBlock node type.
|
---|
91 | type CodeBlockData struct {
|
---|
92 | IsFenced bool // Specifies whether it's a fenced code block or an indented one
|
---|
93 | Info []byte // This holds the info string
|
---|
94 | FenceChar byte
|
---|
95 | FenceLength int
|
---|
96 | FenceOffset int
|
---|
97 | }
|
---|
98 |
|
---|
99 | // TableCellData contains fields relevant to a TableCell node type.
|
---|
100 | type TableCellData struct {
|
---|
101 | IsHeader bool // This tells if it's under the header row
|
---|
102 | Align CellAlignFlags // This holds the value for align attribute
|
---|
103 | }
|
---|
104 |
|
---|
105 | // HeadingData contains fields relevant to a Heading node type.
|
---|
106 | type HeadingData struct {
|
---|
107 | Level int // This holds the heading level number
|
---|
108 | HeadingID string // This might hold heading ID, if present
|
---|
109 | IsTitleblock bool // Specifies whether it's a title block
|
---|
110 | }
|
---|
111 |
|
---|
112 | // Node is a single element in the abstract syntax tree of the parsed document.
|
---|
113 | // It holds connections to the structurally neighboring nodes and, for certain
|
---|
114 | // types of nodes, additional information that might be needed when rendering.
|
---|
115 | type Node struct {
|
---|
116 | Type NodeType // Determines the type of the node
|
---|
117 | Parent *Node // Points to the parent
|
---|
118 | FirstChild *Node // Points to the first child, if any
|
---|
119 | LastChild *Node // Points to the last child, if any
|
---|
120 | Prev *Node // Previous sibling; nil if it's the first child
|
---|
121 | Next *Node // Next sibling; nil if it's the last child
|
---|
122 |
|
---|
123 | Literal []byte // Text contents of the leaf nodes
|
---|
124 |
|
---|
125 | HeadingData // Populated if Type is Heading
|
---|
126 | ListData // Populated if Type is List
|
---|
127 | CodeBlockData // Populated if Type is CodeBlock
|
---|
128 | LinkData // Populated if Type is Link
|
---|
129 | TableCellData // Populated if Type is TableCell
|
---|
130 |
|
---|
131 | content []byte // Markdown content of the block nodes
|
---|
132 | open bool // Specifies an open block node that has not been finished to process yet
|
---|
133 | }
|
---|
134 |
|
---|
135 | // NewNode allocates a node of a specified type.
|
---|
136 | func NewNode(typ NodeType) *Node {
|
---|
137 | return &Node{
|
---|
138 | Type: typ,
|
---|
139 | open: true,
|
---|
140 | }
|
---|
141 | }
|
---|
142 |
|
---|
143 | func (n *Node) String() string {
|
---|
144 | ellipsis := ""
|
---|
145 | snippet := n.Literal
|
---|
146 | if len(snippet) > 16 {
|
---|
147 | snippet = snippet[:16]
|
---|
148 | ellipsis = "..."
|
---|
149 | }
|
---|
150 | return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
|
---|
151 | }
|
---|
152 |
|
---|
153 | // Unlink removes node 'n' from the tree.
|
---|
154 | // It panics if the node is nil.
|
---|
155 | func (n *Node) Unlink() {
|
---|
156 | if n.Prev != nil {
|
---|
157 | n.Prev.Next = n.Next
|
---|
158 | } else if n.Parent != nil {
|
---|
159 | n.Parent.FirstChild = n.Next
|
---|
160 | }
|
---|
161 | if n.Next != nil {
|
---|
162 | n.Next.Prev = n.Prev
|
---|
163 | } else if n.Parent != nil {
|
---|
164 | n.Parent.LastChild = n.Prev
|
---|
165 | }
|
---|
166 | n.Parent = nil
|
---|
167 | n.Next = nil
|
---|
168 | n.Prev = nil
|
---|
169 | }
|
---|
170 |
|
---|
171 | // AppendChild adds a node 'child' as a child of 'n'.
|
---|
172 | // It panics if either node is nil.
|
---|
173 | func (n *Node) AppendChild(child *Node) {
|
---|
174 | child.Unlink()
|
---|
175 | child.Parent = n
|
---|
176 | if n.LastChild != nil {
|
---|
177 | n.LastChild.Next = child
|
---|
178 | child.Prev = n.LastChild
|
---|
179 | n.LastChild = child
|
---|
180 | } else {
|
---|
181 | n.FirstChild = child
|
---|
182 | n.LastChild = child
|
---|
183 | }
|
---|
184 | }
|
---|
185 |
|
---|
186 | // InsertBefore inserts 'sibling' immediately before 'n'.
|
---|
187 | // It panics if either node is nil.
|
---|
188 | func (n *Node) InsertBefore(sibling *Node) {
|
---|
189 | sibling.Unlink()
|
---|
190 | sibling.Prev = n.Prev
|
---|
191 | if sibling.Prev != nil {
|
---|
192 | sibling.Prev.Next = sibling
|
---|
193 | }
|
---|
194 | sibling.Next = n
|
---|
195 | n.Prev = sibling
|
---|
196 | sibling.Parent = n.Parent
|
---|
197 | if sibling.Prev == nil {
|
---|
198 | sibling.Parent.FirstChild = sibling
|
---|
199 | }
|
---|
200 | }
|
---|
201 |
|
---|
202 | // IsContainer returns true if 'n' can contain children.
|
---|
203 | func (n *Node) IsContainer() bool {
|
---|
204 | switch n.Type {
|
---|
205 | case Document:
|
---|
206 | fallthrough
|
---|
207 | case BlockQuote:
|
---|
208 | fallthrough
|
---|
209 | case List:
|
---|
210 | fallthrough
|
---|
211 | case Item:
|
---|
212 | fallthrough
|
---|
213 | case Paragraph:
|
---|
214 | fallthrough
|
---|
215 | case Heading:
|
---|
216 | fallthrough
|
---|
217 | case Emph:
|
---|
218 | fallthrough
|
---|
219 | case Strong:
|
---|
220 | fallthrough
|
---|
221 | case Del:
|
---|
222 | fallthrough
|
---|
223 | case Link:
|
---|
224 | fallthrough
|
---|
225 | case Image:
|
---|
226 | fallthrough
|
---|
227 | case Table:
|
---|
228 | fallthrough
|
---|
229 | case TableHead:
|
---|
230 | fallthrough
|
---|
231 | case TableBody:
|
---|
232 | fallthrough
|
---|
233 | case TableRow:
|
---|
234 | fallthrough
|
---|
235 | case TableCell:
|
---|
236 | return true
|
---|
237 | default:
|
---|
238 | return false
|
---|
239 | }
|
---|
240 | }
|
---|
241 |
|
---|
242 | // IsLeaf returns true if 'n' is a leaf node.
|
---|
243 | func (n *Node) IsLeaf() bool {
|
---|
244 | return !n.IsContainer()
|
---|
245 | }
|
---|
246 |
|
---|
247 | func (n *Node) canContain(t NodeType) bool {
|
---|
248 | if n.Type == List {
|
---|
249 | return t == Item
|
---|
250 | }
|
---|
251 | if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
|
---|
252 | return t != Item
|
---|
253 | }
|
---|
254 | if n.Type == Table {
|
---|
255 | return t == TableHead || t == TableBody
|
---|
256 | }
|
---|
257 | if n.Type == TableHead || n.Type == TableBody {
|
---|
258 | return t == TableRow
|
---|
259 | }
|
---|
260 | if n.Type == TableRow {
|
---|
261 | return t == TableCell
|
---|
262 | }
|
---|
263 | return false
|
---|
264 | }
|
---|
265 |
|
---|
266 | // WalkStatus allows NodeVisitor to have some control over the tree traversal.
|
---|
267 | // It is returned from NodeVisitor and different values allow Node.Walk to
|
---|
268 | // decide which node to go to next.
|
---|
269 | type WalkStatus int
|
---|
270 |
|
---|
271 | const (
|
---|
272 | // GoToNext is the default traversal of every node.
|
---|
273 | GoToNext WalkStatus = iota
|
---|
274 | // SkipChildren tells walker to skip all children of current node.
|
---|
275 | SkipChildren
|
---|
276 | // Terminate tells walker to terminate the traversal.
|
---|
277 | Terminate
|
---|
278 | )
|
---|
279 |
|
---|
280 | // NodeVisitor is a callback to be called when traversing the syntax tree.
|
---|
281 | // Called twice for every node: once with entering=true when the branch is
|
---|
282 | // first visited, then with entering=false after all the children are done.
|
---|
283 | type NodeVisitor func(node *Node, entering bool) WalkStatus
|
---|
284 |
|
---|
285 | // Walk is a convenience method that instantiates a walker and starts a
|
---|
286 | // traversal of subtree rooted at n.
|
---|
287 | func (n *Node) Walk(visitor NodeVisitor) {
|
---|
288 | w := newNodeWalker(n)
|
---|
289 | for w.current != nil {
|
---|
290 | status := visitor(w.current, w.entering)
|
---|
291 | switch status {
|
---|
292 | case GoToNext:
|
---|
293 | w.next()
|
---|
294 | case SkipChildren:
|
---|
295 | w.entering = false
|
---|
296 | w.next()
|
---|
297 | case Terminate:
|
---|
298 | return
|
---|
299 | }
|
---|
300 | }
|
---|
301 | }
|
---|
302 |
|
---|
303 | type nodeWalker struct {
|
---|
304 | current *Node
|
---|
305 | root *Node
|
---|
306 | entering bool
|
---|
307 | }
|
---|
308 |
|
---|
309 | func newNodeWalker(root *Node) *nodeWalker {
|
---|
310 | return &nodeWalker{
|
---|
311 | current: root,
|
---|
312 | root: root,
|
---|
313 | entering: true,
|
---|
314 | }
|
---|
315 | }
|
---|
316 |
|
---|
317 | func (nw *nodeWalker) next() {
|
---|
318 | if (!nw.current.IsContainer() || !nw.entering) && nw.current == nw.root {
|
---|
319 | nw.current = nil
|
---|
320 | return
|
---|
321 | }
|
---|
322 | if nw.entering && nw.current.IsContainer() {
|
---|
323 | if nw.current.FirstChild != nil {
|
---|
324 | nw.current = nw.current.FirstChild
|
---|
325 | nw.entering = true
|
---|
326 | } else {
|
---|
327 | nw.entering = false
|
---|
328 | }
|
---|
329 | } else if nw.current.Next == nil {
|
---|
330 | nw.current = nw.current.Parent
|
---|
331 | nw.entering = false
|
---|
332 | } else {
|
---|
333 | nw.current = nw.current.Next
|
---|
334 | nw.entering = true
|
---|
335 | }
|
---|
336 | }
|
---|
337 |
|
---|
338 | func dump(ast *Node) {
|
---|
339 | fmt.Println(dumpString(ast))
|
---|
340 | }
|
---|
341 |
|
---|
342 | func dumpR(ast *Node, depth int) string {
|
---|
343 | if ast == nil {
|
---|
344 | return ""
|
---|
345 | }
|
---|
346 | indent := bytes.Repeat([]byte("\t"), depth)
|
---|
347 | content := ast.Literal
|
---|
348 | if content == nil {
|
---|
349 | content = ast.content
|
---|
350 | }
|
---|
351 | result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
|
---|
352 | for n := ast.FirstChild; n != nil; n = n.Next {
|
---|
353 | result += dumpR(n, depth+1)
|
---|
354 | }
|
---|
355 | return result
|
---|
356 | }
|
---|
357 |
|
---|
358 | func dumpString(ast *Node) string {
|
---|
359 | return dumpR(ast, 0)
|
---|
360 | }
|
---|