Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/github.com/russross/blackfriday/v2/markdown.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 25.7 KB

Line
1	// Blackfriday Markdown Processor
2	// Available at http://github.com/russross/blackfriday
3	//
4	// Copyright © 2011 Russ Ross <russ@russross.com>.
5	// Distributed under the Simplified BSD License.
6	// See README.md for details.
7
8	package blackfriday
9
10	import (
11	"bytes"
12	"fmt"
13	"io"
14	"strings"
15	"unicode/utf8"
16	)
17
18	//
19	// Markdown parsing and processing
20	//
21
22	// Version string of the package. Appears in the rendered document when
23	// CompletePage flag is on.
24	const Version = "2.0"
25
26	// Extensions is a bitwise or'ed collection of enabled Blackfriday's
27	// extensions.
28	type Extensions int
29
30	// These are the supported markdown parsing extensions.
31	// OR these values together to select multiple extensions.
32	const (
33	NoExtensions Extensions = 0
34	NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
35	Tables // Render tables
36	FencedCode // Render fenced code blocks
37	Autolink // Detect embedded URLs that are not explicitly marked
38	Strikethrough // Strikethrough text using ~~test~~
39	LaxHTMLBlocks // Loosen up HTML block parsing rules
40	SpaceHeadings // Be strict about prefix heading rules
41	HardLineBreak // Translate newlines into line breaks
42	TabSizeEight // Expand tabs to eight spaces instead of four
43	Footnotes // Pandoc-style footnotes
44	NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
45	HeadingIDs // specify heading IDs with {#id}
46	Titleblock // Titleblock ala pandoc
47	AutoHeadingIDs // Create the heading ID from the text
48	BackslashLineBreak // Translate trailing backslashes into line breaks
49	DefinitionLists // Render definition lists
50
51	CommonHTMLFlags HTMLFlags = UseXHTML \| Smartypants \|
52	SmartypantsFractions \| SmartypantsDashes \| SmartypantsLatexDashes
53
54	CommonExtensions Extensions = NoIntraEmphasis \| Tables \| FencedCode \|
55	Autolink \| Strikethrough \| SpaceHeadings \| HeadingIDs \|
56	BackslashLineBreak \| DefinitionLists
57	)
58
59	// ListType contains bitwise or'ed flags for list and list item objects.
60	type ListType int
61
62	// These are the possible flag values for the ListItem renderer.
63	// Multiple flag values may be ORed together.
64	// These are mostly of interest if you are writing a new output format.
65	const (
66	ListTypeOrdered ListType = 1 << iota
67	ListTypeDefinition
68	ListTypeTerm
69
70	ListItemContainsBlock
71	ListItemBeginningOfList // TODO: figure out if this is of any use now
72	ListItemEndOfList
73	)
74
75	// CellAlignFlags holds a type of alignment in a table cell.
76	type CellAlignFlags int
77
78	// These are the possible flag values for the table cell renderer.
79	// Only a single one of these values will be used; they are not ORed together.
80	// These are mostly of interest if you are writing a new output format.
81	const (
82	TableAlignmentLeft CellAlignFlags = 1 << iota
83	TableAlignmentRight
84	TableAlignmentCenter = (TableAlignmentLeft \| TableAlignmentRight)
85	)
86
87	// The size of a tab stop.
88	const (
89	TabSizeDefault = 4
90	TabSizeDouble = 8
91	)
92
93	// blockTags is a set of tags that are recognized as HTML block tags.
94	// Any of these can be included in markdown text without special escaping.
95	var blockTags = map[string]struct{}{
96	"blockquote": {},
97	"del": {},
98	"div": {},
99	"dl": {},
100	"fieldset": {},
101	"form": {},
102	"h1": {},
103	"h2": {},
104	"h3": {},
105	"h4": {},
106	"h5": {},
107	"h6": {},
108	"iframe": {},
109	"ins": {},
110	"math": {},
111	"noscript": {},
112	"ol": {},
113	"pre": {},
114	"p": {},
115	"script": {},
116	"style": {},
117	"table": {},
118	"ul": {},
119
120	// HTML5
121	"address": {},
122	"article": {},
123	"aside": {},
124	"canvas": {},
125	"figcaption": {},
126	"figure": {},
127	"footer": {},
128	"header": {},
129	"hgroup": {},
130	"main": {},
131	"nav": {},
132	"output": {},
133	"progress": {},
134	"section": {},
135	"video": {},
136	}
137
138	// Renderer is the rendering interface. This is mostly of interest if you are
139	// implementing a new rendering format.
140	//
141	// Only an HTML implementation is provided in this repository, see the README
142	// for external implementations.
143	type Renderer interface {
144	// RenderNode is the main rendering method. It will be called once for
145	// every leaf node and twice for every non-leaf node (first with
146	// entering=true, then with entering=false). The method should write its
147	// rendition of the node to the supplied writer w.
148	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
149
150	// RenderHeader is a method that allows the renderer to produce some
151	// content preceding the main body of the output document. The header is
152	// understood in the broad sense here. For example, the default HTML
153	// renderer will write not only the HTML document preamble, but also the
154	// table of contents if it was requested.
155	//
156	// The method will be passed an entire document tree, in case a particular
157	// implementation needs to inspect it to produce output.
158	//
159	// The output should be written to the supplied writer w. If your
160	// implementation has no header to write, supply an empty implementation.
161	RenderHeader(w io.Writer, ast *Node)
162
163	// RenderFooter is a symmetric counterpart of RenderHeader.
164	RenderFooter(w io.Writer, ast *Node)
165	}
166
167	// Callback functions for inline parsing. One such function is defined
168	// for each character that triggers a response when parsing inline data.
169	type inlineParser func(p Markdown, data []byte, offset int) (int, Node)
170
171	// Markdown is a type that holds extensions and the runtime state used by
172	// Parse, and the renderer. You can not use it directly, construct it with New.
173	type Markdown struct {
174	renderer Renderer
175	referenceOverride ReferenceOverrideFunc
176	refs map[string]*reference
177	inlineCallback [256]inlineParser
178	extensions Extensions
179	nesting int
180	maxNesting int
181	insideLink bool
182
183	// Footnotes need to be ordered as well as available to quickly check for
184	// presence. If a ref is also a footnote, it's stored both in refs and here
185	// in notes. Slice is nil if footnotes not enabled.
186	notes []*reference
187
188	doc *Node
189	tip *Node // = doc
190	oldTip *Node
191	lastMatchedContainer *Node // = doc
192	allClosed bool
193	}
194
195	func (p Markdown) getRef(refid string) (ref reference, found bool) {
196	if p.referenceOverride != nil {
197	r, overridden := p.referenceOverride(refid)
198	if overridden {
199	if r == nil {
200	return nil, false
201	}
202	return &reference{
203	link: []byte(r.Link),
204	title: []byte(r.Title),
205	noteID: 0,
206	hasBlock: false,
207	text: []byte(r.Text)}, true
208	}
209	}
210	// refs are case insensitive
211	ref, found = p.refs[strings.ToLower(refid)]
212	return ref, found
213	}
214
215	func (p Markdown) finalize(block Node) {
216	above := block.Parent
217	block.open = false
218	p.tip = above
219	}
220
221	func (p Markdown) addChild(node NodeType, offset uint32) Node {
222	return p.addExistingChild(NewNode(node), offset)
223	}
224
225	func (p Markdown) addExistingChild(node Node, offset uint32) *Node {
226	for !p.tip.canContain(node.Type) {
227	p.finalize(p.tip)
228	}
229	p.tip.AppendChild(node)
230	p.tip = node
231	return node
232	}
233
234	func (p *Markdown) closeUnmatchedBlocks() {
235	if !p.allClosed {
236	for p.oldTip != p.lastMatchedContainer {
237	parent := p.oldTip.Parent
238	p.finalize(p.oldTip)
239	p.oldTip = parent
240	}
241	p.allClosed = true
242	}
243	}
244
245	//
246	//
247	// Public interface
248	//
249	//
250
251	// Reference represents the details of a link.
252	// See the documentation in Options for more details on use-case.
253	type Reference struct {
254	// Link is usually the URL the reference points to.
255	Link string
256	// Title is the alternate text describing the link in more detail.
257	Title string
258	// Text is the optional text to override the ref with if the syntax used was
259	// [refid][]
260	Text string
261	}
262
263	// ReferenceOverrideFunc is expected to be called with a reference string and
264	// return either a valid Reference type that the reference string maps to or
265	// nil. If overridden is false, the default reference logic will be executed.
266	// See the documentation in Options for more details on use-case.
267	type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
268
269	// New constructs a Markdown processor. You can use the same With* functions as
270	// for Run() to customize parser's behavior and the renderer.
271	func New(opts ...Option) *Markdown {
272	var p Markdown
273	for _, opt := range opts {
274	opt(&p)
275	}
276	p.refs = make(map[string]*reference)
277	p.maxNesting = 16
278	p.insideLink = false
279	docNode := NewNode(Document)
280	p.doc = docNode
281	p.tip = docNode
282	p.oldTip = docNode
283	p.lastMatchedContainer = docNode
284	p.allClosed = true
285	// register inline parsers
286	p.inlineCallback[' '] = maybeLineBreak
287	p.inlineCallback['*'] = emphasis
288	p.inlineCallback['_'] = emphasis
289	if p.extensions&Strikethrough != 0 {
290	p.inlineCallback['~'] = emphasis
291	}
292	p.inlineCallback['`'] = codeSpan
293	p.inlineCallback['\n'] = lineBreak
294	p.inlineCallback['['] = link
295	p.inlineCallback['<'] = leftAngle
296	p.inlineCallback['\\'] = escape
297	p.inlineCallback['&'] = entity
298	p.inlineCallback['!'] = maybeImage
299	p.inlineCallback['^'] = maybeInlineFootnote
300	if p.extensions&Autolink != 0 {
301	p.inlineCallback['h'] = maybeAutoLink
302	p.inlineCallback['m'] = maybeAutoLink
303	p.inlineCallback['f'] = maybeAutoLink
304	p.inlineCallback['H'] = maybeAutoLink
305	p.inlineCallback['M'] = maybeAutoLink
306	p.inlineCallback['F'] = maybeAutoLink
307	}
308	if p.extensions&Footnotes != 0 {
309	p.notes = make([]*reference, 0)
310	}
311	return &p
312	}
313
314	// Option customizes the Markdown processor's default behavior.
315	type Option func(*Markdown)
316
317	// WithRenderer allows you to override the default renderer.
318	func WithRenderer(r Renderer) Option {
319	return func(p *Markdown) {
320	p.renderer = r
321	}
322	}
323
324	// WithExtensions allows you to pick some of the many extensions provided by
325	// Blackfriday. You can bitwise OR them.
326	func WithExtensions(e Extensions) Option {
327	return func(p *Markdown) {
328	p.extensions = e
329	}
330	}
331
332	// WithNoExtensions turns off all extensions and custom behavior.
333	func WithNoExtensions() Option {
334	return func(p *Markdown) {
335	p.extensions = NoExtensions
336	p.renderer = NewHTMLRenderer(HTMLRendererParameters{
337	Flags: HTMLFlagsNone,
338	})
339	}
340	}
341
342	// WithRefOverride sets an optional function callback that is called every
343	// time a reference is resolved.
344	//
345	// In Markdown, the link reference syntax can be made to resolve a link to
346	// a reference instead of an inline URL, in one of the following ways:
347	//
348	// * [link text][refid]
349	// * [refid][]
350	//
351	// Usually, the refid is defined at the bottom of the Markdown document. If
352	// this override function is provided, the refid is passed to the override
353	// function first, before consulting the defined refids at the bottom. If
354	// the override function indicates an override did not occur, the refids at
355	// the bottom will be used to fill in the link details.
356	func WithRefOverride(o ReferenceOverrideFunc) Option {
357	return func(p *Markdown) {
358	p.referenceOverride = o
359	}
360	}
361
362	// Run is the main entry point to Blackfriday. It parses and renders a
363	// block of markdown-encoded text.
364	//
365	// The simplest invocation of Run takes one argument, input:
366	// output := Run(input)
367	// This will parse the input with CommonExtensions enabled and render it with
368	// the default HTMLRenderer (with CommonHTMLFlags).
369	//
370	// Variadic arguments opts can customize the default behavior. Since Markdown
371	// type does not contain exported fields, you can not use it directly. Instead,
372	// use the With* functions. For example, this will call the most basic
373	// functionality, with no extensions:
374	// output := Run(input, WithNoExtensions())
375	//
376	// You can use any number of With* arguments, even contradicting ones. They
377	// will be applied in order of appearance and the latter will override the
378	// former:
379	// output := Run(input, WithNoExtensions(), WithExtensions(exts),
380	// WithRenderer(yourRenderer))
381	func Run(input []byte, opts ...Option) []byte {
382	r := NewHTMLRenderer(HTMLRendererParameters{
383	Flags: CommonHTMLFlags,
384	})
385	optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
386	optList = append(optList, opts...)
387	parser := New(optList...)
388	ast := parser.Parse(input)
389	var buf bytes.Buffer
390	parser.renderer.RenderHeader(&buf, ast)
391	ast.Walk(func(node *Node, entering bool) WalkStatus {
392	return parser.renderer.RenderNode(&buf, node, entering)
393	})
394	parser.renderer.RenderFooter(&buf, ast)
395	return buf.Bytes()
396	}
397
398	// Parse is an entry point to the parsing part of Blackfriday. It takes an
399	// input markdown document and produces a syntax tree for its contents. This
400	// tree can then be rendered with a default or custom renderer, or
401	// analyzed/transformed by the caller to whatever non-standard needs they have.
402	// The return value is the root node of the syntax tree.
403	func (p Markdown) Parse(input []byte) Node {
404	p.block(input)
405	// Walk the tree and finish up some of unfinished blocks
406	for p.tip != nil {
407	p.finalize(p.tip)
408	}
409	// Walk the tree again and process inline markdown in each block
410	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
411	if node.Type == Paragraph \|\| node.Type == Heading \|\| node.Type == TableCell {
412	p.inline(node, node.content)
413	node.content = nil
414	}
415	return GoToNext
416	})
417	p.parseRefsToAST()
418	return p.doc
419	}
420
421	func (p *Markdown) parseRefsToAST() {
422	if p.extensions&Footnotes == 0 \|\| len(p.notes) == 0 {
423	return
424	}
425	p.tip = p.doc
426	block := p.addBlock(List, nil)
427	block.IsFootnotesList = true
428	block.ListFlags = ListTypeOrdered
429	flags := ListItemBeginningOfList
430	// Note: this loop is intentionally explicit, not range-form. This is
431	// because the body of the loop will append nested footnotes to p.notes and
432	// we need to process those late additions. Range form would only walk over
433	// the fixed initial set.
434	for i := 0; i < len(p.notes); i++ {
435	ref := p.notes[i]
436	p.addExistingChild(ref.footnote, 0)
437	block := ref.footnote
438	block.ListFlags = flags \| ListTypeOrdered
439	block.RefLink = ref.link
440	if ref.hasBlock {
441	flags \|= ListItemContainsBlock
442	p.block(ref.title)
443	} else {
444	p.inline(block, ref.title)
445	}
446	flags &^= ListItemBeginningOfList \| ListItemContainsBlock
447	}
448	above := block.Parent
449	finalizeList(block)
450	p.tip = above
451	block.Walk(func(node *Node, entering bool) WalkStatus {
452	if node.Type == Paragraph \|\| node.Type == Heading {
453	p.inline(node, node.content)
454	node.content = nil
455	}
456	return GoToNext
457	})
458	}
459
460	//
461	// Link references
462	//
463	// This section implements support for references that (usually) appear
464	// as footnotes in a document, and can be referenced anywhere in the document.
465	// The basic format is:
466	//
467	// [1]: http://www.google.com/ "Google"
468	// [2]: http://www.github.com/ "Github"
469	//
470	// Anywhere in the document, the reference can be linked by referring to its
471	// label, i.e., 1 and 2 in this example, as in:
472	//
473	// This library is hosted on [Github][2], a git hosting site.
474	//
475	// Actual footnotes as specified in Pandoc and supported by some other Markdown
476	// libraries such as php-markdown are also taken care of. They look like this:
477	//
478	// This sentence needs a bit of further explanation.[^note]
479	//
480	// [^note]: This is the explanation.
481	//
482	// Footnotes should be placed at the end of the document in an ordered list.
483	// Finally, there are inline footnotes such as:
484	//
485	// Inline footnotes^[Also supported.] provide a quick inline explanation,
486	// but are rendered at the bottom of the document.
487	//
488
489	// reference holds all information necessary for a reference-style links or
490	// footnotes.
491	//
492	// Consider this markdown with reference-style links:
493	//
494	// [link][ref]
495	//
496	// [ref]: /url/ "tooltip title"
497	//
498	// It will be ultimately converted to this HTML:
499	//
500	// <p><a href=\"/url/\" title=\"title\">link</a></p>
501	//
502	// And a reference structure will be populated as follows:
503	//
504	// p.refs["ref"] = &reference{
505	// link: "/url/",
506	// title: "tooltip title",
507	// }
508	//
509	// Alternatively, reference can contain information about a footnote. Consider
510	// this markdown:
511	//
512	// Text needing a footnote.[^a]
513	//
514	// [^a]: This is the note
515	//
516	// A reference structure will be populated as follows:
517	//
518	// p.refs["a"] = &reference{
519	// link: "a",
520	// title: "This is the note",
521	// noteID: <some positive int>,
522	// }
523	//
524	// TODO: As you can see, it begs for splitting into two dedicated structures
525	// for refs and for footnotes.
526	type reference struct {
527	link []byte
528	title []byte
529	noteID int // 0 if not a footnote ref
530	hasBlock bool
531	footnote *Node // a link to the Item node within a list of footnotes
532
533	text []byte // only gets populated by refOverride feature with Reference.Text
534	}
535
536	func (r *reference) String() string {
537	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
538	r.link, r.title, r.text, r.noteID, r.hasBlock)
539	}
540
541	// Check whether or not data starts with a reference link.
542	// If so, it is parsed and stored in the list of references
543	// (in the render struct).
544	// Returns the number of bytes to skip to move past it,
545	// or zero if the first line is not a reference.
546	func isReference(p *Markdown, data []byte, tabSize int) int {
547	// up to 3 optional leading spaces
548	if len(data) < 4 {
549	return 0
550	}
551	i := 0
552	for i < 3 && data[i] == ' ' {
553	i++
554	}
555
556	noteID := 0
557
558	// id part: anything but a newline between brackets
559	if data[i] != '[' {
560	return 0
561	}
562	i++
563	if p.extensions&Footnotes != 0 {
564	if i < len(data) && data[i] == '^' {
565	// we can set it to anything here because the proper noteIds will
566	// be assigned later during the second pass. It just has to be != 0
567	noteID = 1
568	i++
569	}
570	}
571	idOffset := i
572	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
573	i++
574	}
575	if i >= len(data) \|\| data[i] != ']' {
576	return 0
577	}
578	idEnd := i
579	// footnotes can have empty ID, like this: [^], but a reference can not be
580	// empty like this: []. Break early if it's not a footnote and there's no ID
581	if noteID == 0 && idOffset == idEnd {
582	return 0
583	}
584	// spacer: colon (space \| tab)* newline? (space \| tab)*
585	i++
586	if i >= len(data) \|\| data[i] != ':' {
587	return 0
588	}
589	i++
590	for i < len(data) && (data[i] == ' ' \|\| data[i] == '\t') {
591	i++
592	}
593	if i < len(data) && (data[i] == '\n' \|\| data[i] == '\r') {
594	i++
595	if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
596	i++
597	}
598	}
599	for i < len(data) && (data[i] == ' ' \|\| data[i] == '\t') {
600	i++
601	}
602	if i >= len(data) {
603	return 0
604	}
605
606	var (
607	linkOffset, linkEnd int
608	titleOffset, titleEnd int
609	lineEnd int
610	raw []byte
611	hasBlock bool
612	)
613
614	if p.extensions&Footnotes != 0 && noteID != 0 {
615	linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
616	lineEnd = linkEnd
617	} else {
618	linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
619	}
620	if lineEnd == 0 {
621	return 0
622	}
623
624	// a valid ref has been found
625
626	ref := &reference{
627	noteID: noteID,
628	hasBlock: hasBlock,
629	}
630
631	if noteID > 0 {
632	// reusing the link field for the id since footnotes don't have links
633	ref.link = data[idOffset:idEnd]
634	// if footnote, it's not really a title, it's the contained text
635	ref.title = raw
636	} else {
637	ref.link = data[linkOffset:linkEnd]
638	ref.title = data[titleOffset:titleEnd]
639	}
640
641	// id matches are case-insensitive
642	id := string(bytes.ToLower(data[idOffset:idEnd]))
643
644	p.refs[id] = ref
645
646	return lineEnd
647	}
648
649	func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
650	// link: whitespace-free sequence, optionally between angle brackets
651	if data[i] == '<' {
652	i++
653	}
654	linkOffset = i
655	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
656	i++
657	}
658	linkEnd = i
659	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
660	linkOffset++
661	linkEnd--
662	}
663
664	// optional spacer: (space \| tab)* (newline \| '\'' \| '"' \| '(' )
665	for i < len(data) && (data[i] == ' ' \|\| data[i] == '\t') {
666	i++
667	}
668	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
669	return
670	}
671
672	// compute end-of-line
673	if i >= len(data) \|\| data[i] == '\r' \|\| data[i] == '\n' {
674	lineEnd = i
675	}
676	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
677	lineEnd++
678	}
679
680	// optional (space\|tab)* spacer after a newline
681	if lineEnd > 0 {
682	i = lineEnd + 1
683	for i < len(data) && (data[i] == ' ' \|\| data[i] == '\t') {
684	i++
685	}
686	}
687
688	// optional title: any non-newline sequence enclosed in '"() alone on its line
689	if i+1 < len(data) && (data[i] == '\'' \|\| data[i] == '"' \|\| data[i] == '(') {
690	i++
691	titleOffset = i
692
693	// look for EOL
694	for i < len(data) && data[i] != '\n' && data[i] != '\r' {
695	i++
696	}
697	if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
698	titleEnd = i + 1
699	} else {
700	titleEnd = i
701	}
702
703	// step back
704	i--
705	for i > titleOffset && (data[i] == ' ' \|\| data[i] == '\t') {
706	i--
707	}
708	if i > titleOffset && (data[i] == '\'' \|\| data[i] == '"' \|\| data[i] == ')') {
709	lineEnd = titleEnd
710	titleEnd = i
711	}
712	}
713
714	return
715	}
716
717	// The first bit of this logic is the same as Parser.listItem, but the rest
718	// is much simpler. This function simply finds the entire block and shifts it
719	// over by one tab if it is indeed a block (just returns the line if it's not).
720	// blockEnd is the end of the section in the input buffer, and contents is the
721	// extracted text that was shifted over one tab. It will need to be rendered at
722	// the end of the document.
723	func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
724	if i == 0 \|\| len(data) == 0 {
725	return
726	}
727
728	// skip leading whitespace on first line
729	for i < len(data) && data[i] == ' ' {
730	i++
731	}
732
733	blockStart = i
734
735	// find the end of the line
736	blockEnd = i
737	for i < len(data) && data[i-1] != '\n' {
738	i++
739	}
740
741	// get working buffer
742	var raw bytes.Buffer
743
744	// put the first line into the working buffer
745	raw.Write(data[blockEnd:i])
746	blockEnd = i
747
748	// process the following lines
749	containsBlankLine := false
750
751	gatherLines:
752	for blockEnd < len(data) {
753	i++
754
755	// find the end of this line
756	for i < len(data) && data[i-1] != '\n' {
757	i++
758	}
759
760	// if it is an empty line, guess that it is part of this item
761	// and move on to the next line
762	if p.isEmpty(data[blockEnd:i]) > 0 {
763	containsBlankLine = true
764	blockEnd = i
765	continue
766	}
767
768	n := 0
769	if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
770	// this is the end of the block.
771	// we don't want to include this last line in the index.
772	break gatherLines
773	}
774
775	// if there were blank lines before this one, insert a new one now
776	if containsBlankLine {
777	raw.WriteByte('\n')
778	containsBlankLine = false
779	}
780
781	// get rid of that first tab, write to buffer
782	raw.Write(data[blockEnd+n : i])
783	hasBlock = true
784
785	blockEnd = i
786	}
787
788	if data[blockEnd-1] != '\n' {
789	raw.WriteByte('\n')
790	}
791
792	contents = raw.Bytes()
793
794	return
795	}
796
797	//
798	//
799	// Miscellaneous helper functions
800	//
801	//
802
803	// Test if a character is a punctuation symbol.
804	// Taken from a private function in regexp in the stdlib.
805	func ispunct(c byte) bool {
806	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~") {
807	if c == r {
808	return true
809	}
810	}
811	return false
812	}
813
814	// Test if a character is a whitespace character.
815	func isspace(c byte) bool {
816	return ishorizontalspace(c) \|\| isverticalspace(c)
817	}
818
819	// Test if a character is a horizontal whitespace character.
820	func ishorizontalspace(c byte) bool {
821	return c == ' ' \|\| c == '\t'
822	}
823
824	// Test if a character is a vertical character.
825	func isverticalspace(c byte) bool {
826	return c == '\n' \|\| c == '\r' \|\| c == '\f' \|\| c == '\v'
827	}
828
829	// Test if a character is letter.
830	func isletter(c byte) bool {
831	return (c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z')
832	}
833
834	// Test if a character is a letter or a digit.
835	// TODO: check when this is looking for ASCII alnum and when it should use unicode
836	func isalnum(c byte) bool {
837	return (c >= '0' && c <= '9') \|\| isletter(c)
838	}
839
840	// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
841	// always ends output with a newline
842	func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
843	// first, check for common cases: no tabs, or only tabs at beginning of line
844	i, prefix := 0, 0
845	slowcase := false
846	for i = 0; i < len(line); i++ {
847	if line[i] == '\t' {
848	if prefix == i {
849	prefix++
850	} else {
851	slowcase = true
852	break
853	}
854	}
855	}
856
857	// no need to decode runes if all tabs are at the beginning of the line
858	if !slowcase {
859	for i = 0; i < prefix*tabSize; i++ {
860	out.WriteByte(' ')
861	}
862	out.Write(line[prefix:])
863	return
864	}
865
866	// the slow case: we need to count runes to figure out how
867	// many spaces to insert for each tab
868	column := 0
869	i = 0
870	for i < len(line) {
871	start := i
872	for i < len(line) && line[i] != '\t' {
873	_, size := utf8.DecodeRune(line[i:])
874	i += size
875	column++
876	}
877
878	if i > start {
879	out.Write(line[start:i])
880	}
881
882	if i >= len(line) {
883	break
884	}
885
886	for {
887	out.WriteByte(' ')
888	column++
889	if column%tabSize == 0 {
890	break
891	}
892	}
893
894	i++
895	}
896	}
897
898	// Find if a line counts as indented or not.
899	// Returns number of characters the indent is (0 = not indented).
900	func isIndented(data []byte, indentSize int) int {
901	if len(data) == 0 {
902	return 0
903	}
904	if data[0] == '\t' {
905	return 1
906	}
907	if len(data) < indentSize {
908	return 0
909	}
910	for i := 0; i < indentSize; i++ {
911	if data[i] != ' ' {
912	return 0
913	}
914	}
915	return indentSize
916	}
917
918	// Create a url-safe slug for fragments
919	func slugify(in []byte) []byte {
920	if len(in) == 0 {
921	return in
922	}
923	out := make([]byte, 0, len(in))
924	sym := false
925
926	for _, ch := range in {
927	if isalnum(ch) {
928	sym = false
929	out = append(out, ch)
930	} else if sym {
931	continue
932	} else {
933	out = append(out, '-')
934	sym = true
935	}
936	}
937	var a, b int
938	var ch byte
939	for a, ch = range out {
940	if ch != '-' {
941	break
942	}
943	}
944	for b = len(out) - 1; b > 0; b-- {
945	if out[b] != '-' {
946	break
947	}
948	}
949	return out[a : b+1]
950	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: