1 | // Copyright 2011 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | package html
|
---|
6 |
|
---|
7 | import (
|
---|
8 | "bufio"
|
---|
9 | "errors"
|
---|
10 | "fmt"
|
---|
11 | "io"
|
---|
12 | "strings"
|
---|
13 | )
|
---|
14 |
|
---|
15 | type writer interface {
|
---|
16 | io.Writer
|
---|
17 | io.ByteWriter
|
---|
18 | WriteString(string) (int, error)
|
---|
19 | }
|
---|
20 |
|
---|
21 | // Render renders the parse tree n to the given writer.
|
---|
22 | //
|
---|
23 | // Rendering is done on a 'best effort' basis: calling Parse on the output of
|
---|
24 | // Render will always result in something similar to the original tree, but it
|
---|
25 | // is not necessarily an exact clone unless the original tree was 'well-formed'.
|
---|
26 | // 'Well-formed' is not easily specified; the HTML5 specification is
|
---|
27 | // complicated.
|
---|
28 | //
|
---|
29 | // Calling Parse on arbitrary input typically results in a 'well-formed' parse
|
---|
30 | // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
|
---|
31 | // For example, in a 'well-formed' parse tree, no <a> element is a child of
|
---|
32 | // another <a> element: parsing "<a><a>" results in two sibling elements.
|
---|
33 | // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
|
---|
34 | // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
|
---|
35 | // children; the <a> is reparented to the <table>'s parent. However, calling
|
---|
36 | // Parse on "<a><table><a>" does not return an error, but the result has an <a>
|
---|
37 | // element with an <a> child, and is therefore not 'well-formed'.
|
---|
38 | //
|
---|
39 | // Programmatically constructed trees are typically also 'well-formed', but it
|
---|
40 | // is possible to construct a tree that looks innocuous but, when rendered and
|
---|
41 | // re-parsed, results in a different tree. A simple example is that a solitary
|
---|
42 | // text node would become a tree containing <html>, <head> and <body> elements.
|
---|
43 | // Another example is that the programmatic equivalent of "a<head>b</head>c"
|
---|
44 | // becomes "<html><head><head/><body>abc</body></html>".
|
---|
45 | func Render(w io.Writer, n *Node) error {
|
---|
46 | if x, ok := w.(writer); ok {
|
---|
47 | return render(x, n)
|
---|
48 | }
|
---|
49 | buf := bufio.NewWriter(w)
|
---|
50 | if err := render(buf, n); err != nil {
|
---|
51 | return err
|
---|
52 | }
|
---|
53 | return buf.Flush()
|
---|
54 | }
|
---|
55 |
|
---|
56 | // plaintextAbort is returned from render1 when a <plaintext> element
|
---|
57 | // has been rendered. No more end tags should be rendered after that.
|
---|
58 | var plaintextAbort = errors.New("html: internal error (plaintext abort)")
|
---|
59 |
|
---|
60 | func render(w writer, n *Node) error {
|
---|
61 | err := render1(w, n)
|
---|
62 | if err == plaintextAbort {
|
---|
63 | err = nil
|
---|
64 | }
|
---|
65 | return err
|
---|
66 | }
|
---|
67 |
|
---|
68 | func render1(w writer, n *Node) error {
|
---|
69 | // Render non-element nodes; these are the easy cases.
|
---|
70 | switch n.Type {
|
---|
71 | case ErrorNode:
|
---|
72 | return errors.New("html: cannot render an ErrorNode node")
|
---|
73 | case TextNode:
|
---|
74 | return escape(w, n.Data)
|
---|
75 | case DocumentNode:
|
---|
76 | for c := n.FirstChild; c != nil; c = c.NextSibling {
|
---|
77 | if err := render1(w, c); err != nil {
|
---|
78 | return err
|
---|
79 | }
|
---|
80 | }
|
---|
81 | return nil
|
---|
82 | case ElementNode:
|
---|
83 | // No-op.
|
---|
84 | case CommentNode:
|
---|
85 | if _, err := w.WriteString("<!--"); err != nil {
|
---|
86 | return err
|
---|
87 | }
|
---|
88 | if err := escape(w, n.Data); err != nil {
|
---|
89 | return err
|
---|
90 | }
|
---|
91 | if _, err := w.WriteString("-->"); err != nil {
|
---|
92 | return err
|
---|
93 | }
|
---|
94 | return nil
|
---|
95 | case DoctypeNode:
|
---|
96 | if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
---|
97 | return err
|
---|
98 | }
|
---|
99 | if err := escape(w, n.Data); err != nil {
|
---|
100 | return err
|
---|
101 | }
|
---|
102 | if n.Attr != nil {
|
---|
103 | var p, s string
|
---|
104 | for _, a := range n.Attr {
|
---|
105 | switch a.Key {
|
---|
106 | case "public":
|
---|
107 | p = a.Val
|
---|
108 | case "system":
|
---|
109 | s = a.Val
|
---|
110 | }
|
---|
111 | }
|
---|
112 | if p != "" {
|
---|
113 | if _, err := w.WriteString(" PUBLIC "); err != nil {
|
---|
114 | return err
|
---|
115 | }
|
---|
116 | if err := writeQuoted(w, p); err != nil {
|
---|
117 | return err
|
---|
118 | }
|
---|
119 | if s != "" {
|
---|
120 | if err := w.WriteByte(' '); err != nil {
|
---|
121 | return err
|
---|
122 | }
|
---|
123 | if err := writeQuoted(w, s); err != nil {
|
---|
124 | return err
|
---|
125 | }
|
---|
126 | }
|
---|
127 | } else if s != "" {
|
---|
128 | if _, err := w.WriteString(" SYSTEM "); err != nil {
|
---|
129 | return err
|
---|
130 | }
|
---|
131 | if err := writeQuoted(w, s); err != nil {
|
---|
132 | return err
|
---|
133 | }
|
---|
134 | }
|
---|
135 | }
|
---|
136 | return w.WriteByte('>')
|
---|
137 | case RawNode:
|
---|
138 | _, err := w.WriteString(n.Data)
|
---|
139 | return err
|
---|
140 | default:
|
---|
141 | return errors.New("html: unknown node type")
|
---|
142 | }
|
---|
143 |
|
---|
144 | // Render the <xxx> opening tag.
|
---|
145 | if err := w.WriteByte('<'); err != nil {
|
---|
146 | return err
|
---|
147 | }
|
---|
148 | if _, err := w.WriteString(n.Data); err != nil {
|
---|
149 | return err
|
---|
150 | }
|
---|
151 | for _, a := range n.Attr {
|
---|
152 | if err := w.WriteByte(' '); err != nil {
|
---|
153 | return err
|
---|
154 | }
|
---|
155 | if a.Namespace != "" {
|
---|
156 | if _, err := w.WriteString(a.Namespace); err != nil {
|
---|
157 | return err
|
---|
158 | }
|
---|
159 | if err := w.WriteByte(':'); err != nil {
|
---|
160 | return err
|
---|
161 | }
|
---|
162 | }
|
---|
163 | if _, err := w.WriteString(a.Key); err != nil {
|
---|
164 | return err
|
---|
165 | }
|
---|
166 | if _, err := w.WriteString(`="`); err != nil {
|
---|
167 | return err
|
---|
168 | }
|
---|
169 | if err := escape(w, a.Val); err != nil {
|
---|
170 | return err
|
---|
171 | }
|
---|
172 | if err := w.WriteByte('"'); err != nil {
|
---|
173 | return err
|
---|
174 | }
|
---|
175 | }
|
---|
176 | if voidElements[n.Data] {
|
---|
177 | if n.FirstChild != nil {
|
---|
178 | return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
|
---|
179 | }
|
---|
180 | _, err := w.WriteString("/>")
|
---|
181 | return err
|
---|
182 | }
|
---|
183 | if err := w.WriteByte('>'); err != nil {
|
---|
184 | return err
|
---|
185 | }
|
---|
186 |
|
---|
187 | // Add initial newline where there is danger of a newline beging ignored.
|
---|
188 | if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
|
---|
189 | switch n.Data {
|
---|
190 | case "pre", "listing", "textarea":
|
---|
191 | if err := w.WriteByte('\n'); err != nil {
|
---|
192 | return err
|
---|
193 | }
|
---|
194 | }
|
---|
195 | }
|
---|
196 |
|
---|
197 | // Render any child nodes.
|
---|
198 | switch n.Data {
|
---|
199 | case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
---|
200 | for c := n.FirstChild; c != nil; c = c.NextSibling {
|
---|
201 | if c.Type == TextNode {
|
---|
202 | if _, err := w.WriteString(c.Data); err != nil {
|
---|
203 | return err
|
---|
204 | }
|
---|
205 | } else {
|
---|
206 | if err := render1(w, c); err != nil {
|
---|
207 | return err
|
---|
208 | }
|
---|
209 | }
|
---|
210 | }
|
---|
211 | if n.Data == "plaintext" {
|
---|
212 | // Don't render anything else. <plaintext> must be the
|
---|
213 | // last element in the file, with no closing tag.
|
---|
214 | return plaintextAbort
|
---|
215 | }
|
---|
216 | default:
|
---|
217 | for c := n.FirstChild; c != nil; c = c.NextSibling {
|
---|
218 | if err := render1(w, c); err != nil {
|
---|
219 | return err
|
---|
220 | }
|
---|
221 | }
|
---|
222 | }
|
---|
223 |
|
---|
224 | // Render the </xxx> closing tag.
|
---|
225 | if _, err := w.WriteString("</"); err != nil {
|
---|
226 | return err
|
---|
227 | }
|
---|
228 | if _, err := w.WriteString(n.Data); err != nil {
|
---|
229 | return err
|
---|
230 | }
|
---|
231 | return w.WriteByte('>')
|
---|
232 | }
|
---|
233 |
|
---|
234 | // writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
---|
235 | // quotes, but if s contains a double quote, it will use single quotes.
|
---|
236 | // It is used for writing the identifiers in a doctype declaration.
|
---|
237 | // In valid HTML, they can't contain both types of quotes.
|
---|
238 | func writeQuoted(w writer, s string) error {
|
---|
239 | var q byte = '"'
|
---|
240 | if strings.Contains(s, `"`) {
|
---|
241 | q = '\''
|
---|
242 | }
|
---|
243 | if err := w.WriteByte(q); err != nil {
|
---|
244 | return err
|
---|
245 | }
|
---|
246 | if _, err := w.WriteString(s); err != nil {
|
---|
247 | return err
|
---|
248 | }
|
---|
249 | if err := w.WriteByte(q); err != nil {
|
---|
250 | return err
|
---|
251 | }
|
---|
252 | return nil
|
---|
253 | }
|
---|
254 |
|
---|
255 | // Section 12.1.2, "Elements", gives this list of void elements. Void elements
|
---|
256 | // are those that can't have any contents.
|
---|
257 | var voidElements = map[string]bool{
|
---|
258 | "area": true,
|
---|
259 | "base": true,
|
---|
260 | "br": true,
|
---|
261 | "col": true,
|
---|
262 | "embed": true,
|
---|
263 | "hr": true,
|
---|
264 | "img": true,
|
---|
265 | "input": true,
|
---|
266 | "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
---|
267 | "link": true,
|
---|
268 | "meta": true,
|
---|
269 | "param": true,
|
---|
270 | "source": true,
|
---|
271 | "track": true,
|
---|
272 | "wbr": true,
|
---|
273 | }
|
---|