Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: code/trunk/vendor/golang.org/x/net/html/parse.go@ 145

Last change on this file since 145 was 145, checked in by Izuru Yakumo, 22 months ago

Updated the Makefile and vendored depedencies

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 58.7 KB

Line
1	// Copyright 2010 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	package html
6
7	import (
8	"errors"
9	"fmt"
10	"io"
11	"strings"
12
13	a "golang.org/x/net/html/atom"
14	)
15
16	// A parser implements the HTML5 parsing algorithm:
17	// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18	type parser struct {
19	// tokenizer provides the tokens for the parser.
20	tokenizer *Tokenizer
21	// tok is the most recently read token.
22	tok Token
23	// Self-closing tags like <hr/> are treated as start tags, except that
24	// hasSelfClosingToken is set while they are being processed.
25	hasSelfClosingToken bool
26	// doc is the document root element.
27	doc *Node
28	// The stack of open elements (section 12.2.4.2) and active formatting
29	// elements (section 12.2.4.3).
30	oe, afe nodeStack
31	// Element pointers (section 12.2.4.4).
32	head, form *Node
33	// Other parsing state flags (section 12.2.4.5).
34	scripting, framesetOK bool
35	// The stack of template insertion modes
36	templateStack insertionModeStack
37	// im is the current insertion mode.
38	im insertionMode
39	// originalIM is the insertion mode to go back to after completing a text
40	// or inTableText insertion mode.
41	originalIM insertionMode
42	// fosterParenting is whether new elements should be inserted according to
43	// the foster parenting rules (section 12.2.6.1).
44	fosterParenting bool
45	// quirks is whether the parser is operating in "quirks mode."
46	quirks bool
47	// fragment is whether the parser is parsing an HTML fragment.
48	fragment bool
49	// context is the context element when parsing an HTML fragment
50	// (section 12.4).
51	context *Node
52	}
53
54	func (p parser) top() Node {
55	if n := p.oe.top(); n != nil {
56	return n
57	}
58	return p.doc
59	}
60
61	// Stop tags for use in popUntil. These come from section 12.2.4.2.
62	var (
63	defaultScopeStopTags = map[string][]a.Atom{
64	"": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65	"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66	"svg": {a.Desc, a.ForeignObject, a.Title},
67	}
68	)
69
70	type scope int
71
72	const (
73	defaultScope scope = iota
74	listItemScope
75	buttonScope
76	tableScope
77	tableRowScope
78	tableBodyScope
79	selectScope
80	)
81
82	// popUntil pops the stack of open elements at the highest element whose tag
83	// is in matchTags, provided there is no higher element in the scope's stop
84	// tags (as defined in section 12.2.4.2). It returns whether or not there was
85	// such an element. If there was not, popUntil leaves the stack unchanged.
86	//
87	// For example, the set of stop tags for table scope is: "html", "table". If
88	// the stack was:
89	// ["html", "body", "font", "table", "b", "i", "u"]
90	// then popUntil(tableScope, "font") would return false, but
91	// popUntil(tableScope, "i") would return true and the stack would become:
92	// ["html", "body", "font", "table", "b"]
93	//
94	// If an element's tag is in both the stop tags and matchTags, then the stack
95	// will be popped and the function returns true (provided, of course, there was
96	// no higher element in the stack that was also in the stop tags). For example,
97	// popUntil(tableScope, "table") returns true and leaves:
98	// ["html", "body", "font"]
99	func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101	p.oe = p.oe[:i]
102	return true
103	}
104	return false
105	}
106
107	// indexOfElementInScope returns the index in p.oe of the highest element whose
108	// tag is in matchTags that is in scope. If no matching element is in scope, it
109	// returns -1.
110	func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111	for i := len(p.oe) - 1; i >= 0; i-- {
112	tagAtom := p.oe[i].DataAtom
113	if p.oe[i].Namespace == "" {
114	for _, t := range matchTags {
115	if t == tagAtom {
116	return i
117	}
118	}
119	switch s {
120	case defaultScope:
121	// No-op.
122	case listItemScope:
123	if tagAtom == a.Ol \|\| tagAtom == a.Ul {
124	return -1
125	}
126	case buttonScope:
127	if tagAtom == a.Button {
128	return -1
129	}
130	case tableScope:
131	if tagAtom == a.Html \|\| tagAtom == a.Table \|\| tagAtom == a.Template {
132	return -1
133	}
134	case selectScope:
135	if tagAtom != a.Optgroup && tagAtom != a.Option {
136	return -1
137	}
138	default:
139	panic("unreachable")
140	}
141	}
142	switch s {
143	case defaultScope, listItemScope, buttonScope:
144	for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145	if t == tagAtom {
146	return -1
147	}
148	}
149	}
150	}
151	return -1
152	}
153
154	// elementInScope is like popUntil, except that it doesn't modify the stack of
155	// open elements.
156	func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157	return p.indexOfElementInScope(s, matchTags...) != -1
158	}
159
160	// clearStackToContext pops elements off the stack of open elements until a
161	// scope-defined element is found.
162	func (p *parser) clearStackToContext(s scope) {
163	for i := len(p.oe) - 1; i >= 0; i-- {
164	tagAtom := p.oe[i].DataAtom
165	switch s {
166	case tableScope:
167	if tagAtom == a.Html \|\| tagAtom == a.Table \|\| tagAtom == a.Template {
168	p.oe = p.oe[:i+1]
169	return
170	}
171	case tableRowScope:
172	if tagAtom == a.Html \|\| tagAtom == a.Tr \|\| tagAtom == a.Template {
173	p.oe = p.oe[:i+1]
174	return
175	}
176	case tableBodyScope:
177	if tagAtom == a.Html \|\| tagAtom == a.Tbody \|\| tagAtom == a.Tfoot \|\| tagAtom == a.Thead \|\| tagAtom == a.Template {
178	p.oe = p.oe[:i+1]
179	return
180	}
181	default:
182	panic("unreachable")
183	}
184	}
185	}
186
187	// parseGenericRawTextElement implements the generic raw text element parsing
188	// algorithm defined in 12.2.6.2.
189	// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
190	// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
191	// officially, need to make tokenizer consider both states.
192	func (p *parser) parseGenericRawTextElement() {
193	p.addElement()
194	p.originalIM = p.im
195	p.im = textIM
196	}
197
198	// generateImpliedEndTags pops nodes off the stack of open elements as long as
199	// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
200	// If exceptions are specified, nodes with that name will not be popped off.
201	func (p *parser) generateImpliedEndTags(exceptions ...string) {
202	var i int
203	loop:
204	for i = len(p.oe) - 1; i >= 0; i-- {
205	n := p.oe[i]
206	if n.Type != ElementNode {
207	break
208	}
209	switch n.DataAtom {
210	case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211	for _, except := range exceptions {
212	if n.Data == except {
213	break loop
214	}
215	}
216	continue
217	}
218	break
219	}
220
221	p.oe = p.oe[:i+1]
222	}
223
224	// addChild adds a child node n to the top element, and pushes n onto the stack
225	// of open elements if it is an element node.
226	func (p parser) addChild(n Node) {
227	if p.shouldFosterParent() {
228	p.fosterParent(n)
229	} else {
230	p.top().AppendChild(n)
231	}
232
233	if n.Type == ElementNode {
234	p.oe = append(p.oe, n)
235	}
236	}
237
238	// shouldFosterParent returns whether the next node to be added should be
239	// foster parented.
240	func (p *parser) shouldFosterParent() bool {
241	if p.fosterParenting {
242	switch p.top().DataAtom {
243	case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244	return true
245	}
246	}
247	return false
248	}
249
250	// fosterParent adds a child node according to the foster parenting rules.
251	// Section 12.2.6.1, "foster parenting".
252	func (p parser) fosterParent(n Node) {
253	var table, parent, prev, template *Node
254	var i int
255	for i = len(p.oe) - 1; i >= 0; i-- {
256	if p.oe[i].DataAtom == a.Table {
257	table = p.oe[i]
258	break
259	}
260	}
261
262	var j int
263	for j = len(p.oe) - 1; j >= 0; j-- {
264	if p.oe[j].DataAtom == a.Template {
265	template = p.oe[j]
266	break
267	}
268	}
269
270	if template != nil && (table == nil \|\| j > i) {
271	template.AppendChild(n)
272	return
273	}
274
275	if table == nil {
276	// The foster parent is the html element.
277	parent = p.oe[0]
278	} else {
279	parent = table.Parent
280	}
281	if parent == nil {
282	parent = p.oe[i-1]
283	}
284
285	if table != nil {
286	prev = table.PrevSibling
287	} else {
288	prev = parent.LastChild
289	}
290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291	prev.Data += n.Data
292	return
293	}
294
295	parent.InsertBefore(n, table)
296	}
297
298	// addText adds text to the preceding node if it is a text node, or else it
299	// calls addChild with a new text node.
300	func (p *parser) addText(text string) {
301	if text == "" {
302	return
303	}
304
305	if p.shouldFosterParent() {
306	p.fosterParent(&Node{
307	Type: TextNode,
308	Data: text,
309	})
310	return
311	}
312
313	t := p.top()
314	if n := t.LastChild; n != nil && n.Type == TextNode {
315	n.Data += text
316	return
317	}
318	p.addChild(&Node{
319	Type: TextNode,
320	Data: text,
321	})
322	}
323
324	// addElement adds a child element based on the current token.
325	func (p *parser) addElement() {
326	p.addChild(&Node{
327	Type: ElementNode,
328	DataAtom: p.tok.DataAtom,
329	Data: p.tok.Data,
330	Attr: p.tok.Attr,
331	})
332	}
333
334	// Section 12.2.4.3.
335	func (p *parser) addFormattingElement() {
336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337	p.addElement()
338
339	// Implement the Noah's Ark clause, but with three per family instead of two.
340	identicalElements := 0
341	findIdenticalElements:
342	for i := len(p.afe) - 1; i >= 0; i-- {
343	n := p.afe[i]
344	if n.Type == scopeMarkerNode {
345	break
346	}
347	if n.Type != ElementNode {
348	continue
349	}
350	if n.Namespace != "" {
351	continue
352	}
353	if n.DataAtom != tagAtom {
354	continue
355	}
356	if len(n.Attr) != len(attr) {
357	continue
358	}
359	compareAttributes:
360	for _, t0 := range n.Attr {
361	for _, t1 := range attr {
362	if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363	// Found a match for this attribute, continue with the next attribute.
364	continue compareAttributes
365	}
366	}
367	// If we get here, there is no attribute that matches a.
368	// Therefore the element is not identical to the new one.
369	continue findIdenticalElements
370	}
371
372	identicalElements++
373	if identicalElements >= 3 {
374	p.afe.remove(n)
375	}
376	}
377
378	p.afe = append(p.afe, p.top())
379	}
380
381	// Section 12.2.4.3.
382	func (p *parser) clearActiveFormattingElements() {
383	for {
384	if n := p.afe.pop(); len(p.afe) == 0 \|\| n.Type == scopeMarkerNode {
385	return
386	}
387	}
388	}
389
390	// Section 12.2.4.3.
391	func (p *parser) reconstructActiveFormattingElements() {
392	n := p.afe.top()
393	if n == nil {
394	return
395	}
396	if n.Type == scopeMarkerNode \|\| p.oe.index(n) != -1 {
397	return
398	}
399	i := len(p.afe) - 1
400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401	if i == 0 {
402	i = -1
403	break
404	}
405	i--
406	n = p.afe[i]
407	}
408	for {
409	i++
410	clone := p.afe[i].clone()
411	p.addChild(clone)
412	p.afe[i] = clone
413	if i == len(p.afe)-1 {
414	break
415	}
416	}
417	}
418
419	// Section 12.2.5.
420	func (p *parser) acknowledgeSelfClosingTag() {
421	p.hasSelfClosingToken = false
422	}
423
424	// An insertion mode (section 12.2.4.1) is the state transition function from
425	// a particular state in the HTML5 parser's state machine. It updates the
426	// parser's fields depending on parser.tok (where ErrorToken means EOF).
427	// It returns whether the token was consumed.
428	type insertionMode func(*parser) bool
429
430	// setOriginalIM sets the insertion mode to return to after completing a text or
431	// inTableText insertion mode.
432	// Section 12.2.4.1, "using the rules for".
433	func (p *parser) setOriginalIM() {
434	if p.originalIM != nil {
435	panic("html: bad parser state: originalIM was set twice")
436	}
437	p.originalIM = p.im
438	}
439
440	// Section 12.2.4.1, "reset the insertion mode".
441	func (p *parser) resetInsertionMode() {
442	for i := len(p.oe) - 1; i >= 0; i-- {
443	n := p.oe[i]
444	last := i == 0
445	if last && p.context != nil {
446	n = p.context
447	}
448
449	switch n.DataAtom {
450	case a.Select:
451	if !last {
452	for ancestor, first := n, p.oe[0]; ancestor != first; {
453	ancestor = p.oe[p.oe.index(ancestor)-1]
454	switch ancestor.DataAtom {
455	case a.Template:
456	p.im = inSelectIM
457	return
458	case a.Table:
459	p.im = inSelectInTableIM
460	return
461	}
462	}
463	}
464	p.im = inSelectIM
465	case a.Td, a.Th:
466	// TODO: remove this divergence from the HTML5 spec.
467	//
468	// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
469	p.im = inCellIM
470	case a.Tr:
471	p.im = inRowIM
472	case a.Tbody, a.Thead, a.Tfoot:
473	p.im = inTableBodyIM
474	case a.Caption:
475	p.im = inCaptionIM
476	case a.Colgroup:
477	p.im = inColumnGroupIM
478	case a.Table:
479	p.im = inTableIM
480	case a.Template:
481	// TODO: remove this divergence from the HTML5 spec.
482	if n.Namespace != "" {
483	continue
484	}
485	p.im = p.templateStack.top()
486	case a.Head:
487	// TODO: remove this divergence from the HTML5 spec.
488	//
489	// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
490	p.im = inHeadIM
491	case a.Body:
492	p.im = inBodyIM
493	case a.Frameset:
494	p.im = inFramesetIM
495	case a.Html:
496	if p.head == nil {
497	p.im = beforeHeadIM
498	} else {
499	p.im = afterHeadIM
500	}
501	default:
502	if last {
503	p.im = inBodyIM
504	return
505	}
506	continue
507	}
508	return
509	}
510	}
511
512	const whitespace = " \t\r\n\f"
513
514	// Section 12.2.6.4.1.
515	func initialIM(p *parser) bool {
516	switch p.tok.Type {
517	case TextToken:
518	p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519	if len(p.tok.Data) == 0 {
520	// It was all whitespace, so ignore it.
521	return true
522	}
523	case CommentToken:
524	p.doc.AppendChild(&Node{
525	Type: CommentNode,
526	Data: p.tok.Data,
527	})
528	return true
529	case DoctypeToken:
530	n, quirks := parseDoctype(p.tok.Data)
531	p.doc.AppendChild(n)
532	p.quirks = quirks
533	p.im = beforeHTMLIM
534	return true
535	}
536	p.quirks = true
537	p.im = beforeHTMLIM
538	return false
539	}
540
541	// Section 12.2.6.4.2.
542	func beforeHTMLIM(p *parser) bool {
543	switch p.tok.Type {
544	case DoctypeToken:
545	// Ignore the token.
546	return true
547	case TextToken:
548	p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549	if len(p.tok.Data) == 0 {
550	// It was all whitespace, so ignore it.
551	return true
552	}
553	case StartTagToken:
554	if p.tok.DataAtom == a.Html {
555	p.addElement()
556	p.im = beforeHeadIM
557	return true
558	}
559	case EndTagToken:
560	switch p.tok.DataAtom {
561	case a.Head, a.Body, a.Html, a.Br:
562	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563	return false
564	default:
565	// Ignore the token.
566	return true
567	}
568	case CommentToken:
569	p.doc.AppendChild(&Node{
570	Type: CommentNode,
571	Data: p.tok.Data,
572	})
573	return true
574	}
575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576	return false
577	}
578
579	// Section 12.2.6.4.3.
580	func beforeHeadIM(p *parser) bool {
581	switch p.tok.Type {
582	case TextToken:
583	p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584	if len(p.tok.Data) == 0 {
585	// It was all whitespace, so ignore it.
586	return true
587	}
588	case StartTagToken:
589	switch p.tok.DataAtom {
590	case a.Head:
591	p.addElement()
592	p.head = p.top()
593	p.im = inHeadIM
594	return true
595	case a.Html:
596	return inBodyIM(p)
597	}
598	case EndTagToken:
599	switch p.tok.DataAtom {
600	case a.Head, a.Body, a.Html, a.Br:
601	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602	return false
603	default:
604	// Ignore the token.
605	return true
606	}
607	case CommentToken:
608	p.addChild(&Node{
609	Type: CommentNode,
610	Data: p.tok.Data,
611	})
612	return true
613	case DoctypeToken:
614	// Ignore the token.
615	return true
616	}
617
618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619	return false
620	}
621
622	// Section 12.2.6.4.4.
623	func inHeadIM(p *parser) bool {
624	switch p.tok.Type {
625	case TextToken:
626	s := strings.TrimLeft(p.tok.Data, whitespace)
627	if len(s) < len(p.tok.Data) {
628	// Add the initial whitespace to the current node.
629	p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630	if s == "" {
631	return true
632	}
633	p.tok.Data = s
634	}
635	case StartTagToken:
636	switch p.tok.DataAtom {
637	case a.Html:
638	return inBodyIM(p)
639	case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640	p.addElement()
641	p.oe.pop()
642	p.acknowledgeSelfClosingTag()
643	return true
644	case a.Noscript:
645	if p.scripting {
646	p.parseGenericRawTextElement()
647	return true
648	}
649	p.addElement()
650	p.im = inHeadNoscriptIM
651	// Don't let the tokenizer go into raw text mode when scripting is disabled.
652	p.tokenizer.NextIsNotRawText()
653	return true
654	case a.Script, a.Title:
655	p.addElement()
656	p.setOriginalIM()
657	p.im = textIM
658	return true
659	case a.Noframes, a.Style:
660	p.parseGenericRawTextElement()
661	return true
662	case a.Head:
663	// Ignore the token.
664	return true
665	case a.Template:
666	// TODO: remove this divergence from the HTML5 spec.
667	//
668	// We don't handle all of the corner cases when mixing foreign
669	// content (i.e. <math> or <svg>) with <template>. Without this
670	// early return, we can get into an infinite loop, possibly because
671	// of the "TODO... further divergence" a little below.
672	//
673	// As a workaround, if we are mixing foreign content and templates,
674	// just ignore the rest of the HTML. Foreign content is rare and a
675	// relatively old HTML feature. Templates are also rare and a
676	// relatively new HTML feature. Their combination is very rare.
677	for _, e := range p.oe {
678	if e.Namespace != "" {
679	p.im = ignoreTheRemainingTokens
680	return true
681	}
682	}
683
684	p.addElement()
685	p.afe = append(p.afe, &scopeMarker)
686	p.framesetOK = false
687	p.im = inTemplateIM
688	p.templateStack = append(p.templateStack, inTemplateIM)
689	return true
690	}
691	case EndTagToken:
692	switch p.tok.DataAtom {
693	case a.Head:
694	p.oe.pop()
695	p.im = afterHeadIM
696	return true
697	case a.Body, a.Html, a.Br:
698	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
699	return false
700	case a.Template:
701	if !p.oe.contains(a.Template) {
702	return true
703	}
704	// TODO: remove this further divergence from the HTML5 spec.
705	//
706	// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
707	p.generateImpliedEndTags()
708	for i := len(p.oe) - 1; i >= 0; i-- {
709	if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
710	p.oe = p.oe[:i]
711	break
712	}
713	}
714	p.clearActiveFormattingElements()
715	p.templateStack.pop()
716	p.resetInsertionMode()
717	return true
718	default:
719	// Ignore the token.
720	return true
721	}
722	case CommentToken:
723	p.addChild(&Node{
724	Type: CommentNode,
725	Data: p.tok.Data,
726	})
727	return true
728	case DoctypeToken:
729	// Ignore the token.
730	return true
731	}
732
733	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
734	return false
735	}
736
737	// Section 12.2.6.4.5.
738	func inHeadNoscriptIM(p *parser) bool {
739	switch p.tok.Type {
740	case DoctypeToken:
741	// Ignore the token.
742	return true
743	case StartTagToken:
744	switch p.tok.DataAtom {
745	case a.Html:
746	return inBodyIM(p)
747	case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
748	return inHeadIM(p)
749	case a.Head:
750	// Ignore the token.
751	return true
752	case a.Noscript:
753	// Don't let the tokenizer go into raw text mode even when a <noscript>
754	// tag is in "in head noscript" insertion mode.
755	p.tokenizer.NextIsNotRawText()
756	// Ignore the token.
757	return true
758	}
759	case EndTagToken:
760	switch p.tok.DataAtom {
761	case a.Noscript, a.Br:
762	default:
763	// Ignore the token.
764	return true
765	}
766	case TextToken:
767	s := strings.TrimLeft(p.tok.Data, whitespace)
768	if len(s) == 0 {
769	// It was all whitespace.
770	return inHeadIM(p)
771	}
772	case CommentToken:
773	return inHeadIM(p)
774	}
775	p.oe.pop()
776	if p.top().DataAtom != a.Head {
777	panic("html: the new current node will be a head element.")
778	}
779	p.im = inHeadIM
780	if p.tok.DataAtom == a.Noscript {
781	return true
782	}
783	return false
784	}
785
786	// Section 12.2.6.4.6.
787	func afterHeadIM(p *parser) bool {
788	switch p.tok.Type {
789	case TextToken:
790	s := strings.TrimLeft(p.tok.Data, whitespace)
791	if len(s) < len(p.tok.Data) {
792	// Add the initial whitespace to the current node.
793	p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
794	if s == "" {
795	return true
796	}
797	p.tok.Data = s
798	}
799	case StartTagToken:
800	switch p.tok.DataAtom {
801	case a.Html:
802	return inBodyIM(p)
803	case a.Body:
804	p.addElement()
805	p.framesetOK = false
806	p.im = inBodyIM
807	return true
808	case a.Frameset:
809	p.addElement()
810	p.im = inFramesetIM
811	return true
812	case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
813	p.oe = append(p.oe, p.head)
814	defer p.oe.remove(p.head)
815	return inHeadIM(p)
816	case a.Head:
817	// Ignore the token.
818	return true
819	}
820	case EndTagToken:
821	switch p.tok.DataAtom {
822	case a.Body, a.Html, a.Br:
823	// Drop down to creating an implied <body> tag.
824	case a.Template:
825	return inHeadIM(p)
826	default:
827	// Ignore the token.
828	return true
829	}
830	case CommentToken:
831	p.addChild(&Node{
832	Type: CommentNode,
833	Data: p.tok.Data,
834	})
835	return true
836	case DoctypeToken:
837	// Ignore the token.
838	return true
839	}
840
841	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
842	p.framesetOK = true
843	return false
844	}
845
846	// copyAttributes copies attributes of src not found on dst to dst.
847	func copyAttributes(dst *Node, src Token) {
848	if len(src.Attr) == 0 {
849	return
850	}
851	attr := map[string]string{}
852	for _, t := range dst.Attr {
853	attr[t.Key] = t.Val
854	}
855	for _, t := range src.Attr {
856	if _, ok := attr[t.Key]; !ok {
857	dst.Attr = append(dst.Attr, t)
858	attr[t.Key] = t.Val
859	}
860	}
861	}
862
863	// Section 12.2.6.4.7.
864	func inBodyIM(p *parser) bool {
865	switch p.tok.Type {
866	case TextToken:
867	d := p.tok.Data
868	switch n := p.oe.top(); n.DataAtom {
869	case a.Pre, a.Listing:
870	if n.FirstChild == nil {
871	// Ignore a newline at the start of a <pre> block.
872	if d != "" && d[0] == '\r' {
873	d = d[1:]
874	}
875	if d != "" && d[0] == '\n' {
876	d = d[1:]
877	}
878	}
879	}
880	d = strings.Replace(d, "\x00", "", -1)
881	if d == "" {
882	return true
883	}
884	p.reconstructActiveFormattingElements()
885	p.addText(d)
886	if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
887	// There were non-whitespace characters inserted.
888	p.framesetOK = false
889	}
890	case StartTagToken:
891	switch p.tok.DataAtom {
892	case a.Html:
893	if p.oe.contains(a.Template) {
894	return true
895	}
896	copyAttributes(p.oe[0], p.tok)
897	case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
898	return inHeadIM(p)
899	case a.Body:
900	if p.oe.contains(a.Template) {
901	return true
902	}
903	if len(p.oe) >= 2 {
904	body := p.oe[1]
905	if body.Type == ElementNode && body.DataAtom == a.Body {
906	p.framesetOK = false
907	copyAttributes(body, p.tok)
908	}
909	}
910	case a.Frameset:
911	if !p.framesetOK \|\| len(p.oe) < 2 \|\| p.oe[1].DataAtom != a.Body {
912	// Ignore the token.
913	return true
914	}
915	body := p.oe[1]
916	if body.Parent != nil {
917	body.Parent.RemoveChild(body)
918	}
919	p.oe = p.oe[:1]
920	p.addElement()
921	p.im = inFramesetIM
922	return true
923	case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
924	p.popUntil(buttonScope, a.P)
925	p.addElement()
926	case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
927	p.popUntil(buttonScope, a.P)
928	switch n := p.top(); n.DataAtom {
929	case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
930	p.oe.pop()
931	}
932	p.addElement()
933	case a.Pre, a.Listing:
934	p.popUntil(buttonScope, a.P)
935	p.addElement()
936	// The newline, if any, will be dealt with by the TextToken case.
937	p.framesetOK = false
938	case a.Form:
939	if p.form != nil && !p.oe.contains(a.Template) {
940	// Ignore the token
941	return true
942	}
943	p.popUntil(buttonScope, a.P)
944	p.addElement()
945	if !p.oe.contains(a.Template) {
946	p.form = p.top()
947	}
948	case a.Li:
949	p.framesetOK = false
950	for i := len(p.oe) - 1; i >= 0; i-- {
951	node := p.oe[i]
952	switch node.DataAtom {
953	case a.Li:
954	p.oe = p.oe[:i]
955	case a.Address, a.Div, a.P:
956	continue
957	default:
958	if !isSpecialElement(node) {
959	continue
960	}
961	}
962	break
963	}
964	p.popUntil(buttonScope, a.P)
965	p.addElement()
966	case a.Dd, a.Dt:
967	p.framesetOK = false
968	for i := len(p.oe) - 1; i >= 0; i-- {
969	node := p.oe[i]
970	switch node.DataAtom {
971	case a.Dd, a.Dt:
972	p.oe = p.oe[:i]
973	case a.Address, a.Div, a.P:
974	continue
975	default:
976	if !isSpecialElement(node) {
977	continue
978	}
979	}
980	break
981	}
982	p.popUntil(buttonScope, a.P)
983	p.addElement()
984	case a.Plaintext:
985	p.popUntil(buttonScope, a.P)
986	p.addElement()
987	case a.Button:
988	p.popUntil(defaultScope, a.Button)
989	p.reconstructActiveFormattingElements()
990	p.addElement()
991	p.framesetOK = false
992	case a.A:
993	for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
994	if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
995	p.inBodyEndTagFormatting(a.A, "a")
996	p.oe.remove(n)
997	p.afe.remove(n)
998	break
999	}
1000	}
1001	p.reconstructActiveFormattingElements()
1002	p.addFormattingElement()
1003	case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1004	p.reconstructActiveFormattingElements()
1005	p.addFormattingElement()
1006	case a.Nobr:
1007	p.reconstructActiveFormattingElements()
1008	if p.elementInScope(defaultScope, a.Nobr) {
1009	p.inBodyEndTagFormatting(a.Nobr, "nobr")
1010	p.reconstructActiveFormattingElements()
1011	}
1012	p.addFormattingElement()
1013	case a.Applet, a.Marquee, a.Object:
1014	p.reconstructActiveFormattingElements()
1015	p.addElement()
1016	p.afe = append(p.afe, &scopeMarker)
1017	p.framesetOK = false
1018	case a.Table:
1019	if !p.quirks {
1020	p.popUntil(buttonScope, a.P)
1021	}
1022	p.addElement()
1023	p.framesetOK = false
1024	p.im = inTableIM
1025	return true
1026	case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1027	p.reconstructActiveFormattingElements()
1028	p.addElement()
1029	p.oe.pop()
1030	p.acknowledgeSelfClosingTag()
1031	if p.tok.DataAtom == a.Input {
1032	for _, t := range p.tok.Attr {
1033	if t.Key == "type" {
1034	if strings.ToLower(t.Val) == "hidden" {
1035	// Skip setting framesetOK = false
1036	return true
1037	}
1038	}
1039	}
1040	}
1041	p.framesetOK = false
1042	case a.Param, a.Source, a.Track:
1043	p.addElement()
1044	p.oe.pop()
1045	p.acknowledgeSelfClosingTag()
1046	case a.Hr:
1047	p.popUntil(buttonScope, a.P)
1048	p.addElement()
1049	p.oe.pop()
1050	p.acknowledgeSelfClosingTag()
1051	p.framesetOK = false
1052	case a.Image:
1053	p.tok.DataAtom = a.Img
1054	p.tok.Data = a.Img.String()
1055	return false
1056	case a.Textarea:
1057	p.addElement()
1058	p.setOriginalIM()
1059	p.framesetOK = false
1060	p.im = textIM
1061	case a.Xmp:
1062	p.popUntil(buttonScope, a.P)
1063	p.reconstructActiveFormattingElements()
1064	p.framesetOK = false
1065	p.parseGenericRawTextElement()
1066	case a.Iframe:
1067	p.framesetOK = false
1068	p.parseGenericRawTextElement()
1069	case a.Noembed:
1070	p.parseGenericRawTextElement()
1071	case a.Noscript:
1072	if p.scripting {
1073	p.parseGenericRawTextElement()
1074	return true
1075	}
1076	p.reconstructActiveFormattingElements()
1077	p.addElement()
1078	// Don't let the tokenizer go into raw text mode when scripting is disabled.
1079	p.tokenizer.NextIsNotRawText()
1080	case a.Select:
1081	p.reconstructActiveFormattingElements()
1082	p.addElement()
1083	p.framesetOK = false
1084	p.im = inSelectIM
1085	return true
1086	case a.Optgroup, a.Option:
1087	if p.top().DataAtom == a.Option {
1088	p.oe.pop()
1089	}
1090	p.reconstructActiveFormattingElements()
1091	p.addElement()
1092	case a.Rb, a.Rtc:
1093	if p.elementInScope(defaultScope, a.Ruby) {
1094	p.generateImpliedEndTags()
1095	}
1096	p.addElement()
1097	case a.Rp, a.Rt:
1098	if p.elementInScope(defaultScope, a.Ruby) {
1099	p.generateImpliedEndTags("rtc")
1100	}
1101	p.addElement()
1102	case a.Math, a.Svg:
1103	p.reconstructActiveFormattingElements()
1104	if p.tok.DataAtom == a.Math {
1105	adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1106	} else {
1107	adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1108	}
1109	adjustForeignAttributes(p.tok.Attr)
1110	p.addElement()
1111	p.top().Namespace = p.tok.Data
1112	if p.hasSelfClosingToken {
1113	p.oe.pop()
1114	p.acknowledgeSelfClosingTag()
1115	}
1116	return true
1117	case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1118	// Ignore the token.
1119	default:
1120	p.reconstructActiveFormattingElements()
1121	p.addElement()
1122	}
1123	case EndTagToken:
1124	switch p.tok.DataAtom {
1125	case a.Body:
1126	if p.elementInScope(defaultScope, a.Body) {
1127	p.im = afterBodyIM
1128	}
1129	case a.Html:
1130	if p.elementInScope(defaultScope, a.Body) {
1131	p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1132	return false
1133	}
1134	return true
1135	case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1136	p.popUntil(defaultScope, p.tok.DataAtom)
1137	case a.Form:
1138	if p.oe.contains(a.Template) {
1139	i := p.indexOfElementInScope(defaultScope, a.Form)
1140	if i == -1 {
1141	// Ignore the token.
1142	return true
1143	}
1144	p.generateImpliedEndTags()
1145	if p.oe[i].DataAtom != a.Form {
1146	// Ignore the token.
1147	return true
1148	}
1149	p.popUntil(defaultScope, a.Form)
1150	} else {
1151	node := p.form
1152	p.form = nil
1153	i := p.indexOfElementInScope(defaultScope, a.Form)
1154	if node == nil \|\| i == -1 \|\| p.oe[i] != node {
1155	// Ignore the token.
1156	return true
1157	}
1158	p.generateImpliedEndTags()
1159	p.oe.remove(node)
1160	}
1161	case a.P:
1162	if !p.elementInScope(buttonScope, a.P) {
1163	p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1164	}
1165	p.popUntil(buttonScope, a.P)
1166	case a.Li:
1167	p.popUntil(listItemScope, a.Li)
1168	case a.Dd, a.Dt:
1169	p.popUntil(defaultScope, p.tok.DataAtom)
1170	case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1171	p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1172	case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1173	p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1174	case a.Applet, a.Marquee, a.Object:
1175	if p.popUntil(defaultScope, p.tok.DataAtom) {
1176	p.clearActiveFormattingElements()
1177	}
1178	case a.Br:
1179	p.tok.Type = StartTagToken
1180	return false
1181	case a.Template:
1182	return inHeadIM(p)
1183	default:
1184	p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1185	}
1186	case CommentToken:
1187	p.addChild(&Node{
1188	Type: CommentNode,
1189	Data: p.tok.Data,
1190	})
1191	case ErrorToken:
1192	// TODO: remove this divergence from the HTML5 spec.
1193	if len(p.templateStack) > 0 {
1194	p.im = inTemplateIM
1195	return false
1196	}
1197	for _, e := range p.oe {
1198	switch e.DataAtom {
1199	case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1200	a.Thead, a.Tr, a.Body, a.Html:
1201	default:
1202	return true
1203	}
1204	}
1205	}
1206
1207	return true
1208	}
1209
1210	func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1211	// This is the "adoption agency" algorithm, described at
1212	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1213
1214	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1215	// Once the code successfully parses the comprehensive test suite, we should
1216	// refactor this code to be more idiomatic.
1217
1218	// Steps 1-2
1219	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1220	p.oe.pop()
1221	return
1222	}
1223
1224	// Steps 3-5. The outer loop.
1225	for i := 0; i < 8; i++ {
1226	// Step 6. Find the formatting element.
1227	var formattingElement *Node
1228	for j := len(p.afe) - 1; j >= 0; j-- {
1229	if p.afe[j].Type == scopeMarkerNode {
1230	break
1231	}
1232	if p.afe[j].DataAtom == tagAtom {
1233	formattingElement = p.afe[j]
1234	break
1235	}
1236	}
1237	if formattingElement == nil {
1238	p.inBodyEndTagOther(tagAtom, tagName)
1239	return
1240	}
1241
1242	// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1243	feIndex := p.oe.index(formattingElement)
1244	if feIndex == -1 {
1245	p.afe.remove(formattingElement)
1246	return
1247	}
1248	// Step 8. Ignore the tag if formatting element is not in the scope.
1249	if !p.elementInScope(defaultScope, tagAtom) {
1250	// Ignore the tag.
1251	return
1252	}
1253
1254	// Step 9. This step is omitted because it's just a parse error but no need to return.
1255
1256	// Steps 10-11. Find the furthest block.
1257	var furthestBlock *Node
1258	for _, e := range p.oe[feIndex:] {
1259	if isSpecialElement(e) {
1260	furthestBlock = e
1261	break
1262	}
1263	}
1264	if furthestBlock == nil {
1265	e := p.oe.pop()
1266	for e != formattingElement {
1267	e = p.oe.pop()
1268	}
1269	p.afe.remove(e)
1270	return
1271	}
1272
1273	// Steps 12-13. Find the common ancestor and bookmark node.
1274	commonAncestor := p.oe[feIndex-1]
1275	bookmark := p.afe.index(formattingElement)
1276
1277	// Step 14. The inner loop. Find the lastNode to reparent.
1278	lastNode := furthestBlock
1279	node := furthestBlock
1280	x := p.oe.index(node)
1281	// Step 14.1.
1282	j := 0
1283	for {
1284	// Step 14.2.
1285	j++
1286	// Step. 14.3.
1287	x--
1288	node = p.oe[x]
1289	// Step 14.4. Go to the next step if node is formatting element.
1290	if node == formattingElement {
1291	break
1292	}
1293	// Step 14.5. Remove node from the list of active formatting elements if
1294	// inner loop counter is greater than three and node is in the list of
1295	// active formatting elements.
1296	if ni := p.afe.index(node); j > 3 && ni > -1 {
1297	p.afe.remove(node)
1298	// If any element of the list of active formatting elements is removed,
1299	// we need to take care whether bookmark should be decremented or not.
1300	// This is because the value of bookmark may exceed the size of the
1301	// list by removing elements from the list.
1302	if ni <= bookmark {
1303	bookmark--
1304	}
1305	continue
1306	}
1307	// Step 14.6. Continue the next inner loop if node is not in the list of
1308	// active formatting elements.
1309	if p.afe.index(node) == -1 {
1310	p.oe.remove(node)
1311	continue
1312	}
1313	// Step 14.7.
1314	clone := node.clone()
1315	p.afe[p.afe.index(node)] = clone
1316	p.oe[p.oe.index(node)] = clone
1317	node = clone
1318	// Step 14.8.
1319	if lastNode == furthestBlock {
1320	bookmark = p.afe.index(node) + 1
1321	}
1322	// Step 14.9.
1323	if lastNode.Parent != nil {
1324	lastNode.Parent.RemoveChild(lastNode)
1325	}
1326	node.AppendChild(lastNode)
1327	// Step 14.10.
1328	lastNode = node
1329	}
1330
1331	// Step 15. Reparent lastNode to the common ancestor,
1332	// or for misnested table nodes, to the foster parent.
1333	if lastNode.Parent != nil {
1334	lastNode.Parent.RemoveChild(lastNode)
1335	}
1336	switch commonAncestor.DataAtom {
1337	case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1338	p.fosterParent(lastNode)
1339	default:
1340	commonAncestor.AppendChild(lastNode)
1341	}
1342
1343	// Steps 16-18. Reparent nodes from the furthest block's children
1344	// to a clone of the formatting element.
1345	clone := formattingElement.clone()
1346	reparentChildren(clone, furthestBlock)
1347	furthestBlock.AppendChild(clone)
1348
1349	// Step 19. Fix up the list of active formatting elements.
1350	if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1351	// Move the bookmark with the rest of the list.
1352	bookmark--
1353	}
1354	p.afe.remove(formattingElement)
1355	p.afe.insert(bookmark, clone)
1356
1357	// Step 20. Fix up the stack of open elements.
1358	p.oe.remove(formattingElement)
1359	p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1360	}
1361	}
1362
1363	// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1364	// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1365	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1366	func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1367	for i := len(p.oe) - 1; i >= 0; i-- {
1368	// Two element nodes have the same tag if they have the same Data (a
1369	// string-typed field). As an optimization, for common HTML tags, each
1370	// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1371	// field), since integer comparison is faster than string comparison.
1372	// Uncommon (custom) tags get a zero DataAtom.
1373	//
1374	// The if condition here is equivalent to (p.oe[i].Data == tagName).
1375	if (p.oe[i].DataAtom == tagAtom) &&
1376	((tagAtom != 0) \|\| (p.oe[i].Data == tagName)) {
1377	p.oe = p.oe[:i]
1378	break
1379	}
1380	if isSpecialElement(p.oe[i]) {
1381	break
1382	}
1383	}
1384	}
1385
1386	// Section 12.2.6.4.8.
1387	func textIM(p *parser) bool {
1388	switch p.tok.Type {
1389	case ErrorToken:
1390	p.oe.pop()
1391	case TextToken:
1392	d := p.tok.Data
1393	if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1394	// Ignore a newline at the start of a <textarea> block.
1395	if d != "" && d[0] == '\r' {
1396	d = d[1:]
1397	}
1398	if d != "" && d[0] == '\n' {
1399	d = d[1:]
1400	}
1401	}
1402	if d == "" {
1403	return true
1404	}
1405	p.addText(d)
1406	return true
1407	case EndTagToken:
1408	p.oe.pop()
1409	}
1410	p.im = p.originalIM
1411	p.originalIM = nil
1412	return p.tok.Type == EndTagToken
1413	}
1414
1415	// Section 12.2.6.4.9.
1416	func inTableIM(p *parser) bool {
1417	switch p.tok.Type {
1418	case TextToken:
1419	p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1420	switch p.oe.top().DataAtom {
1421	case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1422	if strings.Trim(p.tok.Data, whitespace) == "" {
1423	p.addText(p.tok.Data)
1424	return true
1425	}
1426	}
1427	case StartTagToken:
1428	switch p.tok.DataAtom {
1429	case a.Caption:
1430	p.clearStackToContext(tableScope)
1431	p.afe = append(p.afe, &scopeMarker)
1432	p.addElement()
1433	p.im = inCaptionIM
1434	return true
1435	case a.Colgroup:
1436	p.clearStackToContext(tableScope)
1437	p.addElement()
1438	p.im = inColumnGroupIM
1439	return true
1440	case a.Col:
1441	p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1442	return false
1443	case a.Tbody, a.Tfoot, a.Thead:
1444	p.clearStackToContext(tableScope)
1445	p.addElement()
1446	p.im = inTableBodyIM
1447	return true
1448	case a.Td, a.Th, a.Tr:
1449	p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1450	return false
1451	case a.Table:
1452	if p.popUntil(tableScope, a.Table) {
1453	p.resetInsertionMode()
1454	return false
1455	}
1456	// Ignore the token.
1457	return true
1458	case a.Style, a.Script, a.Template:
1459	return inHeadIM(p)
1460	case a.Input:
1461	for _, t := range p.tok.Attr {
1462	if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1463	p.addElement()
1464	p.oe.pop()
1465	return true
1466	}
1467	}
1468	// Otherwise drop down to the default action.
1469	case a.Form:
1470	if p.oe.contains(a.Template) \|\| p.form != nil {
1471	// Ignore the token.
1472	return true
1473	}
1474	p.addElement()
1475	p.form = p.oe.pop()
1476	case a.Select:
1477	p.reconstructActiveFormattingElements()
1478	switch p.top().DataAtom {
1479	case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1480	p.fosterParenting = true
1481	}
1482	p.addElement()
1483	p.fosterParenting = false
1484	p.framesetOK = false
1485	p.im = inSelectInTableIM
1486	return true
1487	}
1488	case EndTagToken:
1489	switch p.tok.DataAtom {
1490	case a.Table:
1491	if p.popUntil(tableScope, a.Table) {
1492	p.resetInsertionMode()
1493	return true
1494	}
1495	// Ignore the token.
1496	return true
1497	case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1498	// Ignore the token.
1499	return true
1500	case a.Template:
1501	return inHeadIM(p)
1502	}
1503	case CommentToken:
1504	p.addChild(&Node{
1505	Type: CommentNode,
1506	Data: p.tok.Data,
1507	})
1508	return true
1509	case DoctypeToken:
1510	// Ignore the token.
1511	return true
1512	case ErrorToken:
1513	return inBodyIM(p)
1514	}
1515
1516	p.fosterParenting = true
1517	defer func() { p.fosterParenting = false }()
1518
1519	return inBodyIM(p)
1520	}
1521
1522	// Section 12.2.6.4.11.
1523	func inCaptionIM(p *parser) bool {
1524	switch p.tok.Type {
1525	case StartTagToken:
1526	switch p.tok.DataAtom {
1527	case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1528	if !p.popUntil(tableScope, a.Caption) {
1529	// Ignore the token.
1530	return true
1531	}
1532	p.clearActiveFormattingElements()
1533	p.im = inTableIM
1534	return false
1535	case a.Select:
1536	p.reconstructActiveFormattingElements()
1537	p.addElement()
1538	p.framesetOK = false
1539	p.im = inSelectInTableIM
1540	return true
1541	}
1542	case EndTagToken:
1543	switch p.tok.DataAtom {
1544	case a.Caption:
1545	if p.popUntil(tableScope, a.Caption) {
1546	p.clearActiveFormattingElements()
1547	p.im = inTableIM
1548	}
1549	return true
1550	case a.Table:
1551	if !p.popUntil(tableScope, a.Caption) {
1552	// Ignore the token.
1553	return true
1554	}
1555	p.clearActiveFormattingElements()
1556	p.im = inTableIM
1557	return false
1558	case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1559	// Ignore the token.
1560	return true
1561	}
1562	}
1563	return inBodyIM(p)
1564	}
1565
1566	// Section 12.2.6.4.12.
1567	func inColumnGroupIM(p *parser) bool {
1568	switch p.tok.Type {
1569	case TextToken:
1570	s := strings.TrimLeft(p.tok.Data, whitespace)
1571	if len(s) < len(p.tok.Data) {
1572	// Add the initial whitespace to the current node.
1573	p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1574	if s == "" {
1575	return true
1576	}
1577	p.tok.Data = s
1578	}
1579	case CommentToken:
1580	p.addChild(&Node{
1581	Type: CommentNode,
1582	Data: p.tok.Data,
1583	})
1584	return true
1585	case DoctypeToken:
1586	// Ignore the token.
1587	return true
1588	case StartTagToken:
1589	switch p.tok.DataAtom {
1590	case a.Html:
1591	return inBodyIM(p)
1592	case a.Col:
1593	p.addElement()
1594	p.oe.pop()
1595	p.acknowledgeSelfClosingTag()
1596	return true
1597	case a.Template:
1598	return inHeadIM(p)
1599	}
1600	case EndTagToken:
1601	switch p.tok.DataAtom {
1602	case a.Colgroup:
1603	if p.oe.top().DataAtom == a.Colgroup {
1604	p.oe.pop()
1605	p.im = inTableIM
1606	}
1607	return true
1608	case a.Col:
1609	// Ignore the token.
1610	return true
1611	case a.Template:
1612	return inHeadIM(p)
1613	}
1614	case ErrorToken:
1615	return inBodyIM(p)
1616	}
1617	if p.oe.top().DataAtom != a.Colgroup {
1618	return true
1619	}
1620	p.oe.pop()
1621	p.im = inTableIM
1622	return false
1623	}
1624
1625	// Section 12.2.6.4.13.
1626	func inTableBodyIM(p *parser) bool {
1627	switch p.tok.Type {
1628	case StartTagToken:
1629	switch p.tok.DataAtom {
1630	case a.Tr:
1631	p.clearStackToContext(tableBodyScope)
1632	p.addElement()
1633	p.im = inRowIM
1634	return true
1635	case a.Td, a.Th:
1636	p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1637	return false
1638	case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1639	if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1640	p.im = inTableIM
1641	return false
1642	}
1643	// Ignore the token.
1644	return true
1645	}
1646	case EndTagToken:
1647	switch p.tok.DataAtom {
1648	case a.Tbody, a.Tfoot, a.Thead:
1649	if p.elementInScope(tableScope, p.tok.DataAtom) {
1650	p.clearStackToContext(tableBodyScope)
1651	p.oe.pop()
1652	p.im = inTableIM
1653	}
1654	return true
1655	case a.Table:
1656	if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1657	p.im = inTableIM
1658	return false
1659	}
1660	// Ignore the token.
1661	return true
1662	case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1663	// Ignore the token.
1664	return true
1665	}
1666	case CommentToken:
1667	p.addChild(&Node{
1668	Type: CommentNode,
1669	Data: p.tok.Data,
1670	})
1671	return true
1672	}
1673
1674	return inTableIM(p)
1675	}
1676
1677	// Section 12.2.6.4.14.
1678	func inRowIM(p *parser) bool {
1679	switch p.tok.Type {
1680	case StartTagToken:
1681	switch p.tok.DataAtom {
1682	case a.Td, a.Th:
1683	p.clearStackToContext(tableRowScope)
1684	p.addElement()
1685	p.afe = append(p.afe, &scopeMarker)
1686	p.im = inCellIM
1687	return true
1688	case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1689	if p.popUntil(tableScope, a.Tr) {
1690	p.im = inTableBodyIM
1691	return false
1692	}
1693	// Ignore the token.
1694	return true
1695	}
1696	case EndTagToken:
1697	switch p.tok.DataAtom {
1698	case a.Tr:
1699	if p.popUntil(tableScope, a.Tr) {
1700	p.im = inTableBodyIM
1701	return true
1702	}
1703	// Ignore the token.
1704	return true
1705	case a.Table:
1706	if p.popUntil(tableScope, a.Tr) {
1707	p.im = inTableBodyIM
1708	return false
1709	}
1710	// Ignore the token.
1711	return true
1712	case a.Tbody, a.Tfoot, a.Thead:
1713	if p.elementInScope(tableScope, p.tok.DataAtom) {
1714	p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1715	return false
1716	}
1717	// Ignore the token.
1718	return true
1719	case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1720	// Ignore the token.
1721	return true
1722	}
1723	}
1724
1725	return inTableIM(p)
1726	}
1727
1728	// Section 12.2.6.4.15.
1729	func inCellIM(p *parser) bool {
1730	switch p.tok.Type {
1731	case StartTagToken:
1732	switch p.tok.DataAtom {
1733	case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1734	if p.popUntil(tableScope, a.Td, a.Th) {
1735	// Close the cell and reprocess.
1736	p.clearActiveFormattingElements()
1737	p.im = inRowIM
1738	return false
1739	}
1740	// Ignore the token.
1741	return true
1742	case a.Select:
1743	p.reconstructActiveFormattingElements()
1744	p.addElement()
1745	p.framesetOK = false
1746	p.im = inSelectInTableIM
1747	return true
1748	}
1749	case EndTagToken:
1750	switch p.tok.DataAtom {
1751	case a.Td, a.Th:
1752	if !p.popUntil(tableScope, p.tok.DataAtom) {
1753	// Ignore the token.
1754	return true
1755	}
1756	p.clearActiveFormattingElements()
1757	p.im = inRowIM
1758	return true
1759	case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1760	// Ignore the token.
1761	return true
1762	case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1763	if !p.elementInScope(tableScope, p.tok.DataAtom) {
1764	// Ignore the token.
1765	return true
1766	}
1767	// Close the cell and reprocess.
1768	if p.popUntil(tableScope, a.Td, a.Th) {
1769	p.clearActiveFormattingElements()
1770	}
1771	p.im = inRowIM
1772	return false
1773	}
1774	}
1775	return inBodyIM(p)
1776	}
1777
1778	// Section 12.2.6.4.16.
1779	func inSelectIM(p *parser) bool {
1780	switch p.tok.Type {
1781	case TextToken:
1782	p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1783	case StartTagToken:
1784	switch p.tok.DataAtom {
1785	case a.Html:
1786	return inBodyIM(p)
1787	case a.Option:
1788	if p.top().DataAtom == a.Option {
1789	p.oe.pop()
1790	}
1791	p.addElement()
1792	case a.Optgroup:
1793	if p.top().DataAtom == a.Option {
1794	p.oe.pop()
1795	}
1796	if p.top().DataAtom == a.Optgroup {
1797	p.oe.pop()
1798	}
1799	p.addElement()
1800	case a.Select:
1801	if !p.popUntil(selectScope, a.Select) {
1802	// Ignore the token.
1803	return true
1804	}
1805	p.resetInsertionMode()
1806	case a.Input, a.Keygen, a.Textarea:
1807	if p.elementInScope(selectScope, a.Select) {
1808	p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1809	return false
1810	}
1811	// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1812	p.tokenizer.NextIsNotRawText()
1813	// Ignore the token.
1814	return true
1815	case a.Script, a.Template:
1816	return inHeadIM(p)
1817	case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1818	// Don't let the tokenizer go into raw text mode when there are raw tags
1819	// to be ignored. These tags should be ignored from the tokenizer
1820	// properly.
1821	p.tokenizer.NextIsNotRawText()
1822	// Ignore the token.
1823	return true
1824	}
1825	case EndTagToken:
1826	switch p.tok.DataAtom {
1827	case a.Option:
1828	if p.top().DataAtom == a.Option {
1829	p.oe.pop()
1830	}
1831	case a.Optgroup:
1832	i := len(p.oe) - 1
1833	if p.oe[i].DataAtom == a.Option {
1834	i--
1835	}
1836	if p.oe[i].DataAtom == a.Optgroup {
1837	p.oe = p.oe[:i]
1838	}
1839	case a.Select:
1840	if !p.popUntil(selectScope, a.Select) {
1841	// Ignore the token.
1842	return true
1843	}
1844	p.resetInsertionMode()
1845	case a.Template:
1846	return inHeadIM(p)
1847	}
1848	case CommentToken:
1849	p.addChild(&Node{
1850	Type: CommentNode,
1851	Data: p.tok.Data,
1852	})
1853	case DoctypeToken:
1854	// Ignore the token.
1855	return true
1856	case ErrorToken:
1857	return inBodyIM(p)
1858	}
1859
1860	return true
1861	}
1862
1863	// Section 12.2.6.4.17.
1864	func inSelectInTableIM(p *parser) bool {
1865	switch p.tok.Type {
1866	case StartTagToken, EndTagToken:
1867	switch p.tok.DataAtom {
1868	case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1869	if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1870	// Ignore the token.
1871	return true
1872	}
1873	// This is like p.popUntil(selectScope, a.Select), but it also
1874	// matches <math select>, not just <select>. Matching the MathML
1875	// tag is arguably incorrect (conceptually), but it mimics what
1876	// Chromium does.
1877	for i := len(p.oe) - 1; i >= 0; i-- {
1878	if n := p.oe[i]; n.DataAtom == a.Select {
1879	p.oe = p.oe[:i]
1880	break
1881	}
1882	}
1883	p.resetInsertionMode()
1884	return false
1885	}
1886	}
1887	return inSelectIM(p)
1888	}
1889
1890	// Section 12.2.6.4.18.
1891	func inTemplateIM(p *parser) bool {
1892	switch p.tok.Type {
1893	case TextToken, CommentToken, DoctypeToken:
1894	return inBodyIM(p)
1895	case StartTagToken:
1896	switch p.tok.DataAtom {
1897	case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1898	return inHeadIM(p)
1899	case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1900	p.templateStack.pop()
1901	p.templateStack = append(p.templateStack, inTableIM)
1902	p.im = inTableIM
1903	return false
1904	case a.Col:
1905	p.templateStack.pop()
1906	p.templateStack = append(p.templateStack, inColumnGroupIM)
1907	p.im = inColumnGroupIM
1908	return false
1909	case a.Tr:
1910	p.templateStack.pop()
1911	p.templateStack = append(p.templateStack, inTableBodyIM)
1912	p.im = inTableBodyIM
1913	return false
1914	case a.Td, a.Th:
1915	p.templateStack.pop()
1916	p.templateStack = append(p.templateStack, inRowIM)
1917	p.im = inRowIM
1918	return false
1919	default:
1920	p.templateStack.pop()
1921	p.templateStack = append(p.templateStack, inBodyIM)
1922	p.im = inBodyIM
1923	return false
1924	}
1925	case EndTagToken:
1926	switch p.tok.DataAtom {
1927	case a.Template:
1928	return inHeadIM(p)
1929	default:
1930	// Ignore the token.
1931	return true
1932	}
1933	case ErrorToken:
1934	if !p.oe.contains(a.Template) {
1935	// Ignore the token.
1936	return true
1937	}
1938	// TODO: remove this divergence from the HTML5 spec.
1939	//
1940	// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1941	p.generateImpliedEndTags()
1942	for i := len(p.oe) - 1; i >= 0; i-- {
1943	if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1944	p.oe = p.oe[:i]
1945	break
1946	}
1947	}
1948	p.clearActiveFormattingElements()
1949	p.templateStack.pop()
1950	p.resetInsertionMode()
1951	return false
1952	}
1953	return false
1954	}
1955
1956	// Section 12.2.6.4.19.
1957	func afterBodyIM(p *parser) bool {
1958	switch p.tok.Type {
1959	case ErrorToken:
1960	// Stop parsing.
1961	return true
1962	case TextToken:
1963	s := strings.TrimLeft(p.tok.Data, whitespace)
1964	if len(s) == 0 {
1965	// It was all whitespace.
1966	return inBodyIM(p)
1967	}
1968	case StartTagToken:
1969	if p.tok.DataAtom == a.Html {
1970	return inBodyIM(p)
1971	}
1972	case EndTagToken:
1973	if p.tok.DataAtom == a.Html {
1974	if !p.fragment {
1975	p.im = afterAfterBodyIM
1976	}
1977	return true
1978	}
1979	case CommentToken:
1980	// The comment is attached to the <html> element.
1981	if len(p.oe) < 1 \|\| p.oe[0].DataAtom != a.Html {
1982	panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1983	}
1984	p.oe[0].AppendChild(&Node{
1985	Type: CommentNode,
1986	Data: p.tok.Data,
1987	})
1988	return true
1989	}
1990	p.im = inBodyIM
1991	return false
1992	}
1993
1994	// Section 12.2.6.4.20.
1995	func inFramesetIM(p *parser) bool {
1996	switch p.tok.Type {
1997	case CommentToken:
1998	p.addChild(&Node{
1999	Type: CommentNode,
2000	Data: p.tok.Data,
2001	})
2002	case TextToken:
2003	// Ignore all text but whitespace.
2004	s := strings.Map(func(c rune) rune {
2005	switch c {
2006	case ' ', '\t', '\n', '\f', '\r':
2007	return c
2008	}
2009	return -1
2010	}, p.tok.Data)
2011	if s != "" {
2012	p.addText(s)
2013	}
2014	case StartTagToken:
2015	switch p.tok.DataAtom {
2016	case a.Html:
2017	return inBodyIM(p)
2018	case a.Frameset:
2019	p.addElement()
2020	case a.Frame:
2021	p.addElement()
2022	p.oe.pop()
2023	p.acknowledgeSelfClosingTag()
2024	case a.Noframes:
2025	return inHeadIM(p)
2026	}
2027	case EndTagToken:
2028	switch p.tok.DataAtom {
2029	case a.Frameset:
2030	if p.oe.top().DataAtom != a.Html {
2031	p.oe.pop()
2032	if p.oe.top().DataAtom != a.Frameset {
2033	p.im = afterFramesetIM
2034	return true
2035	}
2036	}
2037	}
2038	default:
2039	// Ignore the token.
2040	}
2041	return true
2042	}
2043
2044	// Section 12.2.6.4.21.
2045	func afterFramesetIM(p *parser) bool {
2046	switch p.tok.Type {
2047	case CommentToken:
2048	p.addChild(&Node{
2049	Type: CommentNode,
2050	Data: p.tok.Data,
2051	})
2052	case TextToken:
2053	// Ignore all text but whitespace.
2054	s := strings.Map(func(c rune) rune {
2055	switch c {
2056	case ' ', '\t', '\n', '\f', '\r':
2057	return c
2058	}
2059	return -1
2060	}, p.tok.Data)
2061	if s != "" {
2062	p.addText(s)
2063	}
2064	case StartTagToken:
2065	switch p.tok.DataAtom {
2066	case a.Html:
2067	return inBodyIM(p)
2068	case a.Noframes:
2069	return inHeadIM(p)
2070	}
2071	case EndTagToken:
2072	switch p.tok.DataAtom {
2073	case a.Html:
2074	p.im = afterAfterFramesetIM
2075	return true
2076	}
2077	default:
2078	// Ignore the token.
2079	}
2080	return true
2081	}
2082
2083	// Section 12.2.6.4.22.
2084	func afterAfterBodyIM(p *parser) bool {
2085	switch p.tok.Type {
2086	case ErrorToken:
2087	// Stop parsing.
2088	return true
2089	case TextToken:
2090	s := strings.TrimLeft(p.tok.Data, whitespace)
2091	if len(s) == 0 {
2092	// It was all whitespace.
2093	return inBodyIM(p)
2094	}
2095	case StartTagToken:
2096	if p.tok.DataAtom == a.Html {
2097	return inBodyIM(p)
2098	}
2099	case CommentToken:
2100	p.doc.AppendChild(&Node{
2101	Type: CommentNode,
2102	Data: p.tok.Data,
2103	})
2104	return true
2105	case DoctypeToken:
2106	return inBodyIM(p)
2107	}
2108	p.im = inBodyIM
2109	return false
2110	}
2111
2112	// Section 12.2.6.4.23.
2113	func afterAfterFramesetIM(p *parser) bool {
2114	switch p.tok.Type {
2115	case CommentToken:
2116	p.doc.AppendChild(&Node{
2117	Type: CommentNode,
2118	Data: p.tok.Data,
2119	})
2120	case TextToken:
2121	// Ignore all text but whitespace.
2122	s := strings.Map(func(c rune) rune {
2123	switch c {
2124	case ' ', '\t', '\n', '\f', '\r':
2125	return c
2126	}
2127	return -1
2128	}, p.tok.Data)
2129	if s != "" {
2130	p.tok.Data = s
2131	return inBodyIM(p)
2132	}
2133	case StartTagToken:
2134	switch p.tok.DataAtom {
2135	case a.Html:
2136	return inBodyIM(p)
2137	case a.Noframes:
2138	return inHeadIM(p)
2139	}
2140	case DoctypeToken:
2141	return inBodyIM(p)
2142	default:
2143	// Ignore the token.
2144	}
2145	return true
2146	}
2147
2148	func ignoreTheRemainingTokens(p *parser) bool {
2149	return true
2150	}
2151
2152	const whitespaceOrNUL = whitespace + "\x00"
2153
2154	// Section 12.2.6.5
2155	func parseForeignContent(p *parser) bool {
2156	switch p.tok.Type {
2157	case TextToken:
2158	if p.framesetOK {
2159	p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2160	}
2161	p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2162	p.addText(p.tok.Data)
2163	case CommentToken:
2164	p.addChild(&Node{
2165	Type: CommentNode,
2166	Data: p.tok.Data,
2167	})
2168	case StartTagToken:
2169	if !p.fragment {
2170	b := breakout[p.tok.Data]
2171	if p.tok.DataAtom == a.Font {
2172	loop:
2173	for _, attr := range p.tok.Attr {
2174	switch attr.Key {
2175	case "color", "face", "size":
2176	b = true
2177	break loop
2178	}
2179	}
2180	}
2181	if b {
2182	for i := len(p.oe) - 1; i >= 0; i-- {
2183	n := p.oe[i]
2184	if n.Namespace == "" \|\| htmlIntegrationPoint(n) \|\| mathMLTextIntegrationPoint(n) {
2185	p.oe = p.oe[:i+1]
2186	break
2187	}
2188	}
2189	return false
2190	}
2191	}
2192	current := p.adjustedCurrentNode()
2193	switch current.Namespace {
2194	case "math":
2195	adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2196	case "svg":
2197	// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2198	// SVG wants e.g. "foreignObject" with a capital second "O".
2199	if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2200	p.tok.DataAtom = a.Lookup([]byte(x))
2201	p.tok.Data = x
2202	}
2203	adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2204	default:
2205	panic("html: bad parser state: unexpected namespace")
2206	}
2207	adjustForeignAttributes(p.tok.Attr)
2208	namespace := current.Namespace
2209	p.addElement()
2210	p.top().Namespace = namespace
2211	if namespace != "" {
2212	// Don't let the tokenizer go into raw text mode in foreign content
2213	// (e.g. in an SVG <title> tag).
2214	p.tokenizer.NextIsNotRawText()
2215	}
2216	if p.hasSelfClosingToken {
2217	p.oe.pop()
2218	p.acknowledgeSelfClosingTag()
2219	}
2220	case EndTagToken:
2221	for i := len(p.oe) - 1; i >= 0; i-- {
2222	if p.oe[i].Namespace == "" {
2223	return p.im(p)
2224	}
2225	if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2226	p.oe = p.oe[:i]
2227	break
2228	}
2229	}
2230	return true
2231	default:
2232	// Ignore the token.
2233	}
2234	return true
2235	}
2236
2237	// Section 12.2.4.2.
2238	func (p parser) adjustedCurrentNode() Node {
2239	if len(p.oe) == 1 && p.fragment && p.context != nil {
2240	return p.context
2241	}
2242	return p.oe.top()
2243	}
2244
2245	// Section 12.2.6.
2246	func (p *parser) inForeignContent() bool {
2247	if len(p.oe) == 0 {
2248	return false
2249	}
2250	n := p.adjustedCurrentNode()
2251	if n.Namespace == "" {
2252	return false
2253	}
2254	if mathMLTextIntegrationPoint(n) {
2255	if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2256	return false
2257	}
2258	if p.tok.Type == TextToken {
2259	return false
2260	}
2261	}
2262	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2263	return false
2264	}
2265	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken \|\| p.tok.Type == TextToken) {
2266	return false
2267	}
2268	if p.tok.Type == ErrorToken {
2269	return false
2270	}
2271	return true
2272	}
2273
2274	// parseImpliedToken parses a token as though it had appeared in the parser's
2275	// input.
2276	func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2277	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2278	p.tok = Token{
2279	Type: t,
2280	DataAtom: dataAtom,
2281	Data: data,
2282	}
2283	p.hasSelfClosingToken = false
2284	p.parseCurrentToken()
2285	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2286	}
2287
2288	// parseCurrentToken runs the current token through the parsing routines
2289	// until it is consumed.
2290	func (p *parser) parseCurrentToken() {
2291	if p.tok.Type == SelfClosingTagToken {
2292	p.hasSelfClosingToken = true
2293	p.tok.Type = StartTagToken
2294	}
2295
2296	consumed := false
2297	for !consumed {
2298	if p.inForeignContent() {
2299	consumed = parseForeignContent(p)
2300	} else {
2301	consumed = p.im(p)
2302	}
2303	}
2304
2305	if p.hasSelfClosingToken {
2306	// This is a parse error, but ignore it.
2307	p.hasSelfClosingToken = false
2308	}
2309	}
2310
2311	func (p *parser) parse() error {
2312	// Iterate until EOF. Any other error will cause an early return.
2313	var err error
2314	for err != io.EOF {
2315	// CDATA sections are allowed only in foreign content.
2316	n := p.oe.top()
2317	p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2318	// Read and parse the next token.
2319	p.tokenizer.Next()
2320	p.tok = p.tokenizer.Token()
2321	if p.tok.Type == ErrorToken {
2322	err = p.tokenizer.Err()
2323	if err != nil && err != io.EOF {
2324	return err
2325	}
2326	}
2327	p.parseCurrentToken()
2328	}
2329	return nil
2330	}
2331
2332	// Parse returns the parse tree for the HTML from the given Reader.
2333	//
2334	// It implements the HTML5 parsing algorithm
2335	// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2336	// which is very complicated. The resultant tree can contain implicitly created
2337	// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2338	// differ from the nesting implied by a naive processing of start and end
2339	// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2340	// with no corresponding node in the resulting tree.
2341	//
2342	// The input is assumed to be UTF-8 encoded.
2343	func Parse(r io.Reader) (*Node, error) {
2344	return ParseWithOptions(r)
2345	}
2346
2347	// ParseFragment parses a fragment of HTML and returns the nodes that were
2348	// found. If the fragment is the InnerHTML for an existing element, pass that
2349	// element in context.
2350	//
2351	// It has the same intricacies as Parse.
2352	func ParseFragment(r io.Reader, context Node) ([]Node, error) {
2353	return ParseFragmentWithOptions(r, context)
2354	}
2355
2356	// ParseOption configures a parser.
2357	type ParseOption func(p *parser)
2358
2359	// ParseOptionEnableScripting configures the scripting flag.
2360	// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2361	//
2362	// By default, scripting is enabled.
2363	func ParseOptionEnableScripting(enable bool) ParseOption {
2364	return func(p *parser) {
2365	p.scripting = enable
2366	}
2367	}
2368
2369	// ParseWithOptions is like Parse, with options.
2370	func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2371	p := &parser{
2372	tokenizer: NewTokenizer(r),
2373	doc: &Node{
2374	Type: DocumentNode,
2375	},
2376	scripting: true,
2377	framesetOK: true,
2378	im: initialIM,
2379	}
2380
2381	for _, f := range opts {
2382	f(p)
2383	}
2384
2385	if err := p.parse(); err != nil {
2386	return nil, err
2387	}
2388	return p.doc, nil
2389	}
2390
2391	// ParseFragmentWithOptions is like ParseFragment, with options.
2392	func ParseFragmentWithOptions(r io.Reader, context Node, opts ...ParseOption) ([]Node, error) {
2393	contextTag := ""
2394	if context != nil {
2395	if context.Type != ElementNode {
2396	return nil, errors.New("html: ParseFragment of non-element Node")
2397	}
2398	// The next check isn't just context.DataAtom.String() == context.Data because
2399	// it is valid to pass an element whose tag isn't a known atom. For example,
2400	// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2401	if context.DataAtom != a.Lookup([]byte(context.Data)) {
2402	return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2403	}
2404	contextTag = context.DataAtom.String()
2405	}
2406	p := &parser{
2407	doc: &Node{
2408	Type: DocumentNode,
2409	},
2410	scripting: true,
2411	fragment: true,
2412	context: context,
2413	}
2414	if context != nil && context.Namespace != "" {
2415	p.tokenizer = NewTokenizer(r)
2416	} else {
2417	p.tokenizer = NewTokenizerFragment(r, contextTag)
2418	}
2419
2420	for _, f := range opts {
2421	f(p)
2422	}
2423
2424	root := &Node{
2425	Type: ElementNode,
2426	DataAtom: a.Html,
2427	Data: a.Html.String(),
2428	}
2429	p.doc.AppendChild(root)
2430	p.oe = nodeStack{root}
2431	if context != nil && context.DataAtom == a.Template {
2432	p.templateStack = append(p.templateStack, inTemplateIM)
2433	}
2434	p.resetInsertionMode()
2435
2436	for n := context; n != nil; n = n.Parent {
2437	if n.Type == ElementNode && n.DataAtom == a.Form {
2438	p.form = n
2439	break
2440	}
2441	}
2442
2443	if err := p.parse(); err != nil {
2444	return nil, err
2445	}
2446
2447	parent := p.doc
2448	if context != nil {
2449	parent = root
2450	}
2451
2452	var result []*Node
2453	for c := parent.FirstChild; c != nil; {
2454	next := c.NextSibling
2455	parent.RemoveChild(c)
2456	result = append(result, c)
2457	c = next
2458	}
2459	return result, nil
2460	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: