Skip to content

Commit 37e1c6a

Browse files
nigeltaoFiloSottile
authored andcommitted
html: ignore templates nested within foreign content
Fixes #46288 Fixes CVE-2021-33194 Change-Id: I2fe39702de8e9aab29965c1526e377a6f9cdf056 Reviewed-on: https://go-review.googlesource.com/c/net/+/311090 Reviewed-by: Filippo Valsorda <filippo@golang.org> Run-TryBot: Filippo Valsorda <filippo@golang.org> Trust: Roland Shoemaker <roland@golang.org> TryBot-Result: Go Bot <gobot@golang.org>
1 parent 4163338 commit 37e1c6a

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

html/parse.go

+23-1
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,24 @@ func inHeadIM(p *parser) bool {
663663
// Ignore the token.
664664
return true
665665
case a.Template:
666+
// TODO: remove this divergence from the HTML5 spec.
667+
//
668+
// We don't handle all of the corner cases when mixing foreign
669+
// content (i.e. <math> or <svg>) with <template>. Without this
670+
// early return, we can get into an infinite loop, possibly because
671+
// of the "TODO... further divergence" a little below.
672+
//
673+
// As a workaround, if we are mixing foreign content and templates,
674+
// just ignore the rest of the HTML. Foreign content is rare and a
675+
// relatively old HTML feature. Templates are also rare and a
676+
// relatively new HTML feature. Their combination is very rare.
677+
for _, e := range p.oe {
678+
if e.Namespace != "" {
679+
p.im = ignoreTheRemainingTokens
680+
return true
681+
}
682+
}
683+
666684
p.addElement()
667685
p.afe = append(p.afe, &scopeMarker)
668686
p.framesetOK = false
@@ -683,7 +701,7 @@ func inHeadIM(p *parser) bool {
683701
if !p.oe.contains(a.Template) {
684702
return true
685703
}
686-
// TODO: remove this divergence from the HTML5 spec.
704+
// TODO: remove this further divergence from the HTML5 spec.
687705
//
688706
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
689707
p.generateImpliedEndTags()
@@ -2127,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool {
21272145
return true
21282146
}
21292147

2148+
func ignoreTheRemainingTokens(p *parser) bool {
2149+
return true
2150+
}
2151+
21302152
const whitespaceOrNUL = whitespace + "\x00"
21312153

21322154
// Section 12.2.6.5

html/parse_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ func TestParser(t *testing.T) {
267267
if err != nil {
268268
t.Fatal(err)
269269
}
270+
if parseTestBlacklist[ta.text] {
271+
continue
272+
}
270273

271274
err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting))
272275

@@ -379,6 +382,14 @@ func testParseCase(text, want, context string, opts ...ParseOption) (err error)
379382
return nil
380383
}
381384

385+
// Some test inputs are simply skipped - we would otherwise fail the test. We
386+
// blacklist such inputs from the parse test.
387+
var parseTestBlacklist = map[string]bool{
388+
// See the a.Template TODO in inHeadIM.
389+
`<math><template><mo><template>`: true,
390+
`<template><svg><foo><template><foreignObject><div></template><div>`: true,
391+
}
392+
382393
// Some test input result in parse trees are not 'well-formed' despite
383394
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
384395
// tree will not result in an exact clone of that tree. We blacklist such
@@ -454,6 +465,17 @@ func TestParseFragmentWithNilContext(t *testing.T) {
454465
ParseFragment(strings.NewReader("<p>hello</p>"), nil)
455466
}
456467

468+
func TestParseFragmentForeignContentTemplates(t *testing.T) {
469+
srcs := []string{
470+
"<math><html><template><mn><template></template></template>",
471+
"<math><math><head><mi><template>",
472+
}
473+
for _, src := range srcs {
474+
// The next line shouldn't infinite-loop.
475+
ParseFragment(strings.NewReader(src), nil)
476+
}
477+
}
478+
457479
func BenchmarkParser(b *testing.B) {
458480
buf, err := ioutil.ReadFile("testdata/go1.html")
459481
if err != nil {

0 commit comments

Comments
 (0)