Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,16 @@ def resetInsertionMode(self):
assert self.innerHTML
last = True
nodeName = self.innerHTML
# Check for conditions that should only happen in the innerHTML
# case
if nodeName in ("select", "colgroup", "head", "html"):
assert self.innerHTML
# "select" may appear in the stack during normal parsing (e.g.
# inside foreign content); in that case fall through to the
# newModes lookup so we end up in "inSelect". The remaining
# names ("colgroup", "head", "html") should only be reachable
# when the fragment-parsing algorithm is in use. Malformed
# markup can land us here in a full parse, so gracefully skip
# those nodes rather than crashing with an AssertionError.
if nodeName in ("colgroup", "head", "html"):
if not self.innerHTML:
continue

if not last and node.namespace != self.tree.defaultNamespace:
continue
Expand Down Expand Up @@ -1696,7 +1702,11 @@ def processEOF(self):
if self.tree.openElements[-1].name != "html":
self.parser.parseError("eof-in-table")
else:
assert self.parser.innerHTML
# The current node is <html>; in normal parsing this is the
# innerHTML case, but malformed markup (e.g. <table><svg><html>)
# can reach here too. Either way, just stop parsing.
if not self.parser.innerHTML:
self.parser.parseError("eof-in-table")
# Stop parsing

def processSpaceCharacters(self, token):
Expand Down
12 changes: 12 additions & 0 deletions html5lib/tests/test_parser2.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,15 @@ def test_self_closing_col():
parser = HTMLParser()
parser.parseFragment('<table><colgroup><col /></colgroup></table>')
assert not parser.errors


def test_malformed_select_in_foreign_content_does_not_crash():
parser = HTMLParser()
assert parser.parse(b'-<math><sElect><mi><sElect><sElect>') is not None
assert parser.errors


def test_malformed_table_with_html_in_foreign_content_does_not_crash():
parser = HTMLParser()
assert parser.parse(b'\xc3\xb1<table><svg><html>') is not None
assert parser.errors