100 lines
3.5 KiB
Diff
100 lines
3.5 KiB
Diff
From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
|
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
|
Date: Fri, 1 Jul 2022 21:06:10 +0200
|
|
Subject: [PATCH] Fix a crash when incorrect parser input occurs together with
|
|
usages of iterwalk() on trees generated by the same parser.
|
|
|
|
CVE: CVE-2022-2309
|
|
|
|
Upstream-Status: Backport
|
|
[https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f]
|
|
|
|
Signed-off-by: Yue Tao <yue.tao@windriver.com>
|
|
|
|
---
|
|
src/lxml/apihelpers.pxi | 7 ++++---
|
|
src/lxml/iterparse.pxi | 11 ++++++-----
|
|
src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
|
|
3 files changed, 30 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
|
|
index c1662762..9fae9fb1 100644
|
|
--- a/src/lxml/apihelpers.pxi
|
|
+++ b/src/lxml/apihelpers.pxi
|
|
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
|
|
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
|
|
c_ns = c_node.nsDef
|
|
while c_ns is not NULL:
|
|
- prefix = funicodeOrNone(c_ns.prefix)
|
|
- if prefix not in nsmap:
|
|
- nsmap[prefix] = funicodeOrNone(c_ns.href)
|
|
+ if c_ns.prefix or c_ns.href:
|
|
+ prefix = funicodeOrNone(c_ns.prefix)
|
|
+ if prefix not in nsmap:
|
|
+ nsmap[prefix] = funicodeOrNone(c_ns.href)
|
|
c_ns = c_ns.next
|
|
c_node = c_node.parent
|
|
return nsmap
|
|
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
|
|
index 138c23a6..a7299da6 100644
|
|
--- a/src/lxml/iterparse.pxi
|
|
+++ b/src/lxml/iterparse.pxi
|
|
@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
|
|
count = 0
|
|
c_ns = c_node.nsDef
|
|
while c_ns is not NULL:
|
|
- count += 1
|
|
+ count += (c_ns.href is not NULL)
|
|
c_ns = c_ns.next
|
|
return count
|
|
|
|
@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
|
|
count = 0
|
|
c_ns = c_node.nsDef
|
|
while c_ns is not NULL:
|
|
- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
|
|
- funicode(c_ns.href))
|
|
- event_list.append( (u"start-ns", ns_tuple) )
|
|
- count += 1
|
|
+ if c_ns.href:
|
|
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
|
|
+ funicode(c_ns.href))
|
|
+ event_list.append( (u"start-ns", ns_tuple) )
|
|
+ count += 1
|
|
c_ns = c_ns.next
|
|
return count
|
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
|
index e5f08469..285313f6 100644
|
|
--- a/src/lxml/tests/test_etree.py
|
|
+++ b/src/lxml/tests/test_etree.py
|
|
@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase):
|
|
[1,2,1,4],
|
|
counts)
|
|
|
|
+ def test_walk_after_parse_failure(self):
|
|
+ # This used to be an issue because libxml2 can leak empty namespaces
|
|
+ # between failed parser runs. iterwalk() failed to handle such a tree.
|
|
+ try:
|
|
+ etree.XML('''<anot xmlns="1">''')
|
|
+ except etree.XMLSyntaxError:
|
|
+ pass
|
|
+ else:
|
|
+ assert False, "invalid input did not fail to parse"
|
|
+
|
|
+ et = etree.XML('''<root> </root>''')
|
|
+ try:
|
|
+ ns = next(etree.iterwalk(et, events=('start-ns',)))
|
|
+ except StopIteration:
|
|
+ # This would be the expected result, because there was no namespace
|
|
+ pass
|
|
+ else:
|
|
+ # This is a bug in libxml2
|
|
+ assert not ns, repr(ns)
|
|
+
|
|
def test_itertext_comment_pi(self):
|
|
# https://bugs.launchpad.net/lxml/+bug/1844674
|
|
XML = self.etree.XML
|
|
--
|
|
2.17.1
|
|
|