diff --git a/CVE-2022-2309.patch b/CVE-2022-2309.patch new file mode 100644 index 0000000..0f718e3 --- /dev/null +++ b/CVE-2022-2309.patch @@ -0,0 +1,186 @@ +From 85b664e506d73780232c256b6dfbaf1f266cdabd Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Fri, 1 Jul 2022 21:06:10 +0200 +Subject: [PATCH 1/3] Fix a crash when incorrect parser input occurs together + with usages of iterwalk() on trees generated by the same parser. + +--- + src/lxml/apihelpers.pxi | 7 ++++--- + src/lxml/iterparse.pxi | 11 ++++++----- + src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++ + 3 files changed, 30 insertions(+), 8 deletions(-) + +diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi +index 5eb3416..88a031d 100644 +--- a/src/lxml/apihelpers.pxi ++++ b/src/lxml/apihelpers.pxi +@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node): + while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: + c_ns = c_node.nsDef + while c_ns is not NULL: +- prefix = funicodeOrNone(c_ns.prefix) +- if prefix not in nsmap: +- nsmap[prefix] = funicodeOrNone(c_ns.href) ++ if c_ns.prefix or c_ns.href: ++ prefix = funicodeOrNone(c_ns.prefix) ++ if prefix not in nsmap: ++ nsmap[prefix] = funicodeOrNone(c_ns.href) + c_ns = c_ns.next + c_node = c_node.parent + return nsmap +diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi +index 4c20506..3da7485 100644 +--- a/src/lxml/iterparse.pxi ++++ b/src/lxml/iterparse.pxi +@@ -419,7 +419,7 @@ cdef int _countNsDefs(xmlNode* c_node): + count = 0 + c_ns = c_node.nsDef + while c_ns is not NULL: +- count += 1 ++ count += (c_ns.href is not NULL) + c_ns = c_ns.next + return count + +@@ -430,9 +430,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: + count = 0 + c_ns = c_node.nsDef + while c_ns is not NULL: +- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '', +- funicode(c_ns.href)) +- event_list.append( (u"start-ns", ns_tuple) ) +- count += 1 ++ if c_ns.href: ++ ns_tuple = (funicodeOrEmpty(c_ns.prefix), ++ funicode(c_ns.href)) ++ event_list.append( (u"start-ns", ns_tuple) ) ++ count += 1 + c_ns = c_ns.next + return count +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index ef5c54b..7b85596 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -1459,6 +1459,26 @@ class ETreeOnlyTestCase(HelperTestCase): + [1,2,1,4], + counts) + ++ def test_walk_after_parse_failure(self): ++ # This used to be an issue because libxml2 can leak empty namespaces ++ # between failed parser runs. iterwalk() failed to handle such a tree. ++ try: ++ etree.XML('''''') ++ except etree.XMLSyntaxError: ++ pass ++ else: ++ assert False, "invalid input did not fail to parse" ++ ++ et = etree.XML(''' ''') ++ try: ++ ns = next(etree.iterwalk(et, events=('start-ns',))) ++ except StopIteration: ++ # This would be the expected result, because there was no namespace ++ pass ++ else: ++ # This is a bug in libxml2 ++ assert not ns, repr(ns) ++ + def test_itertext_comment_pi(self): + # https://bugs.launchpad.net/lxml/+bug/1844674 + XML = self.etree.XML +-- +2.37.2 + + +From c26503461af4c250fc6e771887fae7f9dd208e9b Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Fri, 1 Jul 2022 21:19:44 +0200 +Subject: [PATCH 2/3] Prevent parse failure in new test from leaking into later + test runs. + +--- + src/lxml/tests/test_etree.py | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index 7b85596..8171e03 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -1462,14 +1462,16 @@ class ETreeOnlyTestCase(HelperTestCase): + def test_walk_after_parse_failure(self): + # This used to be an issue because libxml2 can leak empty namespaces + # between failed parser runs. iterwalk() failed to handle such a tree. ++ parser = etree.XMLParser() ++ + try: +- etree.XML('''''') ++ etree.XML('''''', parser=parser) + except etree.XMLSyntaxError: + pass + else: + assert False, "invalid input did not fail to parse" + +- et = etree.XML(''' ''') ++ et = etree.XML(''' ''', parser=parser) + try: + ns = next(etree.iterwalk(et, events=('start-ns',))) + except StopIteration: +-- +2.37.2 + + +From 2e37fbe5c54a188394aa066c3074ab974f6b9f61 Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Tue, 19 Jul 2022 08:25:20 +0200 +Subject: [PATCH 3/3] Work around libxml2 bug in affected versions that failed + to reset the namespace count in the parser context. + +See https://gitlab.gnome.org/GNOME/libxml2/-/issues/378 +--- + src/lxml/includes/xmlparser.pxd | 1 + + src/lxml/parser.pxi | 3 +++ + src/lxml/tests/test_etree.py | 3 +-- + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd +index a196e34..45acfc8 100644 +--- a/src/lxml/includes/xmlparser.pxd ++++ b/src/lxml/includes/xmlparser.pxd +@@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h": + void* userData + int* spaceTab + int spaceMax ++ int nsNr + bint html + bint progressive + int inSubset +diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi +index 35b5145..3187a38 100644 +--- a/src/lxml/parser.pxi ++++ b/src/lxml/parser.pxi +@@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext): + self._c_ctxt.disableSAX = 0 # work around bug in libxml2 + else: + xmlparser.xmlClearParserCtxt(self._c_ctxt) ++ # work around bug in libxml2 [2.9.10 .. 2.9.14]: ++ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378 ++ self._c_ctxt.nsNr = 0 + + cdef int prepare(self, bint set_document_loader=True) except -1: + cdef int result +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index 8171e03..d767cfb 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -1478,8 +1478,7 @@ class ETreeOnlyTestCase(HelperTestCase): + # This would be the expected result, because there was no namespace + pass + else: +- # This is a bug in libxml2 +- assert not ns, repr(ns) ++ assert False, "Found unexpected namespace '%s'" % ns + + def test_itertext_comment_pi(self): + # https://bugs.launchpad.net/lxml/+bug/1844674 +-- +2.37.2 + diff --git a/python-lxml.spec b/python-lxml.spec index d614049..ee8af63 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -2,7 +2,7 @@ Name: python-%{modname} Version: 4.7.1 -Release: 2%{?dist} +Release: 3%{?dist} Summary: XML processing library combining libxml2/libxslt with the ElementTree API # The lxml project is licensed under BSD @@ -13,6 +13,13 @@ License: BSD and MIT and zlib URL: https://github.com/lxml/lxml Source0: %{pypi_source %{modname}} +# Security fix for CVE-2022-2309 +# Resolved upstream: +# https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f +# https://github.com/lxml/lxml/commit/d01872ccdf7e1e5e825b6c6292b43e7d27ae5fc4 +# https://github.com/lxml/lxml/commit/c742576c105f40fc8b754fcae56fee4aa35840a3 +Patch: CVE-2022-2309.patch + BuildRequires: gcc BuildRequires: libxml2-devel BuildRequires: libxslt-devel @@ -65,6 +72,10 @@ cp -a build/lib.%{python3_platform}-%{python3_version}/* src/ %{python3_sitearch}/%{modname}-*.egg-info/ %changelog +* Thu Sep 15 2022 Charalampos Stratakis - 4.7.1-3 +- Security fix for CVE-2022-2309 +- Resolves: rhbz#2107571 + * Fri Jan 21 2022 Fedora Release Engineering - 4.7.1-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild