Compare commits

..

1 Commits
rawhide ... f32

Author SHA1 Message Date
Mikolaj Izdebski 1244f63618 Fix mXSS vulnerability due to the use of improper parser
Resolves: CVE-2020-27783
2020-12-18 16:20:25 +01:00
10 changed files with 106 additions and 203 deletions

View File

@ -1 +0,0 @@
1

5
.gitignore vendored
View File

@ -53,8 +53,3 @@ lxml-2.2.7.tar.gz.asc
/lxml-4.2.5.tgz
/lxml-4.4.0.tgz
/lxml-4.4.1.tgz
/lxml-4.5.1.tgz
/lxml-4.6.2.tar.gz
/lxml-4.6.3.tar.gz
/lxml-4.7.1.tar.gz
/lxml-4.9.1.tar.gz

View File

@ -0,0 +1,74 @@
From b49ffd817ecce80a5d0d6a541c58b92ebb51656b Mon Sep 17 00:00:00 2001
From: Mikolaj Izdebski <mizdebsk@redhat.com>
Date: Fri, 18 Dec 2020 16:13:04 +0100
Subject: [PATCH] Fix CVE-2020-27783: mXSS due to the use of improper parser
Backported from upstream commits 89e7aad6e7ff9ecd88678ff25f885988b184b26e
and a105ab8dc262ec6735977c25c13f0bdfcdec72a7
---
src/lxml/html/clean.py | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index aa9fc57f..15298b5d 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -61,12 +61,15 @@ __all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html',
# This is an IE-specific construct you can have in a stylesheet to
# run some Javascript:
-_css_javascript_re = re.compile(
- r'expression\s*\(.*?\)', re.S|re.I)
+_replace_css_javascript = re.compile(
+ r'expression\s*\(.*?\)', re.S|re.I).sub
# Do I have to worry about @\nimport?
-_css_import_re = re.compile(
- r'@\s*import', re.I)
+_replace_css_import = re.compile(
+ r'@\s*import', re.I).sub
+
+_looks_like_tag_content = re.compile(
+ r'</?[a-zA-Z]+|\son[a-zA-Z]+\s*=', re.ASCII).search
# All kinds of schemes besides just javascript: that can cause
# execution:
@@ -292,8 +295,8 @@ class Cleaner(object):
if not self.inline_style:
for el in _find_styled_elements(doc):
old = el.get('style')
- new = _css_javascript_re.sub('', old)
- new = _css_import_re.sub('', new)
+ new = _replace_css_javascript('', old)
+ new = _replace_css_import('', new)
if self._has_sneaky_javascript(new):
# Something tricky is going on...
del el.attrib['style']
@@ -305,9 +308,9 @@ class Cleaner(object):
el.drop_tree()
continue
old = el.text or ''
- new = _css_javascript_re.sub('', old)
+ new = _replace_css_javascript('', old)
# The imported CSS can do anything; we just can't allow:
- new = _css_import_re.sub('', old)
+ new = _replace_css_import('', new)
if self._has_sneaky_javascript(new):
# Something tricky is going on...
el.text = '/* deleted */'
@@ -509,6 +512,12 @@ class Cleaner(object):
return True
if 'expression(' in style:
return True
+ if '</noscript' in style:
+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
+ return True
+ if _looks_like_tag_content(style):
+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
+ return True
return False
def clean_html(self, html):
--
2.26.2

1
ci.fmf
View File

@ -1 +0,0 @@
resultsdb-testcase: separate

View File

@ -1,53 +0,0 @@
From c742576c105f40fc8b754fcae56fee4aa35840a3 Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Tue, 19 Jul 2022 08:25:20 +0200
Subject: [PATCH] Work around libxml2 bug in affected versions that failed to
reset the namespace count in the parser context.
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
---
src/lxml/includes/xmlparser.pxd | 1 +
src/lxml/parser.pxi | 3 +++
src/lxml/tests/test_etree.py | 3 +--
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
index a196e34bd..45acfc846 100644
--- a/src/lxml/includes/xmlparser.pxd
+++ b/src/lxml/includes/xmlparser.pxd
@@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h":
void* userData
int* spaceTab
int spaceMax
+ int nsNr
bint html
bint progressive
int inSubset
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index f5baf29b9..f0c8c6b64 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext):
self._c_ctxt.disableSAX = 0 # work around bug in libxml2
else:
xmlparser.xmlClearParserCtxt(self._c_ctxt)
+ # work around bug in libxml2 [2.9.10 .. 2.9.14]:
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
+ self._c_ctxt.nsNr = 0
cdef int prepare(self, bint set_document_loader=True) except -1:
cdef int result
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 8bf82c084..0339796d6 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1491,8 +1491,7 @@ def test_walk_after_parse_failure(self):
# This would be the expected result, because there was no namespace
pass
else:
- # This is a bug in libxml2
- assert not ns, repr(ns)
+ assert False, "Found unexpected namespace '%s'" % ns
def test_itertext_comment_pi(self):
# https://bugs.launchpad.net/lxml/+bug/1844674

View File

@ -1,8 +0,0 @@
--- !Policy
product_versions:
- fedora-*
decision_contexts:
- bodhi_update_push_testing
- bodhi_update_push_stable
rules:
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./plans/smoke.functional}

View File

@ -1,7 +0,0 @@
import lxml.etree as et
s = '<foo><bar baz="xyzzy">a<![CDATA[b]]>c</bar></foo>'
x = et.fromstring(s)
t = x.find('bar').text
print(t)
if t != 'abc':
raise Exception()

View File

@ -1,12 +0,0 @@
summary: Basic smoke test
discover:
how: shell
tests:
- name: /smoke/import-python-module
test: |
python3 -c 'import importlib as il; print(il.import_module("lxml"))'
- name: /smoke/etree-fromstring
test: |
python3 plans/etree-fromstring.py
execute:
how: tmt

View File

@ -1,150 +1,66 @@
Name: python-lxml
Version: 4.9.1
Release: 1%{?dist}
%global modname lxml
Name: python-%{modname}
Version: 4.4.1
Release: 5%{?dist}
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
# The lxml project is licensed under BSD-3-Clause
# Some code is derived from ElementTree and cElementTree
# thus using the MIT-CMU elementtree license
# .xsl schematron files are under the MIT license
License: BSD-3-Clause AND MIT-CMU AND MIT
License: BSD
URL: https://github.com/lxml/lxml
Source: %{pypi_source lxml}
# Work around libxml2 bug in affected versions that failed to reset the
# namespace count in the parser context.
# Resolved upstream: https://github.com/lxml/lxml/commit/c742576c105f40fc8b754fcae56fee4aa35840a3
Patch: fix-namespace-count.patch
Source0: https://lxml.de/files/%{modname}-%{version}.tgz
Patch0001: 0001-Fix-CVE-2020-27783-mXSS-due-to-the-use-of-improper-p.patch
BuildRequires: gcc
BuildRequires: libxml2-devel
BuildRequires: libxslt-devel
BuildRequires: python3-devel
# It is a good idea to BuildRequire the runtime requirements of the [extras] we build.
# That way, we ensure all the [extras] we build are installable,
# and we possibly run tests that would otherwise be skipped.
# However, some of the extras here create a dependency loop.
# - [cssselect] Requires cssselect BuildRequires lxml
# - [html5] Requires html5lib BuildRequires lxml
# - [htmlsoup] Requires beautifulsoup4 Requires lxml
# Hence we provide a bcond to disable this buildtime requirement.
%bcond buildrequire_extras 1
%global _description \
lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. It\
provides safe and convenient access to these libraries using the ElementTree It\
extends the ElementTree API significantly to offer support for XPath, RelaxNG,\
XML Schema, XSLT, C14N and much more.
XML Schema, XSLT, C14N and much more.To contact the project, go to the project\
home page < or see our bug tracker at case you want to use the current ...
%description %{_description}
%package -n python3-lxml
%package -n python3-%{modname}
Summary: %{summary}
Suggests: python3-lxml+cssselect
Suggests: python3-lxml+html5
Suggests: python3-lxml+htmlsoup
BuildRequires: python3-devel
BuildRequires: python3-setuptools
BuildRequires: python3-Cython
Suggests: python%{python3_version}dist(cssselect) >= 0.7
Suggests: python%{python3_version}dist(html5lib)
Suggests: python%{python3_version}dist(beautifulsoup4)
%{?python_provide:%python_provide python3-%{modname}}
%description -n python3-lxml %{_description}
%description -n python3-%{modname} %{_description}
Python 3 version.
%pyproject_extras_subpkg -n python3-lxml cssselect html5 htmlsoup
%prep
%autosetup -n lxml-%{version} -p1
%generate_buildrequires
%pyproject_buildrequires -x source%{?with_buildrequire_extras:,cssselect,html5,htmlsoup}
%autosetup -n %{modname}-%{version} -p1
# Remove pregenerated Cython C sources
# We need to do this after %%pyproject_buildrequires because setup.py errors
# without Cython and without the .c files.
find -type f -name '*.c' -print -delete >&2
find -type f -name '*.c' -print -delete
%build
export WITH_CYTHON=true
%pyproject_wheel
env WITH_CYTHON=true %py3_build
%install
%pyproject_install
%pyproject_save_files lxml
%py3_install
%check
# The tests assume inplace build, so we copy the built library to source-dir.
# If not done that, Python can either import the tests or the extension modules, but not both.
cp -a build/lib.%{python3_platform}-*/* src/
# The options are: verbose, unit, functional
%{python3} test.py -vuf
%{__python3} setup.py test
%files -n python3-lxml -f %{pyproject_files}
%license doc/licenses/BSD.txt doc/licenses/elementtree.txt
%files -n python3-%{modname}
%license doc/licenses/ZopePublicLicense.txt LICENSES.txt
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
%{python3_sitearch}/%{modname}/
%{python3_sitearch}/%{modname}-*.egg-info/
%changelog
* Wed Sep 14 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.9.1-1
- Update to 4.9.1
- Fix for CVE-2022-2309
- Resolves: rhbz#2107571, rhbz#2110131
* Wed Aug 31 2022 Miro Hrončok <mhroncok@redhat.com> - 4.7.1-6
- Use SPDX license identifiers
- The schematron files are not Zlib licensed, but MIT
- Package the lxml[cssselect], lxml[html5] and lxml[htmlsoup] extras
* Fri Jul 22 2022 Fedora Release Engineering <releng@fedoraproject.org> - 4.7.1-5
- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild
* Wed Jun 22 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.7.1-4
- Fix FTBFS with setuptools >= 62.1
- Resolves: rhbz#2097102
* Mon Jun 13 2022 Python Maint <python-maint@redhat.com> - 4.7.1-3
- Rebuilt for Python 3.11
* Fri Jan 21 2022 Fedora Release Engineering <releng@fedoraproject.org> - 4.7.1-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild
* Thu Jan 06 2022 Charalampos Stratakis <cstratak@redhat.com> - 4.7.1-1
- Update to 4.7.1
- Fixes CVE-2021-43818
- Resolves: rhbz#2031686, rhbz#2032572
* Fri Nov 26 2021 Miro Hrončok <mhroncok@redhat.com> - 4.6.3-5
- Run the tests during build
- Resolves: rhbz#2026941
* Fri Jul 23 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4.6.3-4
- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild
* Thu Jun 03 2021 Charalampos Stratakis <cstratak@redhat.com> - 4.6.3-3
- Update the license information
* Wed Jun 02 2021 Python Maint <python-maint@redhat.com> - 4.6.3-2
- Rebuilt for Python 3.10
* Thu May 20 2021 Charalampos Stratakis <cstratak@redhat.com> - 4.6.3-1
- Update to 4.6.3
- Fixes CVE-2021-28957
- Fixes: rhbz#1941773
- Fixes: rhbz#1941535
* Wed Jan 27 2021 Fedora Release Engineering <releng@fedoraproject.org> - 4.6.2-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
* Tue Dec 01 2020 Miro Hrončok <mhroncok@redhat.com> - 4.6.2-1
- Update to 4.6.2
- Fixes CVE-2020-27783 and another vulnerability in the HTML Cleaner
- Fixes: rhbz#1855415
- Fixes: rhbz#1901634
* Wed Jul 29 2020 Fedora Release Engineering <releng@fedoraproject.org> - 4.5.1-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild
* Mon Jun 01 2020 Igor Raits <ignatenkobrain@fedoraproject.org> - 4.5.1-1
- Update to 4.5.1
* Fri May 22 2020 Miro Hrončok <mhroncok@redhat.com> - 4.4.1-5
- Rebuilt for Python 3.9
* Fri Dec 18 2020 Mikolaj Izdebski <mizdebsk@redhat.com> - 4.4.1-5
- Fix mXSS vulnerability due to the use of improper parser
- Resolves: CVE-2020-27783
* Thu Jan 30 2020 Fedora Release Engineering <releng@fedoraproject.org> - 4.4.1-4
- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild

View File

@ -1 +1 @@
SHA512 (lxml-4.9.1.tar.gz) = d7ec55c7db2c63a716ca5f4d833706d90fc76c944885e010fcdb96786bcfe796994e438450cf4e8e6e75d702e21fb16971f28f854d7a1f76c34e4ae315414d84
SHA512 (lxml-4.4.1.tgz) = 3f11469290868f5bd30631020ac170c40da7348853609edf6fc6b00437b053fd774e0dfc6e711703ac5d05398dfa1f31e59a185935c3dc8ef0e1914a518bd049