Fix mXSS vulnerability due to the use of improper parser

Resolves: CVE-2020-27783
This commit is contained in:
Mikolaj Izdebski 2020-12-18 16:18:40 +01:00
parent e6e6df4b40
commit 1244f63618
2 changed files with 81 additions and 2 deletions

View File

@ -0,0 +1,74 @@
From b49ffd817ecce80a5d0d6a541c58b92ebb51656b Mon Sep 17 00:00:00 2001
From: Mikolaj Izdebski <mizdebsk@redhat.com>
Date: Fri, 18 Dec 2020 16:13:04 +0100
Subject: [PATCH] Fix CVE-2020-27783: mXSS due to the use of improper parser
Backported from upstream commits 89e7aad6e7ff9ecd88678ff25f885988b184b26e
and a105ab8dc262ec6735977c25c13f0bdfcdec72a7
---
src/lxml/html/clean.py | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index aa9fc57f..15298b5d 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -61,12 +61,15 @@ __all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html',
# This is an IE-specific construct you can have in a stylesheet to
# run some Javascript:
-_css_javascript_re = re.compile(
- r'expression\s*\(.*?\)', re.S|re.I)
+_replace_css_javascript = re.compile(
+ r'expression\s*\(.*?\)', re.S|re.I).sub
# Do I have to worry about @\nimport?
-_css_import_re = re.compile(
- r'@\s*import', re.I)
+_replace_css_import = re.compile(
+ r'@\s*import', re.I).sub
+
+_looks_like_tag_content = re.compile(
+ r'</?[a-zA-Z]+|\son[a-zA-Z]+\s*=', re.ASCII).search
# All kinds of schemes besides just javascript: that can cause
# execution:
@@ -292,8 +295,8 @@ class Cleaner(object):
if not self.inline_style:
for el in _find_styled_elements(doc):
old = el.get('style')
- new = _css_javascript_re.sub('', old)
- new = _css_import_re.sub('', new)
+ new = _replace_css_javascript('', old)
+ new = _replace_css_import('', new)
if self._has_sneaky_javascript(new):
# Something tricky is going on...
del el.attrib['style']
@@ -305,9 +308,9 @@ class Cleaner(object):
el.drop_tree()
continue
old = el.text or ''
- new = _css_javascript_re.sub('', old)
+ new = _replace_css_javascript('', old)
# The imported CSS can do anything; we just can't allow:
- new = _css_import_re.sub('', old)
+ new = _replace_css_import('', new)
if self._has_sneaky_javascript(new):
# Something tricky is going on...
el.text = '/* deleted */'
@@ -509,6 +512,12 @@ class Cleaner(object):
return True
if 'expression(' in style:
return True
+ if '</noscript' in style:
+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
+ return True
+ if _looks_like_tag_content(style):
+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
+ return True
return False
def clean_html(self, html):
--
2.26.2

View File

@ -2,12 +2,13 @@
Name: python-%{modname}
Version: 4.4.1
Release: 4%{?dist}
Release: 5%{?dist}
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
License: BSD
URL: https://github.com/lxml/lxml
Source0: https://lxml.de/files/%{modname}-%{version}.tgz
Patch0001: 0001-Fix-CVE-2020-27783-mXSS-due-to-the-use-of-improper-p.patch
BuildRequires: gcc
BuildRequires: libxml2-devel
@ -37,7 +38,7 @@ Suggests: python%{python3_version}dist(beautifulsoup4)
Python 3 version.
%prep
%autosetup -n %{modname}-%{version}
%autosetup -n %{modname}-%{version} -p1
# Remove pregenerated Cython C sources
find -type f -name '*.c' -print -delete
@ -57,6 +58,10 @@ env WITH_CYTHON=true %py3_build
%{python3_sitearch}/%{modname}-*.egg-info/
%changelog
* Fri Dec 18 2020 Mikolaj Izdebski <mizdebsk@redhat.com> - 4.4.1-5
- Fix mXSS vulnerability due to the use of improper parser
- Resolves: CVE-2020-27783
* Thu Jan 30 2020 Fedora Release Engineering <releng@fedoraproject.org> - 4.4.1-4
- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild