Backport fix for XXE vulnerability.
This commit is contained in:
parent
2e4d5fd2e8
commit
7855855ac3
|
@ -1,7 +1,7 @@
|
|||
From 1ac056bdd30475566dcf630a55aae7cdd9eb81eb Mon Sep 17 00:00:00 2001
|
||||
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
||||
Date: Wed, 19 Aug 2020 22:44:11 -0400
|
||||
Subject: [PATCH 1/4] Relax some test requirements.
|
||||
Subject: [PATCH 1/5] Relax some test requirements.
|
||||
|
||||
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
||||
---
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
From e0de53084fe005fafad423a66b261cc45cfaaeb0 Mon Sep 17 00:00:00 2001
|
||||
From: "James R. Barlow" <james@purplerock.ca>
|
||||
Date: Mon, 4 Jan 2021 20:21:51 -0800
|
||||
Subject: [PATCH 2/4] Fix externalize_inline_images for qpdf 10.1.0
|
||||
Subject: [PATCH 2/5] Fix externalize_inline_images for qpdf 10.1.0
|
||||
|
||||
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
||||
---
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
From d9554019340177a2fb0d033fc6ffcf45f025cb2f Mon Sep 17 00:00:00 2001
|
||||
From: "James R. Barlow" <james@purplerock.ca>
|
||||
Date: Wed, 6 Jan 2021 00:22:16 -0800
|
||||
Subject: [PATCH 3/4] libqpdf 10.1.0 raises different exception
|
||||
Subject: [PATCH 3/5] libqpdf 10.1.0 raises different exception
|
||||
|
||||
The different errors are acceptable to us; actually they are more
|
||||
correct than the original behavior.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
From 4521a84f5892a38e380ed53f9b4cbcfd7647db45 Mon Sep 17 00:00:00 2001
|
||||
From: "James R. Barlow" <james@purplerock.ca>
|
||||
Date: Wed, 6 Jan 2021 03:39:50 -0800
|
||||
Subject: [PATCH 4/4] Fix test_tokenfilter_is_abstract
|
||||
Subject: [PATCH 4/5] Fix test_tokenfilter_is_abstract
|
||||
|
||||
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
||||
---
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
From a1babed37c2ffc20104d5cbd840816d890f5bcfc Mon Sep 17 00:00:00 2001
|
||||
From: "James R. Barlow" <james@purplerock.ca>
|
||||
Date: Sat, 27 Mar 2021 00:43:21 -0700
|
||||
Subject: [PATCH 5/5] Fix XXE vulnerability in XMP metadata parsing
|
||||
|
||||
For details:
|
||||
https://portswigger.net/web-security/xxe
|
||||
|
||||
Reported by: Eric Therond eric.therond@sonarsource.com) of Sonarsource (https://www.sonarsource.com/)
|
||||
|
||||
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
||||
---
|
||||
src/pikepdf/_xml.py | 30 ++++++++++++++++++++++++++++++
|
||||
src/pikepdf/models/metadata.py | 10 +++++-----
|
||||
tests/test_metadata.py | 24 ++++++++++++++++++++++++
|
||||
3 files changed, 59 insertions(+), 5 deletions(-)
|
||||
create mode 100644 src/pikepdf/_xml.py
|
||||
|
||||
diff --git a/src/pikepdf/_xml.py b/src/pikepdf/_xml.py
|
||||
new file mode 100644
|
||||
index 0000000..f0e1c38
|
||||
--- /dev/null
|
||||
+++ b/src/pikepdf/_xml.py
|
||||
@@ -0,0 +1,30 @@
|
||||
+# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
+# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
+#
|
||||
+# Copyright (C) 2021, James R. Barlow (https://github.com/jbarlow83/)
|
||||
+
|
||||
+
|
||||
+from typing import IO, Any, AnyStr, Union
|
||||
+
|
||||
+from lxml.etree import XMLParser as _UnsafeXMLParser
|
||||
+from lxml.etree import parse as _parse
|
||||
+
|
||||
+
|
||||
+class _XMLParser(_UnsafeXMLParser):
|
||||
+ def __init__(self, *args, **kwargs):
|
||||
+ # Prevent XXE attacks
|
||||
+ # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755
|
||||
+ kwargs['resolve_entities'] = False
|
||||
+ kwargs['no_network'] = True
|
||||
+ super().__init__(*args, **kwargs)
|
||||
+
|
||||
+
|
||||
+def parse_xml(source: Union[AnyStr, IO[Any]], recover: bool = False):
|
||||
+ """Wrapper around lxml's parse to provide protection against XXE attacks."""
|
||||
+
|
||||
+ parser = _XMLParser(recover=recover, remove_pis=False)
|
||||
+ return _parse(source, parser=parser)
|
||||
+
|
||||
+
|
||||
+__all__ = ['parse_xml']
|
||||
diff --git a/src/pikepdf/models/metadata.py b/src/pikepdf/models/metadata.py
|
||||
index f4f3860..8b44b60 100644
|
||||
--- a/src/pikepdf/models/metadata.py
|
||||
+++ b/src/pikepdf/models/metadata.py
|
||||
@@ -15,10 +15,11 @@ from io import BytesIO
|
||||
from warnings import warn
|
||||
|
||||
from lxml import etree
|
||||
-from lxml.etree import QName, XMLParser, XMLSyntaxError, parse
|
||||
+from lxml.etree import QName, XMLSyntaxError
|
||||
|
||||
from .. import Name, Stream, String
|
||||
from .. import __version__ as pikepdf_version
|
||||
+from .. import _xml
|
||||
|
||||
XMP_NS_DC = "http://purl.org/dc/elements/1.1/"
|
||||
XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/"
|
||||
@@ -350,14 +351,13 @@ class PdfMetadata(MutableMapping):
|
||||
data = XMP_EMPTY # on some platforms lxml chokes on empty documents
|
||||
|
||||
def basic_parser(xml):
|
||||
- return parse(BytesIO(xml))
|
||||
+ return _xml.parse_xml(BytesIO(xml))
|
||||
|
||||
def strip_illegal_bytes_parser(xml):
|
||||
- return parse(BytesIO(re_xml_illegal_bytes.sub(b'', xml)))
|
||||
+ return _xml.parse_xml(BytesIO(re_xml_illegal_bytes.sub(b'', xml)))
|
||||
|
||||
def recovery_parser(xml):
|
||||
- parser = XMLParser(recover=True)
|
||||
- return parse(BytesIO(xml), parser)
|
||||
+ return _xml.parse_xml(BytesIO(xml), recover=True)
|
||||
|
||||
def replace_with_empty_xmp(_xml=None):
|
||||
log.warning("Error occurred parsing XMP, replacing with empty XMP.")
|
||||
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
|
||||
index cf14b21..d968385 100644
|
||||
--- a/tests/test_metadata.py
|
||||
+++ b/tests/test_metadata.py
|
||||
@@ -596,3 +596,27 @@ def test_issue_135_title_rdf_bag(trivial):
|
||||
xmp['dc:title'] = {'Title 1', 'Title 2'}
|
||||
with trivial.open_metadata(update_docinfo=False) as xmp:
|
||||
assert b'Title 1; Title 2</rdf:li></rdf:Alt></dc:title>' in xmp._get_xml_bytes()
|
||||
+
|
||||
+
|
||||
+def test_xxe(trivial, outdir):
|
||||
+ secret = outdir / 'secret.txt'
|
||||
+ secret.write_text("This is a secret")
|
||||
+ trivial.Root.Metadata = Stream(
|
||||
+ trivial,
|
||||
+ b"""\
|
||||
+<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
+<!DOCTYPE rdf:RDF [<!ENTITY xxe SYSTEM "file://%s">]>
|
||||
+<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image'>
|
||||
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
+<note>
|
||||
+<to>&xxe;</to>
|
||||
+<from>xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</from>
|
||||
+</note>
|
||||
+</rdf:RDF>
|
||||
+</x:xmpmeta>
|
||||
+<?xpacket end='w'?>
|
||||
+ """
|
||||
+ % os.fsencode(secret),
|
||||
+ )
|
||||
+ with trivial.open_metadata() as m:
|
||||
+ assert 'This is a secret' not in str(m)
|
||||
--
|
||||
2.29.2
|
||||
|
|
@ -13,6 +13,8 @@ Patch0001: 0001-Relax-some-test-requirements.patch
|
|||
Patch0002: 0002-Fix-externalize_inline_images-for-qpdf-10.1.0.patch
|
||||
Patch0003: 0003-libqpdf-10.1.0-raises-different-exception.patch
|
||||
Patch0004: 0004-Fix-test_tokenfilter_is_abstract.patch
|
||||
# Backport XXE security fix.
|
||||
Patch0005: 0005-Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch
|
||||
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: qpdf-devel >= 8.4.2
|
||||
|
@ -107,6 +109,7 @@ rm -rf html/.{doctrees,buildinfo}
|
|||
%changelog
|
||||
* Thu Apr 01 2021 Elliott Sales de Andrade <quantum.analyst@gmail.com> - 1.19.4-2
|
||||
- Backport fix for qpdf 10.1.0
|
||||
- Backport fix for XXE vulnerability (#1945365)
|
||||
|
||||
* Wed Dec 23 2020 Elliott Sales de Andrade <quantum.analyst@gmail.com> - 1.19.4-1
|
||||
- Update to latest version
|
||||
|
|
Loading…
Reference in New Issue