Make the Python 3 API actually usable (#1693751)
All string data from rpm is now returned as surrogate-escaped utf-8 string objects. 99.999% of that data IS utf-8 encoded to begin with, returning bytes is plain broken. As a temporary crutch, monkey-patch a .decode() method to returned strings to give users time to migrate from the long-standing broken behavior.
This commit is contained in:
parent
57b4e922b2
commit
d4f7fd8c04
656
0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch
Normal file
656
0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch
Normal file
@ -0,0 +1,656 @@
|
||||
From 84920f898315d09a57a3f1067433eaeb7de5e830 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <84920f898315d09a57a3f1067433eaeb7de5e830.1554884444.git.pmatilai@redhat.com>
|
||||
From: Panu Matilainen <pmatilai@redhat.com>
|
||||
Date: Fri, 22 Feb 2019 19:44:16 +0200
|
||||
Subject: [PATCH] In Python 3, return all our string data as surrogate-escaped
|
||||
utf-8 strings
|
||||
|
||||
In the almost ten years of rpm sort of supporting Python 3 bindings, quite
|
||||
obviously nobody has actually tried to use them. There's a major mismatch
|
||||
between what the header API outputs (bytes) and what all the other APIs
|
||||
accept (strings), resulting in hysterical TypeErrors all over the place,
|
||||
including but not limited to labelCompare() (RhBug:1631292). Also a huge
|
||||
number of other places have been returning strings and silently assuming
|
||||
utf-8 through use of Py_BuildValue("s", ...), which will just irrevocably
|
||||
fail when non-utf8 data is encountered.
|
||||
|
||||
The politically Python 3-correct solution would be declaring all our data
|
||||
as bytes with unspecified encoding - that's exactly what it historically is.
|
||||
However doing so would by definition break every single rpm script people
|
||||
have developed on Python 2. And when 99% of the rpm content in the world
|
||||
actually is utf-8 encoded even if it doesn't say so (and in recent times
|
||||
packages even advertise themselves as utf-8 encoded), the bytes-only route
|
||||
seems a wee bit too draconian, even to this grumpy old fella.
|
||||
|
||||
Instead, route all our string returns through a single helper macro
|
||||
which on Python 2 just does what we always did, but in Python 3 converts
|
||||
the data to surrogate-escaped utf-8 strings. This makes stuff "just work"
|
||||
out of the box pretty much everywhere even with Python 3 (including
|
||||
our own test-suite!), while still allowing to handle the non-utf8 case.
|
||||
Handling the non-utf8 case is a bit more uglier but still possible,
|
||||
which is exactly how you want corner-cases to be. There might be some
|
||||
uses for retrieving raw byte data from the header, but worrying about
|
||||
such an API is a case for some other rainy day, for now we mostly only
|
||||
care that stuff works again.
|
||||
|
||||
Also add test-cases for mixed data source labelCompare() and
|
||||
non-utf8 insert to + retrieve from header.
|
||||
---
|
||||
python/header-py.c | 2 +-
|
||||
python/rpmds-py.c | 8 ++++----
|
||||
python/rpmfd-py.c | 6 +++---
|
||||
python/rpmfi-py.c | 24 ++++++++++++------------
|
||||
python/rpmfiles-py.c | 26 +++++++++++++-------------
|
||||
python/rpmkeyring-py.c | 2 +-
|
||||
python/rpmmacro-py.c | 2 +-
|
||||
python/rpmmodule.c | 2 +-
|
||||
python/rpmps-py.c | 8 ++++----
|
||||
python/rpmstrpool-py.c | 2 +-
|
||||
python/rpmsystem-py.h | 7 +++++++
|
||||
python/rpmtd-py.c | 2 +-
|
||||
python/rpmte-py.c | 16 ++++++++--------
|
||||
python/rpmts-py.c | 11 ++++++-----
|
||||
python/spec-py.c | 8 ++++----
|
||||
tests/local.at | 1 +
|
||||
tests/rpmpython.at | 34 ++++++++++++++++++++++++++++++++++
|
||||
17 files changed, 102 insertions(+), 59 deletions(-)
|
||||
|
||||
diff --git a/python/header-py.c b/python/header-py.c
|
||||
index c9d54e869..93c241cb7 100644
|
||||
--- a/python/header-py.c
|
||||
+++ b/python/header-py.c
|
||||
@@ -231,7 +231,7 @@ static PyObject * hdrFormat(hdrObject * s, PyObject * args, PyObject * kwds)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- result = Py_BuildValue("s", r);
|
||||
+ result = utf8FromString(r);
|
||||
free(r);
|
||||
|
||||
return result;
|
||||
diff --git a/python/rpmds-py.c b/python/rpmds-py.c
|
||||
index 39b26628e..ecc9af9d5 100644
|
||||
--- a/python/rpmds-py.c
|
||||
+++ b/python/rpmds-py.c
|
||||
@@ -31,19 +31,19 @@ rpmds_Ix(rpmdsObject * s)
|
||||
static PyObject *
|
||||
rpmds_DNEVR(rpmdsObject * s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmdsDNEVR(s->ds));
|
||||
+ return utf8FromString(rpmdsDNEVR(s->ds));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmds_N(rpmdsObject * s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmdsN(s->ds));
|
||||
+ return utf8FromString(rpmdsN(s->ds));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmds_EVR(rpmdsObject * s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmdsEVR(s->ds));
|
||||
+ return utf8FromString(rpmdsEVR(s->ds));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -261,7 +261,7 @@ rpmds_subscript(rpmdsObject * s, PyObject * key)
|
||||
|
||||
ix = (int) PyInt_AsLong(key);
|
||||
rpmdsSetIx(s->ds, ix);
|
||||
- return Py_BuildValue("s", rpmdsDNEVR(s->ds));
|
||||
+ return utf8FromString(rpmdsDNEVR(s->ds));
|
||||
}
|
||||
|
||||
static PyMappingMethods rpmds_as_mapping = {
|
||||
diff --git a/python/rpmfd-py.c b/python/rpmfd-py.c
|
||||
index 85fb0cd24..4b05cce5f 100644
|
||||
--- a/python/rpmfd-py.c
|
||||
+++ b/python/rpmfd-py.c
|
||||
@@ -327,17 +327,17 @@ static PyObject *rpmfd_get_closed(rpmfdObject *s)
|
||||
static PyObject *rpmfd_get_name(rpmfdObject *s)
|
||||
{
|
||||
/* XXX: rpm returns non-paths with [mumble], python files use <mumble> */
|
||||
- return Py_BuildValue("s", Fdescr(s->fd));
|
||||
+ return utf8FromString(Fdescr(s->fd));
|
||||
}
|
||||
|
||||
static PyObject *rpmfd_get_mode(rpmfdObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", s->mode);
|
||||
+ return utf8FromString(s->mode);
|
||||
}
|
||||
|
||||
static PyObject *rpmfd_get_flags(rpmfdObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", s->flags);
|
||||
+ return utf8FromString(s->flags);
|
||||
}
|
||||
|
||||
static PyGetSetDef rpmfd_getseters[] = {
|
||||
diff --git a/python/rpmfi-py.c b/python/rpmfi-py.c
|
||||
index 8d2f926d0..db405c231 100644
|
||||
--- a/python/rpmfi-py.c
|
||||
+++ b/python/rpmfi-py.c
|
||||
@@ -41,19 +41,19 @@ rpmfi_DX(rpmfiObject * s, PyObject * unused)
|
||||
static PyObject *
|
||||
rpmfi_BN(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiBN(s->fi));
|
||||
+ return utf8FromString(rpmfiBN(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmfi_DN(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiDN(s->fi));
|
||||
+ return utf8FromString(rpmfiDN(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmfi_FN(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiFN(s->fi));
|
||||
+ return utf8FromString(rpmfiFN(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -98,7 +98,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
char *digest = rpmfiFDigestHex(s->fi, NULL);
|
||||
if (digest) {
|
||||
- PyObject *dig = Py_BuildValue("s", digest);
|
||||
+ PyObject *dig = utf8FromString(digest);
|
||||
free(digest);
|
||||
return dig;
|
||||
} else {
|
||||
@@ -109,7 +109,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
|
||||
static PyObject *
|
||||
rpmfi_FLink(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiFLink(s->fi));
|
||||
+ return utf8FromString(rpmfiFLink(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -133,13 +133,13 @@ rpmfi_FMtime(rpmfiObject * s, PyObject * unused)
|
||||
static PyObject *
|
||||
rpmfi_FUser(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiFUser(s->fi));
|
||||
+ return utf8FromString(rpmfiFUser(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmfi_FGroup(rpmfiObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfiFGroup(s->fi));
|
||||
+ return utf8FromString(rpmfiFGroup(s->fi));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -155,7 +155,7 @@ rpmfi_FClass(rpmfiObject * s, PyObject * unused)
|
||||
|
||||
if ((FClass = rpmfiFClass(s->fi)) == NULL)
|
||||
FClass = "";
|
||||
- return Py_BuildValue("s", FClass);
|
||||
+ return utf8FromString(FClass);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -208,7 +208,7 @@ rpmfi_iternext(rpmfiObject * s)
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(result, 0, Py_None);
|
||||
} else
|
||||
- PyTuple_SET_ITEM(result, 0, Py_BuildValue("s", FN));
|
||||
+ PyTuple_SET_ITEM(result, 0, utf8FromString(FN));
|
||||
PyTuple_SET_ITEM(result, 1, PyLong_FromLongLong(FSize));
|
||||
PyTuple_SET_ITEM(result, 2, PyInt_FromLong(FMode));
|
||||
PyTuple_SET_ITEM(result, 3, PyInt_FromLong(FMtime));
|
||||
@@ -222,12 +222,12 @@ rpmfi_iternext(rpmfiObject * s)
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(result, 10, Py_None);
|
||||
} else
|
||||
- PyTuple_SET_ITEM(result, 10, Py_BuildValue("s", FUser));
|
||||
+ PyTuple_SET_ITEM(result, 10, utf8FromString(FUser));
|
||||
if (FGroup == NULL) {
|
||||
Py_INCREF(Py_None);
|
||||
PyTuple_SET_ITEM(result, 11, Py_None);
|
||||
} else
|
||||
- PyTuple_SET_ITEM(result, 11, Py_BuildValue("s", FGroup));
|
||||
+ PyTuple_SET_ITEM(result, 11, utf8FromString(FGroup));
|
||||
PyTuple_SET_ITEM(result, 12, rpmfi_Digest(s, NULL));
|
||||
|
||||
} else
|
||||
@@ -313,7 +313,7 @@ rpmfi_subscript(rpmfiObject * s, PyObject * key)
|
||||
|
||||
ix = (int) PyInt_AsLong(key);
|
||||
rpmfiSetFX(s->fi, ix);
|
||||
- return Py_BuildValue("s", rpmfiFN(s->fi));
|
||||
+ return utf8FromString(rpmfiFN(s->fi));
|
||||
}
|
||||
|
||||
static PyMappingMethods rpmfi_as_mapping = {
|
||||
diff --git a/python/rpmfiles-py.c b/python/rpmfiles-py.c
|
||||
index bc07dbeaf..557246cae 100644
|
||||
--- a/python/rpmfiles-py.c
|
||||
+++ b/python/rpmfiles-py.c
|
||||
@@ -41,37 +41,37 @@ static PyObject *rpmfile_dx(rpmfileObject *s)
|
||||
static PyObject *rpmfile_name(rpmfileObject *s)
|
||||
{
|
||||
char * fn = rpmfilesFN(s->files, s->ix);
|
||||
- PyObject *o = Py_BuildValue("s", fn);
|
||||
+ PyObject *o = utf8FromString(fn);
|
||||
free(fn);
|
||||
return o;
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_basename(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesBN(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesBN(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_dirname(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
|
||||
+ return utf8FromString(rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_orig_name(rpmfileObject *s)
|
||||
{
|
||||
char * fn = rpmfilesOFN(s->files, s->ix);
|
||||
- PyObject *o = Py_BuildValue("s", fn);
|
||||
+ PyObject *o = utf8FromString(fn);
|
||||
free(fn);
|
||||
return o;
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_orig_basename(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesOBN(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesOBN(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_orig_dirname(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
|
||||
+ return utf8FromString(rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
|
||||
}
|
||||
static PyObject *rpmfile_mode(rpmfileObject *s)
|
||||
{
|
||||
@@ -105,17 +105,17 @@ static PyObject *rpmfile_nlink(rpmfileObject *s)
|
||||
|
||||
static PyObject *rpmfile_linkto(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFLink(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFLink(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_user(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFUser(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFUser(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_group(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFGroup(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFGroup(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_fflags(rpmfileObject *s)
|
||||
@@ -145,7 +145,7 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
|
||||
NULL, &diglen);
|
||||
if (digest) {
|
||||
char * hex = pgpHexStr(digest, diglen);
|
||||
- PyObject *o = Py_BuildValue("s", hex);
|
||||
+ PyObject *o = utf8FromString(hex);
|
||||
free(hex);
|
||||
return o;
|
||||
}
|
||||
@@ -154,17 +154,17 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
|
||||
|
||||
static PyObject *rpmfile_class(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFClass(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFClass(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_caps(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFCaps(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFCaps(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_langs(rpmfileObject *s)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmfilesFLangs(s->files, s->ix));
|
||||
+ return utf8FromString(rpmfilesFLangs(s->files, s->ix));
|
||||
}
|
||||
|
||||
static PyObject *rpmfile_links(rpmfileObject *s)
|
||||
diff --git a/python/rpmkeyring-py.c b/python/rpmkeyring-py.c
|
||||
index d5f131e42..8968e0513 100644
|
||||
--- a/python/rpmkeyring-py.c
|
||||
+++ b/python/rpmkeyring-py.c
|
||||
@@ -38,7 +38,7 @@ static PyObject *rpmPubkey_new(PyTypeObject *subtype,
|
||||
static PyObject * rpmPubkey_Base64(rpmPubkeyObject *s)
|
||||
{
|
||||
char *b64 = rpmPubkeyBase64(s->pubkey);
|
||||
- PyObject *res = Py_BuildValue("s", b64);
|
||||
+ PyObject *res = utf8FromString(b64);
|
||||
free(b64);
|
||||
return res;
|
||||
}
|
||||
diff --git a/python/rpmmacro-py.c b/python/rpmmacro-py.c
|
||||
index 3cb1a51f5..d8a365547 100644
|
||||
--- a/python/rpmmacro-py.c
|
||||
+++ b/python/rpmmacro-py.c
|
||||
@@ -52,7 +52,7 @@ rpmmacro_ExpandMacro(PyObject * self, PyObject * args, PyObject * kwds)
|
||||
if (rpmExpandMacros(NULL, macro, &str, 0) < 0)
|
||||
PyErr_SetString(pyrpmError, "error expanding macro");
|
||||
else
|
||||
- res = Py_BuildValue("s", str);
|
||||
+ res = utf8FromString(str);
|
||||
free(str);
|
||||
}
|
||||
return res;
|
||||
diff --git a/python/rpmmodule.c b/python/rpmmodule.c
|
||||
index 3faad23c7..05032edc7 100644
|
||||
--- a/python/rpmmodule.c
|
||||
+++ b/python/rpmmodule.c
|
||||
@@ -237,7 +237,7 @@ static void addRpmTags(PyObject *module)
|
||||
|
||||
PyModule_AddIntConstant(module, tagname, tagval);
|
||||
pyval = PyInt_FromLong(tagval);
|
||||
- pyname = Py_BuildValue("s", shortname);
|
||||
+ pyname = utf8FromString(shortname);
|
||||
PyDict_SetItem(dict, pyval, pyname);
|
||||
Py_DECREF(pyval);
|
||||
Py_DECREF(pyname);
|
||||
diff --git a/python/rpmps-py.c b/python/rpmps-py.c
|
||||
index bdc899a60..902b2ae63 100644
|
||||
--- a/python/rpmps-py.c
|
||||
+++ b/python/rpmps-py.c
|
||||
@@ -18,12 +18,12 @@ static PyObject *rpmprob_get_type(rpmProblemObject *s, void *closure)
|
||||
|
||||
static PyObject *rpmprob_get_pkgnevr(rpmProblemObject *s, void *closure)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmProblemGetPkgNEVR(s->prob));
|
||||
+ return utf8FromString(rpmProblemGetPkgNEVR(s->prob));
|
||||
}
|
||||
|
||||
static PyObject *rpmprob_get_altnevr(rpmProblemObject *s, void *closure)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmProblemGetAltNEVR(s->prob));
|
||||
+ return utf8FromString(rpmProblemGetAltNEVR(s->prob));
|
||||
}
|
||||
|
||||
static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
|
||||
@@ -38,7 +38,7 @@ static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
|
||||
|
||||
static PyObject *rpmprob_get_str(rpmProblemObject *s, void *closure)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmProblemGetStr(s->prob));
|
||||
+ return utf8FromString(rpmProblemGetStr(s->prob));
|
||||
}
|
||||
|
||||
static PyObject *rpmprob_get_num(rpmProblemObject *s, void *closure)
|
||||
@@ -59,7 +59,7 @@ static PyGetSetDef rpmprob_getseters[] = {
|
||||
static PyObject *rpmprob_str(rpmProblemObject *s)
|
||||
{
|
||||
char *str = rpmProblemString(s->prob);
|
||||
- PyObject *res = Py_BuildValue("s", str);
|
||||
+ PyObject *res = utf8FromString(str);
|
||||
free(str);
|
||||
return res;
|
||||
}
|
||||
diff --git a/python/rpmstrpool-py.c b/python/rpmstrpool-py.c
|
||||
index 356bd1de5..a56e2b540 100644
|
||||
--- a/python/rpmstrpool-py.c
|
||||
+++ b/python/rpmstrpool-py.c
|
||||
@@ -44,7 +44,7 @@ static PyObject *strpool_id2str(rpmstrPoolObject *s, PyObject *item)
|
||||
const char *str = rpmstrPoolStr(s->pool, id);
|
||||
|
||||
if (str)
|
||||
- ret = PyBytes_FromString(str);
|
||||
+ ret = utf8FromString(str);
|
||||
else
|
||||
PyErr_SetObject(PyExc_KeyError, item);
|
||||
}
|
||||
diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h
|
||||
index 955d60cd3..87c750571 100644
|
||||
--- a/python/rpmsystem-py.h
|
||||
+++ b/python/rpmsystem-py.h
|
||||
@@ -19,4 +19,11 @@
|
||||
#define PyInt_AsSsize_t PyLong_AsSsize_t
|
||||
#endif
|
||||
|
||||
+/* In Python 3, we return all strings as surrogate-escaped utf-8 */
|
||||
+#if PY_MAJOR_VERSION >= 3
|
||||
+#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape")
|
||||
+#else
|
||||
+#define utf8FromString(_s) PyBytes_FromString(_s)
|
||||
+#endif
|
||||
+
|
||||
#endif /* H_SYSTEM_PYTHON */
|
||||
diff --git a/python/rpmtd-py.c b/python/rpmtd-py.c
|
||||
index 247c7502a..23ca10517 100644
|
||||
--- a/python/rpmtd-py.c
|
||||
+++ b/python/rpmtd-py.c
|
||||
@@ -17,7 +17,7 @@ PyObject * rpmtd_ItemAsPyobj(rpmtd td, rpmTagClass tclass)
|
||||
|
||||
switch (tclass) {
|
||||
case RPM_STRING_CLASS:
|
||||
- res = PyBytes_FromString(rpmtdGetString(td));
|
||||
+ res = utf8FromString(rpmtdGetString(td));
|
||||
break;
|
||||
case RPM_NUMERIC_CLASS:
|
||||
res = PyLong_FromLongLong(rpmtdGetNumber(td));
|
||||
diff --git a/python/rpmte-py.c b/python/rpmte-py.c
|
||||
index 99ff2f496..2b3745754 100644
|
||||
--- a/python/rpmte-py.c
|
||||
+++ b/python/rpmte-py.c
|
||||
@@ -54,49 +54,49 @@ rpmte_TEType(rpmteObject * s, PyObject * unused)
|
||||
static PyObject *
|
||||
rpmte_N(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteN(s->te));
|
||||
+ return utf8FromString(rpmteN(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_E(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteE(s->te));
|
||||
+ return utf8FromString(rpmteE(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_V(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteV(s->te));
|
||||
+ return utf8FromString(rpmteV(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_R(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteR(s->te));
|
||||
+ return utf8FromString(rpmteR(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_A(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteA(s->te));
|
||||
+ return utf8FromString(rpmteA(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_O(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteO(s->te));
|
||||
+ return utf8FromString(rpmteO(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_NEVR(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteNEVR(s->te));
|
||||
+ return utf8FromString(rpmteNEVR(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
rpmte_NEVRA(rpmteObject * s, PyObject * unused)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmteNEVRA(s->te));
|
||||
+ return utf8FromString(rpmteNEVRA(s->te));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
diff --git a/python/rpmts-py.c b/python/rpmts-py.c
|
||||
index 1ddfc9a1e..96e3bb28e 100644
|
||||
--- a/python/rpmts-py.c
|
||||
+++ b/python/rpmts-py.c
|
||||
@@ -230,8 +230,9 @@ rpmts_SolveCallback(rpmts ts, rpmds ds, const void * data)
|
||||
|
||||
PyEval_RestoreThread(cbInfo->_save);
|
||||
|
||||
- args = Py_BuildValue("(Oissi)", cbInfo->tso,
|
||||
- rpmdsTagN(ds), rpmdsN(ds), rpmdsEVR(ds), rpmdsFlags(ds));
|
||||
+ args = Py_BuildValue("(OiNNi)", cbInfo->tso,
|
||||
+ rpmdsTagN(ds), utf8FromString(rpmdsN(ds)),
|
||||
+ utf8FromString(rpmdsEVR(ds)), rpmdsFlags(ds));
|
||||
result = PyEval_CallObject(cbInfo->cb, args);
|
||||
Py_DECREF(args);
|
||||
|
||||
@@ -409,7 +410,7 @@ rpmts_HdrCheck(rpmtsObject * s, PyObject *obj)
|
||||
rpmrc = headerCheck(s->ts, uh, uc, &msg);
|
||||
Py_END_ALLOW_THREADS;
|
||||
|
||||
- return Py_BuildValue("(is)", rpmrc, msg);
|
||||
+ return Py_BuildValue("(iN)", rpmrc, utf8FromString(msg));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
@@ -500,7 +501,7 @@ rpmtsCallback(const void * hd, const rpmCallbackType what,
|
||||
/* Synthesize a python object for callback (if necessary). */
|
||||
if (pkgObj == NULL) {
|
||||
if (h) {
|
||||
- pkgObj = Py_BuildValue("s", headerGetString(h, RPMTAG_NAME));
|
||||
+ pkgObj = utf8FromString(headerGetString(h, RPMTAG_NAME));
|
||||
} else {
|
||||
pkgObj = Py_None;
|
||||
Py_INCREF(pkgObj);
|
||||
@@ -845,7 +846,7 @@ static PyObject *rpmts_get_tid(rpmtsObject *s, void *closure)
|
||||
|
||||
static PyObject *rpmts_get_rootDir(rpmtsObject *s, void *closure)
|
||||
{
|
||||
- return Py_BuildValue("s", rpmtsRootDir(s->ts));
|
||||
+ return utf8FromString(rpmtsRootDir(s->ts));
|
||||
}
|
||||
|
||||
static int rpmts_set_scriptFd(rpmtsObject *s, PyObject *value, void *closure)
|
||||
diff --git a/python/spec-py.c b/python/spec-py.c
|
||||
index 4efdbf4bf..70b796531 100644
|
||||
--- a/python/spec-py.c
|
||||
+++ b/python/spec-py.c
|
||||
@@ -57,7 +57,7 @@ static PyObject *pkgGetSection(rpmSpecPkg pkg, int section)
|
||||
{
|
||||
char *sect = rpmSpecPkgGetSection(pkg, section);
|
||||
if (sect != NULL) {
|
||||
- PyObject *ps = PyBytes_FromString(sect);
|
||||
+ PyObject *ps = utf8FromString(sect);
|
||||
free(sect);
|
||||
if (ps != NULL)
|
||||
return ps;
|
||||
@@ -158,7 +158,7 @@ static PyObject * getSection(rpmSpec spec, int section)
|
||||
{
|
||||
const char *sect = rpmSpecGetSection(spec, section);
|
||||
if (sect) {
|
||||
- return Py_BuildValue("s", sect);
|
||||
+ return utf8FromString(sect);
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
@@ -208,8 +208,8 @@ static PyObject * spec_get_sources(specObject *s, void *closure)
|
||||
|
||||
rpmSpecSrcIter iter = rpmSpecSrcIterInit(s->spec);
|
||||
while ((source = rpmSpecSrcIterNext(iter)) != NULL) {
|
||||
- PyObject *srcUrl = Py_BuildValue("(sii)",
|
||||
- rpmSpecSrcFilename(source, 1),
|
||||
+ PyObject *srcUrl = Py_BuildValue("(Nii)",
|
||||
+ utf8FromString(rpmSpecSrcFilename(source, 1)),
|
||||
rpmSpecSrcNum(source),
|
||||
rpmSpecSrcFlags(source));
|
||||
if (!srcUrl) {
|
||||
diff --git a/tests/local.at b/tests/local.at
|
||||
index 02ead66c9..42eef1c75 100644
|
||||
--- a/tests/local.at
|
||||
+++ b/tests/local.at
|
||||
@@ -10,6 +10,7 @@ rm -rf "${abs_builddir}"/testing`rpm --eval '%_dbpath'`/*
|
||||
|
||||
m4_define([RPMPY_RUN],[[
|
||||
cat << EOF > test.py
|
||||
+# coding=utf-8
|
||||
import rpm, sys
|
||||
dbpath=rpm.expandMacro('%_dbpath')
|
||||
rpm.addMacro('_dbpath', '${abs_builddir}/testing%s' % dbpath)
|
||||
diff --git a/tests/rpmpython.at b/tests/rpmpython.at
|
||||
index ff77f868c..58f3e84a6 100644
|
||||
--- a/tests/rpmpython.at
|
||||
+++ b/tests/rpmpython.at
|
||||
@@ -106,6 +106,25 @@ None
|
||||
'rpm.hdr' object has no attribute '__foo__']
|
||||
)
|
||||
|
||||
+RPMPY_TEST([non-utf8 data in header],[
|
||||
+str = u'älämölö'
|
||||
+enc = 'iso-8859-1'
|
||||
+b = str.encode(enc)
|
||||
+h = rpm.hdr()
|
||||
+h['group'] = b
|
||||
+d = h['group']
|
||||
+try:
|
||||
+ # python 3
|
||||
+ t = bytes(d, 'utf-8', 'surrogateescape')
|
||||
+except TypeError:
|
||||
+ # python 2
|
||||
+ t = bytes(d)
|
||||
+res = t.decode(enc)
|
||||
+myprint(str == res)
|
||||
+],
|
||||
+[True]
|
||||
+)
|
||||
+
|
||||
RPMPY_TEST([invalid header data],[
|
||||
h1 = rpm.hdr()
|
||||
h1['basenames'] = ['bing', 'bang', 'bong']
|
||||
@@ -125,6 +144,21 @@ for h in [h1, h2]:
|
||||
/opt/bing,/opt/bang,/flopt/bong]
|
||||
)
|
||||
|
||||
+RPMPY_TEST([labelCompare],[
|
||||
+v = '1.0'
|
||||
+r = '1'
|
||||
+e = 3
|
||||
+h = rpm.hdr()
|
||||
+h['name'] = 'testpkg'
|
||||
+h['version'] = v
|
||||
+h['release'] = r
|
||||
+h['epoch'] = e
|
||||
+myprint(rpm.labelCompare((str(h['epoch']), h['version'], h['release']),
|
||||
+ (str(e), v, r)))
|
||||
+],
|
||||
+[0]
|
||||
+)
|
||||
+
|
||||
RPMPY_TEST([vfyflags API],[
|
||||
ts = rpm.ts()
|
||||
dlv = ts.getVfyFlags()
|
||||
--
|
||||
2.20.1
|
||||
|
@ -0,0 +1,90 @@
|
||||
From 13b0ebee7cdb1e4d200b3c40d0ec9440f198a1d4 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <13b0ebee7cdb1e4d200b3c40d0ec9440f198a1d4.1554886141.git.pmatilai@redhat.com>
|
||||
From: Panu Matilainen <pmatilai@redhat.com>
|
||||
Date: Wed, 10 Apr 2019 11:24:44 +0300
|
||||
Subject: [PATCH] Monkey-patch .decode() method to our strings as a temporary
|
||||
compat crutch
|
||||
|
||||
As a temporary crutch to support faster deployment of the sane
|
||||
string behavior on python3, monkey-patch a decode method into all
|
||||
strings we return. This seems to be enough to fix practically all
|
||||
API users who have already adapted to the long-standing broken API
|
||||
on Python 3. API users compatible with both Python 2 and 3 never needed
|
||||
this anyway. Issue a warning with pointer to the relevant bug when the
|
||||
fake decode() method is used to alert users to the issue.
|
||||
|
||||
This is certainly an evil thing to do and will be removed as soon as
|
||||
the critical users have been fixed to work with the new, corrected
|
||||
behavior.
|
||||
---
|
||||
python/rpm/__init__.py | 3 +++
|
||||
python/rpmmodule.c | 1 +
|
||||
python/rpmsystem-py.h | 22 ++++++++++++++++++++--
|
||||
3 files changed, 24 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/python/rpm/__init__.py b/python/rpm/__init__.py
|
||||
index 54728bbd4..6d69eda7b 100644
|
||||
--- a/python/rpm/__init__.py
|
||||
+++ b/python/rpm/__init__.py
|
||||
@@ -61,6 +61,9 @@ except ImportError:
|
||||
# backwards compatibility + give the same class both ways
|
||||
ts = TransactionSet
|
||||
|
||||
+def _fakedecode(self, encoding='utf-8', errors='strict'):
|
||||
+ warnings.warn("decode() called on unicode string, see https://bugzilla.redhat.com/show_bug.cgi?id=1693751", UnicodeWarning, stacklevel=2)
|
||||
+ return self
|
||||
|
||||
def headerLoad(*args, **kwds):
|
||||
"""DEPRECATED! Use rpm.hdr() instead."""
|
||||
diff --git a/python/rpmmodule.c b/python/rpmmodule.c
|
||||
index 05032edc7..2a76cfbd0 100644
|
||||
--- a/python/rpmmodule.c
|
||||
+++ b/python/rpmmodule.c
|
||||
@@ -28,6 +28,7 @@
|
||||
*/
|
||||
|
||||
PyObject * pyrpmError;
|
||||
+PyObject * fakedecode = NULL;
|
||||
|
||||
static PyObject * archScore(PyObject * self, PyObject * arg)
|
||||
{
|
||||
diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h
|
||||
index 25938464a..803da0fc1 100644
|
||||
--- a/python/rpmsystem-py.h
|
||||
+++ b/python/rpmsystem-py.h
|
||||
@@ -19,12 +19,30 @@
|
||||
#define PyInt_AsSsize_t PyLong_AsSsize_t
|
||||
#endif
|
||||
|
||||
+PyObject * fakedecode;
|
||||
+
|
||||
static inline PyObject * utf8FromString(const char *s)
|
||||
{
|
||||
/* In Python 3, we return all strings as surrogate-escaped utf-8 */
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
- if (s != NULL)
|
||||
- return PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape");
|
||||
+ if (s != NULL) {
|
||||
+ PyObject *o = PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape");
|
||||
+ /* fish the fake decode function from python side if not done yet */
|
||||
+ if (fakedecode == NULL) {
|
||||
+ PyObject *n = PyUnicode_FromString("rpm");
|
||||
+ PyObject *m = PyImport_Import(n);
|
||||
+ PyObject *md = PyModule_GetDict(m);
|
||||
+ fakedecode = PyDict_GetItemString(md, "_fakedecode");
|
||||
+ Py_DECREF(m);
|
||||
+ Py_DECREF(n);
|
||||
+ }
|
||||
+ if (fakedecode && o) {
|
||||
+ Py_INCREF(fakedecode);
|
||||
+ /* monkey-patch it into the string object as "decode" */
|
||||
+ PyDict_SetItemString(Py_TYPE(o)->tp_dict, "decode", fakedecode);
|
||||
+ }
|
||||
+ return o;
|
||||
+ }
|
||||
#else
|
||||
if (s != NULL)
|
||||
return PyBytes_FromString(s);
|
||||
--
|
||||
2.20.1
|
||||
|
41
0001-Return-NULL-string-as-None-from-utf8FromString.patch
Normal file
41
0001-Return-NULL-string-as-None-from-utf8FromString.patch
Normal file
@ -0,0 +1,41 @@
|
||||
From aea53a4aead8bd71f519df35fcffd9eec76fbc01 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <aea53a4aead8bd71f519df35fcffd9eec76fbc01.1554884465.git.pmatilai@redhat.com>
|
||||
From: Panu Matilainen <pmatilai@redhat.com>
|
||||
Date: Tue, 26 Feb 2019 11:27:51 +0200
|
||||
Subject: [PATCH] Return NULL string as None from utf8FromString()
|
||||
|
||||
Commit 84920f898315d09a57a3f1067433eaeb7de5e830 regressed dnf install
|
||||
to segfault at the end due to some NULL string passed to strlen().
|
||||
Check for NULL and return it as None, make it an inline function
|
||||
to make this saner.
|
||||
---
|
||||
python/rpmsystem-py.h | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h
|
||||
index 87c750571..25938464a 100644
|
||||
--- a/python/rpmsystem-py.h
|
||||
+++ b/python/rpmsystem-py.h
|
||||
@@ -19,11 +19,17 @@
|
||||
#define PyInt_AsSsize_t PyLong_AsSsize_t
|
||||
#endif
|
||||
|
||||
+static inline PyObject * utf8FromString(const char *s)
|
||||
+{
|
||||
/* In Python 3, we return all strings as surrogate-escaped utf-8 */
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
-#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape")
|
||||
+ if (s != NULL)
|
||||
+ return PyUnicode_DecodeUTF8(s, strlen(s), "surrogateescape");
|
||||
#else
|
||||
-#define utf8FromString(_s) PyBytes_FromString(_s)
|
||||
+ if (s != NULL)
|
||||
+ return PyBytes_FromString(s);
|
||||
#endif
|
||||
+ Py_RETURN_NONE;
|
||||
+}
|
||||
|
||||
#endif /* H_SYSTEM_PYTHON */
|
||||
--
|
||||
2.20.1
|
||||
|
16
rpm.spec
16
rpm.spec
@ -23,7 +23,7 @@
|
||||
|
||||
%global rpmver 4.14.2.1
|
||||
#global snapver rc2
|
||||
%global rel 5
|
||||
%global rel 6
|
||||
|
||||
%global srcver %{version}%{?snapver:-%{snapver}}
|
||||
%global srcdir %{?snapver:testing}%{!?snapver:%{name}-%(echo %{version} | cut -d'.' -f1-2).x}
|
||||
@ -36,7 +36,7 @@
|
||||
Summary: The RPM package management system
|
||||
Name: rpm
|
||||
Version: %{rpmver}
|
||||
Release: %{?snapver:0.%{snapver}.}%{rel}%{?dist}.1
|
||||
Release: %{?snapver:0.%{snapver}.}%{rel}%{?dist}
|
||||
Url: http://www.rpm.org/
|
||||
Source0: http://ftp.rpm.org/releases/%{srcdir}/%{name}-%{srcver}.tar.bz2
|
||||
%if %{with int_bdb}
|
||||
@ -64,6 +64,12 @@ Patch105: rpm-4.14.2-RPMTAG_MODULARITYLABEL.patch
|
||||
Patch106: 0001-find-debuginfo.sh-Handle-position-independent-execut.patch
|
||||
Patch107: 0001-Add-flag-to-use-strip-g-instead-of-full-strip-on-DSO.patch
|
||||
|
||||
# Python 3 string API sanity
|
||||
Patch150: 0001-In-Python-3-return-all-our-string-data-as-surrogate-.patch
|
||||
Patch151: 0001-Return-NULL-string-as-None-from-utf8FromString.patch
|
||||
# Temporary compat crutch, not upstream
|
||||
Patch152: 0001-Monkey-patch-.decode-method-to-our-strings-as-a-temp.patch
|
||||
|
||||
# These are not yet upstream
|
||||
Patch906: rpm-4.7.1-geode-i686.patch
|
||||
# Probably to be upstreamed in slightly different form
|
||||
@ -570,6 +576,12 @@ make check || (cat tests/rpmtests.log; exit 1)
|
||||
%doc doc/librpm/html/*
|
||||
|
||||
%changelog
|
||||
* Wed Apr 10 2019 Panu Matilainen <pmatilai@redhat.com> - 4.14.2.1-6
|
||||
- Unbreak Python 3 API by returning string data as surrogate-escaped utf-8
|
||||
string objects instead of bytes (#1693751)
|
||||
- As a temporary crutch, monkey-patch a .decode() method to returned strings
|
||||
to give users time to migrate from the long-standing broken behavior
|
||||
|
||||
* Wed Apr 10 2019 Panu Matilainen <pmatilai@redhat.com> - 4.14.2.1-5
|
||||
- Generate minidebug for PIE executables on file >= 5.33 too
|
||||
- Backport find-debuginfo --g-libs option for glibc's benefit (#1661512)
|
||||
|
Loading…
Reference in New Issue
Block a user