Give up on cherrypicking and just merge the changes over via meld

2010-08-23 01:10:27 -04:00 · 2010-08-23 01:10:27 -04:00 · e9848abbe0
commit e9848abbe0
parent 05a8f80b75
2 changed files with 30 additions and 21 deletions
--- a/python3-r80382-r80385-lone-surrogate-and-utf8-error-handler.patch
+++ b/python3-r80382-r80385-lone-surrogate-and-utf8-error-handler.patch
@ -1,8 +1,8 @@
-Index: Objects/unicodeobject.c
+Index: Python-3.1.2/Objects/unicodeobject.c
 ===================================================================
--- Objects/unicodeobject.c	(revision 80382)
-+++ Objects/unicodeobject.c	(revision 80383)
-@@ -159,6 +159,12 @@
+--- Python-3.1.2.orig/Objects/unicodeobject.c
+++ Python-3.1.2/Objects/unicodeobject.c
+@@ -159,6 +159,12 @@ static PyObject *unicode_encode_call_err
        const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
        Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
 
@ -15,7 +15,17 @@ Index: Objects/unicodeobject.c
 /* Same for linebreaks */
 static unsigned char ascii_linebreak[] = {
     0, 0, 0, 0, 0, 0, 0, 0,
-@@ -2461,61 +2467,88 @@
+@@ -2453,67 +2459,98 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s
+     for (i = 0; i < size;) {
+         Py_UCS4 ch = s[i++];
+ 
+-        if (ch < 0x80)
+        if (ch < 0x80) {
+             /* Encode ASCII */
+             *p++ = (char) ch;
+ 
+-        else if (ch < 0x0800) {
+        } else if (ch < 0x0800) {
             /* Encode Latin-1 */
             *p++ = (char)(0xc0 | (ch >> 6));
             *p++ = (char)(0x80 | (ch & 0x3f));
@ -50,6 +60,7 @@ Index: Objects/unicodeobject.c
 +                *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
 +                *p++ = (char)(0x80 | (ch & 0x3f));
 +
+            } else {
 #endif
 -                if (ch >= 0xd800 && ch <= 0xdfff) {
 -                    Py_ssize_t newpos;
@ -60,7 +71,12 @@ Index: Objects/unicodeobject.c
 -                        (errors, &errorHandler, "utf-8", "surrogates not allowed", 
 -                         s, size, &exc, i-1, i, &newpos);
 -                    if (!rep)
-+            } else {
+-                        goto error;
+-                    /* Implementation limitations: only support error handler that return
+-                       bytes, and only support up to four replacement bytes. */
+-                    if (!PyBytes_Check(rep)) {
+-                        PyErr_SetString(PyExc_TypeError, "error handler should have returned bytes");
+-                        Py_DECREF(rep);
 +                Py_ssize_t newpos;
 +                PyObject *rep;
 +                Py_ssize_t repsize, k;
@ -87,12 +103,6 @@ Index: Objects/unicodeobject.c
 +                        /* integer overflow */
 +                        PyErr_NoMemory();
                         goto error;
-                    /* Implementation limitations: only support error handler that return
-                       bytes, and only support up to four replacement bytes. */
-                    if (!PyBytes_Check(rep)) {
-                        PyErr_SetString(PyExc_TypeError, "error handler should have returned bytes");
-                        Py_DECREF(rep);
-                        goto error;
                     }
 -                    if (PyBytes_Size(rep) > 4) {
 -                        PyErr_SetString(PyExc_TypeError, "error handler returned too many bytes");
@ -139,10 +149,10 @@ Index: Objects/unicodeobject.c
 -                *p++ = (char)(0x80 | (ch & 0x3f));
 -                continue;
 +                Py_DECREF(rep);
+#ifndef Py_UNICODE_WIDE
             }
-#ifndef Py_UNICODE_WIDE
 -          encodeUCS4:
-#endif
+#endif
 +        } else if (ch < 0x10000) {
 +            *p++ = (char)(0xe0 | (ch >> 12));
 +            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
@ -151,11 +161,11 @@ Index: Objects/unicodeobject.c
             /* Encode UCS4 Unicode ordinals */
             *p++ = (char)(0xf0 | (ch >> 18));
             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
-Index: Lib/test/test_codecs.py
+Index: Python-3.1.2/Lib/test/test_codecs.py
 ===================================================================
--- Lib/test/test_codecs.py	(revision 80382)
-+++ Lib/test/test_codecs.py	(revision 80383)
-@@ -571,6 +571,16 @@
+--- Python-3.1.2.orig/Lib/test/test_codecs.py
+++ Python-3.1.2/Lib/test/test_codecs.py
+@@ -571,6 +571,16 @@ class UTF8Test(ReadTest):
     def test_lone_surrogates(self):
         self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
         self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
@ -172,4 +182,3 @@ Index: Lib/test/test_codecs.py
 
     def test_surrogatepass_handler(self):
         self.assertEquals("abc\ud800def".encode("utf-8", "surrogatepass"),
-
--- a/python3.spec
+++ b/python3.spec
@ -152,7 +152,7 @@ Patch110: python-3.1.2-fix-expat-issue9054.patch
 # Fix encoding to utf8 when lone surrogates are present and error handler is
 # set to ignore, replace, or others that return a unicode str.
 # http://bugs.python.org/issue8092
-Patch111: python3-r80382-lone-surrogate-and-utf8-error-handler.patch
+Patch111: python3-r80382-r80385-lone-surrogate-and-utf8-error-handler.patch

 BuildRoot: %{_tmppath}/%{name}-%{version}-root
 BuildRequires: readline-devel, openssl-devel, gmp-devel
@ -278,7 +278,7 @@ rm -r Modules/zlib || exit 1

 %patch110 -p0 -b .fix-expat-issue9054

-%patch111 -p0 -b .surrogate-utf8
+%patch111 -p1 -b .surrogate-utf8

 # Currently (2010-01-15), http://docs.python.org/library is for 2.6, and there
 # are many differences between 2.6 and the Python 3 library.