Update PEP 538 to the latest upstream implementation

This commit is contained in:
Charalampos Stratakis 2017-05-06 23:27:11 +02:00
parent bbbc440d2c
commit 31fe33b583
2 changed files with 222 additions and 111 deletions

View File

@ -1,34 +1,39 @@
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index c0e64d6..0bb28da 100644
index 195f63f..0d0a127 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -711,6 +711,35 @@ conflict.
@@ -713,6 +713,40 @@ conflict.
.. versionadded:: 3.6
+
+.. envvar:: PYTHONCOERCECLOCALE
+
+ If set to a non-empty string, causes the main Python command line application
+ If set to the value ``0``, causes the main Python command line application
+ to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
+ based alternative. Note that this setting is checked even when the
+ :option:`-E` or :option:`-I` options are used, as it is handled prior to
+ the processing of command line options.
+
+ If this variable is *not* set, and the current locale reported for the
+ ``LC_CTYPE`` category is the default ``C`` locale, then the Python CLI will
+ attempt to configure one of the following locales for the given locale
+ categories before loading the interpreter runtime:
+ If this variable is *not* set, or is set to a value other than ``0``, and
+ the current locale reported for the ``LC_CTYPE`` category is the default
+ ``C`` locale, then the Python CLI will attempt to configure one of the
+ following locales for the given locale categories before loading the
+ interpreter runtime:
+
+ * ``C.UTF-8` (``LC_ALL``)
+ * ``C.utf8` (``LC_ALL``)
+ * ``UTF-8` (``LC_CTYPE``)
+ * ``C.UTF-8`` (``LC_ALL``)
+ * ``C.utf8`` (``LC_ALL``)
+ * ``UTF-8`` (``LC_CTYPE``)
+
+ If setting one of these locale categories succeeds, then the matching
+ environment variables will be set (both ``LC_ALL` and ``LANG`` for the
+ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category),
+ and (if not already set to a non-empty string) :envvar:`PYTHONIOENCODING`
+ will be set to ``utf-8:surrogateescape``.
+ environment variables will be set (both ``LC_ALL`` and ``LANG`` for the
+ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category) in
+ the current process environment before the Python runtime is initialized.
+
+ Configuring one of these locales (either explicitly or via the above
+ implicit locale coercion) will automatically set the error handler for
+ :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
+ behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
+
+ Availability: \*nix
+
@ -39,7 +44,7 @@ index c0e64d6..0bb28da 100644
~~~~~~~~~~~~~~~~~~~~
diff --git a/Lib/test/support/script_helper.py b/Lib/test/support/script_helper.py
index 80889b1..1a1a862 100644
index ca5f9c2..7aa460b 100644
--- a/Lib/test/support/script_helper.py
+++ b/Lib/test/support/script_helper.py
@@ -51,8 +51,35 @@ def interpreter_requires_environment():
@ -80,7 +85,7 @@ index 80889b1..1a1a862 100644
# Executing the interpreter in a subprocess
@@ -99,30 +126,7 @@ def run_python_until_end(*args, **env_vars):
@@ -110,30 +137,7 @@ def run_python_until_end(*args, **env_vars):
def _assert_python(expected_success, *args, **env_vars):
res, cmd_line = run_python_until_end(*args, **env_vars)
if (res.rc and expected_success) or (not res.rc and not expected_success):
@ -113,10 +118,28 @@ index 80889b1..1a1a862 100644
def assert_python_ok(*args, **env_vars):
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
index 2a53f3d..ece84af 100644
index 2a53f3d..391ca15 100644
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -386,7 +386,7 @@ class EmbeddingTests(unittest.TestCase):
@@ -369,14 +369,15 @@ class EmbeddingTests(unittest.TestCase):
def tearDown(self):
os.chdir(self.oldcwd)
- def run_embedded_interpreter(self, *args):
+ def run_embedded_interpreter(self, *args, env=None):
"""Runs a test in the embedded interpreter"""
cmd = [self.test_exe]
cmd.extend(args)
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
- universal_newlines=True)
+ universal_newlines=True,
+ env=env)
(out, err) = p.communicate()
self.assertEqual(p.returncode, 0,
"bad returncode %d, stderr is %r" %
@@ -386,7 +387,7 @@ class EmbeddingTests(unittest.TestCase):
def test_subinterps(self):
# This is just a "don't crash" test
out, err = self.run_embedded_interpreter("repeated_init_and_subinterpreters")
@ -125,11 +148,14 @@ index 2a53f3d..ece84af 100644
print()
print(out)
print(err)
@@ -404,14 +404,15 @@ class EmbeddingTests(unittest.TestCase):
@@ -403,13 +404,14 @@ class EmbeddingTests(unittest.TestCase):
def test_forced_io_encoding(self):
# Checks forced configuration of embedded interpreter IO streams
out, err = self.run_embedded_interpreter("forced_io_encoding")
- out, err = self.run_embedded_interpreter("forced_io_encoding")
- if support.verbose:
+ env = {"PYTHONIOENCODING": "UTF-8:surrogateescape"}
+ out, err = self.run_embedded_interpreter("forced_io_encoding", env=env)
+ if support.verbose > 1:
print()
print(out)
@ -140,12 +166,9 @@ index 2a53f3d..ece84af 100644
+ expected_stdin_encoding = "UTF-8"
expected_pipe_encoding = self._get_default_pipe_encoding()
expected_output = '\n'.join([
+ "Setting PYTHONIOENCODING=UTF-8:surrogateescape",
"--- Use defaults ---",
"Expected encoding: default",
"Expected errors: default",
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index b71bb9f..56867fc 100644
index ae2bcd4..0a302ff 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -9,8 +9,9 @@ import sys
@ -181,7 +204,7 @@ index df9ebd4..63145e4 100644
'import sys',
'def dump(name):',
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index a68d4fa..1494452 100644
index a68d4fa..e28de1c 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -1,4 +1,5 @@
@ -191,17 +214,7 @@ index a68d4fa..1494452 100644
#include <stdio.h>
/*********************************************************
@@ -106,6 +107,9 @@ static void check_stdio_details(const char *encoding, const char * errors)
static int test_forced_io_encoding(void)
{
+ /* Ensure consistent "defaults" */
+ printf("Setting PYTHONIOENCODING=UTF-8:surrogateescape\n");
+ setenv("PYTHONIOENCODING", "UTF-8:surrogateescape", 1);
/* Check various combinations */
printf("--- Use defaults ---\n");
check_stdio_details(NULL, NULL);
@@ -126,6 +130,20 @@ static int test_forced_io_encoding(void)
@@ -126,6 +127,20 @@ static int test_forced_io_encoding(void)
return 0;
}
@ -222,7 +235,7 @@ index a68d4fa..1494452 100644
/* *********************************************************
* List of test cases and the function that implements it.
*
@@ -147,6 +165,7 @@ struct TestCase
@@ -147,6 +162,7 @@ struct TestCase
static struct TestCase TestCases[] = {
{ "forced_io_encoding", test_forced_io_encoding },
{ "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters },
@ -231,14 +244,14 @@ index a68d4fa..1494452 100644
};
diff --git a/Programs/python.c b/Programs/python.c
index a7afbc7..b5edebb 100644
index a7afbc7..03f8295 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -15,6 +15,110 @@ wmain(int argc, wchar_t **argv)
@@ -15,6 +15,21 @@ wmain(int argc, wchar_t **argv)
}
#else
+/* Helpers to better handle the legacy C locale
+/* Access private pylifecycle helper API to better handle the legacy C locale
+ *
+ * The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
@ -250,11 +263,100 @@ index a7afbc7..b5edebb 100644
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
+ *
+ */
+extern int _Py_LegacyLocaleDetected(void);
+extern void _Py_CoerceLegacyLocale(void);
+
+#ifdef PY_COERCE_C_LOCALE
+static const char *_C_LOCALE_COERCION_WARNING =
+ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale "
+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behaviour).\n";
int
main(int argc, char **argv)
{
@@ -25,7 +40,11 @@ main(int argc, char **argv)
char *oldloc;
/* Force malloc() allocator to bootstrap Python */
+#ifdef Py_DEBUG
+ (void)_PyMem_SetupAllocators("malloc_debug");
+# else
(void)_PyMem_SetupAllocators("malloc");
+# endif
argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
@@ -49,7 +68,21 @@ main(int argc, char **argv)
return 1;
}
+#ifdef __ANDROID__
+ /* Passing "" to setlocale() on Android requests the C locale rather
+ * than checking environment variables, so request C.UTF-8 explicitly
+ */
+ setlocale(LC_ALL, "C.UTF-8");
+#else
+ /* Reconfigure the locale to the default for this process */
setlocale(LC_ALL, "");
+#endif
+
+ if (_Py_LegacyLocaleDetected()) {
+ _Py_CoerceLegacyLocale();
+ }
+
+ /* Convert from char to wchar_t based on the locale settings */
for (i = 0; i < argc; i++) {
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
if (!argv_copy[i]) {
@@ -70,7 +103,11 @@ main(int argc, char **argv)
/* Force again malloc() allocator to release memory blocks allocated
before Py_Main() */
+#ifdef Py_DEBUG
+ (void)_PyMem_SetupAllocators("malloc_debug");
+# else
(void)_PyMem_SetupAllocators("malloc");
+# endif
for (i = 0; i < argc; i++) {
PyMem_RawFree(argv_copy2[i]);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index a4f7f82..261ed34 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
return 0;
}
+
/* Global initializations. Can be undone by Py_FinalizeEx(). Don't
call this twice without an intervening Py_FinalizeEx() call. When
initializations fail, a fatal error is issued and the function does
@@ -301,6 +302,173 @@ import_init(PyInterpreterState *interp, PyObject *sysmod)
}
+/* Helper functions to better handle the legacy C locale
+ *
+ * The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
+ * components like GNU readline.
+ *
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
+ * more capable UTF-8 based alternative as follows:
+ *
+ * if (_Py_LegacyLocaleDetected()) {
+ * _Py_CoerceLegacyLocale();
+ * }
+ *
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
+ *
+ * Locale coercion also impacts the default error handler for the standard
+ * streams: while the usual default is "strict", the default for the legacy
+ * C locale and for any of the coercion target locales is "surrogateescape".
+ */
+
+int
+_Py_LegacyLocaleDetected(void)
+{
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
+}
+
+typedef struct _CandidateLocale {
+ const char *locale_name;
@ -268,7 +370,35 @@ index a7afbc7..b5edebb 100644
+ { NULL, 0 }
+};
+
+void
+static char *
+get_default_standard_stream_error_handler(void)
+{
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL) {
+ /* "surrogateescape" is the default in the legacy C locale */
+ if (strcmp(ctype_loc, "C") == 0) {
+ return "surrogateescape";
+ }
+
+ /* "surrogateescape" is the default in locale coercion target locales */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ if (strcmp(ctype_loc, target->locale_name) == 0) {
+ return "surrogateescape";
+ }
+ }
+ }
+
+ /* Otherwise return NULL to request the typical default error handler */
+ return NULL;
+}
+
+#ifdef PY_COERCE_C_LOCALE
+static const char *_C_LOCALE_COERCION_WARNING =
+ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale "
+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
+
+static void
+_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+{
+ const char *newloc = target->locale_name;
@ -303,28 +433,36 @@ index a7afbc7..b5edebb 100644
+ return;
+ }
+
+ /* Set PYTHONIOENCODING if not already set */
+ if (setenv("PYTHONIOENCODING", "utf-8:surrogateescape", 0)) {
+ fprintf(stderr,
+ "Error setting PYTHONIOENCODING during C locale coercion\n");
+ }
+
+ /* Reconfigure with the overridden environment variables */
+ setlocale(LC_ALL, "");
+}
+
+void
+_handle_legacy_c_locale(void)
+static int
+c_locale_coercion_is_expected(void)
+{
+ /* This may be called prior to Py_Initialize, so we don't call any other
+ * Python APIs, and we ignore the -E and -I flags
+ */
+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
+ /* We ignore the Python -E and -I flags here, as we need to sort out
+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+void
+_Py_CoerceLegacyLocale(void)
+{
+#ifdef PY_COERCE_C_LOCALE
+ /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
+ * the locale settings *before* we try to do anything with the command
+ * line arguments. For cross-platform debugging purposes, we also need
+ * to give end users a way to force even scripts that are otherwise
+ * isolated from their environment to use the legacy ASCII-centric C
+ * locale.
+ */
+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
+ if (c_locale_coercion_is_expected()) {
+ /* PYTHONCOERCECLOCALE is not set, or is not set to exactly "0" */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
@ -336,50 +474,12 @@ index a7afbc7..b5edebb 100644
+ return;
+ }
+ }
+
+ }
+ /* No C locale warning here, as Py_Initialize will emit one later */
+#endif
+}
+#endif
+
int
main(int argc, char **argv)
{
@@ -49,7 +153,26 @@ main(int argc, char **argv)
return 1;
}
+#ifdef __ANDROID__
+ /* Passing "" to setlocale() on Android requests the C locale rather
+ * than checking environment variables, so request C.UTF-8 explicitly
+ */
+ setlocale(LC_ALL, "C.UTF-8");
+#else
+ /* Reconfigure the locale to the default for this process */
setlocale(LC_ALL, "");
+#endif
+
+#ifdef PY_COERCE_C_LOCALE
+ /* When the LC_CTYPE category still claims to be using the C locale,
+ assume configuration error and try for a UTF-8 based locale instead */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) {
+ _handle_legacy_c_locale();
+ }
+#endif
+
+ /* Convert from char to wchar_t based on the locale settings */
for (i = 0; i < argc; i++) {
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
if (!argv_copy[i]) {
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index a4f7f82..dd58dc9 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -301,6 +301,31 @@ import_init(PyInterpreterState *interp, PyObject *sysmod)
}
+#ifdef PY_WARN_ON_C_LOCALE
+static const char *_C_LOCALE_WARNING =
+ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
@ -390,15 +490,8 @@ index a4f7f82..dd58dc9 100644
+static void
+_emit_stderr_warning_for_c_locale(void)
+{
+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
+ /* We don't emit a warning if locale coercion has been explicitly disabled.
+ *
+ * For consistency with the corresponding check in Programs/python.c
+ * we ignore the Python -E and -I flags here.
+ */
+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) {
+ if (c_locale_coercion_is_expected()) {
+ if (_Py_LegacyLocaleDetected()) {
+ fprintf(stderr, "%s", _C_LOCALE_WARNING);
+ }
+ }
@ -408,7 +501,7 @@ index a4f7f82..dd58dc9 100644
void
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
{
@@ -315,11 +340,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
@@ -315,11 +483,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
initialized = 1;
_Py_Finalizing = NULL;
@ -429,8 +522,23 @@ index a4f7f82..dd58dc9 100644
#endif
if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
@@ -1242,12 +1418,8 @@ initstdio(void)
}
}
if (!errors && !(pythonioencoding && *pythonioencoding)) {
- /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
- stdin and stdout use the surrogateescape error handler by
- default, instead of the strict error handler. */
- char *loc = setlocale(LC_CTYPE, NULL);
- if (loc != NULL && strcmp(loc, "C") == 0)
- errors = "surrogateescape";
+ /* Choose the default error handler based on the current locale */
+ errors = get_default_standard_stream_error_handler();
}
}
diff --git a/configure b/configure
index 6bcddb7..13052d6 100755
index 2915246..39e5a27 100755
--- a/configure
+++ b/configure
@@ -834,6 +834,8 @@ with_thread
@ -455,7 +563,7 @@ index 6bcddb7..13052d6 100755
--with-valgrind Enable Valgrind support
--with(out)-dtrace disable/enable DTrace support
--with-fpectl enable SIGFPE catching
@@ -11016,6 +11024,52 @@ fi
@@ -11010,6 +11018,52 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5
$as_echo "$with_pymalloc" >&6; }
@ -509,10 +617,10 @@ index 6bcddb7..13052d6 100755
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-valgrind" >&5
$as_echo_n "checking for --with-valgrind... " >&6; }
diff --git a/configure.ac b/configure.ac
index e222c21..a1653e7 100644
index 67dfba3..b9c9f04 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3287,6 +3287,40 @@ then
@@ -3279,6 +3279,40 @@ then
fi
AC_MSG_RESULT($with_pymalloc)
@ -554,10 +662,10 @@ index e222c21..a1653e7 100644
AC_MSG_CHECKING([for --with-valgrind])
AC_ARG_WITH([valgrind],
diff --git a/pyconfig.h.in b/pyconfig.h.in
index e7a836c..11e0798 100644
index b10c57f..0a6f3e2 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1241,9 +1241,15 @@
@@ -1244,9 +1244,15 @@
/* Define as the preferred size in bits of long digits */
#undef PYLONG_BITS_IN_DIGIT

View File

@ -123,7 +123,7 @@
Summary: Version 3 of the Python programming language aka Python 3000
Name: python3
Version: %{pybasever}.1
Release: 4%{?dist}
Release: 5%{?dist}
License: Python
Group: Development/Languages
@ -1659,6 +1659,9 @@ fi
# ======================================================
%changelog
* Fri May 05 2017 Charalampos Stratakis <cstratak@redhat.com> - 3.6.1-5
- Update PEP 538 to the latest upstream implementation
* Tue Apr 18 2017 Charalampos Stratakis <cstratak@redhat.com> - 3.6.1-4
- Enable link time optimizations
- Move windows executables to the devel subpackage (rhbz#1426257)