diff --git a/00262-pep538_coerce_legacy_c_locale.patch b/00262-pep538_coerce_legacy_c_locale.patch index 7802567..856ef77 100644 --- a/00262-pep538_coerce_legacy_c_locale.patch +++ b/00262-pep538_coerce_legacy_c_locale.patch @@ -1,8 +1,8 @@ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst -index 08dc311..c6ec147 100644 +index 195f63f..8ecd70f 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst -@@ -713,6 +713,40 @@ conflict. +@@ -713,6 +713,45 @@ conflict. .. versionadded:: 3.6 @@ -35,6 +35,11 @@ index 08dc311..c6ec147 100644 + :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This + behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. + ++ For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause ++ Python to emit warning messages on ``stderr`` if either the locale coercion ++ activates, or else if a locale that *would* have triggered coercion is ++ still active when the Python runtime is initialized. ++ + Availability: \*nix + + .. versionadded:: 3.7 @@ -44,10 +49,10 @@ index 08dc311..c6ec147 100644 ~~~~~~~~~~~~~~~~~~~~ diff --git a/Lib/test/support/script_helper.py b/Lib/test/support/script_helper.py -index 1e74647..b3ac848 100644 +index ca5f9c2..7aa460b 100644 --- a/Lib/test/support/script_helper.py +++ b/Lib/test/support/script_helper.py -@@ -48,8 +48,35 @@ def interpreter_requires_environment(): +@@ -51,8 +51,35 @@ def interpreter_requires_environment(): return __cached_interp_requires_environment @@ -85,7 +90,7 @@ index 1e74647..b3ac848 100644 # Executing the interpreter in a subprocess -@@ -107,30 +134,7 @@ def run_python_until_end(*args, **env_vars): +@@ -110,30 +137,7 @@ def run_python_until_end(*args, **env_vars): def _assert_python(expected_success, *args, **env_vars): res, cmd_line = run_python_until_end(*args, **env_vars) if (res.rc and expected_success) or (not res.rc and not expected_success): @@ -119,10 +124,10 @@ index 1e74647..b3ac848 100644 def assert_python_ok(*args, **env_vars): diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py new file mode 100644 -index 0000000..ad6ecac +index 0000000..a4b4626 --- /dev/null +++ b/Lib/test/test_c_locale_coercion.py -@@ -0,0 +1,269 @@ +@@ -0,0 +1,353 @@ +# Tests the attempted automatic coercion of the C locale to a UTF-8 locale + +import unittest @@ -139,14 +144,31 @@ index 0000000..ad6ecac + interpreter_requires_environment, +) + ++# Set our expectation for the default encoding used in the C locale ++# for the filesystem encoding and the standard streams ++C_LOCALE_STREAM_ENCODING = "ascii" ++if sys.platform == "darwin": ++ C_LOCALE_FS_ENCODING = "utf-8" ++else: ++ C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING ++ ++# Note that the above is probably still wrong in some cases, such as: ++# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set ++# * AIX and any other platforms that use latin-1 in the C locale ++# ++# Options for dealing with this: ++# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't) ++# * Fix the test expectations to match the actual platform behaviour ++ +# In order to get the warning messages to match up as expected, the candidate +# order here must much the target locale order in Python/pylifecycle.c -+_C_UTF8_LOCALES = ( -+ # Entries: (Target locale, expected env var updates) -+ ("C.UTF-8", "LC_CTYPE & LANG"), -+ ("C.utf8", "LC_CTYPE & LANG"), -+ ("UTF-8", "LC_CTYPE"), -+) ++_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") ++ ++# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to ++# problems encountered on *BSD systems with those test cases ++# For additional details see: ++# nl_langinfo CODESET error: https://bugs.python.org/issue30647 ++# locale handling differences: https://bugs.python.org/issue30672 + +# There's no reliable cross-platform way of checking locale alias +# lists, so the only way of knowing which of these locales will work @@ -158,28 +180,39 @@ index 0000000..ad6ecac + result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) + return result.rc == 0 + -+_EncodingDetails = namedtuple("EncodingDetails", -+ "fsencoding stdin_info stdout_info stderr_info") ++_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" ++_EncodingDetails = namedtuple("EncodingDetails", _fields) + +class EncodingDetails(_EncodingDetails): ++ # XXX (ncoghlan): Using JSON for child state reporting may be less fragile + CHILD_PROCESS_SCRIPT = ";".join([ -+ "import sys", ++ "import sys, os", + "print(sys.getfilesystemencoding())", + "print(sys.stdin.encoding + ':' + sys.stdin.errors)", + "print(sys.stdout.encoding + ':' + sys.stdout.errors)", + "print(sys.stderr.encoding + ':' + sys.stderr.errors)", ++ "print(os.environ.get('LANG', 'not set'))", ++ "print(os.environ.get('LC_CTYPE', 'not set'))", ++ "print(os.environ.get('LC_ALL', 'not set'))", + ]) + + @classmethod -+ def get_expected_details(cls, expected_fsencoding): ++ def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): + """Returns expected child process details for a given encoding""" -+ _stream = expected_fsencoding + ":{}" ++ _stream = stream_encoding + ":{}" + # stdin and stdout should use surrogateescape either because the + # coercion triggered, or because the C locale was detected + stream_info = 2*[_stream.format("surrogateescape")] + # stderr should always use backslashreplace + stream_info.append(_stream.format("backslashreplace")) -+ return dict(cls(expected_fsencoding, *stream_info)._asdict()) ++ expected_lang = env_vars.get("LANG", "not set").lower() ++ if coercion_expected: ++ expected_lc_ctype = CLI_COERCION_TARGET.lower() ++ else: ++ expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() ++ expected_lc_all = env_vars.get("LC_ALL", "not set").lower() ++ env_info = expected_lang, expected_lc_ctype, expected_lc_all ++ return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) + + @staticmethod + def _handle_output_variations(data): @@ -215,99 +248,99 @@ index 0000000..ad6ecac + result.fail(py_cmd) + # All subprocess outputs in this test case should be pure ASCII + adjusted_output = cls._handle_output_variations(result.out) -+ stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() ++ stdout_lines = adjusted_output.decode("ascii").splitlines() + child_encoding_details = dict(cls(*stdout_lines)._asdict()) + stderr_lines = result.err.decode("ascii").rstrip().splitlines() + return child_encoding_details, stderr_lines + + -+class _ChildProcessEncodingTestCase(unittest.TestCase): -+ # Base class to check for expected encoding details in a child process -+ -+ def _check_child_encoding_details(self, -+ env_vars, -+ expected_fsencoding, -+ expected_warning): -+ """Check the C locale handling for the given process environment -+ -+ Parameters: -+ expected_fsencoding: the encoding the child is expected to report -+ allow_c_locale: setting to use for PYTHONALLOWCLOCALE -+ None: don't set the variable at all -+ str: the value set in the child's environment -+ """ -+ result = EncodingDetails.get_child_details(env_vars) -+ encoding_details, stderr_lines = result -+ self.assertEqual(encoding_details, -+ EncodingDetails.get_expected_details( -+ expected_fsencoding)) -+ self.assertEqual(stderr_lines, expected_warning) -+ +# Details of the shared library warning emitted at runtime -+LIBRARY_C_LOCALE_WARNING = ( ++LEGACY_LOCALE_WARNING = ( + "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " + "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " + "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " + "locales is recommended." +) + -+@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"), -+ "C locale runtime warning disabled at build time") -+class LocaleWarningTests(_ChildProcessEncodingTestCase): -+ # Test warning emitted when running in the C locale -+ -+ def test_library_c_locale_warning(self): -+ self.maxDiff = None -+ for locale_to_set in ("C", "POSIX", "invalid.ascii"): -+ var_dict = { -+ "LC_ALL": locale_to_set -+ } -+ with self.subTest(forced_locale=locale_to_set): -+ self._check_child_encoding_details(var_dict, -+ "ascii", -+ [LIBRARY_C_LOCALE_WARNING]) -+ +# Details of the CLI locale coercion warning emitted at runtime +CLI_COERCION_WARNING_FMT = ( -+ "Python detected LC_CTYPE=C: {} coerced to {} (set another locale " ++ "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " + "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)." +) + -+class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): -+ # Base class for test cases that rely on coercion targets being defined + -+ available_targets = [] -+ targets_required = True ++AVAILABLE_TARGETS = None ++CLI_COERCION_TARGET = None ++CLI_COERCION_WARNING = None + -+ @classmethod -+ def setUpClass(cls): -+ first_target_locale = first_env_updates = None -+ available_targets = cls.available_targets -+ # Find the target locales available in the current system -+ for target_locale, env_updates in _C_UTF8_LOCALES: -+ if _set_locale_in_subprocess(target_locale): -+ available_targets.append(target_locale) -+ if first_target_locale is None: -+ first_target_locale = target_locale -+ first_env_updates = env_updates -+ if cls.targets_required and not available_targets: -+ raise unittest.SkipTest("No C-with-UTF-8 locale available") -+ # Expect coercion to use the first available locale -+ cls.EXPECTED_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format( -+ first_env_updates, first_target_locale ++def setUpModule(): ++ global AVAILABLE_TARGETS ++ global CLI_COERCION_TARGET ++ global CLI_COERCION_WARNING ++ ++ if AVAILABLE_TARGETS is not None: ++ # initialization already done ++ return ++ AVAILABLE_TARGETS = [] ++ ++ # Find the target locales available in the current system ++ for target_locale in _C_UTF8_LOCALES: ++ if _set_locale_in_subprocess(target_locale): ++ AVAILABLE_TARGETS.append(target_locale) ++ ++ if AVAILABLE_TARGETS: ++ # Coercion is expected to use the first available target locale ++ CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] ++ CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) ++ ++ ++class _LocaleHandlingTestCase(unittest.TestCase): ++ # Base class to check expected locale handling behaviour ++ ++ def _check_child_encoding_details(self, ++ env_vars, ++ expected_fs_encoding, ++ expected_stream_encoding, ++ expected_warnings, ++ coercion_expected): ++ """Check the C locale handling for the given process environment ++ ++ Parameters: ++ expected_fs_encoding: expected sys.getfilesystemencoding() result ++ expected_stream_encoding: expected encoding for standard streams ++ expected_warning: stderr output to expect (if any) ++ """ ++ result = EncodingDetails.get_child_details(env_vars) ++ encoding_details, stderr_lines = result ++ expected_details = EncodingDetails.get_expected_details( ++ coercion_expected, ++ expected_fs_encoding, ++ expected_stream_encoding, ++ env_vars + ) ++ self.assertEqual(encoding_details, expected_details) ++ if expected_warnings is None: ++ expected_warnings = [] ++ self.assertEqual(stderr_lines, expected_warnings) + + -+class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): ++class LocaleConfigurationTests(_LocaleHandlingTestCase): + # Test explicit external configuration via the process environment + ++ def setUpClass(): ++ # This relies on setupModule() having been run, so it can't be ++ # handled via the @unittest.skipUnless decorator ++ if not AVAILABLE_TARGETS: ++ raise unittest.SkipTest("No C-with-UTF-8 locale available") ++ + def test_external_target_locale_configuration(self): ++ + # Explicitly setting a target locale should give the same behaviour as + # is seen when implicitly coercing to that target locale + self.maxDiff = None + -+ expected_warning = [] -+ expected_fsencoding = "utf-8" ++ expected_fs_encoding = "utf-8" ++ expected_stream_encoding = "utf-8" + + base_var_dict = { + "LANG": "", @@ -315,47 +348,71 @@ index 0000000..ad6ecac + "LC_ALL": "", + } + for env_var in ("LANG", "LC_CTYPE"): -+ for locale_to_set in self.available_targets: ++ for locale_to_set in AVAILABLE_TARGETS: ++ # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as ++ # expected, so skip that combination for now ++ # See https://bugs.python.org/issue30672 for discussion ++ if env_var == "LANG" and locale_to_set == "UTF-8": ++ continue ++ + with self.subTest(env_var=env_var, + configured_locale=locale_to_set): + var_dict = base_var_dict.copy() + var_dict[env_var] = locale_to_set + self._check_child_encoding_details(var_dict, -+ expected_fsencoding, -+ expected_warning) ++ expected_fs_encoding, ++ expected_stream_encoding, ++ expected_warnings=None, ++ coercion_expected=False) + + + +@test.support.cpython_only +@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), + "C locale coercion disabled at build time") -+class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): ++class LocaleCoercionTests(_LocaleHandlingTestCase): + # Test implicit reconfiguration of the environment during CLI startup + -+ def _check_c_locale_coercion(self, expected_fsencoding, coerce_c_locale): ++ def _check_c_locale_coercion(self, ++ fs_encoding, stream_encoding, ++ coerce_c_locale, ++ expected_warnings=None, ++ coercion_expected=True, ++ **extra_vars): + """Check the C locale handling for various configurations + + Parameters: -+ expected_fsencoding: the encoding the child is expected to report -+ allow_c_locale: setting to use for PYTHONALLOWCLOCALE ++ fs_encoding: expected sys.getfilesystemencoding() result ++ stream_encoding: expected encoding for standard streams ++ coerce_c_locale: setting to use for PYTHONCOERCECLOCALE + None: don't set the variable at all + str: the value set in the child's environment ++ expected_warnings: expected warning lines on stderr ++ extra_vars: additional environment variables to set in subprocess + """ -+ -+ # Check for expected warning on stderr if C locale is coerced + self.maxDiff = None + -+ expected_warning = [] -+ if coerce_c_locale != "0": -+ expected_warning.append(self.EXPECTED_COERCION_WARNING) ++ if not AVAILABLE_TARGETS: ++ # Locale coercion is disabled when there aren't any target locales ++ fs_encoding = C_LOCALE_FS_ENCODING ++ stream_encoding = C_LOCALE_STREAM_ENCODING ++ coercion_expected = False ++ if expected_warnings: ++ expected_warnings = [LEGACY_LOCALE_WARNING] + + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + } ++ base_var_dict.update(extra_vars) + for env_var in ("LANG", "LC_CTYPE"): + for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): ++ # XXX (ncoghlan): *BSD platforms don't behave as expected in the ++ # POSIX locale, so we skip that for now ++ # See https://bugs.python.org/issue30672 for discussion ++ if locale_to_set == "POSIX": ++ continue + with self.subTest(env_var=env_var, + nominal_locale=locale_to_set, + PYTHONCOERCECLOCALE=coerce_c_locale): @@ -363,40 +420,72 @@ index 0000000..ad6ecac + var_dict[env_var] = locale_to_set + if coerce_c_locale is not None: + var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale ++ # Check behaviour on successful coercion + self._check_child_encoding_details(var_dict, -+ expected_fsencoding, -+ expected_warning) ++ fs_encoding, ++ stream_encoding, ++ expected_warnings, ++ coercion_expected) + + def test_test_PYTHONCOERCECLOCALE_not_set(self): + # This should coerce to the first available target locale by default -+ self._check_c_locale_coercion("utf-8", coerce_c_locale=None) ++ self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) + + def test_PYTHONCOERCECLOCALE_not_zero(self): -+ # *Any* string other that "0" is considered "set" for our purposes ++ # *Any* string other than "0" is considered "set" for our purposes + # and hence should result in the locale coercion being enabled + for setting in ("", "1", "true", "false"): -+ self._check_c_locale_coercion("utf-8", coerce_c_locale=setting) ++ self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) ++ ++ def test_PYTHONCOERCECLOCALE_set_to_warn(self): ++ # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales ++ self._check_c_locale_coercion("utf-8", "utf-8", ++ coerce_c_locale="warn", ++ expected_warnings=[CLI_COERCION_WARNING]) ++ + + def test_PYTHONCOERCECLOCALE_set_to_zero(self): + # The setting "0" should result in the locale coercion being disabled -+ self._check_c_locale_coercion("ascii", coerce_c_locale="0") ++ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, ++ C_LOCALE_STREAM_ENCODING, ++ coerce_c_locale="0", ++ coercion_expected=False) ++ # Setting LC_ALL=C shouldn't make any difference to the behaviour ++ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, ++ C_LOCALE_STREAM_ENCODING, ++ coerce_c_locale="0", ++ LC_ALL="C", ++ coercion_expected=False) + ++ def test_LC_ALL_set_to_C(self): ++ # Setting LC_ALL should render the locale coercion ineffective ++ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, ++ C_LOCALE_STREAM_ENCODING, ++ coerce_c_locale=None, ++ LC_ALL="C", ++ coercion_expected=False) ++ # And result in a warning about a lack of locale compatibility ++ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, ++ C_LOCALE_STREAM_ENCODING, ++ coerce_c_locale="warn", ++ LC_ALL="C", ++ expected_warnings=[LEGACY_LOCALE_WARNING], ++ coercion_expected=False) + +def test_main(): + test.support.run_unittest( + LocaleConfigurationTests, -+ LocaleCoercionTests, -+ LocaleWarningTests ++ LocaleCoercionTests + ) + test.support.reap_children() + +if __name__ == "__main__": + test_main() diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py -index eb3e2c5..f677d88 100644 +index 6c3625d..009f542 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py -@@ -369,14 +369,15 @@ def setUp(self): +@@ -369,14 +369,21 @@ class EmbeddingTests(unittest.TestCase): def tearDown(self): os.chdir(self.oldcwd) @@ -405,6 +494,12 @@ index eb3e2c5..f677d88 100644 """Runs a test in the embedded interpreter""" cmd = [self.test_exe] cmd.extend(args) ++ if env is not None and sys.platform == 'win32': ++ # Windows requires at least the SYSTEMROOT environment variable to ++ # start Python. ++ env = env.copy() ++ env['SYSTEMROOT'] = os.environ['SYSTEMROOT'] ++ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -414,22 +509,31 @@ index eb3e2c5..f677d88 100644 (out, err) = p.communicate() self.assertEqual(p.returncode, 0, "bad returncode %d, stderr is %r" % -@@ -386,7 +387,7 @@ def run_embedded_interpreter(self, *args): +@@ -386,31 +393,21 @@ class EmbeddingTests(unittest.TestCase): def test_subinterps(self): # This is just a "don't crash" test - out, err = self.run_embedded_interpreter("repeated_init_and_subinterpreters") + out, err = self.run_embedded_interpreter() - if support.verbose: + if support.verbose > 1: print() print(out) print(err) -@@ -403,13 +404,14 @@ def _get_default_pipe_encoding(): +- @staticmethod +- def _get_default_pipe_encoding(): +- rp, wp = os.pipe() +- try: +- with os.fdopen(wp, 'w') as w: +- default_pipe_encoding = w.encoding +- finally: +- os.close(rp) +- return default_pipe_encoding +- def test_forced_io_encoding(self): # Checks forced configuration of embedded interpreter IO streams - out, err = self.run_embedded_interpreter("forced_io_encoding") - if support.verbose: -+ env = {"PYTHONIOENCODING": "UTF-8:surrogateescape"} ++ env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape") + out, err = self.run_embedded_interpreter("forced_io_encoding", env=env) + if support.verbose > 1: print() @@ -437,16 +541,28 @@ index eb3e2c5..f677d88 100644 print(err) - expected_errors = sys.__stdout__.errors - expected_stdin_encoding = sys.__stdin__.encoding +- expected_pipe_encoding = self._get_default_pipe_encoding() ++ expected_stream_encoding = "utf-8" + expected_errors = "surrogateescape" -+ expected_stdin_encoding = "UTF-8" - expected_pipe_encoding = self._get_default_pipe_encoding() expected_output = '\n'.join([ "--- Use defaults ---", + "Expected encoding: default", +@@ -437,8 +434,8 @@ class EmbeddingTests(unittest.TestCase): + "stdout: latin-1:replace", + "stderr: latin-1:backslashreplace"]) + expected_output = expected_output.format( +- in_encoding=expected_stdin_encoding, +- out_encoding=expected_pipe_encoding, ++ in_encoding=expected_stream_encoding, ++ out_encoding=expected_stream_encoding, + errors=expected_errors) + # This is useful if we ever trip over odd platform behaviour + self.maxDiff = None diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py -index 958d282..c4c6850 100644 +index ae2bcd4..0a302ff 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py -@@ -8,8 +8,9 @@ +@@ -9,8 +9,9 @@ import sys import subprocess import tempfile from test.support import script_helper, is_android @@ -458,7 +574,7 @@ index 958d282..c4c6850 100644 # XXX (ncoghlan): Move to script_helper and make consistent with run_python -@@ -150,6 +151,7 @@ def test_undecodable_code(self): +@@ -151,6 +152,7 @@ class CmdLineTest(unittest.TestCase): env = os.environ.copy() # Use C locale to get ascii for the locale encoding env['LC_ALL'] = 'C' @@ -467,10 +583,10 @@ index 958d282..c4c6850 100644 b'import locale; ' b'print(ascii("' + undecodable + b'"), ' diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py -index ed78e2a..3844812 100644 +index df9ebd4..63145e4 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py -@@ -682,6 +682,7 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None): +@@ -680,6 +680,7 @@ class SysModuleTest(unittest.TestCase): # Force the POSIX locale env = os.environ.copy() env["LC_ALL"] = "C" @@ -478,8 +594,25 @@ index ed78e2a..3844812 100644 code = '\n'.join(( 'import sys', 'def dump(name):', +diff --git a/Modules/main.c b/Modules/main.c +index dd50211..f20cf24 100644 +--- a/Modules/main.c ++++ b/Modules/main.c +@@ -105,7 +105,11 @@ static const char usage_6[] = + " predictable seed.\n" + "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" + " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" +-" hooks.\n"; ++" hooks.\n" ++ ++"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n" ++" coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n" ++" locale coercion and locale compatibility warnings on stderr.\n"; + + static int + usage(int exitcode, const wchar_t* program) diff --git a/Programs/_testembed.c b/Programs/_testembed.c -index a68d4fa..280bf50 100644 +index 3968399..1bd2bbf 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1,4 +1,5 @@ @@ -562,7 +695,7 @@ index a7afbc7..03f8295 100644 for (i = 0; i < argc; i++) { PyMem_RawFree(argv_copy2[i]); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c -index c0f41b3..278a5af 100644 +index a4f7f82..743d9b6 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors) @@ -573,7 +706,7 @@ index c0f41b3..278a5af 100644 /* Global initializations. Can be undone by Py_FinalizeEx(). Don't call this twice without an intervening Py_FinalizeEx() call. When initializations fail, a fatal error is issued and the function does -@@ -302,6 +303,167 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) +@@ -301,6 +302,181 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) } @@ -600,22 +733,64 @@ index c0f41b3..278a5af 100644 +int +_Py_LegacyLocaleDetected(void) +{ ++#ifndef MS_WINDOWS ++ /* On non-Windows systems, the C locale is considered a legacy locale */ ++ /* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat ++ * the POSIX locale as a simple alias for the C locale, so ++ * we may also want to check for that explicitly. ++ */ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; ++#else ++ /* Windows uses code pages instead of locales, so no locale is legacy */ ++ return 0; ++#endif ++} ++ ++ ++static const char *_C_LOCALE_WARNING = ++ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " ++ "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " ++ "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " ++ "locales is recommended.\n"; ++ ++static int ++_legacy_locale_warnings_enabled(void) ++{ ++ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); ++ return (coerce_c_locale != NULL && ++ strncmp(coerce_c_locale, "warn", 5) == 0); ++} ++ ++static void ++_emit_stderr_warning_for_legacy_locale(void) ++{ ++ if (_legacy_locale_warnings_enabled()) { ++ if (_Py_LegacyLocaleDetected()) { ++ fprintf(stderr, "%s", _C_LOCALE_WARNING); ++ } ++ } +} + +typedef struct _CandidateLocale { + const char *locale_name; /* The locale to try as a coercion target */ -+ int set_LANG; /* Whether to set LANG in addition to LC_CTYPE */ +} _LocaleCoercionTarget; + +static _LocaleCoercionTarget _TARGET_LOCALES[] = { -+ { "C.UTF-8", 1 }, -+ { "C.utf8", 1}, -+ { "UTF-8", 0 }, -+ { NULL, 0 } ++ {"C.UTF-8"}, ++ {"C.utf8"}, ++ /* {"UTF-8"}, */ ++ {NULL} +}; + ++/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to ++ * problems encountered on *BSD systems with those test cases ++ * For additional details see: ++ * nl_langinfo CODESET error: https://bugs.python.org/issue30647 ++ * locale handling differences: https://bugs.python.org/issue30672 ++ */ ++ ++ +static char * +get_default_standard_stream_error_handler(void) +{ @@ -626,6 +801,7 @@ index c0f41b3..278a5af 100644 + return "surrogateescape"; + } + ++#ifdef PY_COERCE_C_LOCALE + /* "surrogateescape" is the default in locale coercion target locales */ + const _LocaleCoercionTarget *target = NULL; + for (target = _TARGET_LOCALES; target->locale_name; target++) { @@ -633,6 +809,7 @@ index c0f41b3..278a5af 100644 + return "surrogateescape"; + } + } ++#endif + } + + /* Otherwise return NULL to request the typical default error handler */ @@ -641,52 +818,34 @@ index c0f41b3..278a5af 100644 + +#ifdef PY_COERCE_C_LOCALE +static const char *_C_LOCALE_COERCION_WARNING = -+ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale " ++ "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale " + "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; + +static void +_coerce_default_locale_settings(const _LocaleCoercionTarget *target) +{ -+ const char *env_vars_updated = "LC_CTYPE"; ++ + const char *newloc = target->locale_name; + + /* Reset locale back to currently configured defaults */ + setlocale(LC_ALL, ""); + -+ /* Set the relevant locale environment variables */ ++ /* Set the relevant locale environment variable */ + if (setenv("LC_CTYPE", newloc, 1)) { + fprintf(stderr, + "Error setting LC_CTYPE, skipping C locale coercion\n"); + return; + } -+ if (target->set_LANG) { -+ if (setenv("LANG", newloc, 1) == 0) { -+ env_vars_updated = "LC_CTYPE & LANG"; -+ } else { -+ fprintf(stderr, -+ "Error setting LANG during C locale coercion\n"); -+ } ++ if (_legacy_locale_warnings_enabled()) { ++ fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); + } -+ fprintf(stderr, _C_LOCALE_COERCION_WARNING, env_vars_updated, newloc); + + /* Reconfigure with the overridden environment variables */ + setlocale(LC_ALL, ""); +} -+ -+static int -+c_locale_coercion_is_expected(void) -+{ -+ /* This may be called prior to Py_Initialize, so we don't call any other -+ * Python APIs, and we ignore the -E and -I flags -+ */ -+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); -+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { -+ return 1; -+ } -+ return 0; -+} +#endif + ++ +void +_Py_CoerceLegacyLocale(void) +{ @@ -697,9 +856,15 @@ index c0f41b3..278a5af 100644 + * to give end users a way to force even scripts that are otherwise + * isolated from their environment to use the legacy ASCII-centric C + * locale. -+ */ -+ if (c_locale_coercion_is_expected()) { -+ /* PYTHONCOERCECLOCALE is not set, or is not set to exactly "0" */ ++ * ++ * Ignoring -E and -I is safe from a security perspective, as we only use ++ * the setting to turn *off* the implicit locale coercion, and anyone with ++ * access to the process environment already has the ability to set ++ * `LC_ALL=C` to override the C level locale settings anyway. ++ */ ++ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); ++ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { ++ /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */ + const char *locale_override = getenv("LC_ALL"); + if (locale_override == NULL || *locale_override == '\0') { + /* LC_ALL is also not set (or is set to an empty string) */ @@ -719,29 +884,11 @@ index c0f41b3..278a5af 100644 +#endif +} + -+ -+#ifdef PY_WARN_ON_C_LOCALE -+static const char *_C_LOCALE_WARNING = -+ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " -+ "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " -+ "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " -+ "locales is recommended.\n"; -+ -+static void -+_emit_stderr_warning_for_c_locale(void) -+{ -+ if (c_locale_coercion_is_expected()) { -+ if (_Py_LegacyLocaleDetected()) { -+ fprintf(stderr, "%s", _C_LOCALE_WARNING); -+ } -+ } -+} -+#endif + void _Py_InitializeEx_Private(int install_sigs, int install_importlib) { -@@ -316,11 +478,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) +@@ -315,11 +491,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) initialized = 1; _Py_Finalizing = NULL; @@ -752,17 +899,17 @@ index c0f41b3..278a5af 100644 + */ + setlocale(LC_CTYPE, "C.UTF-8"); +#else ++#ifndef MS_WINDOWS /* Set up the LC_CTYPE locale, so we can obtain the locale's charset without having to switch locales. */ setlocale(LC_CTYPE, ""); -+#ifdef PY_WARN_ON_C_LOCALE -+ _emit_stderr_warning_for_c_locale(); ++ _emit_stderr_warning_for_legacy_locale(); +#endif #endif if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') -@@ -1252,12 +1422,8 @@ initstdio(void) +@@ -1242,12 +1426,8 @@ initstdio(void) } } if (!errors && !(pythonioencoding && *pythonioencoding)) { @@ -778,7 +925,7 @@ index c0f41b3..278a5af 100644 } diff --git a/configure b/configure -index c9340c6..8b1bd5b 100755 +index 2915246..39e5a27 100755 --- a/configure +++ b/configure @@ -834,6 +834,8 @@ with_thread @@ -790,7 +937,7 @@ index c9340c6..8b1bd5b 100755 with_valgrind with_dtrace with_fpectl -@@ -1538,6 +1540,12 @@ Optional Packages: +@@ -1527,6 +1529,12 @@ Optional Packages: deprecated; use --with(out)-threads --with(out)-doc-strings disable/enable documentation strings --with(out)-pymalloc disable/enable specialized mallocs @@ -803,7 +950,7 @@ index c9340c6..8b1bd5b 100755 --with-valgrind Enable Valgrind support --with(out)-dtrace disable/enable DTrace support --with-fpectl enable SIGFPE catching -@@ -11030,6 +11038,52 @@ fi +@@ -11010,6 +11018,52 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5 $as_echo "$with_pymalloc" >&6; } @@ -857,10 +1004,10 @@ index c9340c6..8b1bd5b 100755 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-valgrind" >&5 $as_echo_n "checking for --with-valgrind... " >&6; } diff --git a/configure.ac b/configure.ac -index e065ce5..c455ebd 100644 +index 67dfba3..b9c9f04 100644 --- a/configure.ac +++ b/configure.ac -@@ -3304,6 +3304,40 @@ then +@@ -3279,6 +3279,40 @@ then fi AC_MSG_RESULT($with_pymalloc) @@ -902,10 +1049,10 @@ index e065ce5..c455ebd 100644 AC_MSG_CHECKING([for --with-valgrind]) AC_ARG_WITH([valgrind], diff --git a/pyconfig.h.in b/pyconfig.h.in -index 0a3d59e..fa2792b 100644 +index b10c57f..0a6f3e2 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in -@@ -1247,9 +1247,15 @@ +@@ -1244,9 +1244,15 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT diff --git a/python3.spec b/python3.spec index 9ac8875..d6687c7 100644 --- a/python3.spec +++ b/python3.spec @@ -133,7 +133,7 @@ Summary: Version 3 of the Python programming language aka Python 3000 Name: python3 Version: %{pybasever}.1 -Release: 9%{?dist} +Release: 10%{?dist} License: Python Group: Development/Languages @@ -434,11 +434,6 @@ Patch252: 00252-add-executable-option.patch # Reported upstream: http://bugs.python.org/issue29324 Patch258: 00258-fix-test_aead_aes_gcm.patch -# 00261 # -# Use proper command line parsing in _testembed -# Fixed upstream: http://bugs.python.org/issue24932 -Patch261: 00261-use-proper-command-line-parsing-in-_testembed.patch - # 00262 # # Backport of PEP 538: Coercing the legacy C locale to a UTF-8 based locale # https://www.python.org/dev/peps/pep-0538/ @@ -733,7 +728,6 @@ sed -r -i s/'_PIP_VERSION = "[0-9.]+"'/'_PIP_VERSION = "%{pip_version}"'/ Lib/en %patch251 -p1 %patch252 -p1 %patch258 -p1 -%patch261 -p1 %patch262 -p1 %ifarch aarch64 @@ -1702,6 +1696,9 @@ fi # ====================================================== %changelog +* Tue Jun 27 2017 Charalampos Stratakis - 3.6.1-10 +- Update to the latest upstream implementation of PEP 538 + * Mon Jun 26 2017 Michal Cyprian - 3.6.1-9 - Make pip and distutils in user environment install into separate location