python-pandas/pandas-1.3.5-pr-46681.patch

From 5c886169cd2674d7077271602a1a36ae0526d3ff Mon Sep 17 00:00:00 2001
From: "Benjamin A. Beasley" <code@musicinmybrain.net>
Date: Wed, 6 Apr 2022 07:13:22 -0400
Subject: [PATCH] Fix a few test failures on big-endian systems

These are all due to tests expecting little-endian dtypes, where in fact
the endianness of the dtype is that of the host.
---
 pandas/tests/arrays/boolean/test_astype.py    |   5 +-
 .../tests/arrays/boolean/test_construction.py |   5 +-
 pandas/tests/arrays/floating/test_to_numpy.py |   5 +-
 pandas/tests/arrays/integer/test_dtypes.py    |   5 +-
 pandas/tests/frame/methods/test_to_records.py | 137 ++++++++++++++----
 pandas/tests/io/parser/test_c_parser_only.py  |   8 +-
 .../tests/scalar/timedelta/test_arithmetic.py |   4 +-
 pandas/tests/tools/test_to_timedelta.py       |   4 +-
 8 files changed, 134 insertions(+), 39 deletions(-)

diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
index 57cec70262..258d2a99ef 100644
--- a/pandas/tests/arrays/boolean/test_astype.py
+++ b/pandas/tests/arrays/boolean/test_astype.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest

@@ -20,7 +22,8 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)

     result = arr.astype("str")
-    expected = np.array(["True", "False", "<NA>"], dtype="<U5")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array(["True", "False", "<NA>"], dtype=f"{endian}U5")
     tm.assert_numpy_array_equal(result, expected)

     # no missing values
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index c9e96c4379..8204da66b0 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest

@@ -270,7 +272,8 @@ def test_to_numpy(box):

     arr = con([True, False, None], dtype="boolean")
     result = arr.to_numpy(dtype="str")
-    expected = np.array([True, False, pd.NA], dtype="<U5")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array([True, False, pd.NA], dtype=f"{endian}U5")
     tm.assert_numpy_array_equal(result, expected)

     # no missing values -> can convert to bool, otherwise raises
diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py
index 26e5687b1b..e96e27d84c 100644
--- a/pandas/tests/arrays/floating/test_to_numpy.py
+++ b/pandas/tests/arrays/floating/test_to_numpy.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest

@@ -115,7 +117,8 @@ def test_to_numpy_string(box, dtype):
     arr = con([0.0, 1.0, None], dtype="Float64")

     result = arr.to_numpy(dtype="str")
-    expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array([0.0, 1.0, pd.NA], dtype=f"{endian}U32")
     tm.assert_numpy_array_equal(result, expected)


diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index e3f59205aa..88b4a1e935 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest

@@ -284,7 +286,8 @@ def test_to_numpy_na_raises(dtype):

 def test_astype_str():
     a = pd.array([1, 2, None], dtype="Int64")
-    expected = np.array(["1", "2", "<NA>"], dtype="<U21")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array(["1", "2", "<NA>"], dtype=f"{endian}U21")

     tm.assert_numpy_array_equal(a.astype(str), expected)
     tm.assert_numpy_array_equal(a.astype("str"), expected)
diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py
index 2c96cf291c..2c503571f6 100644
--- a/pandas/tests/frame/methods/test_to_records.py
+++ b/pandas/tests/frame/methods/test_to_records.py
@@ -1,4 +1,5 @@
 from collections import abc
+from sys import byteorder

 import numpy as np
 import pytest
@@ -14,6 +15,9 @@ from pandas import (
 import pandas._testing as tm


+endian = {"little": "<", "big": ">"}[byteorder]
+
+
 class TestDataFrameToRecords:
     def test_to_records_timeseries(self):
         index = date_range("1/1/2000", periods=10)
@@ -143,7 +147,12 @@ class TestDataFrameToRecords:
                 {},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Should have no effect in this case.
@@ -151,23 +160,38 @@ class TestDataFrameToRecords:
                 {"index": True},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Column dtype applied across the board. Index unaffected.
             (
-                {"column_dtypes": "<U4"},
+                {"column_dtypes": f"{endian}U4"},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U4"),
+                        ("B", f"{endian}U4"),
+                        ("C", f"{endian}U4"),
+                    ],
                 ),
             ),
             # Index dtype applied across the board. Columns unaffected.
             (
-                {"index_dtypes": "<U1"},
+                {"index_dtypes": f"{endian}U1"},
                 np.rec.array(
                     [("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
-                    dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}U1"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Pass in a type instance.
@@ -175,7 +199,12 @@ class TestDataFrameToRecords:
                 {"column_dtypes": str},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U"),
+                        ("B", f"{endian}U"),
+                        ("C", f"{endian}U"),
+                    ],
                 ),
             ),
             # Pass in a dtype instance.
@@ -183,15 +212,25 @@ class TestDataFrameToRecords:
                 {"column_dtypes": np.dtype("unicode")},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U"),
+                        ("B", f"{endian}U"),
+                        ("C", f"{endian}U"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (name-only).
             (
-                {"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
+                {"column_dtypes": {"A": np.int8, "B": np.float32, "C": f"{endian}U2"}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", f"{endian}U2"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (indices-only).
@@ -199,15 +238,20 @@ class TestDataFrameToRecords:
                 {"index_dtypes": {0: "int16"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Ignore index mappings if index is not True.
             (
-                {"index": False, "index_dtypes": "<U2"},
+                {"index": False, "index_dtypes": f"{endian}U2"},
                 np.rec.array(
                     [(1, 0.2, "a"), (2, 1.5, "bc")],
-                    dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[("A", f"{endian}i8"), ("B", f"{endian}f8"), ("C", "O")],
                 ),
             ),
             # Non-existent names / indices in mapping should not error.
@@ -215,7 +259,12 @@ class TestDataFrameToRecords:
                 {"index_dtypes": {0: "int16", "not-there": "float32"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in mapping default to array dtype.
@@ -223,7 +272,12 @@ class TestDataFrameToRecords:
                 {"column_dtypes": {"A": np.int8, "B": np.float32}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in dtype mapping default to array dtype.
@@ -231,18 +285,28 @@ class TestDataFrameToRecords:
                 {"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Mixture of everything.
             (
                 {
                     "column_dtypes": {"A": np.int8, "B": np.float32},
-                    "index_dtypes": "<U2",
+                    "index_dtypes": f"{endian}U2",
                 },
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}U2"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Invalid dype values.
@@ -291,7 +355,7 @@ class TestDataFrameToRecords:
                 {"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
                 np.rec.array(
                     [(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
-                    dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
+                    dtype=[("a", f"{endian}i4"), ("b", "i1"), ("c", f"{endian}f8")],
                 ),
             ),
             # MultiIndex in the columns.
@@ -302,14 +366,17 @@ class TestDataFrameToRecords:
                         [("a", "d"), ("b", "e"), ("c", "f")]
                     ),
                 ),
-                {"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
+                {
+                    "column_dtypes": {0: f"{endian}U1", 2: "float32"},
+                    "index_dtypes": "float32",
+                },
                 np.rec.array(
                     [(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
                     dtype=[
-                        ("index", "<f4"),
-                        ("('a', 'd')", "<U1"),
-                        ("('b', 'e')", "<i8"),
-                        ("('c', 'f')", "<f4"),
+                        ("index", f"{endian}f4"),
+                        ("('a', 'd')", f"{endian}U1"),
+                        ("('b', 'e')", f"{endian}i8"),
+                        ("('c', 'f')", f"{endian}f4"),
                     ],
                 ),
             ),
@@ -324,7 +391,10 @@ class TestDataFrameToRecords:
                         [("d", -4), ("d", -5), ("f", -6)], names=list("cd")
                     ),
                 ),
-                {"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
+                {
+                    "column_dtypes": "float64",
+                    "index_dtypes": {0: f"{endian}U2", 1: "int8"},
+                },
                 np.rec.array(
                     [
                         ("d", -4, 1.0, 2.0, 3.0),
@@ -332,11 +402,11 @@ class TestDataFrameToRecords:
                         ("f", -6, 7, 8, 9.0),
                     ],
                     dtype=[
-                        ("c", "<U2"),
+                        ("c", f"{endian}U2"),
                         ("d", "i1"),
-                        ("('a', 'd')", "<f8"),
-                        ("('b', 'e')", "<f8"),
-                        ("('c', 'f')", "<f8"),
+                        ("('a', 'd')", f"{endian}f8"),
+                        ("('b', 'e')", f"{endian}f8"),
+                        ("('c', 'f')", f"{endian}f8"),
                     ],
                 ),
             ),
@@ -366,13 +436,18 @@ class TestDataFrameToRecords:

         dtype_mappings = {
             "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
-            "index_dtypes": "<U2",
+            "index_dtypes": f"{endian}U2",
         }

         result = df.to_records(**dtype_mappings)
         expected = np.rec.array(
             [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-            dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+            dtype=[
+                ("index", f"{endian}U2"),
+                ("A", "i1"),
+                ("B", f"{endian}f4"),
+                ("C", "O"),
+            ],
         )
         tm.assert_almost_equal(result, expected)

diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 160e00f5fb..049fa412c2 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -12,6 +12,7 @@ from io import (
 )
 import mmap
 import os
+from sys import byteorder
 import tarfile

 import numpy as np
@@ -28,6 +29,9 @@ from pandas import (
 import pandas._testing as tm


+endian = {"little": "<", "big": ">"}[byteorder]
+
+
 @pytest.mark.parametrize(
     "malformed",
     ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"],
@@ -141,9 +145,9 @@ nan 2
             "the dtype timedelta64 is not supported for parsing",
             {"dtype": {"A": "timedelta64", "B": "float64"}},
         ),
-        ("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
+        (f"the dtype {endian}U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
     ],
-    ids=["dt64-0", "dt64-1", "td64", "<U8"],
+    ids=["dt64-0", "dt64-1", "td64", f"{endian}U8"],
 )
 def test_unsupported_dtype(c_parser_only, match, kwargs):
     parser = c_parser_only
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
index 7dfda0463e..3e4334261a 100644
--- a/pandas/tests/scalar/timedelta/test_arithmetic.py
+++ b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -6,6 +6,7 @@ from datetime import (
     timedelta,
 )
 import operator
+from sys import byteorder

 import numpy as np
 import pytest
@@ -293,9 +294,10 @@ class TestTimedeltaAdditionSubtraction:
         expected = pd.to_timedelta(["2 days"]).values
         tm.assert_numpy_array_equal(td * np.array([2]), expected)
         tm.assert_numpy_array_equal(np.array([2]) * td, expected)
+        endian = {"little": "<", "big": ">"}[byteorder]
         msg = (
             "ufunc '?multiply'? cannot use operands with types "
-            r"dtype\('<m8\[ns\]'\) and dtype\('<m8\[ns\]'\)"
+            fr"dtype\('{endian}m8\[ns\]'\) and dtype\('{endian}m8\[ns\]'\)"
         )
         with pytest.raises(TypeError, match=msg):
             td * other
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index 395fdea67f..1141a136d6 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -2,6 +2,7 @@ from datetime import (
     time,
     timedelta,
 )
+from sys import byteorder

 import numpy as np
 import pytest
@@ -202,8 +203,9 @@ class TestTimedeltas:
         timedelta_NaT = np.timedelta64("NaT")

         actual = to_timedelta(Series(["00:00:01", np.nan]))
+        endian = {"little": "<", "big": ">"}[byteorder]
         expected = Series(
-            [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
+            [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=f"{endian}m8[ns]"
         )
         tm.assert_series_equal(actual, expected)

--
2.35.1