python-pandas/pandas-1.3.5-pr-46681.patch

469 lines
18 KiB
Diff

From 5c886169cd2674d7077271602a1a36ae0526d3ff Mon Sep 17 00:00:00 2001
From: "Benjamin A. Beasley" <code@musicinmybrain.net>
Date: Wed, 6 Apr 2022 07:13:22 -0400
Subject: [PATCH] Fix a few test failures on big-endian systems
These are all due to tests expecting little-endian dtypes, where in fact
the endianness of the dtype is that of the host.
---
pandas/tests/arrays/boolean/test_astype.py | 5 +-
.../tests/arrays/boolean/test_construction.py | 5 +-
pandas/tests/arrays/floating/test_to_numpy.py | 5 +-
pandas/tests/arrays/integer/test_dtypes.py | 5 +-
pandas/tests/frame/methods/test_to_records.py | 137 ++++++++++++++----
pandas/tests/io/parser/test_c_parser_only.py | 8 +-
.../tests/scalar/timedelta/test_arithmetic.py | 4 +-
pandas/tests/tools/test_to_timedelta.py | 4 +-
8 files changed, 134 insertions(+), 39 deletions(-)
diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
index 57cec70262..258d2a99ef 100644
--- a/pandas/tests/arrays/boolean/test_astype.py
+++ b/pandas/tests/arrays/boolean/test_astype.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
import numpy as np
import pytest
@@ -20,7 +22,8 @@ def test_astype():
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("str")
- expected = np.array(["True", "False", "<NA>"], dtype="<U5")
+ endian = {"little": "<", "big": ">"}[byteorder]
+ expected = np.array(["True", "False", "<NA>"], dtype=f"{endian}U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
index c9e96c4379..8204da66b0 100644
--- a/pandas/tests/arrays/boolean/test_construction.py
+++ b/pandas/tests/arrays/boolean/test_construction.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
import numpy as np
import pytest
@@ -270,7 +272,8 @@ def test_to_numpy(box):
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
- expected = np.array([True, False, pd.NA], dtype="<U5")
+ endian = {"little": "<", "big": ">"}[byteorder]
+ expected = np.array([True, False, pd.NA], dtype=f"{endian}U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values -> can convert to bool, otherwise raises
diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py
index 26e5687b1b..e96e27d84c 100644
--- a/pandas/tests/arrays/floating/test_to_numpy.py
+++ b/pandas/tests/arrays/floating/test_to_numpy.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
import numpy as np
import pytest
@@ -115,7 +117,8 @@ def test_to_numpy_string(box, dtype):
arr = con([0.0, 1.0, None], dtype="Float64")
result = arr.to_numpy(dtype="str")
- expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
+ endian = {"little": "<", "big": ">"}[byteorder]
+ expected = np.array([0.0, 1.0, pd.NA], dtype=f"{endian}U32")
tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index e3f59205aa..88b4a1e935 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
import numpy as np
import pytest
@@ -284,7 +286,8 @@ def test_to_numpy_na_raises(dtype):
def test_astype_str():
a = pd.array([1, 2, None], dtype="Int64")
- expected = np.array(["1", "2", "<NA>"], dtype="<U21")
+ endian = {"little": "<", "big": ">"}[byteorder]
+ expected = np.array(["1", "2", "<NA>"], dtype=f"{endian}U21")
tm.assert_numpy_array_equal(a.astype(str), expected)
tm.assert_numpy_array_equal(a.astype("str"), expected)
diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py
index 2c96cf291c..2c503571f6 100644
--- a/pandas/tests/frame/methods/test_to_records.py
+++ b/pandas/tests/frame/methods/test_to_records.py
@@ -1,4 +1,5 @@
from collections import abc
+from sys import byteorder
import numpy as np
import pytest
@@ -14,6 +15,9 @@ from pandas import (
import pandas._testing as tm
+endian = {"little": "<", "big": ">"}[byteorder]
+
+
class TestDataFrameToRecords:
def test_to_records_timeseries(self):
index = date_range("1/1/2000", periods=10)
@@ -143,7 +147,12 @@ class TestDataFrameToRecords:
{},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
- dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", f"{endian}i8"),
+ ("B", f"{endian}f8"),
+ ("C", "O"),
+ ],
),
),
# Should have no effect in this case.
@@ -151,23 +160,38 @@ class TestDataFrameToRecords:
{"index": True},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
- dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", f"{endian}i8"),
+ ("B", f"{endian}f8"),
+ ("C", "O"),
+ ],
),
),
# Column dtype applied across the board. Index unaffected.
(
- {"column_dtypes": "<U4"},
+ {"column_dtypes": f"{endian}U4"},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", f"{endian}U4"),
+ ("B", f"{endian}U4"),
+ ("C", f"{endian}U4"),
+ ],
),
),
# Index dtype applied across the board. Columns unaffected.
(
- {"index_dtypes": "<U1"},
+ {"index_dtypes": f"{endian}U1"},
np.rec.array(
[("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
- dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}U1"),
+ ("A", f"{endian}i8"),
+ ("B", f"{endian}f8"),
+ ("C", "O"),
+ ],
),
),
# Pass in a type instance.
@@ -175,7 +199,12 @@ class TestDataFrameToRecords:
{"column_dtypes": str},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", f"{endian}U"),
+ ("B", f"{endian}U"),
+ ("C", f"{endian}U"),
+ ],
),
),
# Pass in a dtype instance.
@@ -183,15 +212,25 @@ class TestDataFrameToRecords:
{"column_dtypes": np.dtype("unicode")},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", f"{endian}U"),
+ ("B", f"{endian}U"),
+ ("C", f"{endian}U"),
+ ],
),
),
# Pass in a dictionary (name-only).
(
- {"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
+ {"column_dtypes": {"A": np.int8, "B": np.float32, "C": f"{endian}U2"}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", "i1"),
+ ("B", f"{endian}f4"),
+ ("C", f"{endian}U2"),
+ ],
),
),
# Pass in a dictionary (indices-only).
@@ -199,15 +238,20 @@ class TestDataFrameToRecords:
{"index_dtypes": {0: "int16"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
- dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[
+ ("index", "i2"),
+ ("A", f"{endian}i8"),
+ ("B", f"{endian}f8"),
+ ("C", "O"),
+ ],
),
),
# Ignore index mappings if index is not True.
(
- {"index": False, "index_dtypes": "<U2"},
+ {"index": False, "index_dtypes": f"{endian}U2"},
np.rec.array(
[(1, 0.2, "a"), (2, 1.5, "bc")],
- dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[("A", f"{endian}i8"), ("B", f"{endian}f8"), ("C", "O")],
),
),
# Non-existent names / indices in mapping should not error.
@@ -215,7 +259,12 @@ class TestDataFrameToRecords:
{"index_dtypes": {0: "int16", "not-there": "float32"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
- dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+ dtype=[
+ ("index", "i2"),
+ ("A", f"{endian}i8"),
+ ("B", f"{endian}f8"),
+ ("C", "O"),
+ ],
),
),
# Names / indices not in mapping default to array dtype.
@@ -223,7 +272,12 @@ class TestDataFrameToRecords:
{"column_dtypes": {"A": np.int8, "B": np.float32}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", "i1"),
+ ("B", f"{endian}f4"),
+ ("C", "O"),
+ ],
),
),
# Names / indices not in dtype mapping default to array dtype.
@@ -231,18 +285,28 @@ class TestDataFrameToRecords:
{"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}i8"),
+ ("A", "i1"),
+ ("B", f"{endian}f4"),
+ ("C", "O"),
+ ],
),
),
# Mixture of everything.
(
{
"column_dtypes": {"A": np.int8, "B": np.float32},
- "index_dtypes": "<U2",
+ "index_dtypes": f"{endian}U2",
},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}U2"),
+ ("A", "i1"),
+ ("B", f"{endian}f4"),
+ ("C", "O"),
+ ],
),
),
# Invalid dype values.
@@ -291,7 +355,7 @@ class TestDataFrameToRecords:
{"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
np.rec.array(
[(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
- dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
+ dtype=[("a", f"{endian}i4"), ("b", "i1"), ("c", f"{endian}f8")],
),
),
# MultiIndex in the columns.
@@ -302,14 +366,17 @@ class TestDataFrameToRecords:
[("a", "d"), ("b", "e"), ("c", "f")]
),
),
- {"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
+ {
+ "column_dtypes": {0: f"{endian}U1", 2: "float32"},
+ "index_dtypes": "float32",
+ },
np.rec.array(
[(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
dtype=[
- ("index", "<f4"),
- ("('a', 'd')", "<U1"),
- ("('b', 'e')", "<i8"),
- ("('c', 'f')", "<f4"),
+ ("index", f"{endian}f4"),
+ ("('a', 'd')", f"{endian}U1"),
+ ("('b', 'e')", f"{endian}i8"),
+ ("('c', 'f')", f"{endian}f4"),
],
),
),
@@ -324,7 +391,10 @@ class TestDataFrameToRecords:
[("d", -4), ("d", -5), ("f", -6)], names=list("cd")
),
),
- {"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
+ {
+ "column_dtypes": "float64",
+ "index_dtypes": {0: f"{endian}U2", 1: "int8"},
+ },
np.rec.array(
[
("d", -4, 1.0, 2.0, 3.0),
@@ -332,11 +402,11 @@ class TestDataFrameToRecords:
("f", -6, 7, 8, 9.0),
],
dtype=[
- ("c", "<U2"),
+ ("c", f"{endian}U2"),
("d", "i1"),
- ("('a', 'd')", "<f8"),
- ("('b', 'e')", "<f8"),
- ("('c', 'f')", "<f8"),
+ ("('a', 'd')", f"{endian}f8"),
+ ("('b', 'e')", f"{endian}f8"),
+ ("('c', 'f')", f"{endian}f8"),
],
),
),
@@ -366,13 +436,18 @@ class TestDataFrameToRecords:
dtype_mappings = {
"column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
- "index_dtypes": "<U2",
+ "index_dtypes": f"{endian}U2",
}
result = df.to_records(**dtype_mappings)
expected = np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
- dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+ dtype=[
+ ("index", f"{endian}U2"),
+ ("A", "i1"),
+ ("B", f"{endian}f4"),
+ ("C", "O"),
+ ],
)
tm.assert_almost_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 160e00f5fb..049fa412c2 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -12,6 +12,7 @@ from io import (
)
import mmap
import os
+from sys import byteorder
import tarfile
import numpy as np
@@ -28,6 +29,9 @@ from pandas import (
import pandas._testing as tm
+endian = {"little": "<", "big": ">"}[byteorder]
+
+
@pytest.mark.parametrize(
"malformed",
["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"],
@@ -141,9 +145,9 @@ nan 2
"the dtype timedelta64 is not supported for parsing",
{"dtype": {"A": "timedelta64", "B": "float64"}},
),
- ("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
+ (f"the dtype {endian}U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
],
- ids=["dt64-0", "dt64-1", "td64", "<U8"],
+ ids=["dt64-0", "dt64-1", "td64", f"{endian}U8"],
)
def test_unsupported_dtype(c_parser_only, match, kwargs):
parser = c_parser_only
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
index 7dfda0463e..3e4334261a 100644
--- a/pandas/tests/scalar/timedelta/test_arithmetic.py
+++ b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -6,6 +6,7 @@ from datetime import (
timedelta,
)
import operator
+from sys import byteorder
import numpy as np
import pytest
@@ -293,9 +294,10 @@ class TestTimedeltaAdditionSubtraction:
expected = pd.to_timedelta(["2 days"]).values
tm.assert_numpy_array_equal(td * np.array([2]), expected)
tm.assert_numpy_array_equal(np.array([2]) * td, expected)
+ endian = {"little": "<", "big": ">"}[byteorder]
msg = (
"ufunc '?multiply'? cannot use operands with types "
- r"dtype\('<m8\[ns\]'\) and dtype\('<m8\[ns\]'\)"
+ fr"dtype\('{endian}m8\[ns\]'\) and dtype\('{endian}m8\[ns\]'\)"
)
with pytest.raises(TypeError, match=msg):
td * other
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index 395fdea67f..1141a136d6 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -2,6 +2,7 @@ from datetime import (
time,
timedelta,
)
+from sys import byteorder
import numpy as np
import pytest
@@ -202,8 +203,9 @@ class TestTimedeltas:
timedelta_NaT = np.timedelta64("NaT")
actual = to_timedelta(Series(["00:00:01", np.nan]))
+ endian = {"little": "<", "big": ">"}[byteorder]
expected = Series(
- [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
+ [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=f"{endian}m8[ns]"
)
tm.assert_series_equal(actual, expected)
--
2.35.1