From 798549d05964cd2cf1318ac5a367e1925be3ccab Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 29 Feb 2024 20:31:56 -0500 Subject: [PATCH] Fix tests on other architectures --- ...ncoding_gh601-on-big-endian-machines.patch | 2 +- 0002-Skip-coverage-testing.patch | 2 +- ...ximum-for-sizeof-test-to-pass-32-bit.patch | 22 +++++++++++---- ...timestamp_overflow_pyarrow-condition.patch | 4 +-- ...ng-the-random-seed-with-the-same-byt.patch | 27 +++++++++++++++++++ ...patch => 0006-Allow-older-versioneer.patch | 4 +-- ...07-Ignore-NumPy-warnings-from-Pandas.patch | 4 +-- 0008-TST-Add-missing-skip-for-pyarrow.patch | 25 +++++++++++++++++ force-little-endian-random.patch | 19 ------------- python-dask.spec | 10 ++++--- 10 files changed, 84 insertions(+), 35 deletions(-) create mode 100644 0005-Force-initializing-the-random-seed-with-the-same-byt.patch rename 0005-Allow-older-versioneer.patch => 0006-Allow-older-versioneer.patch (84%) rename 0006-Ignore-NumPy-warnings-from-Pandas.patch => 0007-Ignore-NumPy-warnings-from-Pandas.patch (92%) create mode 100644 0008-TST-Add-missing-skip-for-pyarrow.patch delete mode 100644 force-little-endian-random.patch diff --git a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch index 5e87d2e..2996f1e 100644 --- a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +++ b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch @@ -1,7 +1,7 @@ From 252bee6343dd77b89b2f2879204398d9047a9924 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 7 Mar 2021 04:07:32 -0500 -Subject: [PATCH 1/6] Skip test_encoding_gh601 on big-endian machines. +Subject: [PATCH 1/8] Skip test_encoding_gh601 on big-endian machines. Signed-off-by: Elliott Sales de Andrade --- diff --git a/0002-Skip-coverage-testing.patch b/0002-Skip-coverage-testing.patch index bee4182..ba64a5a 100644 --- a/0002-Skip-coverage-testing.patch +++ b/0002-Skip-coverage-testing.patch @@ -1,7 +1,7 @@ From 8cf6b4224a56b0a05fc5fe3bd75d7979d102c610 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 7 May 2023 23:13:59 -0400 -Subject: [PATCH 2/6] Skip coverage testing +Subject: [PATCH 2/8] Skip coverage testing Signed-off-by: Elliott Sales de Andrade --- diff --git a/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch b/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch index af09015..f5bf14d 100644 --- a/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch +++ b/0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch @@ -1,18 +1,21 @@ -From 84706407d4e633658c77503d46a500d09cda5932 Mon Sep 17 00:00:00 2001 +From e0158048423c562e20479a2f8f8bd749a15b84f6 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 20 Jul 2023 00:05:48 -0400 -Subject: [PATCH 3/6] TST: Increase maximum for sizeof test to pass 32-bit +Subject: [PATCH 3/8] TST: Increase maximum for sizeof test to pass 32-bit In that case, the result is 1244, which is ~2.6 * `sys.sizeof` (476), slightly over the 2 used in the existing test. +The minimum size of the sparse matrix also appears to be a bit smaller +as well. + Signed-off-by: Elliott Sales de Andrade --- - dask/tests/test_sizeof.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) + dask/tests/test_sizeof.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dask/tests/test_sizeof.py b/dask/tests/test_sizeof.py -index ef3d6ba3..dd1a0b81 100644 +index ef3d6ba3..40c479b9 100644 --- a/dask/tests/test_sizeof.py +++ b/dask/tests/test_sizeof.py @@ -81,7 +81,7 @@ def test_pandas_multiindex(): @@ -24,6 +27,15 @@ index ef3d6ba3..dd1a0b81 100644 assert isinstance(sizeof(index), int) +@@ -104,7 +104,7 @@ def test_sparse_matrix(): + assert sizeof(sp.tocoo()) >= 240 + assert sizeof(sp.tocsc()) >= 232 + assert sizeof(sp.tocsr()) >= 232 +- assert sizeof(sp.todok()) >= 188 ++ assert sizeof(sp.todok()) >= 184 + assert sizeof(sp.tolil()) >= 204 + + -- 2.43.0 diff --git a/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch b/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch index eeed87d..ef78a7f 100644 --- a/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch +++ b/0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch @@ -1,7 +1,7 @@ -From 4f47b4de8a5b26ebf42670fabee8f6053e87d849 Mon Sep 17 00:00:00 2001 +From 9c00d60eb22e91bd6920b472c458f277f1dec964 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sat, 19 Aug 2023 16:49:33 -0400 -Subject: [PATCH 4/6] Fix test_pandas_timestamp_overflow_pyarrow condition +Subject: [PATCH 4/8] Fix test_pandas_timestamp_overflow_pyarrow condition The new behavior in pyarrow only occurs with Pandas 2.0 as well. diff --git a/0005-Force-initializing-the-random-seed-with-the-same-byt.patch b/0005-Force-initializing-the-random-seed-with-the-same-byt.patch new file mode 100644 index 0000000..db05dee --- /dev/null +++ b/0005-Force-initializing-the-random-seed-with-the-same-byt.patch @@ -0,0 +1,27 @@ +From 741c375b14edcc5b32d1641b11a39d8ee5679496 Mon Sep 17 00:00:00 2001 +From: Diane Trout +Date: Thu, 29 Feb 2024 19:39:26 -0500 +Subject: [PATCH 5/8] Force initializing the random seed with the same byte + order interpretation as on x86 + +Signed-off-by: Elliott Sales de Andrade +--- + dask/utils.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/dask/utils.py b/dask/utils.py +index 0e974fe4..4dcfca42 100644 +--- a/dask/utils.py ++++ b/dask/utils.py +@@ -557,7 +557,7 @@ def random_state_data(n: int, random_state=None) -> list: + random_state = np.random.RandomState(random_state) + + random_data = random_state.bytes(624 * n * 4) # `n * 624` 32-bit integers +- l = list(np.frombuffer(random_data, dtype=np.uint32).reshape((n, -1))) ++ l = list(np.frombuffer(random_data, dtype=" Date: Sat, 25 Nov 2023 20:22:50 -0500 -Subject: [PATCH 5/6] Allow older versioneer +Subject: [PATCH 6/8] Allow older versioneer Signed-off-by: Elliott Sales de Andrade --- diff --git a/0006-Ignore-NumPy-warnings-from-Pandas.patch b/0007-Ignore-NumPy-warnings-from-Pandas.patch similarity index 92% rename from 0006-Ignore-NumPy-warnings-from-Pandas.patch rename to 0007-Ignore-NumPy-warnings-from-Pandas.patch index ceba2f5..a9762c5 100644 --- a/0006-Ignore-NumPy-warnings-from-Pandas.patch +++ b/0007-Ignore-NumPy-warnings-from-Pandas.patch @@ -1,7 +1,7 @@ -From 6580d3d6314ff062070624241543b671d83a6c07 Mon Sep 17 00:00:00 2001 +From 9a6c7b4dcf99d9638a0a7d8f5253cbf9ee494095 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Mon, 22 May 2023 12:53:20 -0500 -Subject: [PATCH 6/6] Ignore NumPy warnings from Pandas +Subject: [PATCH 7/8] Ignore NumPy warnings from Pandas Signed-off-by: Elliott Sales de Andrade --- diff --git a/0008-TST-Add-missing-skip-for-pyarrow.patch b/0008-TST-Add-missing-skip-for-pyarrow.patch new file mode 100644 index 0000000..e506e05 --- /dev/null +++ b/0008-TST-Add-missing-skip-for-pyarrow.patch @@ -0,0 +1,25 @@ +From 5077c2dc914761df04fe853e9261cd33a733e3df Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Thu, 29 Feb 2024 19:30:14 -0500 +Subject: [PATCH 8/8] TST: Add missing skip for pyarrow + +Signed-off-by: Elliott Sales de Andrade +--- + dask/dataframe/io/tests/test_parquet.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py +index e838d20d..05f0ac8e 100644 +--- a/dask/dataframe/io/tests/test_parquet.py ++++ b/dask/dataframe/io/tests/test_parquet.py +@@ -4934,6 +4934,7 @@ def test_read_parquet_lists_not_converting(tmpdir): + assert_eq(df, result) + + ++@PYARROW_MARK + @pytest.mark.skipif(not PANDAS_GE_200, reason="Requires pandas>=2.0") + def test_parquet_string_roundtrip(tmpdir): + pdf = pd.DataFrame({"a": ["a", "b", "c"]}, dtype="string[pyarrow]") +-- +2.43.0 + diff --git a/force-little-endian-random.patch b/force-little-endian-random.patch deleted file mode 100644 index 87c9cc1..0000000 --- a/force-little-endian-random.patch +++ /dev/null @@ -1,19 +0,0 @@ -Author: Diane Trout -Description: Force initializing the random seed with the same - byte order interpretation as on x86. - -Index: dask-2023.8.0+dfsg/dask/utils.py -=================================================================== ---- dask-2023.8.0+dfsg.orig/dask/utils.py -+++ dask-2023.8.0+dfsg/dask/utils.py -@@ -426,7 +426,9 @@ def random_state_data(n: int, random_sta - random_state = np.random.RandomState(random_state) - - random_data = random_state.bytes(624 * n * 4) # `n * 624` 32-bit integers -- l = list(np.frombuffer(random_data, dtype=np.uint32).reshape((n, -1))) -+ dt = np.dtype(np.uint32) -+ dt = dt.newbyteorder("<") -+ l = list(np.frombuffer(random_data, dtype=dt).reshape((n, -1))) - assert len(l) == n - return l - diff --git a/python-dask.spec b/python-dask.spec index 74f85c9..f78a9aa 100644 --- a/python-dask.spec +++ b/python-dask.spec @@ -23,16 +23,20 @@ Patch: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch # Fedora-specific patch. Patch: 0002-Skip-coverage-testing.patch # Drop after dropping 32-bit support. +# https://github.com/dask/dask/pull/10971 Patch: 0003-TST-Increase-maximum-for-sizeof-test-to-pass-32-bit.patch # https://github.com/dask/dask/pull/10451 Patch: 0004-Fix-test_pandas_timestamp_overflow_pyarrow-condition.patch # https://github.com/dask/dask/issues/10423 -Patch: https://salsa.debian.org/python-team/packages/dask/-/raw/08ffea1b7b53e9c71c9a926d5786288c2e6c1b5b/debian/patches/force-little-endian-random.patch -Patch: 0005-Allow-older-versioneer.patch +# https://github.com/dask/dask/pull/10970 +Patch: 0005-Force-initializing-the-random-seed-with-the-same-byt.patch +Patch: 0006-Allow-older-versioneer.patch # Ignore warnings from Pandas. # Upstream had https://github.com/dask/dask/pull/10307 but reverted it because # a new Pandas was released that fixed the warning, but we don't have it yet. -Patch: 0006-Ignore-NumPy-warnings-from-Pandas.patch +Patch: 0007-Ignore-NumPy-warnings-from-Pandas.patch +# https://github.com/dask/dask/pull/10969 +Patch: 0008-TST-Add-missing-skip-for-pyarrow.patch %description Dask is a flexible parallel computing library for analytics.