Update to latest version.

This commit is contained in:
Elliott Sales de Andrade 2021-07-11 03:55:07 -04:00
parent 2459e4f1fa
commit b2522d5930
7 changed files with 40 additions and 67 deletions

1
.gitignore vendored
View File

@ -43,3 +43,4 @@
/dask-2021.6.0.tar.gz
/dask-2021.6.1.tar.gz
/dask-2021.6.2.tar.gz
/dask-2021.7.0.tar.gz

View File

@ -1,7 +1,7 @@
From 0d43bed7efaf54dc2c40d853ce19cf62de128370 Mon Sep 17 00:00:00 2001
From b431fcfcf1d054df501fbeec8154ac15cdb48092 Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Sun, 7 Mar 2021 04:07:32 -0500
Subject: [PATCH 1/4] Skip test_encoding_gh601 on big-endian machines.
Subject: [PATCH 1/3] Skip test_encoding_gh601 on big-endian machines.
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
@ -9,7 +9,7 @@ Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
1 file changed, 2 insertions(+)
diff --git a/dask/dataframe/io/tests/test_csv.py b/dask/dataframe/io/tests/test_csv.py
index a6c50d12..6905f31d 100644
index 67ce22c8..247112bc 100644
--- a/dask/dataframe/io/tests/test_csv.py
+++ b/dask/dataframe/io/tests/test_csv.py
@@ -1,6 +1,7 @@
@ -17,10 +17,10 @@ index a6c50d12..6905f31d 100644
import os
from io import BytesIO
+import sys
from time import sleep
from unittest import mock
@@ -1096,6 +1097,7 @@ xfail_pandas_100 = pytest.mark.xfail(
import pytest
@@ -1081,6 +1082,7 @@ xfail_pandas_100 = pytest.mark.xfail(
)

View File

@ -1,7 +1,7 @@
From ab18b267a2eeeaa60bda03cd67b21880c91d4766 Mon Sep 17 00:00:00 2001
From 41bf8806be46f4cbadc6492539dff709791e25de Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Sun, 13 Jun 2021 05:37:41 -0400
Subject: [PATCH 2/4] Use packaging for version comparisons.
Subject: [PATCH 2/3] Use packaging for version comparisons.
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
@ -187,10 +187,10 @@ index df92f3e6..8ff041c9 100644
)
def test_tensordot():
diff --git a/dask/base.py b/dask/base.py
index 59c4640d..1cd48cac 100644
index 73a4ff50..79a000a0 100644
--- a/dask/base.py
+++ b/dask/base.py
@@ -6,13 +6,13 @@ import uuid
@@ -7,13 +7,13 @@ import uuid
from collections import OrderedDict
from contextlib import contextmanager
from dataclasses import fields, is_dataclass
@ -205,7 +205,7 @@ index 59c4640d..1cd48cac 100644
from tlz import curry, groupby, identity, merge
from tlz.functoolz import Compose
@@ -898,7 +898,7 @@ def _normalize_function(func):
@@ -911,7 +911,7 @@ def _normalize_function(func):
def register_pandas():
import pandas as pd
@ -373,7 +373,7 @@ index 40740310..69eca3c5 100644
"Due to a bug in pyarrow 0.10.0, the ORC reader is "
"unavailable. Please either downgrade pyarrow to "
diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py
index 99227157..83bb4bf8 100644
index 16d29bef..8e4aa1da 100644
--- a/dask/dataframe/io/parquet/arrow.py
+++ b/dask/dataframe/io/parquet/arrow.py
@@ -2,13 +2,13 @@ import json
@ -445,7 +445,7 @@ index e11de5e3..00ab6ee0 100644
_ENGINES[engine] = eng = ArrowDatasetEngine
diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py
index 5c817294..ae28c792 100644
index dfa77a9a..f35323f7 100644
--- a/dask/dataframe/io/parquet/fastparquet.py
+++ b/dask/dataframe/io/parquet/fastparquet.py
@@ -3,11 +3,11 @@ import json
@ -495,7 +495,7 @@ index 1c1ca00e..580a7ded 100644
"PyArrow 0.10.0 release broke the ORC reader, see "
"https://issues.apache.org/jira/browse/ARROW-3009"
diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py
index a5a1244a..28df8771 100644
index f9833237..cfeddb74 100644
--- a/dask/dataframe/io/tests/test_parquet.py
+++ b/dask/dataframe/io/tests/test_parquet.py
@@ -4,11 +4,11 @@ import os
@ -568,7 +568,7 @@ index a5a1244a..28df8771 100644
and PANDAS_GT_110
and not PANDAS_GT_121
):
@@ -822,7 +828,7 @@ def test_append_dict_column(tmpdir, engine):
@@ -817,7 +823,7 @@ def test_append_dict_column(tmpdir, engine):
if engine == "fastparquet":
pytest.xfail("Fastparquet engine is missing dict-column support")
@ -577,7 +577,7 @@ index a5a1244a..28df8771 100644
pytest.skip("Newer PyArrow version required for dict-column support.")
tmp = str(tmpdir)
@@ -981,7 +987,7 @@ def test_categories_unnamed_index(tmpdir, engine):
@@ -988,7 +994,7 @@ def test_categories_unnamed_index(tmpdir, engine):
# Check that we can handle an unnamed categorical index
# https://github.com/dask/dask/issues/6885
@ -586,7 +586,7 @@ index a5a1244a..28df8771 100644
pytest.skip("PyArrow>=0.15 Required.")
tmpdir = str(tmpdir)
@@ -1166,7 +1172,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual
@@ -1173,7 +1179,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("schema", ["infer", "complex"])
def test_pyarrow_schema_inference(tmpdir, index, engine, schema):
@ -595,7 +595,7 @@ index a5a1244a..28df8771 100644
pytest.skip("PyArrow>=0.15 Required.")
if schema == "complex":
schema = {"index": pa.string(), "amount": pa.int64()}
@@ -1359,9 +1365,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine):
@@ -1366,9 +1372,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine):
# Recent versions of pyarrow support full row-wise filtering
# (fastparquet and older pyarrow versions do not)
@ -606,7 +606,7 @@ index a5a1244a..28df8771 100644
fn = str(tmpdir)
df = pd.DataFrame({"at": ["ab", "aa", "ba", "da", "bb"]})
@@ -1462,7 +1466,7 @@ def test_pyarrow_filter_divisions(tmpdir):
@@ -1469,7 +1473,7 @@ def test_pyarrow_filter_divisions(tmpdir):
str(tmpdir.join("file.1.parquet")), engine="pyarrow", row_group_size=2
)
@ -615,7 +615,7 @@ index a5a1244a..28df8771 100644
# Only works for ArrowDatasetEngine.
# Legacy code will not apply filters on individual row-groups
# when `split_row_groups=False`.
@@ -1637,7 +1641,7 @@ def test_parquet_select_cats(tmpdir, engine):
@@ -1644,7 +1648,7 @@ def test_parquet_select_cats(tmpdir, engine):
def test_columns_name(tmpdir, engine):
@ -624,7 +624,7 @@ index a5a1244a..28df8771 100644
pytest.skip("Fastparquet does not write column_indexes up to 0.3.1")
tmp_path = str(tmpdir)
df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(["a", "b"], name="idx"))
@@ -2041,9 +2045,7 @@ def test_to_parquet_with_get(tmpdir):
@@ -2048,9 +2052,7 @@ def test_to_parquet_with_get(tmpdir):
def test_select_partitioned_column(tmpdir, engine):
pytest.importorskip("snappy")
if engine.startswith("pyarrow"):
@ -635,7 +635,7 @@ index a5a1244a..28df8771 100644
pytest.skip("pyarrow<0.9.0 did not support this")
fn = str(tmpdir)
@@ -2067,9 +2069,9 @@ def test_select_partitioned_column(tmpdir, engine):
@@ -2074,9 +2076,9 @@ def test_select_partitioned_column(tmpdir, engine):
def test_with_tz(tmpdir, engine):
@ -647,7 +647,7 @@ index a5a1244a..28df8771 100644
pytest.skip("fastparquet<0.3.0 did not support this")
with warnings.catch_warnings():
@@ -2278,7 +2280,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema):
@@ -2285,7 +2287,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema):
if (
schema == "infer"
and engine.startswith("pyarrow")
@ -656,7 +656,7 @@ index a5a1244a..28df8771 100644
):
pytest.skip("PyArrow>=0.15 Required.")
@@ -2814,9 +2816,7 @@ def test_filter_nonpartition_columns(
@@ -2821,9 +2823,7 @@ def test_filter_nonpartition_columns(
@PYARROW_MARK
def test_pandas_metadata_nullable_pyarrow(tmpdir):
@ -667,7 +667,7 @@ index a5a1244a..28df8771 100644
pytest.skip("PyArrow>=0.16 and Pandas>=1.0.0 Required.")
tmpdir = str(tmpdir)
@@ -2837,7 +2837,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir):
@@ -2844,7 +2844,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir):
@PYARROW_MARK
def test_pandas_timestamp_overflow_pyarrow(tmpdir):
@ -676,7 +676,7 @@ index a5a1244a..28df8771 100644
pytest.skip("PyArrow>=0.17 Required.")
info = np.iinfo(np.dtype("int64"))
@@ -2947,7 +2947,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols):
@@ -2954,7 +2954,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols):
@fp_pandas_xfail
def test_partitioned_preserve_index(tmpdir, write_engine, read_engine):
@ -685,7 +685,7 @@ index a5a1244a..28df8771 100644
pytest.skip("PyArrow>=0.15 Required.")
tmp = str(tmpdir)
@@ -3065,7 +3065,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine):
@@ -3072,7 +3072,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine):
@PYARROW_MARK
@pytest.mark.parametrize("test_filter", [True, False])
def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
@ -694,7 +694,7 @@ index a5a1244a..28df8771 100644
# Using pyarrow.dataset API does not produce
# Categorical type for partitioned columns.
pytest.skip("PyArrow>0.17.1 Required.")
@@ -3093,7 +3093,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
@@ -3100,7 +3100,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
def test_pyarrow_dataset_read_from_paths(
tmpdir, read_from_paths, test_filter_partitioned
):
@ -703,7 +703,7 @@ index a5a1244a..28df8771 100644
# Using pyarrow.dataset API does not produce
# Categorical type for partitioned columns.
pytest.skip("PyArrow>0.17.1 Required.")
@@ -3123,7 +3123,7 @@ def test_pyarrow_dataset_read_from_paths(
@@ -3130,7 +3130,7 @@ def test_pyarrow_dataset_read_from_paths(
@PYARROW_MARK
@pytest.mark.parametrize("split_row_groups", [True, False])
def test_pyarrow_dataset_filter_partitioned(tmpdir, split_row_groups):
@ -809,7 +809,7 @@ index 5d995b87..a31943f8 100644
funcs = list(range(11))
diff --git a/dask/sizeof.py b/dask/sizeof.py
index 570b6251..38c06885 100644
index 522f6b89..10874585 100644
--- a/dask/sizeof.py
+++ b/dask/sizeof.py
@@ -2,7 +2,8 @@ import itertools
@ -822,7 +822,7 @@ index 570b6251..38c06885 100644
from .utils import Dispatch
@@ -195,7 +196,7 @@ def register_pyarrow():
@@ -218,7 +219,7 @@ def register_pyarrow():
return int(_get_col_size(data)) + 1000
# Handle pa.Column for pyarrow < 0.15
@ -862,7 +862,7 @@ index 51c7044f..16cdca8b 100644
a = np.arange(5)
diff --git a/setup.py b/setup.py
index 41751134..9365926c 100755
index 2588fc1c..1806f328 100755
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,7 @@ extras_require["complete"] = sorted({v for req in extras_require.values() for v

View File

@ -1,7 +1,7 @@
From 57cf597227fb78a42cabbeedab146260e6f485e2 Mon Sep 17 00:00:00 2001
From 61376866ed42a06de6f7e4816b8ed8aac819b368 Mon Sep 17 00:00:00 2001
From: McToel <theo.doellmann@gmx.de>
Date: Sun, 16 May 2021 11:11:06 +0200
Subject: [PATCH 3/4] fix index_col duplication if index_col is type str
Subject: [PATCH 3/3] fix index_col duplication if index_col is type str
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---

View File

@ -1,29 +0,0 @@
From 97bf7e87437e5f8ce1c1a8b6defed81c8946add2 Mon Sep 17 00:00:00 2001
From: Julia Signell <jsignell@gmail.com>
Date: Tue, 29 Jun 2021 08:48:10 -0400
Subject: [PATCH 4/4] Fix scipy tests (#7841)
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
dask/array/tests/test_stats.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/dask/array/tests/test_stats.py b/dask/array/tests/test_stats.py
index d9fe290d..a80b6e61 100644
--- a/dask/array/tests/test_stats.py
+++ b/dask/array/tests/test_stats.py
@@ -75,8 +75,10 @@ def test_one(kind):
],
)
def test_two(kind, kwargs):
+ # The sums of observed and expected frequencies must match
a = np.random.random(size=30)
- b = np.random.random(size=30)
+ b = a[::-1]
+
a_ = da.from_array(a, 3)
b_ = da.from_array(b, 3)
--
2.31.1

View File

@ -6,7 +6,7 @@
%bcond_without bootstrap
Name: python-%{srcname}
Version: 2021.6.2
Version: 2021.7.0
Release: 1%{?dist}
Summary: Parallel PyData with Task Scheduling
@ -19,8 +19,6 @@ Patch0001: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
Patch0002: 0002-Use-packaging-for-version-comparisons.patch
# https://github.com/dask/dask/pull/7661
Patch0003: 0003-fix-index_col-duplication-if-index_col-is-type-str.patch
# https://github.com/dask/dask/pull/7841
Patch0004: 0004-Fix-scipy-tests-7841.patch
BuildArch: noarch
@ -53,7 +51,7 @@ BuildRequires: python3dist(setuptools)
%global toolz_version 0.8.2
BuildRequires: python3dist(toolz) >= %{toolz_version}
%if %{without bootstrap}
%global distributed_version 2021.6
%global distributed_version 2021.7
BuildRequires: python3dist(distributed) >= %{distributed_version}
BuildRequires: python3dist(scikit-image)
BuildRequires: python3dist(xarray)
@ -238,6 +236,9 @@ pytest_args=(
%changelog
* Sun Jul 11 2021 Elliott Sales de Andrade <quantum.analyst@gmail.com> - 2021.7.0-1
- Update to latest version (#1980906)
* Sun Jul 04 2021 Elliott Sales de Andrade <quantum.analyst@gmail.com> - 2021.6.2-1
- Update to latest version (#1974872)

View File

@ -1 +1 @@
SHA512 (dask-2021.6.2.tar.gz) = 2949a0a028d08794936faaeeea3b36c97ac1492288519efa0fd1e6bbb9b5b52b730657b8b342a47ca0183d98e9179836423847670fc12c14be0890daa4b773b4
SHA512 (dask-2021.7.0.tar.gz) = 3dbeefe08af6536f19a03ead69372dfa7a26eff66d768fa8e238341822136a2314fca0291694ad22bdfe3618a4ed6785d836ceb5cc4231f361d97f0cbba0edbf