From 7e84e06e4fce1deef3b148ff9cbe32006be0ea44 Mon Sep 17 00:00:00 2001 From: "Benjamin A. Beasley" Date: Mon, 9 May 2022 08:42:18 -0400 Subject: [PATCH] Update to 2022.5.0 (close RHBZ#2065859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the “diagnostics” extras metapackage because no recent enough Bokeh version is available. - Switch to the PyPI sdist as source (with workarounds) - Ensure NUMPY_LICENSE.txt is packaged - Add Provides: bundled(numpy) --- .gitignore | 2 + ...ncoding_gh601-on-big-endian-machines.patch | 26 +--- ...duplication-if-index_col-is-type-str.patch | 30 ----- NUMPY_LICENSE.txt | 30 +++++ _version.py | 20 --- conftest.py | 70 ++++++++++ dask-2022.5.0-pr-9113.patch | 14 ++ python-dask.spec | 123 ++++++++++++++++-- sources | 2 +- 9 files changed, 232 insertions(+), 85 deletions(-) delete mode 100644 0002-fix-index_col-duplication-if-index_col-is-type-str.patch create mode 100644 NUMPY_LICENSE.txt delete mode 100644 _version.py create mode 100644 conftest.py create mode 100644 dask-2022.5.0-pr-9113.patch diff --git a/.gitignore b/.gitignore index 9257dd0..e2286ab 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,5 @@ /dask-2022.01.1.tar.gz /dask-2022.02.0.tar.gz /dask-2022.02.1.tar.gz +/dask-2022.05.0.tar.gz +/dask-2022.5.0.tar.gz diff --git a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch index ad031eb..dc65d0e 100644 --- a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +++ b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch @@ -1,26 +1,15 @@ -From 5d9cdaa75e0547d8d5edf8c995f29279688f7e11 Mon Sep 17 00:00:00 2001 -From: Elliott Sales de Andrade -Date: Sun, 7 Mar 2021 04:07:32 -0500 -Subject: [PATCH 1/2] Skip test_encoding_gh601 on big-endian machines. - -Signed-off-by: Elliott Sales de Andrade ---- - dask/dataframe/io/tests/test_csv.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/dask/dataframe/io/tests/test_csv.py b/dask/dataframe/io/tests/test_csv.py -index 48780ef8..f63d0e80 100644 ---- a/dask/dataframe/io/tests/test_csv.py -+++ b/dask/dataframe/io/tests/test_csv.py -@@ -1,6 +1,7 @@ - import gzip +diff -Naur dask-2022.05.0-original/dask/dataframe/io/tests/test_csv.py dask-2022.05.0/dask/dataframe/io/tests/test_csv.py +--- dask-2022.05.0-original/dask/dataframe/io/tests/test_csv.py 2022-05-02 14:23:56.000000000 -0400 ++++ dask-2022.05.0/dask/dataframe/io/tests/test_csv.py 2022-05-09 08:45:28.765627808 -0400 +@@ -2,6 +2,7 @@ import os + import warnings from io import BytesIO +import sys from unittest import mock import pytest -@@ -1099,6 +1100,7 @@ def test_read_csv_with_datetime_index_partitions_n(): +@@ -1116,6 +1117,7 @@ xfail_pandas_100 = pytest.mark.xfail(reason="https://github.com/dask/dask/issues/5787") @@ -28,6 +17,3 @@ index 48780ef8..f63d0e80 100644 @pytest.mark.parametrize( "encoding", [ --- -2.31.1 - diff --git a/0002-fix-index_col-duplication-if-index_col-is-type-str.patch b/0002-fix-index_col-duplication-if-index_col-is-type-str.patch deleted file mode 100644 index 481e482..0000000 --- a/0002-fix-index_col-duplication-if-index_col-is-type-str.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 41306c1829a22f0e8654f8d75dc23ce766c26ea8 Mon Sep 17 00:00:00 2001 -From: McToel -Date: Sun, 16 May 2021 11:11:06 +0200 -Subject: [PATCH 2/2] fix index_col duplication if index_col is type str - -Signed-off-by: Elliott Sales de Andrade ---- - dask/dataframe/io/sql.py | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/dask/dataframe/io/sql.py b/dask/dataframe/io/sql.py -index 6573a325..9c15360c 100644 ---- a/dask/dataframe/io/sql.py -+++ b/dask/dataframe/io/sql.py -@@ -125,10 +125,8 @@ def read_sql_table( - if columns - else list(table.columns) - ) -- if index_col not in columns: -- columns.append( -- table.columns[index_col] if isinstance(index_col, str) else index_col -- ) -+ if index not in columns: -+ columns.append(index) - - if isinstance(index_col, str): - kwargs["index_col"] = index_col --- -2.31.1 - diff --git a/NUMPY_LICENSE.txt b/NUMPY_LICENSE.txt new file mode 100644 index 0000000..b4139af --- /dev/null +++ b/NUMPY_LICENSE.txt @@ -0,0 +1,30 @@ +Copyright (c) 2005-2015, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_version.py b/_version.py deleted file mode 100644 index 0181b0a..0000000 --- a/_version.py +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by 'versioneer.py' (0.16) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -{ - "dirty": false, - "error": null, - "full-revisionid": "@commit@", - "version": "@version@" -} -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..594bcfc --- /dev/null +++ b/conftest.py @@ -0,0 +1,70 @@ +import pytest + +import dask + +# The doctests in these files fail due to either: +# - Non-required dependencies not being installed +# - Imported doctests due to pulling the docstrings from other packages +# (e.g. `numpy`). No need to run these doctests. +collect_ignore = [ + "dask/bytes/hdfs3.py", + "dask/bytes/pyarrow.py", + "dask/bytes/s3.py", + "dask/array/ghost.py", + "dask/array/fft.py", + "dask/dataframe/io/io.py", + "dask/dataframe/io/parquet/arrow.py", + "dask/dot.py", + "dask/ml.py", +] + +collect_ignore_glob = [] +try: + import numpy # noqa: F401 +except ImportError: + collect_ignore_glob.append("dask/array/*") + +try: + import pandas # noqa: F401 +except ImportError: + collect_ignore_glob.append("dask/dataframe/*") + +try: + import scipy # noqa: F401 +except ImportError: + collect_ignore.append("dask/array/stats.py") + +try: + import pyarrow # noqa: F401 +except ImportError: + collect_ignore.append("dask/dataframe/io/orc/arrow.py") + +try: + import tiledb # noqa: F401 +except ImportError: + collect_ignore.append("dask/array/tiledb_io.py") + +try: + import sqlalchemy # noqa: F401 +except ImportError: + collect_ignore.append("dask/dataframe/io/sql.py") + + +def pytest_addoption(parser): + parser.addoption("--runslow", action="store_true", help="run slow tests") + + +def pytest_runtest_setup(item): + if "slow" in item.keywords and not item.config.getoption("--runslow"): + pytest.skip("need --runslow option to run") + + +pytest.register_assert_rewrite( + "dask.array.utils", "dask.dataframe.utils", "dask.bag.utils" +) + + +@pytest.fixture(params=["disk", "tasks"]) +def shuffle_method(request): + with dask.config.set(shuffle=request.param): + yield request.param diff --git a/dask-2022.5.0-pr-9113.patch b/dask-2022.5.0-pr-9113.patch new file mode 100644 index 0000000..1d698cf --- /dev/null +++ b/dask-2022.5.0-pr-9113.patch @@ -0,0 +1,14 @@ +diff -Naur dask-2022.5.0-original/setup.cfg dask-2022.5.0/setup.cfg +--- dask-2022.5.0-original/setup.cfg 2022-05-02 14:24:38.606312800 -0400 ++++ dask-2022.5.0/setup.cfg 2022-05-22 09:06:51.480496179 -0400 +@@ -55,7 +55,9 @@ + xfail_strict = true + + [metadata] +-license_files = LICENSE.txt ++license_files = ++ LICENSE.txt ++ dask/array/NUMPY_LICENSE.txt + + [mypy] + python_version = 3.9 diff --git a/python-dask.spec b/python-dask.spec index 5e7c6f2..a430587 100644 --- a/python-dask.spec +++ b/python-dask.spec @@ -6,19 +6,36 @@ %bcond_without bootstrap Name: python-%{srcname} -Version: 2022.2.1 -%global tag 2022.02.1 -%global commit 217561b4266c170f452471fed64123940066c4b8 +Version: 2022.5.0 +%global tag 2022.05.0 Release: %autorelease Summary: Parallel PyData with Task Scheduling License: BSD URL: https://github.com/dask/dask/ -Source0: https://github.com/dask/dask/archive/%{tag}/%{srcname}-%{tag}.tar.gz -# Grab this from the sdist until we switch back to it. -Source1: _version.py +Source0: %{pypi_source %{srcname}} +# These are missing from the PyPI sdist. Until our PR#9113 and PR#9115 are +# accepted and included in a release, we grab them from the release tag in the +# GitHub repository. +Source1: https://github.com/dask/dask/raw/%{tag}/dask/array/NUMPY_LICENSE.txt +# PyPI sdist tarball is missing 'conftest.py' +# https://github.com/dask/dask/issues/8475 +# Include conftest.py in sdists +# https://github.com/dask/dask/pull/9115 +Source2: https://github.com/dask/dask/raw/%{tag}/conftest.py # https://github.com/dask/dask/issues/6725 -Patch0001: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +Patch: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch +# Add NUMPY_LICENSE.txt to license_files in setup.cfg +# https://github.com/dask/dask/pull/9113 +# +# Fixes: +# +# dask/array/NUMPY_LICENSE.txt not included in PyPI distributions +# https://github.com/dask/dask/issues/9112 +# +# This patch has been trivially rebased to apply to the PyPI sdist, which uses +# tabs rather than spaces in setup.cfg. +Patch: dask-2022.5.0-pr-9113.patch BuildArch: noarch @@ -48,16 +65,33 @@ Recommends: python3-%{srcname}+array = %{version}-%{release} Recommends: python3-%{srcname}+bag = %{version}-%{release} Recommends: python3-%{srcname}+dataframe = %{version}-%{release} Recommends: python3-%{srcname}+delayed = %{version}-%{release} -Recommends: python3-%{srcname}+diagnostics = %{version}-%{release} %if %{without bootstrap} Recommends: python3-%{srcname}+distributed = %{version}-%{release} %endif +# No recent enough Bokeh is packaged +Obsoletes: python3-%{srcname}+diagnostics < 2022.5.0-1 + +# There is nothing that can be unbundled; there are some some snippets forked +# or copied from unspecified versions of numpy, under a BSD license similar to +# that of dask itself. +# +# - dask/array/numpy_compat.py: +# _Recurser, moveaxis, rollaxis, sliding_window_view +# - dask/array/backends.py: +# _tensordot +# - dask/array/core.py: +# block +# - dask/array/einsumfuncs.py: +# parse_einsum_input +# - dask/array/routines.py: +# cov, _average +Provides: bundled(numpy) %description -n python3-%{srcname} Dask is a flexible parallel computing library for analytics. -%pyproject_extras_subpkg -n python3-%{srcname} array bag dataframe delayed diagnostics +%pyproject_extras_subpkg -n python3-%{srcname} array bag dataframe delayed %if %{without bootstrap} %pyproject_extras_subpkg distributed %endif @@ -77,12 +111,13 @@ Documentation for dask. %prep -%autosetup -n %{srcname}-%{tag} -p1 -sed -e 's/@version@/%{tag}/' -e 's/@commit@/%{commit}/' %SOURCE1 > %{srcname}/_version.py +%autosetup -n %{srcname}-%{version} -p1 +cp -p '%{SOURCE1}' dask/array/ +cp -p '%{SOURCE2}' ./ %generate_buildrequires -%pyproject_buildrequires -r -x test,array,bag,dataframe,delayed,diagnostics +%pyproject_buildrequires -r -x test,array,bag,dataframe,delayed %if %{without bootstrap} %pyproject_buildrequires -x distributed %endif @@ -111,6 +146,66 @@ rm -rf html/.{doctrees,buildinfo} %global have_arm 1 %endif +%ifarch %{ix86} +# read_sql_query with meta converts dtypes from 32 to 64. +# https://github.com/dask/dask/issues/8620 + +# > tm.assert_frame_equal( +# a, b, check_names=check_names, check_dtype=check_dtype, **kwargs +# E AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="age") are different +# E +# E Attribute "dtype" are different +# E [left]: int32 +# E [right]: int64 +# dask/dataframe/utils.py:555: AssertionError +k="${k-}${k+ and }not test_query_with_meta" +%endif + +%ifarch ppc64le +# TODO: Should this be reported upstream? Is it a dask issue, or a numpy one? +# Possibly related to +# https://fedoraproject.org/wiki/Changes/PPC64LE_Float128_Transition? + +# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg +# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount +# E assert False +# E + where False = allclose(array([0.12586355-0.09957204j, 0.20256483+0.04098342j,\n 0.05781123-0.03588671j, 0.01135963-0.03334219j,\n 0.03747771+0.07495994j, 0.2106574 -0.0363521j ,\n 0.16352091+0.03782915j, 0.1381678 -0.06815128j,\n 0.03781295-0.04011523j, 0.01493269+0.07780643j]), array([0.12559072-0.07164038j, 0.20256483+0.05438578j,\n 0.05781123-0.03588671j, 0.01135963-0.03334219j,\n 0.03747771+0.07495994j, 0.2106574 -0.0363521j ,\n 0.16352091+0.03782915j, 0.1381678 -0.06815128j,\n 0.03781295-0.04011523j, 0.01493269+0.07780643j]), equal_nan=True, **{}) +# dask/array/utils.py:361: AssertionError +k="${k-}${k+ and }not test_lstsq[100-10-10-True]" +# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg +# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount +# E assert False +# E + where False = allclose(array([ 0.20168675+0.08857556j, 0.144233 -0.19173091j,\n -0.03367557-0.08053959j, 0.04108325-0.24648308j,\n -0.01844576+0.00841932j, 0.29652375+0.05682199j,\n 0.05551828+0.20156798j, -0.08409592+0.02354949j,\n 0.09848743-0.00748637j, 0.22889193-0.07372773j]), array([ 0.20067551+0.2642591j , 0.144233 -0.18573336j,\n -0.03367557-0.08053959j, 0.04108325-0.24648308j,\n -0.01844576+0.00841932j, 0.29652375+0.05682199j,\n 0.05551828+0.20156798j, -0.08409592+0.02354949j,\n 0.09848743-0.00748637j, 0.22889193-0.07372773j]), equal_nan=True, **{}) +# dask/array/utils.py:361: AssertionError +k="${k-}${k+ and }not test_lstsq[20-10-5-True]" + +# test_vdot fails with NumPy 1.19.0 +# https://github.com/dask/dask/issues/6406 +# +# vdot returns incorrect results on ppc64le +# https://github.com/numpy/numpy/issues/17087 + +# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg +# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount +# E assert False +# E + where False = allclose((0.38772781971416226-0.6851997484294434j), (0.38772781971416226-0.306563166009585j), equal_nan=True, **{}) +# dask/array/utils.py:361: AssertionError +k="${k-}${k+ and }not test_vdot[shape0-chunks0]" +# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg +# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount +# E assert False +# E + where False = allclose((0.38772781971416226-0.6851997484294434j), (0.38772781971416226-0.306563166009585j), equal_nan=True, **{}) +# dask/array/utils.py:361: AssertionError +k="${k-}${k+ and }not test_vdot[shape1-chunks1]" +%endif + +# This test compares against files in .github/. It does not work on the PyPI +# sdist, and is only relevant to upstream CI anyway. +# +# test_development_guidelines_matches_ci fails from sdist +# https://github.com/dask/dask/issues/8499 +k="${k-}${k+ and }not test_development_guidelines_matches_ci" + pytest_args=( -m 'not network' @@ -127,12 +222,12 @@ pytest_args=( %files -n python3-%{srcname} -f %{pyproject_files} %doc README.rst -%license LICENSE.txt +%license LICENSE.txt dask/array/NUMPY_LICENSE.txt %if %{without bootstrap} %files -n python-%{srcname}-doc %doc html -%license LICENSE.txt +%license LICENSE.txt dask/array/NUMPY_LICENSE.txt %endif diff --git a/sources b/sources index 59ff640..31bce64 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (dask-2022.02.1.tar.gz) = 6f66b5a904ea7ab0137465f622d1606a218b4ddc8b91939759098c66c7b749c674c750bf0caa6b53c7cf9d192312c0991d8e6144cebf9745eb9dfd4583b8b66c +SHA512 (dask-2022.5.0.tar.gz) = 40d119f5cee86c5ae6756501ed9f73a6fde0954cae961cca0ddf947e36df227163f87d7176d04388ab9ffe8ba18efbfb908adcf45de0064041a412b7596209a8