Update to 2022.5.0 (close RHBZ#2065859)

- Drop the “diagnostics” extras metapackage because no recent enough
  Bokeh version is available.
- Switch to the PyPI sdist as source (with workarounds)
- Ensure NUMPY_LICENSE.txt is packaged
- Add Provides: bundled(numpy)
This commit is contained in:
Benjamin A. Beasley 2022-05-09 08:42:18 -04:00
parent c6940ea5b0
commit 7e84e06e4f
9 changed files with 232 additions and 85 deletions

2
.gitignore vendored
View File

@ -55,3 +55,5 @@
/dask-2022.01.1.tar.gz
/dask-2022.02.0.tar.gz
/dask-2022.02.1.tar.gz
/dask-2022.05.0.tar.gz
/dask-2022.5.0.tar.gz

View File

@ -1,26 +1,15 @@
From 5d9cdaa75e0547d8d5edf8c995f29279688f7e11 Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Sun, 7 Mar 2021 04:07:32 -0500
Subject: [PATCH 1/2] Skip test_encoding_gh601 on big-endian machines.
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
dask/dataframe/io/tests/test_csv.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/dask/dataframe/io/tests/test_csv.py b/dask/dataframe/io/tests/test_csv.py
index 48780ef8..f63d0e80 100644
--- a/dask/dataframe/io/tests/test_csv.py
+++ b/dask/dataframe/io/tests/test_csv.py
@@ -1,6 +1,7 @@
import gzip
diff -Naur dask-2022.05.0-original/dask/dataframe/io/tests/test_csv.py dask-2022.05.0/dask/dataframe/io/tests/test_csv.py
--- dask-2022.05.0-original/dask/dataframe/io/tests/test_csv.py 2022-05-02 14:23:56.000000000 -0400
+++ dask-2022.05.0/dask/dataframe/io/tests/test_csv.py 2022-05-09 08:45:28.765627808 -0400
@@ -2,6 +2,7 @@
import os
import warnings
from io import BytesIO
+import sys
from unittest import mock
import pytest
@@ -1099,6 +1100,7 @@ def test_read_csv_with_datetime_index_partitions_n():
@@ -1116,6 +1117,7 @@
xfail_pandas_100 = pytest.mark.xfail(reason="https://github.com/dask/dask/issues/5787")
@ -28,6 +17,3 @@ index 48780ef8..f63d0e80 100644
@pytest.mark.parametrize(
"encoding",
[
--
2.31.1

View File

@ -1,30 +0,0 @@
From 41306c1829a22f0e8654f8d75dc23ce766c26ea8 Mon Sep 17 00:00:00 2001
From: McToel <theo.doellmann@gmx.de>
Date: Sun, 16 May 2021 11:11:06 +0200
Subject: [PATCH 2/2] fix index_col duplication if index_col is type str
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
dask/dataframe/io/sql.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/dask/dataframe/io/sql.py b/dask/dataframe/io/sql.py
index 6573a325..9c15360c 100644
--- a/dask/dataframe/io/sql.py
+++ b/dask/dataframe/io/sql.py
@@ -125,10 +125,8 @@ def read_sql_table(
if columns
else list(table.columns)
)
- if index_col not in columns:
- columns.append(
- table.columns[index_col] if isinstance(index_col, str) else index_col
- )
+ if index not in columns:
+ columns.append(index)
if isinstance(index_col, str):
kwargs["index_col"] = index_col
--
2.31.1

30
NUMPY_LICENSE.txt Normal file
View File

@ -0,0 +1,30 @@
Copyright (c) 2005-2015, NumPy Developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of the NumPy Developers nor the names of any
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,20 +0,0 @@
# This file was generated by 'versioneer.py' (0.16) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.
import json
version_json = '''
{
"dirty": false,
"error": null,
"full-revisionid": "@commit@",
"version": "@version@"
}
''' # END VERSION_JSON
def get_versions():
return json.loads(version_json)

70
conftest.py Normal file
View File

@ -0,0 +1,70 @@
import pytest
import dask
# The doctests in these files fail due to either:
# - Non-required dependencies not being installed
# - Imported doctests due to pulling the docstrings from other packages
# (e.g. `numpy`). No need to run these doctests.
collect_ignore = [
"dask/bytes/hdfs3.py",
"dask/bytes/pyarrow.py",
"dask/bytes/s3.py",
"dask/array/ghost.py",
"dask/array/fft.py",
"dask/dataframe/io/io.py",
"dask/dataframe/io/parquet/arrow.py",
"dask/dot.py",
"dask/ml.py",
]
collect_ignore_glob = []
try:
import numpy # noqa: F401
except ImportError:
collect_ignore_glob.append("dask/array/*")
try:
import pandas # noqa: F401
except ImportError:
collect_ignore_glob.append("dask/dataframe/*")
try:
import scipy # noqa: F401
except ImportError:
collect_ignore.append("dask/array/stats.py")
try:
import pyarrow # noqa: F401
except ImportError:
collect_ignore.append("dask/dataframe/io/orc/arrow.py")
try:
import tiledb # noqa: F401
except ImportError:
collect_ignore.append("dask/array/tiledb_io.py")
try:
import sqlalchemy # noqa: F401
except ImportError:
collect_ignore.append("dask/dataframe/io/sql.py")
def pytest_addoption(parser):
parser.addoption("--runslow", action="store_true", help="run slow tests")
def pytest_runtest_setup(item):
if "slow" in item.keywords and not item.config.getoption("--runslow"):
pytest.skip("need --runslow option to run")
pytest.register_assert_rewrite(
"dask.array.utils", "dask.dataframe.utils", "dask.bag.utils"
)
@pytest.fixture(params=["disk", "tasks"])
def shuffle_method(request):
with dask.config.set(shuffle=request.param):
yield request.param

View File

@ -0,0 +1,14 @@
diff -Naur dask-2022.5.0-original/setup.cfg dask-2022.5.0/setup.cfg
--- dask-2022.5.0-original/setup.cfg 2022-05-02 14:24:38.606312800 -0400
+++ dask-2022.5.0/setup.cfg 2022-05-22 09:06:51.480496179 -0400
@@ -55,7 +55,9 @@
xfail_strict = true
[metadata]
-license_files = LICENSE.txt
+license_files =
+ LICENSE.txt
+ dask/array/NUMPY_LICENSE.txt
[mypy]
python_version = 3.9

View File

@ -6,19 +6,36 @@
%bcond_without bootstrap
Name: python-%{srcname}
Version: 2022.2.1
%global tag 2022.02.1
%global commit 217561b4266c170f452471fed64123940066c4b8
Version: 2022.5.0
%global tag 2022.05.0
Release: %autorelease
Summary: Parallel PyData with Task Scheduling
License: BSD
URL: https://github.com/dask/dask/
Source0: https://github.com/dask/dask/archive/%{tag}/%{srcname}-%{tag}.tar.gz
# Grab this from the sdist until we switch back to it.
Source1: _version.py
Source0: %{pypi_source %{srcname}}
# These are missing from the PyPI sdist. Until our PR#9113 and PR#9115 are
# accepted and included in a release, we grab them from the release tag in the
# GitHub repository.
Source1: https://github.com/dask/dask/raw/%{tag}/dask/array/NUMPY_LICENSE.txt
# PyPI sdist tarball is missing 'conftest.py'
# https://github.com/dask/dask/issues/8475
# Include conftest.py in sdists
# https://github.com/dask/dask/pull/9115
Source2: https://github.com/dask/dask/raw/%{tag}/conftest.py
# https://github.com/dask/dask/issues/6725
Patch0001: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
Patch: 0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
# Add NUMPY_LICENSE.txt to license_files in setup.cfg
# https://github.com/dask/dask/pull/9113
#
# Fixes:
#
# dask/array/NUMPY_LICENSE.txt not included in PyPI distributions
# https://github.com/dask/dask/issues/9112
#
# This patch has been trivially rebased to apply to the PyPI sdist, which uses
# tabs rather than spaces in setup.cfg.
Patch: dask-2022.5.0-pr-9113.patch
BuildArch: noarch
@ -48,16 +65,33 @@ Recommends: python3-%{srcname}+array = %{version}-%{release}
Recommends: python3-%{srcname}+bag = %{version}-%{release}
Recommends: python3-%{srcname}+dataframe = %{version}-%{release}
Recommends: python3-%{srcname}+delayed = %{version}-%{release}
Recommends: python3-%{srcname}+diagnostics = %{version}-%{release}
%if %{without bootstrap}
Recommends: python3-%{srcname}+distributed = %{version}-%{release}
%endif
# No recent enough Bokeh is packaged
Obsoletes: python3-%{srcname}+diagnostics < 2022.5.0-1
# There is nothing that can be unbundled; there are some some snippets forked
# or copied from unspecified versions of numpy, under a BSD license similar to
# that of dask itself.
#
# - dask/array/numpy_compat.py:
# _Recurser, moveaxis, rollaxis, sliding_window_view
# - dask/array/backends.py:
# _tensordot
# - dask/array/core.py:
# block
# - dask/array/einsumfuncs.py:
# parse_einsum_input
# - dask/array/routines.py:
# cov, _average
Provides: bundled(numpy)
%description -n python3-%{srcname}
Dask is a flexible parallel computing library for analytics.
%pyproject_extras_subpkg -n python3-%{srcname} array bag dataframe delayed diagnostics
%pyproject_extras_subpkg -n python3-%{srcname} array bag dataframe delayed
%if %{without bootstrap}
%pyproject_extras_subpkg distributed
%endif
@ -77,12 +111,13 @@ Documentation for dask.
%prep
%autosetup -n %{srcname}-%{tag} -p1
sed -e 's/@version@/%{tag}/' -e 's/@commit@/%{commit}/' %SOURCE1 > %{srcname}/_version.py
%autosetup -n %{srcname}-%{version} -p1
cp -p '%{SOURCE1}' dask/array/
cp -p '%{SOURCE2}' ./
%generate_buildrequires
%pyproject_buildrequires -r -x test,array,bag,dataframe,delayed,diagnostics
%pyproject_buildrequires -r -x test,array,bag,dataframe,delayed
%if %{without bootstrap}
%pyproject_buildrequires -x distributed
%endif
@ -111,6 +146,66 @@ rm -rf html/.{doctrees,buildinfo}
%global have_arm 1
%endif
%ifarch %{ix86}
# read_sql_query with meta converts dtypes from 32 to 64.
# https://github.com/dask/dask/issues/8620
# > tm.assert_frame_equal(
# a, b, check_names=check_names, check_dtype=check_dtype, **kwargs
# E AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="age") are different
# E
# E Attribute "dtype" are different
# E [left]: int32
# E [right]: int64
# dask/dataframe/utils.py:555: AssertionError
k="${k-}${k+ and }not test_query_with_meta"
%endif
%ifarch ppc64le
# TODO: Should this be reported upstream? Is it a dask issue, or a numpy one?
# Possibly related to
# https://fedoraproject.org/wiki/Changes/PPC64LE_Float128_Transition?
# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg
# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount
# E assert False
# E + where False = allclose(array([0.12586355-0.09957204j, 0.20256483+0.04098342j,\n 0.05781123-0.03588671j, 0.01135963-0.03334219j,\n 0.03747771+0.07495994j, 0.2106574 -0.0363521j ,\n 0.16352091+0.03782915j, 0.1381678 -0.06815128j,\n 0.03781295-0.04011523j, 0.01493269+0.07780643j]), array([0.12559072-0.07164038j, 0.20256483+0.05438578j,\n 0.05781123-0.03588671j, 0.01135963-0.03334219j,\n 0.03747771+0.07495994j, 0.2106574 -0.0363521j ,\n 0.16352091+0.03782915j, 0.1381678 -0.06815128j,\n 0.03781295-0.04011523j, 0.01493269+0.07780643j]), equal_nan=True, **{})
# dask/array/utils.py:361: AssertionError
k="${k-}${k+ and }not test_lstsq[100-10-10-True]"
# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg
# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount
# E assert False
# E + where False = allclose(array([ 0.20168675+0.08857556j, 0.144233 -0.19173091j,\n -0.03367557-0.08053959j, 0.04108325-0.24648308j,\n -0.01844576+0.00841932j, 0.29652375+0.05682199j,\n 0.05551828+0.20156798j, -0.08409592+0.02354949j,\n 0.09848743-0.00748637j, 0.22889193-0.07372773j]), array([ 0.20067551+0.2642591j , 0.144233 -0.18573336j,\n -0.03367557-0.08053959j, 0.04108325-0.24648308j,\n -0.01844576+0.00841932j, 0.29652375+0.05682199j,\n 0.05551828+0.20156798j, -0.08409592+0.02354949j,\n 0.09848743-0.00748637j, 0.22889193-0.07372773j]), equal_nan=True, **{})
# dask/array/utils.py:361: AssertionError
k="${k-}${k+ and }not test_lstsq[20-10-5-True]"
# test_vdot fails with NumPy 1.19.0
# https://github.com/dask/dask/issues/6406
#
# vdot returns incorrect results on ppc64le
# https://github.com/numpy/numpy/issues/17087
# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg
# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount
# E assert False
# E + where False = allclose((0.38772781971416226-0.6851997484294434j), (0.38772781971416226-0.306563166009585j), equal_nan=True, **{})
# dask/array/utils.py:361: AssertionError
k="${k-}${k+ and }not test_vdot[shape0-chunks0]"
# > assert allclose(a, b, equal_nan=equal_nan, **kwargs), msg
# E AssertionError: found values in 'a' and 'b' which differ by more than the allowed amount
# E assert False
# E + where False = allclose((0.38772781971416226-0.6851997484294434j), (0.38772781971416226-0.306563166009585j), equal_nan=True, **{})
# dask/array/utils.py:361: AssertionError
k="${k-}${k+ and }not test_vdot[shape1-chunks1]"
%endif
# This test compares against files in .github/. It does not work on the PyPI
# sdist, and is only relevant to upstream CI anyway.
#
# test_development_guidelines_matches_ci fails from sdist
# https://github.com/dask/dask/issues/8499
k="${k-}${k+ and }not test_development_guidelines_matches_ci"
pytest_args=(
-m 'not network'
@ -127,12 +222,12 @@ pytest_args=(
%files -n python3-%{srcname} -f %{pyproject_files}
%doc README.rst
%license LICENSE.txt
%license LICENSE.txt dask/array/NUMPY_LICENSE.txt
%if %{without bootstrap}
%files -n python-%{srcname}-doc
%doc html
%license LICENSE.txt
%license LICENSE.txt dask/array/NUMPY_LICENSE.txt
%endif

View File

@ -1 +1 @@
SHA512 (dask-2022.02.1.tar.gz) = 6f66b5a904ea7ab0137465f622d1606a218b4ddc8b91939759098c66c7b749c674c750bf0caa6b53c7cf9d192312c0991d8e6144cebf9745eb9dfd4583b8b66c
SHA512 (dask-2022.5.0.tar.gz) = 40d119f5cee86c5ae6756501ed9f73a6fde0954cae961cca0ddf947e36df227163f87d7176d04388ab9ffe8ba18efbfb908adcf45de0064041a412b7596209a8