Patch broken tests to work.

2021-06-20 02:20:01 -04:00 · 2021-06-20 02:20:01 -04:00 · 64f50c6b25
parent 8ba0c74b59
commit 64f50c6b25
4 changed files with 914 additions and 9 deletions
--- a/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
+++ b/0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
@ -1,7 +1,7 @@
 From 0d43bed7efaf54dc2c40d853ce19cf62de128370 Mon Sep 17 00:00:00 2001
 From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
 Date: Sun, 7 Mar 2021 04:07:32 -0500
-Subject: [PATCH] Skip test_encoding_gh601 on big-endian machines.
+Subject: [PATCH 1/3] Skip test_encoding_gh601 on big-endian machines.

 Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
 ---
--- a/0002-Use-packaging-for-version-comparisons.patch
+++ b/0002-Use-packaging-for-version-comparisons.patch
@ -0,0 +1,878 @@
+From ab18b267a2eeeaa60bda03cd67b21880c91d4766 Mon Sep 17 00:00:00 2001
+From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
+Date: Sun, 13 Jun 2021 05:37:41 -0400
+Subject: [PATCH 2/3] Use packaging for version comparisons.
+
+Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
+---
+ dask/array/numpy_compat.py               | 11 ++---
+ dask/array/tests/test_cupy.py            | 22 +++++-----
+ dask/array/tests/test_sparse.py          |  6 +--
+ dask/base.py                             |  4 +-
+ dask/bytes/tests/test_http.py            |  4 +-
+ dask/bytes/tests/test_local.py           |  4 +-
+ dask/bytes/tests/test_s3.py              | 12 ++---
+ dask/dataframe/_compat.py                | 18 ++++----
+ dask/dataframe/io/orc.py                 |  7 ++-
+ dask/dataframe/io/parquet/arrow.py       | 12 ++---
+ dask/dataframe/io/parquet/core.py        |  7 +--
+ dask/dataframe/io/parquet/fastparquet.py |  4 +-
+ dask/dataframe/io/tests/test_orc.py      |  4 +-
+ dask/dataframe/io/tests/test_parquet.py  | 56 ++++++++++++------------
+ dask/dataframe/tests/test_rolling.py     |  6 +--
+ dask/diagnostics/profile_visualize.py    |  6 +--
+ dask/diagnostics/tests/test_profiler.py  | 10 ++---
+ dask/sizeof.py                           |  5 ++-
+ dask/tests/test_multiprocessing.py       |  7 +--
+ setup.py                                 |  1 +
+ 20 files changed, 103 insertions(+), 103 deletions(-)
+
+diff --git a/dask/array/numpy_compat.py b/dask/array/numpy_compat.py
+index 60d043d8..134c5839 100644
+--- a/dask/array/numpy_compat.py
+++ b/dask/array/numpy_compat.py
+@@ -1,14 +1,15 @@
+ import warnings
+-from distutils.version import LooseVersion
+ 
+ import numpy as np
+from packaging.version import parse as parse_version
+ 
+ from ..utils import derived_from
+ 
+-_numpy_117 = LooseVersion(np.__version__) >= "1.17.0"
+-_numpy_118 = LooseVersion(np.__version__) >= "1.18.0"
+-_numpy_120 = LooseVersion(np.__version__) >= "1.20.0"
+-_numpy_121 = LooseVersion(np.__version__) >= "1.21.0"
+_np_version = parse_version(np.__version__)
+_numpy_117 = _np_version >= parse_version("1.17.0")
+_numpy_118 = _np_version >= parse_version("1.18.0")
+_numpy_120 = _np_version >= parse_version("1.20.0")
+_numpy_121 = _np_version >= parse_version("1.21.0")
+ 
+ 
+ # Taken from scikit-learn:
+diff --git a/dask/array/tests/test_cupy.py b/dask/array/tests/test_cupy.py
+index 26d5d3a4..be5c77df 100644
+--- a/dask/array/tests/test_cupy.py
+++ b/dask/array/tests/test_cupy.py
+@@ -1,7 +1,6 @@
+-from distutils.version import LooseVersion
+-
+ import numpy as np
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ import dask
+ import dask.array as da
+@@ -12,6 +11,7 @@ from dask.sizeof import sizeof
+ 
+ cupy = pytest.importorskip("cupy")
+ cupyx = pytest.importorskip("cupyx")
+cupy_version = parse_version(cupy.__version__)
+ 
+ 
+ functions = [
+@@ -35,7 +35,7 @@ functions = [
+     pytest.param(
+         lambda x: x.mean(),
+         marks=pytest.mark.skipif(
+-            not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+            not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+             reason="NEP-18 support is not available in NumPy or CuPy older than "
+             "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+         ),
+@@ -47,7 +47,7 @@ functions = [
+     pytest.param(
+         lambda x: x.std(),
+         marks=pytest.mark.skipif(
+-            not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+            not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+             reason="NEP-18 support is not available in NumPy or CuPy older than "
+             "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+         ),
+@@ -55,7 +55,7 @@ functions = [
+     pytest.param(
+         lambda x: x.var(),
+         marks=pytest.mark.skipif(
+-            not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+            not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+             reason="NEP-18 support is not available in NumPy or CuPy older than "
+             "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+         ),
+@@ -318,7 +318,7 @@ def test_diagonal():
+ 
+ 
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+ )
+@@ -336,7 +336,7 @@ def test_tril_triu():
+ 
+ 
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+ )
+@@ -448,7 +448,7 @@ def test_nearest():
+ 
+ 
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+ )
+@@ -465,7 +465,7 @@ def test_constant():
+ 
+ 
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+ )
+@@ -556,7 +556,7 @@ def test_random_shapes(shape):
+ 
+ 
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.1.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.1.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.1.0 (requires https://github.com/cupy/cupy/pull/2209)",
+ )
+@@ -936,7 +936,7 @@ def test_cupy_sparse_concatenate(axis):
+ 
+ @pytest.mark.skipif(not _numpy_120, reason="NEP-35 is not available")
+ @pytest.mark.skipif(
+-    not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
+    not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
+     reason="NEP-18 support is not available in NumPy or CuPy older than "
+     "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
+ )
+diff --git a/dask/array/tests/test_sparse.py b/dask/array/tests/test_sparse.py
+index df92f3e6..8ff041c9 100644
+--- a/dask/array/tests/test_sparse.py
+++ b/dask/array/tests/test_sparse.py
+@@ -1,8 +1,8 @@
+ import random
+-from distutils.version import LooseVersion
+ 
+ import numpy as np
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ import dask
+ import dask.array as da
+@@ -34,7 +34,7 @@ functions = [
+     pytest.param(
+         lambda x: x.mean(),
+         marks=pytest.mark.skipif(
+-            sparse.__version__ >= LooseVersion("0.12.0"),
+            parse_version(sparse.__version__) >= parse_version("0.12.0"),
+             reason="https://github.com/dask/dask/issues/7169",
+         ),
+     ),
+@@ -91,7 +91,7 @@ def test_basic(func):
+ 
+ 
+ @pytest.mark.skipif(
+-    sparse.__version__ < LooseVersion("0.7.0+10"),
+    parse_version(sparse.__version__) < parse_version("0.7.0+10"),
+     reason="fixed in https://github.com/pydata/sparse/pull/256",
+ )
+ def test_tensordot():
+diff --git a/dask/base.py b/dask/base.py
+index 59c4640d..1cd48cac 100644
+--- a/dask/base.py
+++ b/dask/base.py
+@@ -6,13 +6,13 @@ import uuid
+ from collections import OrderedDict
+ from contextlib import contextmanager
+ from dataclasses import fields, is_dataclass
+-from distutils.version import LooseVersion
+ from functools import partial
+ from hashlib import md5
+ from numbers import Number
+ from operator import getitem
+ from typing import Iterator, Mapping, Set
+ 
+from packaging.version import parse as parse_version
+ from tlz import curry, groupby, identity, merge
+ from tlz.functoolz import Compose
+ 
+@@ -898,7 +898,7 @@ def _normalize_function(func):
+ def register_pandas():
+     import pandas as pd
+ 
+-    PANDAS_GT_130 = LooseVersion(pd.__version__) >= LooseVersion("1.3.0")
+    PANDAS_GT_130 = parse_version(pd.__version__) >= parse_version("1.3.0")
+ 
+     @normalize_token.register(pd.Index)
+     def normalize_index(ind):
+diff --git a/dask/bytes/tests/test_http.py b/dask/bytes/tests/test_http.py
+index bee444f5..3a05ef81 100644
+--- a/dask/bytes/tests/test_http.py
+++ b/dask/bytes/tests/test_http.py
+@@ -2,11 +2,11 @@ import os
+ import subprocess
+ import sys
+ import time
+-from distutils.version import LooseVersion
+ 
+ import fsspec
+ import pytest
+ from fsspec.core import open_files
+from packaging.version import parse as parse_version
+ 
+ import dask.bag as db
+ from dask.utils import tmpdir
+@@ -14,7 +14,7 @@ from dask.utils import tmpdir
+ files = ["a", "b"]
+ requests = pytest.importorskip("requests")
+ errs = (requests.exceptions.RequestException,)
+-if LooseVersion(fsspec.__version__) > "0.7.4":
+if parse_version(fsspec.__version__) > parse_version("0.7.4"):
+     aiohttp = pytest.importorskip("aiohttp")
+     errs = errs + (aiohttp.client_exceptions.ClientResponseError,)
+ 
+diff --git a/dask/bytes/tests/test_local.py b/dask/bytes/tests/test_local.py
+index 40b161c7..5564f92c 100644
+--- a/dask/bytes/tests/test_local.py
+++ b/dask/bytes/tests/test_local.py
+@@ -2,7 +2,6 @@ import gzip
+ import os
+ import pathlib
+ import sys
+-from distutils.version import LooseVersion
+ from functools import partial
+ from time import sleep
+ 
+@@ -11,6 +10,7 @@ import pytest
+ from fsspec.compression import compr
+ from fsspec.core import open_files
+ from fsspec.implementations.local import LocalFileSystem
+from packaging.version import parse as parse_version
+ from tlz import concat, valmap
+ 
+ from dask import compute
+@@ -356,7 +356,7 @@ def test_get_pyarrow_filesystem():
+     from fsspec.implementations.local import LocalFileSystem
+ 
+     pa = pytest.importorskip("pyarrow")
+-    if pa.__version__ >= LooseVersion("2.0.0"):
+    if parse_version(pa.__version__).major >= 2:
+         pytest.skip("fsspec no loger inherits from pyarrow>=2.0.")
+ 
+     fs = LocalFileSystem()
+diff --git a/dask/bytes/tests/test_s3.py b/dask/bytes/tests/test_s3.py
+index 1412de3e..b24b30b5 100644
+--- a/dask/bytes/tests/test_s3.py
+++ b/dask/bytes/tests/test_s3.py
+@@ -5,10 +5,10 @@ import subprocess
+ import sys
+ import time
+ from contextlib import contextmanager
+-from distutils.version import LooseVersion
+ from functools import partial
+ 
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ s3fs = pytest.importorskip("s3fs")
+ boto3 = pytest.importorskip("boto3")
+@@ -438,13 +438,13 @@ def test_parquet(s3, engine, s3so, metadata_file):
+     from dask.dataframe._compat import tm
+ 
+     lib = pytest.importorskip(engine)
+-    if engine == "pyarrow" and LooseVersion(lib.__version__) < "0.13.1":
+    lib_version = parse_version(lib.__version__)
+    if engine == "pyarrow" and lib_version < parse_version("0.13.1"):
+         pytest.skip("pyarrow < 0.13.1 not supported for parquet")
+     if (
+         engine == "pyarrow"
+-        and LooseVersion(lib.__version__) >= "2.0"
+-        and LooseVersion(lib.__version__) < "3.0"
+-        and LooseVersion(s3fs.__version__) > "0.5.0"
+        and lib_version.major == 2
+        and parse_version(s3fs.__version__) > parse_version("0.5.0")
+     ):
+         pytest.skip("#7056 - new s3fs not supported before pyarrow 3.0")
+ 
+@@ -507,7 +507,7 @@ def test_parquet_wstoragepars(s3, s3so):
+ 
+ def test_get_pyarrow_fs_s3(s3):
+     pa = pytest.importorskip("pyarrow")
+-    if pa.__version__ >= LooseVersion("2.0.0"):
+    if parse_version(pa.__version__).major >= 2:
+         pytest.skip("fsspec no loger inherits from pyarrow>=2.0.")
+     fs = DaskS3FileSystem(anon=True)
+     assert isinstance(fs, pa.filesystem.FileSystem)
+diff --git a/dask/dataframe/_compat.py b/dask/dataframe/_compat.py
+index 2a0b17fc..5e76d106 100644
+--- a/dask/dataframe/_compat.py
+++ b/dask/dataframe/_compat.py
+@@ -1,16 +1,16 @@
+ import string
+-from distutils.version import LooseVersion
+ 
+ import numpy as np
+ import pandas as pd
+-
+-PANDAS_VERSION = LooseVersion(pd.__version__)
+-PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0")
+-PANDAS_GT_104 = PANDAS_VERSION >= LooseVersion("1.0.4")
+-PANDAS_GT_110 = PANDAS_VERSION >= LooseVersion("1.1.0")
+-PANDAS_GT_120 = PANDAS_VERSION >= LooseVersion("1.2.0")
+-PANDAS_GT_121 = PANDAS_VERSION >= LooseVersion("1.2.1")
+-PANDAS_GT_130 = PANDAS_VERSION >= LooseVersion("1.3.0")
+from packaging.version import parse as parse_version
+
+PANDAS_VERSION = parse_version(pd.__version__)
+PANDAS_GT_100 = PANDAS_VERSION >= parse_version("1.0.0")
+PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4")
+PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0")
+PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0")
+PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1")
+PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0")
+ 
+ 
+ if PANDAS_GT_100:
+diff --git a/dask/dataframe/io/orc.py b/dask/dataframe/io/orc.py
+index 40740310..69eca3c5 100644
+--- a/dask/dataframe/io/orc.py
+++ b/dask/dataframe/io/orc.py
+@@ -1,6 +1,5 @@
+-from distutils.version import LooseVersion
+-
+ from fsspec.core import get_fs_token_paths
+from packaging.version import parse as parse_version
+ 
+ from ...base import tokenize
+ from ...highlevelgraph import HighLevelGraph
+@@ -49,7 +48,7 @@ def _read_orc_stripe(fs, path, stripe, columns=None):
+     with fs.open(path, "rb") as f:
+         o = orc.ORCFile(f)
+         table = o.read_stripe(stripe, columns)
+-    if pa.__version__ < LooseVersion("0.11.0"):
+    if parse_version(pa.__version__) < parse_version("0.11.0"):
+         return table.to_pandas()
+     else:
+         return table.to_pandas(date_as_object=False)
+@@ -80,7 +79,7 @@ def read_orc(path, columns=None, storage_options=None):
+     orc = import_required("pyarrow.orc", "Please install pyarrow >= 0.9.0")
+     import pyarrow as pa
+ 
+-    if LooseVersion(pa.__version__) == "0.10.0":
+    if parse_version(pa.__version__) == parse_version("0.10.0"):
+         raise RuntimeError(
+             "Due to a bug in pyarrow 0.10.0, the ORC reader is "
+             "unavailable. Please either downgrade pyarrow to "
+diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py
+index 99227157..83bb4bf8 100644
+--- a/dask/dataframe/io/parquet/arrow.py
+++ b/dask/dataframe/io/parquet/arrow.py
+@@ -2,13 +2,13 @@ import json
+ import warnings
+ from collections import defaultdict
+ from datetime import datetime
+-from distutils.version import LooseVersion
+ from functools import partial
+ 
+ import numpy as np
+ import pandas as pd
+ import pyarrow as pa
+ import pyarrow.parquet as pq
+from packaging.version import parse as parse_version
+ 
+ from dask import delayed
+ 
+@@ -27,14 +27,16 @@ from .utils import (
+ )
+ 
+ # Check PyArrow version for feature support
+-preserve_ind_supported = pa.__version__ >= LooseVersion("0.15.0")
+_pa_version = parse_version(pa.__version__)
+preserve_ind_supported = _pa_version >= parse_version("0.15.0")
+ read_row_groups_supported = preserve_ind_supported
+-if pa.__version__ >= LooseVersion("1.0.0"):
+if _pa_version.major >= 1:
+     from pyarrow import dataset as pa_ds
+ else:
+     pa_ds = None
+-subset_stats_supported = pa.__version__ > LooseVersion("2.0.0")
+-schema_field_supported = pa.__version__ >= LooseVersion("0.15.0")
+subset_stats_supported = _pa_version > parse_version("2.0.0")
+schema_field_supported = _pa_version >= parse_version("0.15.0")
+del _pa_version
+ 
+ #
+ #  Helper Utilities
+diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py
+index e11de5e3..00ab6ee0 100644
+--- a/dask/dataframe/io/parquet/core.py
+++ b/dask/dataframe/io/parquet/core.py
+@@ -1,11 +1,11 @@
+ import math
+ import warnings
+-from distutils.version import LooseVersion
+ 
+ import tlz as toolz
+ from fsspec.core import get_fs_token_paths
+ from fsspec.implementations.local import LocalFileSystem
+ from fsspec.utils import stringify_path
+from packaging.version import parse as parse_version
+ 
+ from ....base import tokenize
+ from ....delayed import Delayed
+@@ -844,11 +844,12 @@ def get_engine(engine):
+ 
+     elif engine in ("pyarrow", "arrow", "pyarrow-legacy", "pyarrow-dataset"):
+         pa = import_required("pyarrow", "`pyarrow` not installed")
+        pa_version = parse_version(pa.__version__)
+ 
+-        if LooseVersion(pa.__version__) < "0.13.1":
+        if pa_version < parse_version("0.13.1"):
+             raise RuntimeError("PyArrow version >= 0.13.1 required")
+ 
+-        if engine == "pyarrow-dataset" and LooseVersion(pa.__version__) >= "1.0.0":
+        if engine == "pyarrow-dataset" and pa_version.major >= 1:
+             from .arrow import ArrowDatasetEngine
+ 
+             _ENGINES[engine] = eng = ArrowDatasetEngine
+diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py
+index 5c817294..ae28c792 100644
+--- a/dask/dataframe/io/parquet/fastparquet.py
+++ b/dask/dataframe/io/parquet/fastparquet.py
+@@ -3,11 +3,11 @@ import json
+ import pickle
+ import warnings
+ from collections import OrderedDict, defaultdict
+-from distutils.version import LooseVersion
+ 
+ import numpy as np
+ import pandas as pd
+ import tlz as toolz
+from packaging.version import parse as parse_version
+ 
+ try:
+     import fastparquet
+@@ -924,7 +924,7 @@ class FastParquetEngine(Engine):
+             rgs = []
+         elif partition_on:
+             mkdirs = lambda x: fs.mkdirs(x, exist_ok=True)
+-            if LooseVersion(fastparquet.__version__) >= "0.1.4":
+            if parse_version(fastparquet.__version__) >= parse_version("0.1.4"):
+                 rgs = partition_on_columns(
+                     df, partition_on, path, filename, fmd, compression, fs.open, mkdirs
+                 )
+diff --git a/dask/dataframe/io/tests/test_orc.py b/dask/dataframe/io/tests/test_orc.py
+index 1c1ca00e..580a7ded 100644
+--- a/dask/dataframe/io/tests/test_orc.py
+++ b/dask/dataframe/io/tests/test_orc.py
+@@ -1,9 +1,9 @@
+ import os
+ import shutil
+ import tempfile
+-from distutils.version import LooseVersion
+ 
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ import dask.dataframe as dd
+ from dask.dataframe import read_orc
+@@ -16,7 +16,7 @@ pytest.importorskip("pyarrow.orc")
+ import pyarrow as pa
+ 
+ pytestmark = pytest.mark.skipif(
+-    LooseVersion(pa.__version__) == "0.10.0",
+    parse_version(pa.__version__).base_version == parse_version("0.10.0"),
+     reason=(
+         "PyArrow 0.10.0 release broke the ORC reader, see "
+         "https://issues.apache.org/jira/browse/ARROW-3009"
+diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py
+index a5a1244a..28df8771 100644
+--- a/dask/dataframe/io/tests/test_parquet.py
+++ b/dask/dataframe/io/tests/test_parquet.py
+@@ -4,11 +4,11 @@ import os
+ import sys
+ import warnings
+ from decimal import Decimal
+-from distutils.version import LooseVersion
+ 
+ import numpy as np
+ import pandas as pd
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ import dask
+ import dask.dataframe as dd
+@@ -25,12 +25,18 @@ try:
+     import fastparquet
+ except ImportError:
+     fastparquet = False
+    fastparquet_version = parse_version("0")
+else:
+    fastparquet_version = parse_version(fastparquet.__version__)
+ 
+ 
+ try:
+     import pyarrow as pa
+ except ImportError:
+     pa = False
+    pa_version = parse_version("0")
+else:
+    pa_version = parse_version(pa.__version__)
+ 
+ try:
+     import pyarrow.parquet as pq
+@@ -41,7 +47,7 @@ except ImportError:
+ SKIP_FASTPARQUET = not fastparquet
+ FASTPARQUET_MARK = pytest.mark.skipif(SKIP_FASTPARQUET, reason="fastparquet not found")
+ 
+-if pq and pa.__version__ < LooseVersion("0.13.1"):
+if pq and pa_version < parse_version("0.13.1"):
+     SKIP_PYARROW = True
+     SKIP_PYARROW_REASON = "pyarrow >= 0.13.1 required for parquet"
+ else:
+@@ -49,8 +55,8 @@ else:
+         sys.platform == "win32"
+         and pa
+         and (
+-            (pa.__version__ == LooseVersion("0.16.0"))
+-            or (pa.__version__ == LooseVersion("2.0.0"))
+            pa_version == parse_version("0.16.0")
+            or pa_version == parse_version("2.0.0")
+         )
+     ):
+         SKIP_PYARROW = True
+@@ -64,7 +70,7 @@ else:
+         SKIP_PYARROW_REASON = "pyarrow not found"
+ PYARROW_MARK = pytest.mark.skipif(SKIP_PYARROW, reason=SKIP_PYARROW_REASON)
+ 
+-if pa and pa.__version__ < LooseVersion("1.0.0"):
+if pa and pa_version.major < 1:
+     SKIP_PYARROW_DS = True
+     SKIP_PYARROW_DS_REASON = "pyarrow >= 1.0.0 required for pyarrow dataset API"
+ else:
+@@ -147,7 +153,7 @@ write_read_engines_xfail = write_read_engines(
+ 
+ if (
+     fastparquet
+-    and fastparquet.__version__ < LooseVersion("0.5")
+    and fastparquet_version < parse_version("0.5")
+     and PANDAS_GT_110
+     and not PANDAS_GT_121
+ ):
+@@ -822,7 +828,7 @@ def test_append_dict_column(tmpdir, engine):
+ 
+     if engine == "fastparquet":
+         pytest.xfail("Fastparquet engine is missing dict-column support")
+-    elif pa.__version__ < LooseVersion("1.0.1"):
+    elif pa_version < parse_version("1.0.1"):
+         pytest.skip("Newer PyArrow version required for dict-column support.")
+ 
+     tmp = str(tmpdir)
+@@ -981,7 +987,7 @@ def test_categories_unnamed_index(tmpdir, engine):
+     # Check that we can handle an unnamed categorical index
+     # https://github.com/dask/dask/issues/6885
+ 
+-    if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"):
+    if engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"):
+         pytest.skip("PyArrow>=0.15 Required.")
+ 
+     tmpdir = str(tmpdir)
+@@ -1166,7 +1172,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual
+ @pytest.mark.parametrize("index", [False, True])
+ @pytest.mark.parametrize("schema", ["infer", "complex"])
+ def test_pyarrow_schema_inference(tmpdir, index, engine, schema):
+-    if pa.__version__ < LooseVersion("0.15.0"):
+    if pa_version < parse_version("0.15.0"):
+         pytest.skip("PyArrow>=0.15 Required.")
+     if schema == "complex":
+         schema = {"index": pa.string(), "amount": pa.int64()}
+@@ -1359,9 +1365,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine):
+ 
+     # Recent versions of pyarrow support full row-wise filtering
+     # (fastparquet and older pyarrow versions do not)
+-    pyarrow_row_filtering = (
+-        read_engine == "pyarrow-dataset" and pa.__version__ >= LooseVersion("1.0.0")
+-    )
+    pyarrow_row_filtering = read_engine == "pyarrow-dataset" and pa_version.major >= 1
+ 
+     fn = str(tmpdir)
+     df = pd.DataFrame({"at": ["ab", "aa", "ba", "da", "bb"]})
+@@ -1462,7 +1466,7 @@ def test_pyarrow_filter_divisions(tmpdir):
+         str(tmpdir.join("file.1.parquet")), engine="pyarrow", row_group_size=2
+     )
+ 
+-    if pa.__version__ >= LooseVersion("1.0.0"):
+    if pa_version.major >= 1:
+         # Only works for ArrowDatasetEngine.
+         # Legacy code will not apply filters on individual row-groups
+         # when `split_row_groups=False`.
+@@ -1637,7 +1641,7 @@ def test_parquet_select_cats(tmpdir, engine):
+ 
+ 
+ def test_columns_name(tmpdir, engine):
+-    if engine == "fastparquet" and fastparquet.__version__ <= LooseVersion("0.3.1"):
+    if engine == "fastparquet" and fastparquet_version <= parse_version("0.3.1"):
+         pytest.skip("Fastparquet does not write column_indexes up to 0.3.1")
+     tmp_path = str(tmpdir)
+     df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(["a", "b"], name="idx"))
+@@ -2041,9 +2045,7 @@ def test_to_parquet_with_get(tmpdir):
+ def test_select_partitioned_column(tmpdir, engine):
+     pytest.importorskip("snappy")
+     if engine.startswith("pyarrow"):
+-        import pyarrow as pa
+-
+-        if pa.__version__ < LooseVersion("0.9.0"):
+        if pa_version < parse_version("0.9.0"):
+             pytest.skip("pyarrow<0.9.0 did not support this")
+ 
+     fn = str(tmpdir)
+@@ -2067,9 +2069,9 @@ def test_select_partitioned_column(tmpdir, engine):
+ 
+ 
+ def test_with_tz(tmpdir, engine):
+-    if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.11.0"):
+    if engine.startswith("pyarrow") and pa_version < parse_version("0.11.0"):
+         pytest.skip("pyarrow<0.11.0 did not support this")
+-    if engine == "fastparquet" and fastparquet.__version__ < LooseVersion("0.3.0"):
+    if engine == "fastparquet" and fastparquet_version < parse_version("0.3.0"):
+         pytest.skip("fastparquet<0.3.0 did not support this")
+ 
+     with warnings.catch_warnings():
+@@ -2278,7 +2280,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema):
+     if (
+         schema == "infer"
+         and engine.startswith("pyarrow")
+-        and pa.__version__ < LooseVersion("0.15.0")
+        and pa_version < parse_version("0.15.0")
+     ):
+         pytest.skip("PyArrow>=0.15 Required.")
+ 
+@@ -2814,9 +2816,7 @@ def test_filter_nonpartition_columns(
+ 
+ @PYARROW_MARK
+ def test_pandas_metadata_nullable_pyarrow(tmpdir):
+-    if pa.__version__ < LooseVersion("0.16.0") or pd.__version__ < LooseVersion(
+-        "1.0.0"
+-    ):
+    if pa_version < parse_version("0.16.0") or parse_version(pd.__version__).major < 1:
+         pytest.skip("PyArrow>=0.16 and Pandas>=1.0.0 Required.")
+     tmpdir = str(tmpdir)
+ 
+@@ -2837,7 +2837,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir):
+ 
+ @PYARROW_MARK
+ def test_pandas_timestamp_overflow_pyarrow(tmpdir):
+-    if pa.__version__ < LooseVersion("0.17.0"):
+    if pa.__version__ < parse_version("0.17.0"):
+         pytest.skip("PyArrow>=0.17 Required.")
+ 
+     info = np.iinfo(np.dtype("int64"))
+@@ -2947,7 +2947,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols):
+ @fp_pandas_xfail
+ def test_partitioned_preserve_index(tmpdir, write_engine, read_engine):
+ 
+-    if write_engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"):
+    if write_engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"):
+         pytest.skip("PyArrow>=0.15 Required.")
+ 
+     tmp = str(tmpdir)
+@@ -3065,7 +3065,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine):
+ @PYARROW_MARK
+ @pytest.mark.parametrize("test_filter", [True, False])
+ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
+-    if pa.__version__ <= LooseVersion("0.17.1"):
+    if pa_version <= parse_version("0.17.1"):
+         # Using pyarrow.dataset API does not produce
+         # Categorical type for partitioned columns.
+         pytest.skip("PyArrow>0.17.1 Required.")
+@@ -3093,7 +3093,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
+ def test_pyarrow_dataset_read_from_paths(
+     tmpdir, read_from_paths, test_filter_partitioned
+ ):
+-    if pa.__version__ <= LooseVersion("0.17.1"):
+    if pa_version <= parse_version("0.17.1"):
+         # Using pyarrow.dataset API does not produce
+         # Categorical type for partitioned columns.
+         pytest.skip("PyArrow>0.17.1 Required.")
+@@ -3123,7 +3123,7 @@ def test_pyarrow_dataset_read_from_paths(
+ @PYARROW_MARK
+ @pytest.mark.parametrize("split_row_groups", [True, False])
+ def test_pyarrow_dataset_filter_partitioned(tmpdir, split_row_groups):
+-    if pa.__version__ < LooseVersion("1.0.0"):
+    if pa_version.major < 1:
+         # pyarrow.dataset API required.
+         pytest.skip("PyArrow>=1.0.0 Required.")
+ 
+diff --git a/dask/dataframe/tests/test_rolling.py b/dask/dataframe/tests/test_rolling.py
+index 19cb5b56..ea023b40 100644
+--- a/dask/dataframe/tests/test_rolling.py
+++ b/dask/dataframe/tests/test_rolling.py
+@@ -1,8 +1,7 @@
+-from distutils.version import LooseVersion
+-
+ import numpy as np
+ import pandas as pd
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ import dask.dataframe as dd
+ from dask.dataframe._compat import PANDAS_GT_130
+@@ -404,7 +403,8 @@ def test_rolling_agg_aggregate():
+ @pytest.mark.skipif(not dd._compat.PANDAS_GT_100, reason="needs pandas>=1.0.0")
+ def test_rolling_numba_engine():
+     numba = pytest.importorskip("numba")
+-    if not dd._compat.PANDAS_GT_104 and LooseVersion(numba.__version__) >= "0.49":
+    numba_version = parse_version(numba.__version__)
+    if not dd._compat.PANDAS_GT_104 and numba_version >= parse_version("0.49"):
+         # Was fixed in https://github.com/pandas-dev/pandas/pull/33687
+         pytest.xfail("Known incompatibility between pandas and numba")
+ 
+diff --git a/dask/diagnostics/profile_visualize.py b/dask/diagnostics/profile_visualize.py
+index 751957da..b62c6fba 100644
+--- a/dask/diagnostics/profile_visualize.py
+++ b/dask/diagnostics/profile_visualize.py
+@@ -1,9 +1,9 @@
+ import random
+ from bisect import bisect_left
+-from distutils.version import LooseVersion
+ from itertools import cycle
+ from operator import add, itemgetter
+ 
+from packaging.version import parse as parse_version
+ from tlz import accumulate, groupby, pluck, unique
+ 
+ from ..core import istask
+@@ -372,7 +372,7 @@ def plot_resources(results, palette="Viridis", **kwargs):
+         line_width=4,
+         **{
+             "legend_label"
+-            if LooseVersion(bokeh.__version__) >= "1.4"
+            if parse_version(bokeh.__version__) >= parse_version("1.4")
+             else "legend": "% CPU"
+         }
+     )
+@@ -390,7 +390,7 @@ def plot_resources(results, palette="Viridis", **kwargs):
+         line_width=4,
+         **{
+             "legend_label"
+-            if LooseVersion(bokeh.__version__) >= "1.4"
+            if parse_version(bokeh.__version__) >= parse_version("1.4")
+             else "legend": "Memory"
+         }
+     )
+diff --git a/dask/diagnostics/tests/test_profiler.py b/dask/diagnostics/tests/test_profiler.py
+index 5d995b87..a31943f8 100644
+--- a/dask/diagnostics/tests/test_profiler.py
+++ b/dask/diagnostics/tests/test_profiler.py
+@@ -1,10 +1,10 @@
+ import contextlib
+ import os
+-from distutils.version import LooseVersion
+ from operator import add, mul
+ from time import sleep
+ 
+ import pytest
+from packaging.version import parse as parse_version
+ 
+ from dask.diagnostics import CacheProfiler, Profiler, ResourceProfiler
+ from dask.threaded import get
+@@ -326,10 +326,10 @@ def test_plot_multiple():
+     p = visualize(
+         [prof, rprof], label_size=50, title="Not the default", show=False, save=False
+     )
+-    bokeh_version = LooseVersion(bokeh.__version__)
+-    if bokeh_version >= "1.1.0":
+    bokeh_version = parse_version(bokeh.__version__)
+    if bokeh_version >= parse_version("1.1.0"):
+         figures = [r[0] for r in p.children[1].children]
+-    elif bokeh_version >= "0.12.0":
+    elif bokeh_version >= parse_version("0.12.0"):
+         figures = [r.children[0] for r in p.children[1].children]
+     else:
+         figures = [r[0] for r in p.children]
+@@ -364,7 +364,7 @@ def test_get_colors():
+     from dask.diagnostics.profile_visualize import get_colors
+ 
+     # 256-color palettes were added in bokeh 1.4.0
+-    if LooseVersion(bokeh.__version__) >= "1.4.0":
+    if parse_version(bokeh.__version__) >= parse_version("1.4.0"):
+         from bokeh.palettes import Blues256
+ 
+         funcs = list(range(11))
+diff --git a/dask/sizeof.py b/dask/sizeof.py
+index 570b6251..38c06885 100644
+--- a/dask/sizeof.py
+++ b/dask/sizeof.py
+@@ -2,7 +2,8 @@ import itertools
+ import random
+ import sys
+ from array import array
+-from distutils.version import LooseVersion
+
+from packaging.version import parse as parse_version
+ 
+ from .utils import Dispatch
+ 
+@@ -195,7 +196,7 @@ def register_pyarrow():
+         return int(_get_col_size(data)) + 1000
+ 
+     # Handle pa.Column for pyarrow < 0.15
+-    if pa.__version__ < LooseVersion("0.15.0"):
+    if parse_version(pa.__version__) < parse_version("0.15.0"):
+ 
+         @sizeof.register(pa.Column)
+         def sizeof_pyarrow_column(col):
+diff --git a/dask/tests/test_multiprocessing.py b/dask/tests/test_multiprocessing.py
+index 51c7044f..16cdca8b 100644
+--- a/dask/tests/test_multiprocessing.py
+++ b/dask/tests/test_multiprocessing.py
+@@ -2,10 +2,8 @@ import multiprocessing
+ import pickle
+ import sys
+ from concurrent.futures import ProcessPoolExecutor
+-from distutils.version import LooseVersion
+ from operator import add
+ 
+-import cloudpickle
+ import pytest
+ 
+ import dask
+@@ -49,13 +47,10 @@ def test_pickle_locals():
+ 
+ 
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5")
+-@pytest.mark.skipif(
+-    cloudpickle.__version__ < LooseVersion("1.3.0"),
+-    reason="requires cloudpickle >= 1.3.0",
+-)
+ def test_out_of_band_pickling():
+     """Test that out-of-band pickling works"""
+     np = pytest.importorskip("numpy")
+    pytest.importorskip("cloudpickle", minversion="1.3.0")
+ 
+     a = np.arange(5)
+ 
+diff --git a/setup.py b/setup.py
+index 41751134..9365926c 100755
+--- a/setup.py
+++ b/setup.py
+@@ -22,6 +22,7 @@ extras_require["complete"] = sorted({v for req in extras_require.values() for v
+ extras_require["test"] = ["pytest", "pytest-rerunfailures", "pytest-xdist"]
+ 
+ install_requires = [
+    "packaging",
+     "pyyaml",
+     "cloudpickle >= 1.1.1",
+     "fsspec >= 0.6.0",
+-- 
+2.31.1
+
--- a/0003-fix-index_col-duplication-if-index_col-is-type-str.patch
+++ b/0003-fix-index_col-duplication-if-index_col-is-type-str.patch
@ -0,0 +1,30 @@
+From 57cf597227fb78a42cabbeedab146260e6f485e2 Mon Sep 17 00:00:00 2001
+From: McToel <theo.doellmann@gmx.de>
+Date: Sun, 16 May 2021 11:11:06 +0200
+Subject: [PATCH 3/3] fix index_col duplication if index_col is type str
+
+Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
+---
+ dask/dataframe/io/sql.py | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/dask/dataframe/io/sql.py b/dask/dataframe/io/sql.py
+index 112876e2..d698c494 100644
+--- a/dask/dataframe/io/sql.py
+++ b/dask/dataframe/io/sql.py
+@@ -125,10 +125,8 @@ def read_sql_table(
+         if columns
+         else list(table.columns)
+     )
+-    if index_col not in columns:
+-        columns.append(
+-            table.columns[index_col] if isinstance(index_col, str) else index_col
+-        )
+    if index not in columns:
+        columns.append(index)
+ 
+     if isinstance(index_col, str):
+         kwargs["index_col"] = index_col
+-- 
+2.31.1
+
--- a/python-dask.spec
+++ b/python-dask.spec
@ -15,6 +15,10 @@ URL:            https://github.com/dask/dask/
 Source0:        %pypi_source
 # https://github.com/dask/dask/issues/6725
 Patch0001:      0001-Skip-test_encoding_gh601-on-big-endian-machines.patch
+# https://github.com/dask/dask/pull/7280
+Patch0002:      0002-Use-packaging-for-version-comparisons.patch
+# https://github.com/dask/dask/pull/7661
+Patch0003:      0003-fix-index_col-duplication-if-index_col-is-type-str.patch

 BuildArch:      noarch

@ -169,10 +173,6 @@ Documentation for dask.
 # Remove bundled egg-info
 rm -rf %{srcname}.egg-info

-# Disable failing on deprecation warnings.
-# It seems upstream mistook the distribution system for its own CI.
-sed -r -i 's/filterwarnings =/\0\n        ignore::DeprecationWarning/; /error:::/d' setup.cfg
-
 %build
 %py3_build

@ -198,10 +198,7 @@ pytest_args=(
  -m 'not network'

  # https://bugzilla.redhat.com/show_bug.cgi?id=1968947#c4
-  --ignore=dask/dataframe/io/tests/test_sql.py
-
-  # Those also fail, but don't seem very important.
-  --ignore=dask/tests/test_config.py::test_collect_yaml_permission_errors
+  --deselect=dask/dataframe/io/tests/test_sql.py::test_select_from_select

  -n %[0%{?have_arm}?"2":"auto"]
 )