2021-07-11 07:55:07 +00:00
|
|
|
From 41bf8806be46f4cbadc6492539dff709791e25de Mon Sep 17 00:00:00 2001
|
2021-06-20 06:20:01 +00:00
|
|
|
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
|
|
|
Date: Sun, 13 Jun 2021 05:37:41 -0400
|
2021-07-11 07:55:07 +00:00
|
|
|
Subject: [PATCH 2/3] Use packaging for version comparisons.
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
|
|
|
|
---
|
|
|
|
dask/array/numpy_compat.py | 11 ++---
|
|
|
|
dask/array/tests/test_cupy.py | 22 +++++-----
|
|
|
|
dask/array/tests/test_sparse.py | 6 +--
|
|
|
|
dask/base.py | 4 +-
|
|
|
|
dask/bytes/tests/test_http.py | 4 +-
|
|
|
|
dask/bytes/tests/test_local.py | 4 +-
|
|
|
|
dask/bytes/tests/test_s3.py | 12 ++---
|
|
|
|
dask/dataframe/_compat.py | 18 ++++----
|
|
|
|
dask/dataframe/io/orc.py | 7 ++-
|
|
|
|
dask/dataframe/io/parquet/arrow.py | 12 ++---
|
|
|
|
dask/dataframe/io/parquet/core.py | 7 +--
|
|
|
|
dask/dataframe/io/parquet/fastparquet.py | 4 +-
|
|
|
|
dask/dataframe/io/tests/test_orc.py | 4 +-
|
|
|
|
dask/dataframe/io/tests/test_parquet.py | 56 ++++++++++++------------
|
|
|
|
dask/dataframe/tests/test_rolling.py | 6 +--
|
|
|
|
dask/diagnostics/profile_visualize.py | 6 +--
|
|
|
|
dask/diagnostics/tests/test_profiler.py | 10 ++---
|
|
|
|
dask/sizeof.py | 5 ++-
|
|
|
|
dask/tests/test_multiprocessing.py | 7 +--
|
|
|
|
setup.py | 1 +
|
|
|
|
20 files changed, 103 insertions(+), 103 deletions(-)
|
|
|
|
|
|
|
|
diff --git a/dask/array/numpy_compat.py b/dask/array/numpy_compat.py
|
|
|
|
index 60d043d8..134c5839 100644
|
|
|
|
--- a/dask/array/numpy_compat.py
|
|
|
|
+++ b/dask/array/numpy_compat.py
|
|
|
|
@@ -1,14 +1,15 @@
|
|
|
|
import warnings
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from ..utils import derived_from
|
|
|
|
|
|
|
|
-_numpy_117 = LooseVersion(np.__version__) >= "1.17.0"
|
|
|
|
-_numpy_118 = LooseVersion(np.__version__) >= "1.18.0"
|
|
|
|
-_numpy_120 = LooseVersion(np.__version__) >= "1.20.0"
|
|
|
|
-_numpy_121 = LooseVersion(np.__version__) >= "1.21.0"
|
|
|
|
+_np_version = parse_version(np.__version__)
|
|
|
|
+_numpy_117 = _np_version >= parse_version("1.17.0")
|
|
|
|
+_numpy_118 = _np_version >= parse_version("1.18.0")
|
|
|
|
+_numpy_120 = _np_version >= parse_version("1.20.0")
|
|
|
|
+_numpy_121 = _np_version >= parse_version("1.21.0")
|
|
|
|
|
|
|
|
|
|
|
|
# Taken from scikit-learn:
|
|
|
|
diff --git a/dask/array/tests/test_cupy.py b/dask/array/tests/test_cupy.py
|
|
|
|
index 26d5d3a4..be5c77df 100644
|
|
|
|
--- a/dask/array/tests/test_cupy.py
|
|
|
|
+++ b/dask/array/tests/test_cupy.py
|
|
|
|
@@ -1,7 +1,6 @@
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
-
|
|
|
|
import numpy as np
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask
|
|
|
|
import dask.array as da
|
|
|
|
@@ -12,6 +11,7 @@ from dask.sizeof import sizeof
|
|
|
|
|
|
|
|
cupy = pytest.importorskip("cupy")
|
|
|
|
cupyx = pytest.importorskip("cupyx")
|
|
|
|
+cupy_version = parse_version(cupy.__version__)
|
|
|
|
|
|
|
|
|
|
|
|
functions = [
|
|
|
|
@@ -35,7 +35,7 @@ functions = [
|
|
|
|
pytest.param(
|
|
|
|
lambda x: x.mean(),
|
|
|
|
marks=pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
),
|
|
|
|
@@ -47,7 +47,7 @@ functions = [
|
|
|
|
pytest.param(
|
|
|
|
lambda x: x.std(),
|
|
|
|
marks=pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
),
|
|
|
|
@@ -55,7 +55,7 @@ functions = [
|
|
|
|
pytest.param(
|
|
|
|
lambda x: x.var(),
|
|
|
|
marks=pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
),
|
|
|
|
@@ -318,7 +318,7 @@ def test_diagonal():
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
)
|
|
|
|
@@ -336,7 +336,7 @@ def test_tril_triu():
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
)
|
|
|
|
@@ -448,7 +448,7 @@ def test_nearest():
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
)
|
|
|
|
@@ -465,7 +465,7 @@ def test_constant():
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
)
|
|
|
|
@@ -556,7 +556,7 @@ def test_random_shapes(shape):
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.1.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.1.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.1.0 (requires https://github.com/cupy/cupy/pull/2209)",
|
|
|
|
)
|
|
|
|
@@ -936,7 +936,7 @@ def test_cupy_sparse_concatenate(axis):
|
|
|
|
|
|
|
|
@pytest.mark.skipif(not _numpy_120, reason="NEP-35 is not available")
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"),
|
|
|
|
+ not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"),
|
|
|
|
reason="NEP-18 support is not available in NumPy or CuPy older than "
|
|
|
|
"6.4.0 (requires https://github.com/cupy/cupy/pull/2418)",
|
|
|
|
)
|
|
|
|
diff --git a/dask/array/tests/test_sparse.py b/dask/array/tests/test_sparse.py
|
|
|
|
index df92f3e6..8ff041c9 100644
|
|
|
|
--- a/dask/array/tests/test_sparse.py
|
|
|
|
+++ b/dask/array/tests/test_sparse.py
|
|
|
|
@@ -1,8 +1,8 @@
|
|
|
|
import random
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask
|
|
|
|
import dask.array as da
|
|
|
|
@@ -34,7 +34,7 @@ functions = [
|
|
|
|
pytest.param(
|
|
|
|
lambda x: x.mean(),
|
|
|
|
marks=pytest.mark.skipif(
|
|
|
|
- sparse.__version__ >= LooseVersion("0.12.0"),
|
|
|
|
+ parse_version(sparse.__version__) >= parse_version("0.12.0"),
|
|
|
|
reason="https://github.com/dask/dask/issues/7169",
|
|
|
|
),
|
|
|
|
),
|
|
|
|
@@ -91,7 +91,7 @@ def test_basic(func):
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
- sparse.__version__ < LooseVersion("0.7.0+10"),
|
|
|
|
+ parse_version(sparse.__version__) < parse_version("0.7.0+10"),
|
|
|
|
reason="fixed in https://github.com/pydata/sparse/pull/256",
|
|
|
|
)
|
|
|
|
def test_tensordot():
|
|
|
|
diff --git a/dask/base.py b/dask/base.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index 73a4ff50..79a000a0 100644
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/dask/base.py
|
|
|
|
+++ b/dask/base.py
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -7,13 +7,13 @@ import uuid
|
2021-06-20 06:20:01 +00:00
|
|
|
from collections import OrderedDict
|
|
|
|
from contextlib import contextmanager
|
|
|
|
from dataclasses import fields, is_dataclass
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from functools import partial
|
|
|
|
from hashlib import md5
|
|
|
|
from numbers import Number
|
|
|
|
from operator import getitem
|
|
|
|
from typing import Iterator, Mapping, Set
|
|
|
|
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
from tlz import curry, groupby, identity, merge
|
|
|
|
from tlz.functoolz import Compose
|
|
|
|
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -911,7 +911,7 @@ def _normalize_function(func):
|
2021-06-20 06:20:01 +00:00
|
|
|
def register_pandas():
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
- PANDAS_GT_130 = LooseVersion(pd.__version__) >= LooseVersion("1.3.0")
|
|
|
|
+ PANDAS_GT_130 = parse_version(pd.__version__) >= parse_version("1.3.0")
|
|
|
|
|
|
|
|
@normalize_token.register(pd.Index)
|
|
|
|
def normalize_index(ind):
|
|
|
|
diff --git a/dask/bytes/tests/test_http.py b/dask/bytes/tests/test_http.py
|
|
|
|
index bee444f5..3a05ef81 100644
|
|
|
|
--- a/dask/bytes/tests/test_http.py
|
|
|
|
+++ b/dask/bytes/tests/test_http.py
|
|
|
|
@@ -2,11 +2,11 @@ import os
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import fsspec
|
|
|
|
import pytest
|
|
|
|
from fsspec.core import open_files
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask.bag as db
|
|
|
|
from dask.utils import tmpdir
|
|
|
|
@@ -14,7 +14,7 @@ from dask.utils import tmpdir
|
|
|
|
files = ["a", "b"]
|
|
|
|
requests = pytest.importorskip("requests")
|
|
|
|
errs = (requests.exceptions.RequestException,)
|
|
|
|
-if LooseVersion(fsspec.__version__) > "0.7.4":
|
|
|
|
+if parse_version(fsspec.__version__) > parse_version("0.7.4"):
|
|
|
|
aiohttp = pytest.importorskip("aiohttp")
|
|
|
|
errs = errs + (aiohttp.client_exceptions.ClientResponseError,)
|
|
|
|
|
|
|
|
diff --git a/dask/bytes/tests/test_local.py b/dask/bytes/tests/test_local.py
|
|
|
|
index 40b161c7..5564f92c 100644
|
|
|
|
--- a/dask/bytes/tests/test_local.py
|
|
|
|
+++ b/dask/bytes/tests/test_local.py
|
|
|
|
@@ -2,7 +2,6 @@ import gzip
|
|
|
|
import os
|
|
|
|
import pathlib
|
|
|
|
import sys
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from functools import partial
|
|
|
|
from time import sleep
|
|
|
|
|
|
|
|
@@ -11,6 +10,7 @@ import pytest
|
|
|
|
from fsspec.compression import compr
|
|
|
|
from fsspec.core import open_files
|
|
|
|
from fsspec.implementations.local import LocalFileSystem
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
from tlz import concat, valmap
|
|
|
|
|
|
|
|
from dask import compute
|
|
|
|
@@ -356,7 +356,7 @@ def test_get_pyarrow_filesystem():
|
|
|
|
from fsspec.implementations.local import LocalFileSystem
|
|
|
|
|
|
|
|
pa = pytest.importorskip("pyarrow")
|
|
|
|
- if pa.__version__ >= LooseVersion("2.0.0"):
|
|
|
|
+ if parse_version(pa.__version__).major >= 2:
|
|
|
|
pytest.skip("fsspec no loger inherits from pyarrow>=2.0.")
|
|
|
|
|
|
|
|
fs = LocalFileSystem()
|
|
|
|
diff --git a/dask/bytes/tests/test_s3.py b/dask/bytes/tests/test_s3.py
|
|
|
|
index 1412de3e..b24b30b5 100644
|
|
|
|
--- a/dask/bytes/tests/test_s3.py
|
|
|
|
+++ b/dask/bytes/tests/test_s3.py
|
|
|
|
@@ -5,10 +5,10 @@ import subprocess
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
from contextlib import contextmanager
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from functools import partial
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
s3fs = pytest.importorskip("s3fs")
|
|
|
|
boto3 = pytest.importorskip("boto3")
|
|
|
|
@@ -438,13 +438,13 @@ def test_parquet(s3, engine, s3so, metadata_file):
|
|
|
|
from dask.dataframe._compat import tm
|
|
|
|
|
|
|
|
lib = pytest.importorskip(engine)
|
|
|
|
- if engine == "pyarrow" and LooseVersion(lib.__version__) < "0.13.1":
|
|
|
|
+ lib_version = parse_version(lib.__version__)
|
|
|
|
+ if engine == "pyarrow" and lib_version < parse_version("0.13.1"):
|
|
|
|
pytest.skip("pyarrow < 0.13.1 not supported for parquet")
|
|
|
|
if (
|
|
|
|
engine == "pyarrow"
|
|
|
|
- and LooseVersion(lib.__version__) >= "2.0"
|
|
|
|
- and LooseVersion(lib.__version__) < "3.0"
|
|
|
|
- and LooseVersion(s3fs.__version__) > "0.5.0"
|
|
|
|
+ and lib_version.major == 2
|
|
|
|
+ and parse_version(s3fs.__version__) > parse_version("0.5.0")
|
|
|
|
):
|
|
|
|
pytest.skip("#7056 - new s3fs not supported before pyarrow 3.0")
|
|
|
|
|
|
|
|
@@ -507,7 +507,7 @@ def test_parquet_wstoragepars(s3, s3so):
|
|
|
|
|
|
|
|
def test_get_pyarrow_fs_s3(s3):
|
|
|
|
pa = pytest.importorskip("pyarrow")
|
|
|
|
- if pa.__version__ >= LooseVersion("2.0.0"):
|
|
|
|
+ if parse_version(pa.__version__).major >= 2:
|
|
|
|
pytest.skip("fsspec no loger inherits from pyarrow>=2.0.")
|
|
|
|
fs = DaskS3FileSystem(anon=True)
|
|
|
|
assert isinstance(fs, pa.filesystem.FileSystem)
|
|
|
|
diff --git a/dask/dataframe/_compat.py b/dask/dataframe/_compat.py
|
|
|
|
index 2a0b17fc..5e76d106 100644
|
|
|
|
--- a/dask/dataframe/_compat.py
|
|
|
|
+++ b/dask/dataframe/_compat.py
|
|
|
|
@@ -1,16 +1,16 @@
|
|
|
|
import string
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
-
|
|
|
|
-PANDAS_VERSION = LooseVersion(pd.__version__)
|
|
|
|
-PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0")
|
|
|
|
-PANDAS_GT_104 = PANDAS_VERSION >= LooseVersion("1.0.4")
|
|
|
|
-PANDAS_GT_110 = PANDAS_VERSION >= LooseVersion("1.1.0")
|
|
|
|
-PANDAS_GT_120 = PANDAS_VERSION >= LooseVersion("1.2.0")
|
|
|
|
-PANDAS_GT_121 = PANDAS_VERSION >= LooseVersion("1.2.1")
|
|
|
|
-PANDAS_GT_130 = PANDAS_VERSION >= LooseVersion("1.3.0")
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
+
|
|
|
|
+PANDAS_VERSION = parse_version(pd.__version__)
|
|
|
|
+PANDAS_GT_100 = PANDAS_VERSION >= parse_version("1.0.0")
|
|
|
|
+PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4")
|
|
|
|
+PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0")
|
|
|
|
+PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0")
|
|
|
|
+PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1")
|
|
|
|
+PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0")
|
|
|
|
|
|
|
|
|
|
|
|
if PANDAS_GT_100:
|
|
|
|
diff --git a/dask/dataframe/io/orc.py b/dask/dataframe/io/orc.py
|
|
|
|
index 40740310..69eca3c5 100644
|
|
|
|
--- a/dask/dataframe/io/orc.py
|
|
|
|
+++ b/dask/dataframe/io/orc.py
|
|
|
|
@@ -1,6 +1,5 @@
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
-
|
|
|
|
from fsspec.core import get_fs_token_paths
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from ...base import tokenize
|
|
|
|
from ...highlevelgraph import HighLevelGraph
|
|
|
|
@@ -49,7 +48,7 @@ def _read_orc_stripe(fs, path, stripe, columns=None):
|
|
|
|
with fs.open(path, "rb") as f:
|
|
|
|
o = orc.ORCFile(f)
|
|
|
|
table = o.read_stripe(stripe, columns)
|
|
|
|
- if pa.__version__ < LooseVersion("0.11.0"):
|
|
|
|
+ if parse_version(pa.__version__) < parse_version("0.11.0"):
|
|
|
|
return table.to_pandas()
|
|
|
|
else:
|
|
|
|
return table.to_pandas(date_as_object=False)
|
|
|
|
@@ -80,7 +79,7 @@ def read_orc(path, columns=None, storage_options=None):
|
|
|
|
orc = import_required("pyarrow.orc", "Please install pyarrow >= 0.9.0")
|
|
|
|
import pyarrow as pa
|
|
|
|
|
|
|
|
- if LooseVersion(pa.__version__) == "0.10.0":
|
|
|
|
+ if parse_version(pa.__version__) == parse_version("0.10.0"):
|
|
|
|
raise RuntimeError(
|
|
|
|
"Due to a bug in pyarrow 0.10.0, the ORC reader is "
|
|
|
|
"unavailable. Please either downgrade pyarrow to "
|
|
|
|
diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index 16d29bef..8e4aa1da 100644
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/dask/dataframe/io/parquet/arrow.py
|
|
|
|
+++ b/dask/dataframe/io/parquet/arrow.py
|
|
|
|
@@ -2,13 +2,13 @@ import json
|
|
|
|
import warnings
|
|
|
|
from collections import defaultdict
|
|
|
|
from datetime import datetime
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from functools import partial
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import pyarrow as pa
|
|
|
|
import pyarrow.parquet as pq
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from dask import delayed
|
|
|
|
|
|
|
|
@@ -27,14 +27,16 @@ from .utils import (
|
|
|
|
)
|
|
|
|
|
|
|
|
# Check PyArrow version for feature support
|
|
|
|
-preserve_ind_supported = pa.__version__ >= LooseVersion("0.15.0")
|
|
|
|
+_pa_version = parse_version(pa.__version__)
|
|
|
|
+preserve_ind_supported = _pa_version >= parse_version("0.15.0")
|
|
|
|
read_row_groups_supported = preserve_ind_supported
|
|
|
|
-if pa.__version__ >= LooseVersion("1.0.0"):
|
|
|
|
+if _pa_version.major >= 1:
|
|
|
|
from pyarrow import dataset as pa_ds
|
|
|
|
else:
|
|
|
|
pa_ds = None
|
|
|
|
-subset_stats_supported = pa.__version__ > LooseVersion("2.0.0")
|
|
|
|
-schema_field_supported = pa.__version__ >= LooseVersion("0.15.0")
|
|
|
|
+subset_stats_supported = _pa_version > parse_version("2.0.0")
|
|
|
|
+schema_field_supported = _pa_version >= parse_version("0.15.0")
|
|
|
|
+del _pa_version
|
|
|
|
|
|
|
|
#
|
|
|
|
# Helper Utilities
|
|
|
|
diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py
|
|
|
|
index e11de5e3..00ab6ee0 100644
|
|
|
|
--- a/dask/dataframe/io/parquet/core.py
|
|
|
|
+++ b/dask/dataframe/io/parquet/core.py
|
|
|
|
@@ -1,11 +1,11 @@
|
|
|
|
import math
|
|
|
|
import warnings
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import tlz as toolz
|
|
|
|
from fsspec.core import get_fs_token_paths
|
|
|
|
from fsspec.implementations.local import LocalFileSystem
|
|
|
|
from fsspec.utils import stringify_path
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from ....base import tokenize
|
|
|
|
from ....delayed import Delayed
|
|
|
|
@@ -844,11 +844,12 @@ def get_engine(engine):
|
|
|
|
|
|
|
|
elif engine in ("pyarrow", "arrow", "pyarrow-legacy", "pyarrow-dataset"):
|
|
|
|
pa = import_required("pyarrow", "`pyarrow` not installed")
|
|
|
|
+ pa_version = parse_version(pa.__version__)
|
|
|
|
|
|
|
|
- if LooseVersion(pa.__version__) < "0.13.1":
|
|
|
|
+ if pa_version < parse_version("0.13.1"):
|
|
|
|
raise RuntimeError("PyArrow version >= 0.13.1 required")
|
|
|
|
|
|
|
|
- if engine == "pyarrow-dataset" and LooseVersion(pa.__version__) >= "1.0.0":
|
|
|
|
+ if engine == "pyarrow-dataset" and pa_version.major >= 1:
|
|
|
|
from .arrow import ArrowDatasetEngine
|
|
|
|
|
|
|
|
_ENGINES[engine] = eng = ArrowDatasetEngine
|
|
|
|
diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index dfa77a9a..f35323f7 100644
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/dask/dataframe/io/parquet/fastparquet.py
|
|
|
|
+++ b/dask/dataframe/io/parquet/fastparquet.py
|
|
|
|
@@ -3,11 +3,11 @@ import json
|
|
|
|
import pickle
|
|
|
|
import warnings
|
|
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import tlz as toolz
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
try:
|
|
|
|
import fastparquet
|
|
|
|
@@ -924,7 +924,7 @@ class FastParquetEngine(Engine):
|
|
|
|
rgs = []
|
|
|
|
elif partition_on:
|
|
|
|
mkdirs = lambda x: fs.mkdirs(x, exist_ok=True)
|
|
|
|
- if LooseVersion(fastparquet.__version__) >= "0.1.4":
|
|
|
|
+ if parse_version(fastparquet.__version__) >= parse_version("0.1.4"):
|
|
|
|
rgs = partition_on_columns(
|
|
|
|
df, partition_on, path, filename, fmd, compression, fs.open, mkdirs
|
|
|
|
)
|
|
|
|
diff --git a/dask/dataframe/io/tests/test_orc.py b/dask/dataframe/io/tests/test_orc.py
|
|
|
|
index 1c1ca00e..580a7ded 100644
|
|
|
|
--- a/dask/dataframe/io/tests/test_orc.py
|
|
|
|
+++ b/dask/dataframe/io/tests/test_orc.py
|
|
|
|
@@ -1,9 +1,9 @@
|
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import tempfile
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask.dataframe as dd
|
|
|
|
from dask.dataframe import read_orc
|
|
|
|
@@ -16,7 +16,7 @@ pytest.importorskip("pyarrow.orc")
|
|
|
|
import pyarrow as pa
|
|
|
|
|
|
|
|
pytestmark = pytest.mark.skipif(
|
|
|
|
- LooseVersion(pa.__version__) == "0.10.0",
|
|
|
|
+ parse_version(pa.__version__).base_version == parse_version("0.10.0"),
|
|
|
|
reason=(
|
|
|
|
"PyArrow 0.10.0 release broke the ORC reader, see "
|
|
|
|
"https://issues.apache.org/jira/browse/ARROW-3009"
|
|
|
|
diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index f9833237..cfeddb74 100644
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/dask/dataframe/io/tests/test_parquet.py
|
|
|
|
+++ b/dask/dataframe/io/tests/test_parquet.py
|
|
|
|
@@ -4,11 +4,11 @@ import os
|
|
|
|
import sys
|
|
|
|
import warnings
|
|
|
|
from decimal import Decimal
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask
|
|
|
|
import dask.dataframe as dd
|
|
|
|
@@ -25,12 +25,18 @@ try:
|
|
|
|
import fastparquet
|
|
|
|
except ImportError:
|
|
|
|
fastparquet = False
|
|
|
|
+ fastparquet_version = parse_version("0")
|
|
|
|
+else:
|
|
|
|
+ fastparquet_version = parse_version(fastparquet.__version__)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
import pyarrow as pa
|
|
|
|
except ImportError:
|
|
|
|
pa = False
|
|
|
|
+ pa_version = parse_version("0")
|
|
|
|
+else:
|
|
|
|
+ pa_version = parse_version(pa.__version__)
|
|
|
|
|
|
|
|
try:
|
|
|
|
import pyarrow.parquet as pq
|
|
|
|
@@ -41,7 +47,7 @@ except ImportError:
|
|
|
|
SKIP_FASTPARQUET = not fastparquet
|
|
|
|
FASTPARQUET_MARK = pytest.mark.skipif(SKIP_FASTPARQUET, reason="fastparquet not found")
|
|
|
|
|
|
|
|
-if pq and pa.__version__ < LooseVersion("0.13.1"):
|
|
|
|
+if pq and pa_version < parse_version("0.13.1"):
|
|
|
|
SKIP_PYARROW = True
|
|
|
|
SKIP_PYARROW_REASON = "pyarrow >= 0.13.1 required for parquet"
|
|
|
|
else:
|
|
|
|
@@ -49,8 +55,8 @@ else:
|
|
|
|
sys.platform == "win32"
|
|
|
|
and pa
|
|
|
|
and (
|
|
|
|
- (pa.__version__ == LooseVersion("0.16.0"))
|
|
|
|
- or (pa.__version__ == LooseVersion("2.0.0"))
|
|
|
|
+ pa_version == parse_version("0.16.0")
|
|
|
|
+ or pa_version == parse_version("2.0.0")
|
|
|
|
)
|
|
|
|
):
|
|
|
|
SKIP_PYARROW = True
|
|
|
|
@@ -64,7 +70,7 @@ else:
|
|
|
|
SKIP_PYARROW_REASON = "pyarrow not found"
|
|
|
|
PYARROW_MARK = pytest.mark.skipif(SKIP_PYARROW, reason=SKIP_PYARROW_REASON)
|
|
|
|
|
|
|
|
-if pa and pa.__version__ < LooseVersion("1.0.0"):
|
|
|
|
+if pa and pa_version.major < 1:
|
|
|
|
SKIP_PYARROW_DS = True
|
|
|
|
SKIP_PYARROW_DS_REASON = "pyarrow >= 1.0.0 required for pyarrow dataset API"
|
|
|
|
else:
|
|
|
|
@@ -147,7 +153,7 @@ write_read_engines_xfail = write_read_engines(
|
|
|
|
|
|
|
|
if (
|
|
|
|
fastparquet
|
|
|
|
- and fastparquet.__version__ < LooseVersion("0.5")
|
|
|
|
+ and fastparquet_version < parse_version("0.5")
|
|
|
|
and PANDAS_GT_110
|
|
|
|
and not PANDAS_GT_121
|
|
|
|
):
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -817,7 +823,7 @@ def test_append_dict_column(tmpdir, engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
if engine == "fastparquet":
|
|
|
|
pytest.xfail("Fastparquet engine is missing dict-column support")
|
|
|
|
- elif pa.__version__ < LooseVersion("1.0.1"):
|
|
|
|
+ elif pa_version < parse_version("1.0.1"):
|
|
|
|
pytest.skip("Newer PyArrow version required for dict-column support.")
|
|
|
|
|
|
|
|
tmp = str(tmpdir)
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -988,7 +994,7 @@ def test_categories_unnamed_index(tmpdir, engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
# Check that we can handle an unnamed categorical index
|
|
|
|
# https://github.com/dask/dask/issues/6885
|
|
|
|
|
|
|
|
- if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"):
|
|
|
|
+ if engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"):
|
|
|
|
pytest.skip("PyArrow>=0.15 Required.")
|
|
|
|
|
|
|
|
tmpdir = str(tmpdir)
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -1173,7 +1179,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual
|
2021-06-20 06:20:01 +00:00
|
|
|
@pytest.mark.parametrize("index", [False, True])
|
|
|
|
@pytest.mark.parametrize("schema", ["infer", "complex"])
|
|
|
|
def test_pyarrow_schema_inference(tmpdir, index, engine, schema):
|
|
|
|
- if pa.__version__ < LooseVersion("0.15.0"):
|
|
|
|
+ if pa_version < parse_version("0.15.0"):
|
|
|
|
pytest.skip("PyArrow>=0.15 Required.")
|
|
|
|
if schema == "complex":
|
|
|
|
schema = {"index": pa.string(), "amount": pa.int64()}
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -1366,9 +1372,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
# Recent versions of pyarrow support full row-wise filtering
|
|
|
|
# (fastparquet and older pyarrow versions do not)
|
|
|
|
- pyarrow_row_filtering = (
|
|
|
|
- read_engine == "pyarrow-dataset" and pa.__version__ >= LooseVersion("1.0.0")
|
|
|
|
- )
|
|
|
|
+ pyarrow_row_filtering = read_engine == "pyarrow-dataset" and pa_version.major >= 1
|
|
|
|
|
|
|
|
fn = str(tmpdir)
|
|
|
|
df = pd.DataFrame({"at": ["ab", "aa", "ba", "da", "bb"]})
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -1469,7 +1473,7 @@ def test_pyarrow_filter_divisions(tmpdir):
|
2021-06-20 06:20:01 +00:00
|
|
|
str(tmpdir.join("file.1.parquet")), engine="pyarrow", row_group_size=2
|
|
|
|
)
|
|
|
|
|
|
|
|
- if pa.__version__ >= LooseVersion("1.0.0"):
|
|
|
|
+ if pa_version.major >= 1:
|
|
|
|
# Only works for ArrowDatasetEngine.
|
|
|
|
# Legacy code will not apply filters on individual row-groups
|
|
|
|
# when `split_row_groups=False`.
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -1644,7 +1648,7 @@ def test_parquet_select_cats(tmpdir, engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_columns_name(tmpdir, engine):
|
|
|
|
- if engine == "fastparquet" and fastparquet.__version__ <= LooseVersion("0.3.1"):
|
|
|
|
+ if engine == "fastparquet" and fastparquet_version <= parse_version("0.3.1"):
|
|
|
|
pytest.skip("Fastparquet does not write column_indexes up to 0.3.1")
|
|
|
|
tmp_path = str(tmpdir)
|
|
|
|
df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(["a", "b"], name="idx"))
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2048,9 +2052,7 @@ def test_to_parquet_with_get(tmpdir):
|
2021-06-20 06:20:01 +00:00
|
|
|
def test_select_partitioned_column(tmpdir, engine):
|
|
|
|
pytest.importorskip("snappy")
|
|
|
|
if engine.startswith("pyarrow"):
|
|
|
|
- import pyarrow as pa
|
|
|
|
-
|
|
|
|
- if pa.__version__ < LooseVersion("0.9.0"):
|
|
|
|
+ if pa_version < parse_version("0.9.0"):
|
|
|
|
pytest.skip("pyarrow<0.9.0 did not support this")
|
|
|
|
|
|
|
|
fn = str(tmpdir)
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2074,9 +2076,9 @@ def test_select_partitioned_column(tmpdir, engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_with_tz(tmpdir, engine):
|
|
|
|
- if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.11.0"):
|
|
|
|
+ if engine.startswith("pyarrow") and pa_version < parse_version("0.11.0"):
|
|
|
|
pytest.skip("pyarrow<0.11.0 did not support this")
|
|
|
|
- if engine == "fastparquet" and fastparquet.__version__ < LooseVersion("0.3.0"):
|
|
|
|
+ if engine == "fastparquet" and fastparquet_version < parse_version("0.3.0"):
|
|
|
|
pytest.skip("fastparquet<0.3.0 did not support this")
|
|
|
|
|
|
|
|
with warnings.catch_warnings():
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2285,7 +2287,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema):
|
2021-06-20 06:20:01 +00:00
|
|
|
if (
|
|
|
|
schema == "infer"
|
|
|
|
and engine.startswith("pyarrow")
|
|
|
|
- and pa.__version__ < LooseVersion("0.15.0")
|
|
|
|
+ and pa_version < parse_version("0.15.0")
|
|
|
|
):
|
|
|
|
pytest.skip("PyArrow>=0.15 Required.")
|
|
|
|
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2821,9 +2823,7 @@ def test_filter_nonpartition_columns(
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
@PYARROW_MARK
|
|
|
|
def test_pandas_metadata_nullable_pyarrow(tmpdir):
|
|
|
|
- if pa.__version__ < LooseVersion("0.16.0") or pd.__version__ < LooseVersion(
|
|
|
|
- "1.0.0"
|
|
|
|
- ):
|
|
|
|
+ if pa_version < parse_version("0.16.0") or parse_version(pd.__version__).major < 1:
|
|
|
|
pytest.skip("PyArrow>=0.16 and Pandas>=1.0.0 Required.")
|
|
|
|
tmpdir = str(tmpdir)
|
|
|
|
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2844,7 +2844,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir):
|
2021-06-20 06:20:01 +00:00
|
|
|
|
|
|
|
@PYARROW_MARK
|
|
|
|
def test_pandas_timestamp_overflow_pyarrow(tmpdir):
|
|
|
|
- if pa.__version__ < LooseVersion("0.17.0"):
|
|
|
|
+ if pa.__version__ < parse_version("0.17.0"):
|
|
|
|
pytest.skip("PyArrow>=0.17 Required.")
|
|
|
|
|
|
|
|
info = np.iinfo(np.dtype("int64"))
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -2954,7 +2954,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols):
|
2021-06-20 06:20:01 +00:00
|
|
|
@fp_pandas_xfail
|
|
|
|
def test_partitioned_preserve_index(tmpdir, write_engine, read_engine):
|
|
|
|
|
|
|
|
- if write_engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"):
|
|
|
|
+ if write_engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"):
|
|
|
|
pytest.skip("PyArrow>=0.15 Required.")
|
|
|
|
|
|
|
|
tmp = str(tmpdir)
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -3072,7 +3072,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine):
|
2021-06-20 06:20:01 +00:00
|
|
|
@PYARROW_MARK
|
|
|
|
@pytest.mark.parametrize("test_filter", [True, False])
|
|
|
|
def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
|
|
|
|
- if pa.__version__ <= LooseVersion("0.17.1"):
|
|
|
|
+ if pa_version <= parse_version("0.17.1"):
|
|
|
|
# Using pyarrow.dataset API does not produce
|
|
|
|
# Categorical type for partitioned columns.
|
|
|
|
pytest.skip("PyArrow>0.17.1 Required.")
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -3100,7 +3100,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter):
|
2021-06-20 06:20:01 +00:00
|
|
|
def test_pyarrow_dataset_read_from_paths(
|
|
|
|
tmpdir, read_from_paths, test_filter_partitioned
|
|
|
|
):
|
|
|
|
- if pa.__version__ <= LooseVersion("0.17.1"):
|
|
|
|
+ if pa_version <= parse_version("0.17.1"):
|
|
|
|
# Using pyarrow.dataset API does not produce
|
|
|
|
# Categorical type for partitioned columns.
|
|
|
|
pytest.skip("PyArrow>0.17.1 Required.")
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -3130,7 +3130,7 @@ def test_pyarrow_dataset_read_from_paths(
|
2021-06-20 06:20:01 +00:00
|
|
|
@PYARROW_MARK
|
|
|
|
@pytest.mark.parametrize("split_row_groups", [True, False])
|
|
|
|
def test_pyarrow_dataset_filter_partitioned(tmpdir, split_row_groups):
|
|
|
|
- if pa.__version__ < LooseVersion("1.0.0"):
|
|
|
|
+ if pa_version.major < 1:
|
|
|
|
# pyarrow.dataset API required.
|
|
|
|
pytest.skip("PyArrow>=1.0.0 Required.")
|
|
|
|
|
|
|
|
diff --git a/dask/dataframe/tests/test_rolling.py b/dask/dataframe/tests/test_rolling.py
|
|
|
|
index 19cb5b56..ea023b40 100644
|
|
|
|
--- a/dask/dataframe/tests/test_rolling.py
|
|
|
|
+++ b/dask/dataframe/tests/test_rolling.py
|
|
|
|
@@ -1,8 +1,7 @@
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
-
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
import dask.dataframe as dd
|
|
|
|
from dask.dataframe._compat import PANDAS_GT_130
|
|
|
|
@@ -404,7 +403,8 @@ def test_rolling_agg_aggregate():
|
|
|
|
@pytest.mark.skipif(not dd._compat.PANDAS_GT_100, reason="needs pandas>=1.0.0")
|
|
|
|
def test_rolling_numba_engine():
|
|
|
|
numba = pytest.importorskip("numba")
|
|
|
|
- if not dd._compat.PANDAS_GT_104 and LooseVersion(numba.__version__) >= "0.49":
|
|
|
|
+ numba_version = parse_version(numba.__version__)
|
|
|
|
+ if not dd._compat.PANDAS_GT_104 and numba_version >= parse_version("0.49"):
|
|
|
|
# Was fixed in https://github.com/pandas-dev/pandas/pull/33687
|
|
|
|
pytest.xfail("Known incompatibility between pandas and numba")
|
|
|
|
|
|
|
|
diff --git a/dask/diagnostics/profile_visualize.py b/dask/diagnostics/profile_visualize.py
|
|
|
|
index 751957da..b62c6fba 100644
|
|
|
|
--- a/dask/diagnostics/profile_visualize.py
|
|
|
|
+++ b/dask/diagnostics/profile_visualize.py
|
|
|
|
@@ -1,9 +1,9 @@
|
|
|
|
import random
|
|
|
|
from bisect import bisect_left
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from itertools import cycle
|
|
|
|
from operator import add, itemgetter
|
|
|
|
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
from tlz import accumulate, groupby, pluck, unique
|
|
|
|
|
|
|
|
from ..core import istask
|
|
|
|
@@ -372,7 +372,7 @@ def plot_resources(results, palette="Viridis", **kwargs):
|
|
|
|
line_width=4,
|
|
|
|
**{
|
|
|
|
"legend_label"
|
|
|
|
- if LooseVersion(bokeh.__version__) >= "1.4"
|
|
|
|
+ if parse_version(bokeh.__version__) >= parse_version("1.4")
|
|
|
|
else "legend": "% CPU"
|
|
|
|
}
|
|
|
|
)
|
|
|
|
@@ -390,7 +390,7 @@ def plot_resources(results, palette="Viridis", **kwargs):
|
|
|
|
line_width=4,
|
|
|
|
**{
|
|
|
|
"legend_label"
|
|
|
|
- if LooseVersion(bokeh.__version__) >= "1.4"
|
|
|
|
+ if parse_version(bokeh.__version__) >= parse_version("1.4")
|
|
|
|
else "legend": "Memory"
|
|
|
|
}
|
|
|
|
)
|
|
|
|
diff --git a/dask/diagnostics/tests/test_profiler.py b/dask/diagnostics/tests/test_profiler.py
|
|
|
|
index 5d995b87..a31943f8 100644
|
|
|
|
--- a/dask/diagnostics/tests/test_profiler.py
|
|
|
|
+++ b/dask/diagnostics/tests/test_profiler.py
|
|
|
|
@@ -1,10 +1,10 @@
|
|
|
|
import contextlib
|
|
|
|
import os
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from operator import add, mul
|
|
|
|
from time import sleep
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from dask.diagnostics import CacheProfiler, Profiler, ResourceProfiler
|
|
|
|
from dask.threaded import get
|
|
|
|
@@ -326,10 +326,10 @@ def test_plot_multiple():
|
|
|
|
p = visualize(
|
|
|
|
[prof, rprof], label_size=50, title="Not the default", show=False, save=False
|
|
|
|
)
|
|
|
|
- bokeh_version = LooseVersion(bokeh.__version__)
|
|
|
|
- if bokeh_version >= "1.1.0":
|
|
|
|
+ bokeh_version = parse_version(bokeh.__version__)
|
|
|
|
+ if bokeh_version >= parse_version("1.1.0"):
|
|
|
|
figures = [r[0] for r in p.children[1].children]
|
|
|
|
- elif bokeh_version >= "0.12.0":
|
|
|
|
+ elif bokeh_version >= parse_version("0.12.0"):
|
|
|
|
figures = [r.children[0] for r in p.children[1].children]
|
|
|
|
else:
|
|
|
|
figures = [r[0] for r in p.children]
|
|
|
|
@@ -364,7 +364,7 @@ def test_get_colors():
|
|
|
|
from dask.diagnostics.profile_visualize import get_colors
|
|
|
|
|
|
|
|
# 256-color palettes were added in bokeh 1.4.0
|
|
|
|
- if LooseVersion(bokeh.__version__) >= "1.4.0":
|
|
|
|
+ if parse_version(bokeh.__version__) >= parse_version("1.4.0"):
|
|
|
|
from bokeh.palettes import Blues256
|
|
|
|
|
|
|
|
funcs = list(range(11))
|
|
|
|
diff --git a/dask/sizeof.py b/dask/sizeof.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index 522f6b89..10874585 100644
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/dask/sizeof.py
|
|
|
|
+++ b/dask/sizeof.py
|
|
|
|
@@ -2,7 +2,8 @@ import itertools
|
|
|
|
import random
|
|
|
|
import sys
|
|
|
|
from array import array
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
+
|
|
|
|
+from packaging.version import parse as parse_version
|
|
|
|
|
|
|
|
from .utils import Dispatch
|
|
|
|
|
2021-07-11 07:55:07 +00:00
|
|
|
@@ -218,7 +219,7 @@ def register_pyarrow():
|
2021-06-20 06:20:01 +00:00
|
|
|
return int(_get_col_size(data)) + 1000
|
|
|
|
|
|
|
|
# Handle pa.Column for pyarrow < 0.15
|
|
|
|
- if pa.__version__ < LooseVersion("0.15.0"):
|
|
|
|
+ if parse_version(pa.__version__) < parse_version("0.15.0"):
|
|
|
|
|
|
|
|
@sizeof.register(pa.Column)
|
|
|
|
def sizeof_pyarrow_column(col):
|
|
|
|
diff --git a/dask/tests/test_multiprocessing.py b/dask/tests/test_multiprocessing.py
|
|
|
|
index 51c7044f..16cdca8b 100644
|
|
|
|
--- a/dask/tests/test_multiprocessing.py
|
|
|
|
+++ b/dask/tests/test_multiprocessing.py
|
|
|
|
@@ -2,10 +2,8 @@ import multiprocessing
|
|
|
|
import pickle
|
|
|
|
import sys
|
|
|
|
from concurrent.futures import ProcessPoolExecutor
|
|
|
|
-from distutils.version import LooseVersion
|
|
|
|
from operator import add
|
|
|
|
|
|
|
|
-import cloudpickle
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
import dask
|
|
|
|
@@ -49,13 +47,10 @@ def test_pickle_locals():
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5")
|
|
|
|
-@pytest.mark.skipif(
|
|
|
|
- cloudpickle.__version__ < LooseVersion("1.3.0"),
|
|
|
|
- reason="requires cloudpickle >= 1.3.0",
|
|
|
|
-)
|
|
|
|
def test_out_of_band_pickling():
|
|
|
|
"""Test that out-of-band pickling works"""
|
|
|
|
np = pytest.importorskip("numpy")
|
|
|
|
+ pytest.importorskip("cloudpickle", minversion="1.3.0")
|
|
|
|
|
|
|
|
a = np.arange(5)
|
|
|
|
|
|
|
|
diff --git a/setup.py b/setup.py
|
2021-07-11 07:55:07 +00:00
|
|
|
index 2588fc1c..1806f328 100755
|
2021-06-20 06:20:01 +00:00
|
|
|
--- a/setup.py
|
|
|
|
+++ b/setup.py
|
|
|
|
@@ -22,6 +22,7 @@ extras_require["complete"] = sorted({v for req in extras_require.values() for v
|
|
|
|
extras_require["test"] = ["pytest", "pytest-rerunfailures", "pytest-xdist"]
|
|
|
|
|
|
|
|
install_requires = [
|
|
|
|
+ "packaging",
|
|
|
|
"pyyaml",
|
|
|
|
"cloudpickle >= 1.1.1",
|
|
|
|
"fsspec >= 0.6.0",
|
|
|
|
--
|
|
|
|
2.31.1
|
|
|
|
|