From ab18b267a2eeeaa60bda03cd67b21880c91d4766 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Sun, 13 Jun 2021 05:37:41 -0400 Subject: [PATCH 2/4] Use packaging for version comparisons. Signed-off-by: Elliott Sales de Andrade --- dask/array/numpy_compat.py | 11 ++--- dask/array/tests/test_cupy.py | 22 +++++----- dask/array/tests/test_sparse.py | 6 +-- dask/base.py | 4 +- dask/bytes/tests/test_http.py | 4 +- dask/bytes/tests/test_local.py | 4 +- dask/bytes/tests/test_s3.py | 12 ++--- dask/dataframe/_compat.py | 18 ++++---- dask/dataframe/io/orc.py | 7 ++- dask/dataframe/io/parquet/arrow.py | 12 ++--- dask/dataframe/io/parquet/core.py | 7 +-- dask/dataframe/io/parquet/fastparquet.py | 4 +- dask/dataframe/io/tests/test_orc.py | 4 +- dask/dataframe/io/tests/test_parquet.py | 56 ++++++++++++------------ dask/dataframe/tests/test_rolling.py | 6 +-- dask/diagnostics/profile_visualize.py | 6 +-- dask/diagnostics/tests/test_profiler.py | 10 ++--- dask/sizeof.py | 5 ++- dask/tests/test_multiprocessing.py | 7 +-- setup.py | 1 + 20 files changed, 103 insertions(+), 103 deletions(-) diff --git a/dask/array/numpy_compat.py b/dask/array/numpy_compat.py index 60d043d8..134c5839 100644 --- a/dask/array/numpy_compat.py +++ b/dask/array/numpy_compat.py @@ -1,14 +1,15 @@ import warnings -from distutils.version import LooseVersion import numpy as np +from packaging.version import parse as parse_version from ..utils import derived_from -_numpy_117 = LooseVersion(np.__version__) >= "1.17.0" -_numpy_118 = LooseVersion(np.__version__) >= "1.18.0" -_numpy_120 = LooseVersion(np.__version__) >= "1.20.0" -_numpy_121 = LooseVersion(np.__version__) >= "1.21.0" +_np_version = parse_version(np.__version__) +_numpy_117 = _np_version >= parse_version("1.17.0") +_numpy_118 = _np_version >= parse_version("1.18.0") +_numpy_120 = _np_version >= parse_version("1.20.0") +_numpy_121 = _np_version >= parse_version("1.21.0") # Taken from scikit-learn: diff --git a/dask/array/tests/test_cupy.py b/dask/array/tests/test_cupy.py index 26d5d3a4..be5c77df 100644 --- a/dask/array/tests/test_cupy.py +++ b/dask/array/tests/test_cupy.py @@ -1,7 +1,6 @@ -from distutils.version import LooseVersion - import numpy as np import pytest +from packaging.version import parse as parse_version import dask import dask.array as da @@ -12,6 +11,7 @@ from dask.sizeof import sizeof cupy = pytest.importorskip("cupy") cupyx = pytest.importorskip("cupyx") +cupy_version = parse_version(cupy.__version__) functions = [ @@ -35,7 +35,7 @@ functions = [ pytest.param( lambda x: x.mean(), marks=pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ), @@ -47,7 +47,7 @@ functions = [ pytest.param( lambda x: x.std(), marks=pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ), @@ -55,7 +55,7 @@ functions = [ pytest.param( lambda x: x.var(), marks=pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ), @@ -318,7 +318,7 @@ def test_diagonal(): @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ) @@ -336,7 +336,7 @@ def test_tril_triu(): @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ) @@ -448,7 +448,7 @@ def test_nearest(): @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ) @@ -465,7 +465,7 @@ def test_constant(): @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ) @@ -556,7 +556,7 @@ def test_random_shapes(shape): @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.1.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.1.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.1.0 (requires https://github.com/cupy/cupy/pull/2209)", ) @@ -936,7 +936,7 @@ def test_cupy_sparse_concatenate(axis): @pytest.mark.skipif(not _numpy_120, reason="NEP-35 is not available") @pytest.mark.skipif( - not IS_NEP18_ACTIVE or cupy.__version__ < LooseVersion("6.4.0"), + not IS_NEP18_ACTIVE or cupy_version < parse_version("6.4.0"), reason="NEP-18 support is not available in NumPy or CuPy older than " "6.4.0 (requires https://github.com/cupy/cupy/pull/2418)", ) diff --git a/dask/array/tests/test_sparse.py b/dask/array/tests/test_sparse.py index df92f3e6..8ff041c9 100644 --- a/dask/array/tests/test_sparse.py +++ b/dask/array/tests/test_sparse.py @@ -1,8 +1,8 @@ import random -from distutils.version import LooseVersion import numpy as np import pytest +from packaging.version import parse as parse_version import dask import dask.array as da @@ -34,7 +34,7 @@ functions = [ pytest.param( lambda x: x.mean(), marks=pytest.mark.skipif( - sparse.__version__ >= LooseVersion("0.12.0"), + parse_version(sparse.__version__) >= parse_version("0.12.0"), reason="https://github.com/dask/dask/issues/7169", ), ), @@ -91,7 +91,7 @@ def test_basic(func): @pytest.mark.skipif( - sparse.__version__ < LooseVersion("0.7.0+10"), + parse_version(sparse.__version__) < parse_version("0.7.0+10"), reason="fixed in https://github.com/pydata/sparse/pull/256", ) def test_tensordot(): diff --git a/dask/base.py b/dask/base.py index 59c4640d..1cd48cac 100644 --- a/dask/base.py +++ b/dask/base.py @@ -6,13 +6,13 @@ import uuid from collections import OrderedDict from contextlib import contextmanager from dataclasses import fields, is_dataclass -from distutils.version import LooseVersion from functools import partial from hashlib import md5 from numbers import Number from operator import getitem from typing import Iterator, Mapping, Set +from packaging.version import parse as parse_version from tlz import curry, groupby, identity, merge from tlz.functoolz import Compose @@ -898,7 +898,7 @@ def _normalize_function(func): def register_pandas(): import pandas as pd - PANDAS_GT_130 = LooseVersion(pd.__version__) >= LooseVersion("1.3.0") + PANDAS_GT_130 = parse_version(pd.__version__) >= parse_version("1.3.0") @normalize_token.register(pd.Index) def normalize_index(ind): diff --git a/dask/bytes/tests/test_http.py b/dask/bytes/tests/test_http.py index bee444f5..3a05ef81 100644 --- a/dask/bytes/tests/test_http.py +++ b/dask/bytes/tests/test_http.py @@ -2,11 +2,11 @@ import os import subprocess import sys import time -from distutils.version import LooseVersion import fsspec import pytest from fsspec.core import open_files +from packaging.version import parse as parse_version import dask.bag as db from dask.utils import tmpdir @@ -14,7 +14,7 @@ from dask.utils import tmpdir files = ["a", "b"] requests = pytest.importorskip("requests") errs = (requests.exceptions.RequestException,) -if LooseVersion(fsspec.__version__) > "0.7.4": +if parse_version(fsspec.__version__) > parse_version("0.7.4"): aiohttp = pytest.importorskip("aiohttp") errs = errs + (aiohttp.client_exceptions.ClientResponseError,) diff --git a/dask/bytes/tests/test_local.py b/dask/bytes/tests/test_local.py index 40b161c7..5564f92c 100644 --- a/dask/bytes/tests/test_local.py +++ b/dask/bytes/tests/test_local.py @@ -2,7 +2,6 @@ import gzip import os import pathlib import sys -from distutils.version import LooseVersion from functools import partial from time import sleep @@ -11,6 +10,7 @@ import pytest from fsspec.compression import compr from fsspec.core import open_files from fsspec.implementations.local import LocalFileSystem +from packaging.version import parse as parse_version from tlz import concat, valmap from dask import compute @@ -356,7 +356,7 @@ def test_get_pyarrow_filesystem(): from fsspec.implementations.local import LocalFileSystem pa = pytest.importorskip("pyarrow") - if pa.__version__ >= LooseVersion("2.0.0"): + if parse_version(pa.__version__).major >= 2: pytest.skip("fsspec no loger inherits from pyarrow>=2.0.") fs = LocalFileSystem() diff --git a/dask/bytes/tests/test_s3.py b/dask/bytes/tests/test_s3.py index 1412de3e..b24b30b5 100644 --- a/dask/bytes/tests/test_s3.py +++ b/dask/bytes/tests/test_s3.py @@ -5,10 +5,10 @@ import subprocess import sys import time from contextlib import contextmanager -from distutils.version import LooseVersion from functools import partial import pytest +from packaging.version import parse as parse_version s3fs = pytest.importorskip("s3fs") boto3 = pytest.importorskip("boto3") @@ -438,13 +438,13 @@ def test_parquet(s3, engine, s3so, metadata_file): from dask.dataframe._compat import tm lib = pytest.importorskip(engine) - if engine == "pyarrow" and LooseVersion(lib.__version__) < "0.13.1": + lib_version = parse_version(lib.__version__) + if engine == "pyarrow" and lib_version < parse_version("0.13.1"): pytest.skip("pyarrow < 0.13.1 not supported for parquet") if ( engine == "pyarrow" - and LooseVersion(lib.__version__) >= "2.0" - and LooseVersion(lib.__version__) < "3.0" - and LooseVersion(s3fs.__version__) > "0.5.0" + and lib_version.major == 2 + and parse_version(s3fs.__version__) > parse_version("0.5.0") ): pytest.skip("#7056 - new s3fs not supported before pyarrow 3.0") @@ -507,7 +507,7 @@ def test_parquet_wstoragepars(s3, s3so): def test_get_pyarrow_fs_s3(s3): pa = pytest.importorskip("pyarrow") - if pa.__version__ >= LooseVersion("2.0.0"): + if parse_version(pa.__version__).major >= 2: pytest.skip("fsspec no loger inherits from pyarrow>=2.0.") fs = DaskS3FileSystem(anon=True) assert isinstance(fs, pa.filesystem.FileSystem) diff --git a/dask/dataframe/_compat.py b/dask/dataframe/_compat.py index 2a0b17fc..5e76d106 100644 --- a/dask/dataframe/_compat.py +++ b/dask/dataframe/_compat.py @@ -1,16 +1,16 @@ import string -from distutils.version import LooseVersion import numpy as np import pandas as pd - -PANDAS_VERSION = LooseVersion(pd.__version__) -PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0") -PANDAS_GT_104 = PANDAS_VERSION >= LooseVersion("1.0.4") -PANDAS_GT_110 = PANDAS_VERSION >= LooseVersion("1.1.0") -PANDAS_GT_120 = PANDAS_VERSION >= LooseVersion("1.2.0") -PANDAS_GT_121 = PANDAS_VERSION >= LooseVersion("1.2.1") -PANDAS_GT_130 = PANDAS_VERSION >= LooseVersion("1.3.0") +from packaging.version import parse as parse_version + +PANDAS_VERSION = parse_version(pd.__version__) +PANDAS_GT_100 = PANDAS_VERSION >= parse_version("1.0.0") +PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4") +PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0") +PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0") +PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1") +PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0") if PANDAS_GT_100: diff --git a/dask/dataframe/io/orc.py b/dask/dataframe/io/orc.py index 40740310..69eca3c5 100644 --- a/dask/dataframe/io/orc.py +++ b/dask/dataframe/io/orc.py @@ -1,6 +1,5 @@ -from distutils.version import LooseVersion - from fsspec.core import get_fs_token_paths +from packaging.version import parse as parse_version from ...base import tokenize from ...highlevelgraph import HighLevelGraph @@ -49,7 +48,7 @@ def _read_orc_stripe(fs, path, stripe, columns=None): with fs.open(path, "rb") as f: o = orc.ORCFile(f) table = o.read_stripe(stripe, columns) - if pa.__version__ < LooseVersion("0.11.0"): + if parse_version(pa.__version__) < parse_version("0.11.0"): return table.to_pandas() else: return table.to_pandas(date_as_object=False) @@ -80,7 +79,7 @@ def read_orc(path, columns=None, storage_options=None): orc = import_required("pyarrow.orc", "Please install pyarrow >= 0.9.0") import pyarrow as pa - if LooseVersion(pa.__version__) == "0.10.0": + if parse_version(pa.__version__) == parse_version("0.10.0"): raise RuntimeError( "Due to a bug in pyarrow 0.10.0, the ORC reader is " "unavailable. Please either downgrade pyarrow to " diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py index 99227157..83bb4bf8 100644 --- a/dask/dataframe/io/parquet/arrow.py +++ b/dask/dataframe/io/parquet/arrow.py @@ -2,13 +2,13 @@ import json import warnings from collections import defaultdict from datetime import datetime -from distutils.version import LooseVersion from functools import partial import numpy as np import pandas as pd import pyarrow as pa import pyarrow.parquet as pq +from packaging.version import parse as parse_version from dask import delayed @@ -27,14 +27,16 @@ from .utils import ( ) # Check PyArrow version for feature support -preserve_ind_supported = pa.__version__ >= LooseVersion("0.15.0") +_pa_version = parse_version(pa.__version__) +preserve_ind_supported = _pa_version >= parse_version("0.15.0") read_row_groups_supported = preserve_ind_supported -if pa.__version__ >= LooseVersion("1.0.0"): +if _pa_version.major >= 1: from pyarrow import dataset as pa_ds else: pa_ds = None -subset_stats_supported = pa.__version__ > LooseVersion("2.0.0") -schema_field_supported = pa.__version__ >= LooseVersion("0.15.0") +subset_stats_supported = _pa_version > parse_version("2.0.0") +schema_field_supported = _pa_version >= parse_version("0.15.0") +del _pa_version # # Helper Utilities diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py index e11de5e3..00ab6ee0 100644 --- a/dask/dataframe/io/parquet/core.py +++ b/dask/dataframe/io/parquet/core.py @@ -1,11 +1,11 @@ import math import warnings -from distutils.version import LooseVersion import tlz as toolz from fsspec.core import get_fs_token_paths from fsspec.implementations.local import LocalFileSystem from fsspec.utils import stringify_path +from packaging.version import parse as parse_version from ....base import tokenize from ....delayed import Delayed @@ -844,11 +844,12 @@ def get_engine(engine): elif engine in ("pyarrow", "arrow", "pyarrow-legacy", "pyarrow-dataset"): pa = import_required("pyarrow", "`pyarrow` not installed") + pa_version = parse_version(pa.__version__) - if LooseVersion(pa.__version__) < "0.13.1": + if pa_version < parse_version("0.13.1"): raise RuntimeError("PyArrow version >= 0.13.1 required") - if engine == "pyarrow-dataset" and LooseVersion(pa.__version__) >= "1.0.0": + if engine == "pyarrow-dataset" and pa_version.major >= 1: from .arrow import ArrowDatasetEngine _ENGINES[engine] = eng = ArrowDatasetEngine diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py index 5c817294..ae28c792 100644 --- a/dask/dataframe/io/parquet/fastparquet.py +++ b/dask/dataframe/io/parquet/fastparquet.py @@ -3,11 +3,11 @@ import json import pickle import warnings from collections import OrderedDict, defaultdict -from distutils.version import LooseVersion import numpy as np import pandas as pd import tlz as toolz +from packaging.version import parse as parse_version try: import fastparquet @@ -924,7 +924,7 @@ class FastParquetEngine(Engine): rgs = [] elif partition_on: mkdirs = lambda x: fs.mkdirs(x, exist_ok=True) - if LooseVersion(fastparquet.__version__) >= "0.1.4": + if parse_version(fastparquet.__version__) >= parse_version("0.1.4"): rgs = partition_on_columns( df, partition_on, path, filename, fmd, compression, fs.open, mkdirs ) diff --git a/dask/dataframe/io/tests/test_orc.py b/dask/dataframe/io/tests/test_orc.py index 1c1ca00e..580a7ded 100644 --- a/dask/dataframe/io/tests/test_orc.py +++ b/dask/dataframe/io/tests/test_orc.py @@ -1,9 +1,9 @@ import os import shutil import tempfile -from distutils.version import LooseVersion import pytest +from packaging.version import parse as parse_version import dask.dataframe as dd from dask.dataframe import read_orc @@ -16,7 +16,7 @@ pytest.importorskip("pyarrow.orc") import pyarrow as pa pytestmark = pytest.mark.skipif( - LooseVersion(pa.__version__) == "0.10.0", + parse_version(pa.__version__).base_version == parse_version("0.10.0"), reason=( "PyArrow 0.10.0 release broke the ORC reader, see " "https://issues.apache.org/jira/browse/ARROW-3009" diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index a5a1244a..28df8771 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -4,11 +4,11 @@ import os import sys import warnings from decimal import Decimal -from distutils.version import LooseVersion import numpy as np import pandas as pd import pytest +from packaging.version import parse as parse_version import dask import dask.dataframe as dd @@ -25,12 +25,18 @@ try: import fastparquet except ImportError: fastparquet = False + fastparquet_version = parse_version("0") +else: + fastparquet_version = parse_version(fastparquet.__version__) try: import pyarrow as pa except ImportError: pa = False + pa_version = parse_version("0") +else: + pa_version = parse_version(pa.__version__) try: import pyarrow.parquet as pq @@ -41,7 +47,7 @@ except ImportError: SKIP_FASTPARQUET = not fastparquet FASTPARQUET_MARK = pytest.mark.skipif(SKIP_FASTPARQUET, reason="fastparquet not found") -if pq and pa.__version__ < LooseVersion("0.13.1"): +if pq and pa_version < parse_version("0.13.1"): SKIP_PYARROW = True SKIP_PYARROW_REASON = "pyarrow >= 0.13.1 required for parquet" else: @@ -49,8 +55,8 @@ else: sys.platform == "win32" and pa and ( - (pa.__version__ == LooseVersion("0.16.0")) - or (pa.__version__ == LooseVersion("2.0.0")) + pa_version == parse_version("0.16.0") + or pa_version == parse_version("2.0.0") ) ): SKIP_PYARROW = True @@ -64,7 +70,7 @@ else: SKIP_PYARROW_REASON = "pyarrow not found" PYARROW_MARK = pytest.mark.skipif(SKIP_PYARROW, reason=SKIP_PYARROW_REASON) -if pa and pa.__version__ < LooseVersion("1.0.0"): +if pa and pa_version.major < 1: SKIP_PYARROW_DS = True SKIP_PYARROW_DS_REASON = "pyarrow >= 1.0.0 required for pyarrow dataset API" else: @@ -147,7 +153,7 @@ write_read_engines_xfail = write_read_engines( if ( fastparquet - and fastparquet.__version__ < LooseVersion("0.5") + and fastparquet_version < parse_version("0.5") and PANDAS_GT_110 and not PANDAS_GT_121 ): @@ -822,7 +828,7 @@ def test_append_dict_column(tmpdir, engine): if engine == "fastparquet": pytest.xfail("Fastparquet engine is missing dict-column support") - elif pa.__version__ < LooseVersion("1.0.1"): + elif pa_version < parse_version("1.0.1"): pytest.skip("Newer PyArrow version required for dict-column support.") tmp = str(tmpdir) @@ -981,7 +987,7 @@ def test_categories_unnamed_index(tmpdir, engine): # Check that we can handle an unnamed categorical index # https://github.com/dask/dask/issues/6885 - if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"): + if engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"): pytest.skip("PyArrow>=0.15 Required.") tmpdir = str(tmpdir) @@ -1166,7 +1172,7 @@ def test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_succeeds_w_manual @pytest.mark.parametrize("index", [False, True]) @pytest.mark.parametrize("schema", ["infer", "complex"]) def test_pyarrow_schema_inference(tmpdir, index, engine, schema): - if pa.__version__ < LooseVersion("0.15.0"): + if pa_version < parse_version("0.15.0"): pytest.skip("PyArrow>=0.15 Required.") if schema == "complex": schema = {"index": pa.string(), "amount": pa.int64()} @@ -1359,9 +1365,7 @@ def test_filters_v0(tmpdir, write_engine, read_engine): # Recent versions of pyarrow support full row-wise filtering # (fastparquet and older pyarrow versions do not) - pyarrow_row_filtering = ( - read_engine == "pyarrow-dataset" and pa.__version__ >= LooseVersion("1.0.0") - ) + pyarrow_row_filtering = read_engine == "pyarrow-dataset" and pa_version.major >= 1 fn = str(tmpdir) df = pd.DataFrame({"at": ["ab", "aa", "ba", "da", "bb"]}) @@ -1462,7 +1466,7 @@ def test_pyarrow_filter_divisions(tmpdir): str(tmpdir.join("file.1.parquet")), engine="pyarrow", row_group_size=2 ) - if pa.__version__ >= LooseVersion("1.0.0"): + if pa_version.major >= 1: # Only works for ArrowDatasetEngine. # Legacy code will not apply filters on individual row-groups # when `split_row_groups=False`. @@ -1637,7 +1641,7 @@ def test_parquet_select_cats(tmpdir, engine): def test_columns_name(tmpdir, engine): - if engine == "fastparquet" and fastparquet.__version__ <= LooseVersion("0.3.1"): + if engine == "fastparquet" and fastparquet_version <= parse_version("0.3.1"): pytest.skip("Fastparquet does not write column_indexes up to 0.3.1") tmp_path = str(tmpdir) df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(["a", "b"], name="idx")) @@ -2041,9 +2045,7 @@ def test_to_parquet_with_get(tmpdir): def test_select_partitioned_column(tmpdir, engine): pytest.importorskip("snappy") if engine.startswith("pyarrow"): - import pyarrow as pa - - if pa.__version__ < LooseVersion("0.9.0"): + if pa_version < parse_version("0.9.0"): pytest.skip("pyarrow<0.9.0 did not support this") fn = str(tmpdir) @@ -2067,9 +2069,9 @@ def test_select_partitioned_column(tmpdir, engine): def test_with_tz(tmpdir, engine): - if engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.11.0"): + if engine.startswith("pyarrow") and pa_version < parse_version("0.11.0"): pytest.skip("pyarrow<0.11.0 did not support this") - if engine == "fastparquet" and fastparquet.__version__ < LooseVersion("0.3.0"): + if engine == "fastparquet" and fastparquet_version < parse_version("0.3.0"): pytest.skip("fastparquet<0.3.0 did not support this") with warnings.catch_warnings(): @@ -2278,7 +2280,7 @@ def test_timeseries_nulls_in_schema(tmpdir, engine, schema): if ( schema == "infer" and engine.startswith("pyarrow") - and pa.__version__ < LooseVersion("0.15.0") + and pa_version < parse_version("0.15.0") ): pytest.skip("PyArrow>=0.15 Required.") @@ -2814,9 +2816,7 @@ def test_filter_nonpartition_columns( @PYARROW_MARK def test_pandas_metadata_nullable_pyarrow(tmpdir): - if pa.__version__ < LooseVersion("0.16.0") or pd.__version__ < LooseVersion( - "1.0.0" - ): + if pa_version < parse_version("0.16.0") or parse_version(pd.__version__).major < 1: pytest.skip("PyArrow>=0.16 and Pandas>=1.0.0 Required.") tmpdir = str(tmpdir) @@ -2837,7 +2837,7 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir): @PYARROW_MARK def test_pandas_timestamp_overflow_pyarrow(tmpdir): - if pa.__version__ < LooseVersion("0.17.0"): + if pa.__version__ < parse_version("0.17.0"): pytest.skip("PyArrow>=0.17 Required.") info = np.iinfo(np.dtype("int64")) @@ -2947,7 +2947,7 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols): @fp_pandas_xfail def test_partitioned_preserve_index(tmpdir, write_engine, read_engine): - if write_engine.startswith("pyarrow") and pa.__version__ < LooseVersion("0.15.0"): + if write_engine.startswith("pyarrow") and pa_version < parse_version("0.15.0"): pytest.skip("PyArrow>=0.15 Required.") tmp = str(tmpdir) @@ -3065,7 +3065,7 @@ def test_pyarrow_dataset_simple(tmpdir, engine): @PYARROW_MARK @pytest.mark.parametrize("test_filter", [True, False]) def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter): - if pa.__version__ <= LooseVersion("0.17.1"): + if pa_version <= parse_version("0.17.1"): # Using pyarrow.dataset API does not produce # Categorical type for partitioned columns. pytest.skip("PyArrow>0.17.1 Required.") @@ -3093,7 +3093,7 @@ def test_pyarrow_dataset_partitioned(tmpdir, engine, test_filter): def test_pyarrow_dataset_read_from_paths( tmpdir, read_from_paths, test_filter_partitioned ): - if pa.__version__ <= LooseVersion("0.17.1"): + if pa_version <= parse_version("0.17.1"): # Using pyarrow.dataset API does not produce # Categorical type for partitioned columns. pytest.skip("PyArrow>0.17.1 Required.") @@ -3123,7 +3123,7 @@ def test_pyarrow_dataset_read_from_paths( @PYARROW_MARK @pytest.mark.parametrize("split_row_groups", [True, False]) def test_pyarrow_dataset_filter_partitioned(tmpdir, split_row_groups): - if pa.__version__ < LooseVersion("1.0.0"): + if pa_version.major < 1: # pyarrow.dataset API required. pytest.skip("PyArrow>=1.0.0 Required.") diff --git a/dask/dataframe/tests/test_rolling.py b/dask/dataframe/tests/test_rolling.py index 19cb5b56..ea023b40 100644 --- a/dask/dataframe/tests/test_rolling.py +++ b/dask/dataframe/tests/test_rolling.py @@ -1,8 +1,7 @@ -from distutils.version import LooseVersion - import numpy as np import pandas as pd import pytest +from packaging.version import parse as parse_version import dask.dataframe as dd from dask.dataframe._compat import PANDAS_GT_130 @@ -404,7 +403,8 @@ def test_rolling_agg_aggregate(): @pytest.mark.skipif(not dd._compat.PANDAS_GT_100, reason="needs pandas>=1.0.0") def test_rolling_numba_engine(): numba = pytest.importorskip("numba") - if not dd._compat.PANDAS_GT_104 and LooseVersion(numba.__version__) >= "0.49": + numba_version = parse_version(numba.__version__) + if not dd._compat.PANDAS_GT_104 and numba_version >= parse_version("0.49"): # Was fixed in https://github.com/pandas-dev/pandas/pull/33687 pytest.xfail("Known incompatibility between pandas and numba") diff --git a/dask/diagnostics/profile_visualize.py b/dask/diagnostics/profile_visualize.py index 751957da..b62c6fba 100644 --- a/dask/diagnostics/profile_visualize.py +++ b/dask/diagnostics/profile_visualize.py @@ -1,9 +1,9 @@ import random from bisect import bisect_left -from distutils.version import LooseVersion from itertools import cycle from operator import add, itemgetter +from packaging.version import parse as parse_version from tlz import accumulate, groupby, pluck, unique from ..core import istask @@ -372,7 +372,7 @@ def plot_resources(results, palette="Viridis", **kwargs): line_width=4, **{ "legend_label" - if LooseVersion(bokeh.__version__) >= "1.4" + if parse_version(bokeh.__version__) >= parse_version("1.4") else "legend": "% CPU" } ) @@ -390,7 +390,7 @@ def plot_resources(results, palette="Viridis", **kwargs): line_width=4, **{ "legend_label" - if LooseVersion(bokeh.__version__) >= "1.4" + if parse_version(bokeh.__version__) >= parse_version("1.4") else "legend": "Memory" } ) diff --git a/dask/diagnostics/tests/test_profiler.py b/dask/diagnostics/tests/test_profiler.py index 5d995b87..a31943f8 100644 --- a/dask/diagnostics/tests/test_profiler.py +++ b/dask/diagnostics/tests/test_profiler.py @@ -1,10 +1,10 @@ import contextlib import os -from distutils.version import LooseVersion from operator import add, mul from time import sleep import pytest +from packaging.version import parse as parse_version from dask.diagnostics import CacheProfiler, Profiler, ResourceProfiler from dask.threaded import get @@ -326,10 +326,10 @@ def test_plot_multiple(): p = visualize( [prof, rprof], label_size=50, title="Not the default", show=False, save=False ) - bokeh_version = LooseVersion(bokeh.__version__) - if bokeh_version >= "1.1.0": + bokeh_version = parse_version(bokeh.__version__) + if bokeh_version >= parse_version("1.1.0"): figures = [r[0] for r in p.children[1].children] - elif bokeh_version >= "0.12.0": + elif bokeh_version >= parse_version("0.12.0"): figures = [r.children[0] for r in p.children[1].children] else: figures = [r[0] for r in p.children] @@ -364,7 +364,7 @@ def test_get_colors(): from dask.diagnostics.profile_visualize import get_colors # 256-color palettes were added in bokeh 1.4.0 - if LooseVersion(bokeh.__version__) >= "1.4.0": + if parse_version(bokeh.__version__) >= parse_version("1.4.0"): from bokeh.palettes import Blues256 funcs = list(range(11)) diff --git a/dask/sizeof.py b/dask/sizeof.py index 570b6251..38c06885 100644 --- a/dask/sizeof.py +++ b/dask/sizeof.py @@ -2,7 +2,8 @@ import itertools import random import sys from array import array -from distutils.version import LooseVersion + +from packaging.version import parse as parse_version from .utils import Dispatch @@ -195,7 +196,7 @@ def register_pyarrow(): return int(_get_col_size(data)) + 1000 # Handle pa.Column for pyarrow < 0.15 - if pa.__version__ < LooseVersion("0.15.0"): + if parse_version(pa.__version__) < parse_version("0.15.0"): @sizeof.register(pa.Column) def sizeof_pyarrow_column(col): diff --git a/dask/tests/test_multiprocessing.py b/dask/tests/test_multiprocessing.py index 51c7044f..16cdca8b 100644 --- a/dask/tests/test_multiprocessing.py +++ b/dask/tests/test_multiprocessing.py @@ -2,10 +2,8 @@ import multiprocessing import pickle import sys from concurrent.futures import ProcessPoolExecutor -from distutils.version import LooseVersion from operator import add -import cloudpickle import pytest import dask @@ -49,13 +47,10 @@ def test_pickle_locals(): @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, reason="requires pickle protocol 5") -@pytest.mark.skipif( - cloudpickle.__version__ < LooseVersion("1.3.0"), - reason="requires cloudpickle >= 1.3.0", -) def test_out_of_band_pickling(): """Test that out-of-band pickling works""" np = pytest.importorskip("numpy") + pytest.importorskip("cloudpickle", minversion="1.3.0") a = np.arange(5) diff --git a/setup.py b/setup.py index 41751134..9365926c 100755 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ extras_require["complete"] = sorted({v for req in extras_require.values() for v extras_require["test"] = ["pytest", "pytest-rerunfailures", "pytest-xdist"] install_requires = [ + "packaging", "pyyaml", "cloudpickle >= 1.1.1", "fsspec >= 0.6.0", -- 2.31.1