Backport proper pyarrow 10 and 11 support
This commit is contained in:
parent
0cccc16aae
commit
25cbd2676d
|
@ -0,0 +1,335 @@
|
|||
From 43d4450e7e7386eb3aebb286b6101889c32ba52c Mon Sep 17 00:00:00 2001
|
||||
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
|
||||
Date: Sat, 24 Dec 2022 20:49:35 +0100
|
||||
Subject: [PATCH 1/3] CI: Unpin pyarrow<10 (#50314)
|
||||
|
||||
* CI: Unpin pyarrow<10
|
||||
|
||||
* Skip test
|
||||
---
|
||||
.github/actions/setup-conda/action.yml | 2 +-
|
||||
.github/workflows/ubuntu.yml | 2 +-
|
||||
ci/deps/actions-310.yaml | 2 +-
|
||||
ci/deps/actions-38-downstream_compat.yaml | 2 +-
|
||||
ci/deps/actions-38.yaml | 2 +-
|
||||
ci/deps/actions-39.yaml | 2 +-
|
||||
ci/deps/circle-38-arm64.yaml | 2 +-
|
||||
environment.yml | 2 +-
|
||||
pandas/compat/pyarrow.py | 2 ++
|
||||
pandas/tests/arrays/interval/test_interval.py | 2 +-
|
||||
pandas/tests/arrays/period/test_arrow_compat.py | 3 +++
|
||||
requirements-dev.txt | 2 +-
|
||||
12 files changed, 15 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml
|
||||
index 7d1e54052f..002d0020c2 100644
|
||||
--- a/.github/actions/setup-conda/action.yml
|
||||
+++ b/.github/actions/setup-conda/action.yml
|
||||
@@ -18,7 +18,7 @@ runs:
|
||||
- name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
|
||||
run: |
|
||||
grep -q ' - pyarrow' ${{ inputs.environment-file }}
|
||||
- sed -i"" -e "s/ - pyarrow<10/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
|
||||
+ sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
|
||||
cat ${{ inputs.environment-file }}
|
||||
shell: bash
|
||||
if: ${{ inputs.pyarrow-version }}
|
||||
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
|
||||
index 4602d12d85..8fd69a4851 100644
|
||||
--- a/.github/workflows/ubuntu.yml
|
||||
+++ b/.github/workflows/ubuntu.yml
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
pattern: ["not single_cpu", "single_cpu"]
|
||||
# Don't test pyarrow v2/3: Causes timeouts in read_csv engine
|
||||
# even if tests are skipped/xfailed
|
||||
- pyarrow_version: ["5", "6", "7"]
|
||||
+ pyarrow_version: ["5", "6", "7", "8", "9", "10"]
|
||||
include:
|
||||
- name: "Downstream Compat"
|
||||
env_file: actions-38-downstream_compat.yaml
|
||||
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
|
||||
index deb23d435b..6050a28e11 100644
|
||||
--- a/ci/deps/actions-310.yaml
|
||||
+++ b/ci/deps/actions-310.yaml
|
||||
@@ -39,7 +39,7 @@ dependencies:
|
||||
- psycopg2
|
||||
- pymysql
|
||||
- pytables
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pyreadstat
|
||||
- python-snappy
|
||||
- pyxlsb
|
||||
diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml
|
||||
index 06ffafeb70..988eacdd0c 100644
|
||||
--- a/ci/deps/actions-38-downstream_compat.yaml
|
||||
+++ b/ci/deps/actions-38-downstream_compat.yaml
|
||||
@@ -38,7 +38,7 @@ dependencies:
|
||||
- odfpy
|
||||
- pandas-gbq
|
||||
- psycopg2
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pymysql
|
||||
- pyreadstat
|
||||
- pytables
|
||||
diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
|
||||
index 222da40ea9..131e2d1882 100644
|
||||
--- a/ci/deps/actions-38.yaml
|
||||
+++ b/ci/deps/actions-38.yaml
|
||||
@@ -37,7 +37,7 @@ dependencies:
|
||||
- odfpy
|
||||
- pandas-gbq
|
||||
- psycopg2
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pymysql
|
||||
- pyreadstat
|
||||
- pytables
|
||||
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
|
||||
index 1c60e8ad6d..62e7397757 100644
|
||||
--- a/ci/deps/actions-39.yaml
|
||||
+++ b/ci/deps/actions-39.yaml
|
||||
@@ -38,7 +38,7 @@ dependencies:
|
||||
- pandas-gbq
|
||||
- psycopg2
|
||||
- pymysql
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pyreadstat
|
||||
- pytables
|
||||
- python-snappy
|
||||
diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml
|
||||
index 263521fb74..512c47f0a6 100644
|
||||
--- a/ci/deps/circle-38-arm64.yaml
|
||||
+++ b/ci/deps/circle-38-arm64.yaml
|
||||
@@ -37,7 +37,7 @@ dependencies:
|
||||
- odfpy
|
||||
- pandas-gbq
|
||||
- psycopg2
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pymysql
|
||||
# Not provided on ARM
|
||||
#- pyreadstat
|
||||
diff --git a/environment.yml b/environment.yml
|
||||
index 20f839db9a..1620bad9b0 100644
|
||||
--- a/environment.yml
|
||||
+++ b/environment.yml
|
||||
@@ -38,7 +38,7 @@ dependencies:
|
||||
- odfpy
|
||||
- pandas-gbq
|
||||
- psycopg2
|
||||
- - pyarrow<10
|
||||
+ - pyarrow
|
||||
- pymysql
|
||||
- pyreadstat
|
||||
- pytables
|
||||
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
|
||||
index 6965865acb..887ae49c3d 100644
|
||||
--- a/pandas/compat/pyarrow.py
|
||||
+++ b/pandas/compat/pyarrow.py
|
||||
@@ -18,6 +18,7 @@ try:
|
||||
pa_version_under7p0 = _palv < Version("7.0.0")
|
||||
pa_version_under8p0 = _palv < Version("8.0.0")
|
||||
pa_version_under9p0 = _palv < Version("9.0.0")
|
||||
+ pa_version_under10p0 = _palv < Version("10.0.0")
|
||||
except ImportError:
|
||||
pa_version_under1p01 = True
|
||||
pa_version_under2p0 = True
|
||||
@@ -28,3 +29,4 @@ except ImportError:
|
||||
pa_version_under7p0 = True
|
||||
pa_version_under8p0 = True
|
||||
pa_version_under9p0 = True
|
||||
+ pa_version_under10p0 = True
|
||||
diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
|
||||
index 2a6bea3255..0667082784 100644
|
||||
--- a/pandas/tests/arrays/interval/test_interval.py
|
||||
+++ b/pandas/tests/arrays/interval/test_interval.py
|
||||
@@ -287,7 +287,7 @@ def test_arrow_array():
|
||||
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
|
||||
pa.array(intervals, type="float64")
|
||||
|
||||
- with pytest.raises(TypeError, match="different 'subtype'"):
|
||||
+ with pytest.raises(TypeError, match="different 'subtype'|to convert IntervalArray"):
|
||||
pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
|
||||
|
||||
|
||||
diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py
|
||||
index 03fd146572..01a97d07a7 100644
|
||||
--- a/pandas/tests/arrays/period/test_arrow_compat.py
|
||||
+++ b/pandas/tests/arrays/period/test_arrow_compat.py
|
||||
@@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
|
||||
+from pandas.compat.pyarrow import pa_version_under10p0
|
||||
+
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
@@ -26,6 +28,7 @@ def test_arrow_extension_type():
|
||||
assert not hash(p1) == hash(p3)
|
||||
|
||||
|
||||
+@pytest.mark.xfail(not pa_version_under10p0, reason="Wrong behavior with pyarrow 10")
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq",
|
||||
[
|
||||
diff --git a/requirements-dev.txt b/requirements-dev.txt
|
||||
index 95291e4ab5..1c7a011e5f 100644
|
||||
--- a/requirements-dev.txt
|
||||
+++ b/requirements-dev.txt
|
||||
@@ -29,7 +29,7 @@ openpyxl
|
||||
odfpy
|
||||
pandas-gbq
|
||||
psycopg2
|
||||
-pyarrow<10
|
||||
+pyarrow
|
||||
pymysql
|
||||
pyreadstat
|
||||
tables
|
||||
--
|
||||
2.39.2
|
||||
|
||||
|
||||
From 5c2ced8f67fb248d6e5166b5dfdb03909de3123b Mon Sep 17 00:00:00 2001
|
||||
From: "Benjamin A. Beasley" <code@musicinmybrain.net>
|
||||
Date: Wed, 19 Apr 2023 11:36:21 -0400
|
||||
Subject: [PATCH 2/3] Add pandas.compat.pa_version_under11p0
|
||||
|
||||
Partial backport of #50998 / 52306d9
|
||||
---
|
||||
pandas/compat/__init__.py | 2 ++
|
||||
pandas/compat/pyarrow.py | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
|
||||
index 80f66c945b..68f2495d0d 100644
|
||||
--- a/pandas/compat/__init__.py
|
||||
+++ b/pandas/compat/__init__.py
|
||||
@@ -29,6 +29,7 @@ from pandas.compat.pyarrow import (
|
||||
pa_version_under7p0,
|
||||
pa_version_under8p0,
|
||||
pa_version_under9p0,
|
||||
+ pa_version_under11p0,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -163,4 +164,5 @@ __all__ = [
|
||||
"pa_version_under7p0",
|
||||
"pa_version_under8p0",
|
||||
"pa_version_under9p0",
|
||||
+ "pa_version_under11p0",
|
||||
]
|
||||
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
|
||||
index 887ae49c3d..ca51d74828 100644
|
||||
--- a/pandas/compat/pyarrow.py
|
||||
+++ b/pandas/compat/pyarrow.py
|
||||
@@ -19,6 +19,7 @@ try:
|
||||
pa_version_under8p0 = _palv < Version("8.0.0")
|
||||
pa_version_under9p0 = _palv < Version("9.0.0")
|
||||
pa_version_under10p0 = _palv < Version("10.0.0")
|
||||
+ pa_version_under11p0 = _palv < Version("11.0.0")
|
||||
except ImportError:
|
||||
pa_version_under1p01 = True
|
||||
pa_version_under2p0 = True
|
||||
@@ -30,3 +31,4 @@ except ImportError:
|
||||
pa_version_under8p0 = True
|
||||
pa_version_under9p0 = True
|
||||
pa_version_under10p0 = True
|
||||
+ pa_version_under11p0 = True
|
||||
--
|
||||
2.39.2
|
||||
|
||||
|
||||
From 8549bbe4e1144e2429612fa17e082ab9c1cba23f Mon Sep 17 00:00:00 2001
|
||||
From: Luke Manley <lukemanley@gmail.com>
|
||||
Date: Fri, 10 Feb 2023 13:15:08 -0500
|
||||
Subject: [PATCH 3/3] CI: unpin pyarrow, fix failing test (#51175)
|
||||
|
||||
* unpin pyarrow, fix failing test
|
||||
|
||||
* cleanup
|
||||
|
||||
* handle NaT/NaN
|
||||
---
|
||||
pandas/core/arrays/arrow/array.py | 12 ++++++++++++
|
||||
pandas/core/tools/timedeltas.py | 4 +++-
|
||||
pandas/tests/extension/test_arrow.py | 12 +++++++++++-
|
||||
3 files changed, 26 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
|
||||
index 4dfd8942c2..7031f4d80d 100644
|
||||
--- a/pandas/core/arrays/arrow/array.py
|
||||
+++ b/pandas/core/arrays/arrow/array.py
|
||||
@@ -259,6 +259,18 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray):
|
||||
from pandas.core.tools.timedeltas import to_timedelta
|
||||
|
||||
scalars = to_timedelta(strings, errors="raise")
|
||||
+ if pa_type.unit != "ns":
|
||||
+ # GH51175: test_from_sequence_of_strings_pa_array
|
||||
+ # attempt to parse as int64 reflecting pyarrow's
|
||||
+ # duration to string casting behavior
|
||||
+ mask = isna(scalars)
|
||||
+ if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
|
||||
+ strings = pa.array(strings, type=pa.string(), from_pandas=True)
|
||||
+ strings = pc.if_else(mask, None, strings)
|
||||
+ try:
|
||||
+ scalars = strings.cast(pa.int64())
|
||||
+ except pa.ArrowInvalid:
|
||||
+ pass
|
||||
elif pa.types.is_time(pa_type):
|
||||
from pandas.core.tools.times import to_time
|
||||
|
||||
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
|
||||
index 5026c97c0b..67ba7818d1 100644
|
||||
--- a/pandas/core/tools/timedeltas.py
|
||||
+++ b/pandas/core/tools/timedeltas.py
|
||||
@@ -236,7 +236,9 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):
|
||||
# returning arg (errors == "ignore"), and where the input is a
|
||||
# generator, we return a useful list-like instead of a
|
||||
# used-up generator
|
||||
- arg = np.array(list(arg), dtype=object)
|
||||
+ if not hasattr(arg, "__array__"):
|
||||
+ arg = list(arg)
|
||||
+ arg = np.array(arg, dtype=object)
|
||||
|
||||
try:
|
||||
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
|
||||
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
|
||||
index b7ddb1f248..689bd29dfd 100644
|
||||
--- a/pandas/tests/extension/test_arrow.py
|
||||
+++ b/pandas/tests/extension/test_arrow.py
|
||||
@@ -30,6 +30,7 @@ from pandas.compat import (
|
||||
pa_version_under7p0,
|
||||
pa_version_under8p0,
|
||||
pa_version_under9p0,
|
||||
+ pa_version_under11p0,
|
||||
)
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
@@ -272,7 +273,7 @@ class TestConstructors(base.BaseConstructorsTests):
|
||||
reason="Nanosecond time parsing not supported.",
|
||||
)
|
||||
)
|
||||
- elif pa.types.is_duration(pa_dtype):
|
||||
+ elif pa_version_under11p0 and pa.types.is_duration(pa_dtype):
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
raises=pa.ArrowNotImplementedError,
|
||||
@@ -1707,3 +1708,12 @@ def test_mode(data_for_grouping, dropna, take_idx, exp_idx, request):
|
||||
result = ser.mode(dropna=dropna)
|
||||
expected = pd.Series(data_for_grouping.take(exp_idx))
|
||||
tm.assert_series_equal(result, expected)
|
||||
+
|
||||
+@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
|
||||
+def test_duration_from_strings_with_nat(unit):
|
||||
+ # GH51175
|
||||
+ strings = ["1000", "NaT"]
|
||||
+ pa_type = pa.duration(unit)
|
||||
+ result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type)
|
||||
+ expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type))
|
||||
+ tm.assert_extension_array_equal(result, expected)
|
||||
--
|
||||
2.39.2
|
||||
|
|
@ -94,6 +94,43 @@ Patch: https://github.com/pandas-dev/pandas/commit/e73d4d29203dab20e001
|
|||
# https://bugzilla.redhat.com/show_bug.cgi?id=2171682
|
||||
Patch: https://github.com/pandas-dev/pandas/pull/52150.patch
|
||||
|
||||
# CI: Unpin pyarrow<10
|
||||
# https://github.com/pandas-dev/pandas/pull/50314
|
||||
#
|
||||
# Merged upstream as 4878dfe551da2fa8e2bc33e774b595f099bfa74e:
|
||||
# CI: Unpin pyarrow<10 (#50314)
|
||||
# * CI: Unpin pyarrow<10
|
||||
# * Skip test
|
||||
#
|
||||
# ----
|
||||
#
|
||||
# Add pandas.compat.pa_version_under11p0, required for pyarrow 11 support, from
|
||||
# the following PR/commit, but without the other associated changes:
|
||||
#
|
||||
# ENH: support reductions for pyarrow temporal types (#50998)
|
||||
# https://github.com/pandas-dev/pandas/pull/50998
|
||||
#
|
||||
# Merged upstream as 52306d957cb77a3823624679bb9606e244e7faa8:
|
||||
# ENH: support reductions for pyarrow temporal types
|
||||
# * unit check
|
||||
# * lint fixup
|
||||
#
|
||||
# ----
|
||||
#
|
||||
# CI: unpin pyarrow, fix failing test
|
||||
# https://github.com/pandas-dev/pandas/pull/51175
|
||||
#
|
||||
# Merged upstream as 5f584bd29be7203db64bdf8619991927e29c74bc:
|
||||
# CI: unpin pyarrow, fix failing test (#51175)
|
||||
# * unpin pyarrow, fix failing test
|
||||
# * cleanup
|
||||
# * handle NaT/NaN
|
||||
#
|
||||
# ----
|
||||
#
|
||||
# All commits cherry-picked to tag v1.5.3 and combined into a single patch.
|
||||
Patch: pandas-1.5.3-pyarrow-10-11.patch
|
||||
|
||||
%global _description %{expand:
|
||||
pandas is an open source, BSD-licensed library providing
|
||||
high-performance, easy-to-use data structures and data
|
||||
|
@ -623,16 +660,6 @@ k="${k-}${k+ and }not (TestDataFramePlotsSubplots and test_bar_log_subplots)"
|
|||
%endif
|
||||
|
||||
%if 0%{?fedora} > 37
|
||||
# The text of an error message has changed in libarrow/pyarrow 10, which is
|
||||
# harmless but breaks one test. Disable it until a patch is available upstream.
|
||||
# CI: pyarrow 10 broke our ci
|
||||
# https://github.com/pandas-dev/pandas/issues/50058
|
||||
k="${k-}${k+ and }not test_arrow_array"
|
||||
|
||||
# Probably also related to upstream pinning pyarrow < 10 for CI:
|
||||
# E TypeError: Expected unicode, got pyarrow.lib.StringScalar
|
||||
k="${k-}${k+ and }not (TestConstructors and test_from_sequence_of_strings_pa_array)"
|
||||
|
||||
# TODO: Why does this fail? Does it need a slightly older version of dask?
|
||||
# E AssertionError: Caused unexpected warning(s): [('RuntimeWarning', RuntimeWarning('invalid value encountered in cast'), '/builddir/build/BUILDROOT/python-pandas-1.5.3-1.fc39.x86_64/usr/lib64/python3.11/site-packages/pandas/core/dtypes/cast.py', 1836)]
|
||||
k="${k-}${k+ and }not test_construct_dask_float_array_int_dtype_match_ndarray"
|
||||
|
@ -717,6 +744,7 @@ export PYTHONHASHSEED="$(
|
|||
%changelog
|
||||
* Wed Apr 19 2023 Benjamin A. Beasley <code@musicinmybrain.net> - 1.5.3-3
|
||||
- Drop unnecessary weak dependency on python-pandas-datareader
|
||||
- Backport proper pyarrow 10 and 11 support
|
||||
|
||||
* Thu Apr 13 2023 Benjamin A. Beasley <code@musicinmybrain.net> - 1.5.3-2
|
||||
- Fix RHBZ#2171682 by backporting upstream PR#52150
|
||||
|
|
Loading…
Reference in New Issue