Import python venv for stability
This commit is contained in:
+158
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Assertion helpers for arithmetic tests.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
NumpyExtensionArray,
|
||||
)
|
||||
|
||||
|
||||
def assert_cannot_add(left, right, msg="cannot add"):
|
||||
"""
|
||||
Helper function to assert that two objects cannot be added.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
The first operand.
|
||||
right : object
|
||||
The second operand.
|
||||
msg : str, default "cannot add"
|
||||
The error message expected in the TypeError.
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
|
||||
|
||||
def assert_invalid_addsub_type(left, right, msg=None):
|
||||
"""
|
||||
Helper function to assert that two objects can
|
||||
neither be added nor subtracted.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
The first operand.
|
||||
right : object
|
||||
The second operand.
|
||||
msg : str or None, default None
|
||||
The error message expected in the TypeError.
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left - right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right - left
|
||||
|
||||
|
||||
def get_upcast_box(left, right, is_cmp: bool = False):
|
||||
"""
|
||||
Get the box to use for 'expected' in an arithmetic or comparison operation.
|
||||
|
||||
Parameters
|
||||
left : Any
|
||||
right : Any
|
||||
is_cmp : bool, default False
|
||||
Whether the operation is a comparison method.
|
||||
"""
|
||||
|
||||
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
|
||||
return DataFrame
|
||||
if isinstance(left, Series) or isinstance(right, Series):
|
||||
if is_cmp and isinstance(left, Index):
|
||||
# Index does not defer for comparisons
|
||||
return np.array
|
||||
return Series
|
||||
if isinstance(left, Index) or isinstance(right, Index):
|
||||
if is_cmp:
|
||||
return np.array
|
||||
return Index
|
||||
return tm.to_array
|
||||
|
||||
|
||||
def assert_invalid_comparison(left, right, box):
|
||||
"""
|
||||
Assert that comparison operations with mismatched types behave correctly.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray, ExtensionArray, Index, or Series
|
||||
right : object
|
||||
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
|
||||
"""
|
||||
# Not for tznaive-tzaware comparison
|
||||
|
||||
# Note: not quite the same as how we do this for tm.box_expected
|
||||
xbox = box if box not in [Index, array] else np.array
|
||||
|
||||
def xbox2(x):
|
||||
# Eventually we'd like this to be tighter, but for now we'll
|
||||
# just exclude NumpyExtensionArray[bool]
|
||||
if isinstance(x, NumpyExtensionArray):
|
||||
return x._ndarray
|
||||
if isinstance(x, BooleanArray):
|
||||
# NB: we are assuming no pd.NAs for now
|
||||
return x.astype(bool)
|
||||
return x
|
||||
|
||||
result = xbox2(left == right)
|
||||
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = xbox2(right == left)
|
||||
tm.assert_equal(result, xbox(expected))
|
||||
|
||||
result = xbox2(left != right)
|
||||
tm.assert_equal(result, ~expected)
|
||||
|
||||
result = xbox2(right != left)
|
||||
tm.assert_equal(result, xbox(~expected))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"Invalid comparison between",
|
||||
"Cannot compare type",
|
||||
"not supported between",
|
||||
"invalid type promotion",
|
||||
(
|
||||
# GH#36706 npdev 1.20.0 2020-09-28
|
||||
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
|
||||
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
|
||||
"For example they cannot be stored in a single array unless the "
|
||||
"dtype is `object`."
|
||||
),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left < right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left <= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left > right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left >= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right < left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right <= left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right > left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right >= left
|
||||
+139
@@ -0,0 +1,139 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index
|
||||
|
||||
|
||||
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
|
||||
def one(request):
|
||||
"""
|
||||
Several variants of integer value 1. The zero-dim integer array
|
||||
behaves like an integer.
|
||||
|
||||
This fixture can be used to check that datetimelike indexes handle
|
||||
addition and subtraction of integers and zero-dimensional arrays
|
||||
of integers.
|
||||
|
||||
Examples
|
||||
--------
|
||||
dti = pd.date_range('2016-01-01', periods=2, freq='h')
|
||||
dti
|
||||
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
|
||||
dtype='datetime64[ns]', freq='h')
|
||||
dti + one
|
||||
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
|
||||
dtype='datetime64[ns]', freq='h')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
zeros = [
|
||||
box_cls([0] * 5, dtype=dtype)
|
||||
for box_cls in [Index, np.array, pd.array]
|
||||
for dtype in [np.int64, np.uint64, np.float64]
|
||||
]
|
||||
zeros.extend([box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [Index, np.array]])
|
||||
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
|
||||
zeros.extend([np.array(-0.0, dtype=np.float64)])
|
||||
zeros.extend([0, 0.0, -0.0])
|
||||
|
||||
|
||||
@pytest.fixture(params=zeros)
|
||||
def zero(request):
|
||||
"""
|
||||
Several types of scalar zeros and length 5 vectors of zeros.
|
||||
|
||||
This fixture can be used to check that numeric-dtype indexes handle
|
||||
division by any zero numeric-dtype.
|
||||
|
||||
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
|
||||
which creates numeric-dtype vectors also of length 5.
|
||||
|
||||
Examples
|
||||
--------
|
||||
arr = RangeIndex(5)
|
||||
arr / zeros
|
||||
Index([nan, inf, inf, inf, inf], dtype='float64')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Scalar Fixtures
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.Timedelta("10m7s").to_pytimedelta(),
|
||||
pd.Timedelta("10m7s"),
|
||||
pd.Timedelta("10m7s").to_timedelta64(),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def scalar_td(request):
|
||||
"""
|
||||
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Day(3),
|
||||
pd.offsets.Hour(72),
|
||||
pd.Timedelta(days=3).to_pytimedelta(),
|
||||
pd.Timedelta("72:00:00"),
|
||||
np.timedelta64(3, "D"),
|
||||
np.timedelta64(72, "h"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def three_days(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 3-day timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Hour(2),
|
||||
pd.offsets.Minute(120),
|
||||
pd.Timedelta(hours=2).to_pytimedelta(),
|
||||
pd.Timedelta(seconds=2 * 3600),
|
||||
np.timedelta64(2, "h"),
|
||||
np.timedelta64(120, "m"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def two_hours(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 2-hour timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_common_mismatch = [
|
||||
pd.offsets.YearBegin(2),
|
||||
pd.offsets.MonthBegin(1),
|
||||
pd.offsets.Minute(),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.timedelta64(4, "h"),
|
||||
pd.Timedelta(hours=23).to_pytimedelta(),
|
||||
pd.Timedelta("23:00:00"),
|
||||
*_common_mismatch,
|
||||
]
|
||||
)
|
||||
def not_daily(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset instances that are _not_
|
||||
compatible with Daily frequencies.
|
||||
"""
|
||||
return request.param
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.ops.array_ops import (
|
||||
comparison_op,
|
||||
na_logical_op,
|
||||
)
|
||||
|
||||
|
||||
def test_na_logical_op_2d():
|
||||
left = np.arange(8).reshape(4, 2)
|
||||
right = left.astype(object)
|
||||
right[0, 0] = np.nan
|
||||
|
||||
# Check that we fall back to the vec_binop branch
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
operator.or_(left, right)
|
||||
|
||||
result = na_logical_op(left, right, operator.or_)
|
||||
expected = right
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_object_comparison_2d():
|
||||
left = np.arange(9).reshape(3, 3).astype(object)
|
||||
right = left.T
|
||||
|
||||
result = comparison_op(left, right, operator.eq)
|
||||
expected = np.eye(3).astype(bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Ensure that cython doesn't raise on non-writeable arg, which
|
||||
# we can get from np.broadcast_to
|
||||
right.flags.writeable = False
|
||||
result = comparison_op(left, right, operator.ne)
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_divmod_bool_raises(box_with_array):
|
||||
# GH#46043 // raises, so divmod should too
|
||||
ser = Series([True, False])
|
||||
obj = tm.box_expected(ser, box_with_array)
|
||||
|
||||
msg = "operator 'floordiv' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
obj // obj
|
||||
|
||||
if box_with_array is DataFrame:
|
||||
msg = "operator 'floordiv' not implemented for bool dtypes"
|
||||
else:
|
||||
msg = "operator 'divmod' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
divmod(obj, obj)
|
||||
|
||||
# go through __rdivmod__
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
divmod(True, obj)
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalComparisons:
|
||||
def test_categorical_nan_equality(self):
|
||||
cat = Series(Categorical(["a", "b", "c", np.nan]))
|
||||
expected = Series([True, True, True, False])
|
||||
result = cat == cat
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_categorical_tuple_equality(self):
|
||||
# GH 18050
|
||||
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
|
||||
expected = Series([True, False, True, False, False])
|
||||
result = ser == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype("category") == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
+2500
File diff suppressed because it is too large
Load Diff
+308
@@ -0,0 +1,308 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
IntervalArray,
|
||||
)
|
||||
from pandas.tests.arithmetic.common import get_upcast_box
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
|
||||
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
|
||||
(
|
||||
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
|
||||
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3).insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_array(left_right_dtypes):
|
||||
"""
|
||||
Fixture to generate an IntervalArray of various dtypes containing NA if possible
|
||||
"""
|
||||
left, right = left_right_dtypes
|
||||
return IntervalArray.from_arrays(left, right)
|
||||
|
||||
|
||||
def create_categorical_intervals(left, right, closed="right"):
|
||||
return Categorical(IntervalIndex.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_intervals(left, right, closed="right"):
|
||||
return Series(IntervalArray.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_categorical_intervals(left, right, closed="right"):
|
||||
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
|
||||
|
||||
|
||||
class TestComparison:
|
||||
@pytest.fixture(params=[operator.eq, operator.ne])
|
||||
def op(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
IntervalArray.from_arrays,
|
||||
IntervalIndex.from_arrays,
|
||||
create_categorical_intervals,
|
||||
create_series_intervals,
|
||||
create_series_categorical_intervals,
|
||||
],
|
||||
ids=[
|
||||
"IntervalArray",
|
||||
"IntervalIndex",
|
||||
"Categorical[Interval]",
|
||||
"Series[Interval]",
|
||||
"Series[Categorical[Interval]]",
|
||||
],
|
||||
)
|
||||
def interval_constructor(self, request):
|
||||
"""
|
||||
Fixture for all pandas native interval constructors.
|
||||
To be used as the LHS of IntervalArray comparisons.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
def elementwise_comparison(self, op, interval_array, other):
|
||||
"""
|
||||
Helper that performs elementwise comparisons between `array` and `other`
|
||||
"""
|
||||
other = other if is_list_like(other) else [other] * len(interval_array)
|
||||
expected = np.array(
|
||||
[op(x, y) for x, y in zip(interval_array, other, strict=True)]
|
||||
)
|
||||
if isinstance(other, Series):
|
||||
return Series(expected, index=other.index)
|
||||
return expected
|
||||
|
||||
def test_compare_scalar_interval(self, op, interval_array):
|
||||
# matches first interval
|
||||
other = interval_array[0]
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# matches on a single endpoint but not both
|
||||
other = Interval(interval_array.left[0], interval_array.right[1])
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = Interval(0, 1, closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_na(self, op, interval_array, nulls_fixture, box_with_array):
|
||||
box = box_with_array
|
||||
obj = tm.box_expected(interval_array, box)
|
||||
result = op(obj, nulls_fixture)
|
||||
|
||||
if nulls_fixture is pd.NA:
|
||||
# GH#31882
|
||||
exp = np.ones(interval_array.shape, dtype=bool)
|
||||
expected = BooleanArray(exp, exp)
|
||||
else:
|
||||
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
|
||||
|
||||
if not (box is Index and nulls_fixture is pd.NA):
|
||||
# don't cast expected from BooleanArray to ndarray[object]
|
||||
xbox = get_upcast_box(obj, nulls_fixture, True)
|
||||
expected = tm.box_expected(expected, xbox)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
rev = op(nulls_fixture, obj)
|
||||
tm.assert_equal(rev, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
0,
|
||||
1.0,
|
||||
True,
|
||||
"foo",
|
||||
Timestamp("2017-01-01"),
|
||||
Timestamp("2017-01-01", tz="US/Eastern"),
|
||||
Timedelta("0 days"),
|
||||
Period("2017-01-01", "D"),
|
||||
],
|
||||
)
|
||||
def test_compare_scalar_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
|
||||
# same endpoints
|
||||
other = interval_constructor(interval_array.left, interval_array.right)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# different endpoints
|
||||
other = interval_constructor(
|
||||
interval_array.left[::-1], interval_array.right[::-1]
|
||||
)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# all nan endpoints
|
||||
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval_mixed_closed(
|
||||
self, op, interval_constructor, closed, other_closed
|
||||
):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
(
|
||||
Interval(0, 1),
|
||||
Interval(Timedelta("1 day"), Timedelta("2 days")),
|
||||
Interval(4, 5, "both"),
|
||||
Interval(10, 20, "neither"),
|
||||
),
|
||||
(0, 1.5, Timestamp("20170103"), np.nan),
|
||||
(
|
||||
Timestamp("20170102", tz="US/Eastern"),
|
||||
Timedelta("2 days"),
|
||||
"baz",
|
||||
pd.NaT,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_compare_list_like_object(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
|
||||
other = [nulls_fixture] * 4
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
np.arange(4, dtype="int64"),
|
||||
np.arange(4, dtype="float64"),
|
||||
date_range("2017-01-01", periods=4),
|
||||
date_range("2017-01-01", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
period_range("2017-01-01", periods=4, freq="D"),
|
||||
Categorical(list("abab")),
|
||||
Categorical(date_range("2017-01-01", periods=4)),
|
||||
pd.array(list("abcd")),
|
||||
pd.array(["foo", 3.14, None, object()], dtype=object),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_compare_list_like_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("length", [1, 3, 5])
|
||||
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
|
||||
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
|
||||
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
|
||||
other = other_constructor([Interval(0, 1)] * length)
|
||||
with pytest.raises(ValueError, match="Lengths must match to compare"):
|
||||
op(interval_array, other)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"constructor, expected_type, assert_func",
|
||||
[
|
||||
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
|
||||
(Series, Series, tm.assert_series_equal),
|
||||
],
|
||||
)
|
||||
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
|
||||
# IntervalIndex/Series that rely on IntervalArray for comparisons
|
||||
breaks = range(4)
|
||||
index = constructor(IntervalIndex.from_breaks(breaks))
|
||||
|
||||
# scalar comparisons
|
||||
other = index[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = breaks[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
# list-like comparisons
|
||||
other = IntervalArray.from_breaks(breaks)
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = [index[0], breaks[0], "foo"]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
|
||||
def test_comparison_operations(self, scalars):
|
||||
# GH #28981
|
||||
expected = Series([False, False])
|
||||
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
|
||||
result = s == scalars
|
||||
tm.assert_series_equal(result, expected)
|
||||
+1585
File diff suppressed because it is too large
Load Diff
+410
@@ -0,0 +1,410 @@
|
||||
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
|
||||
# behave identically.
|
||||
# Specifically for object dtype
|
||||
import datetime
|
||||
from decimal import Decimal
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
|
||||
class TestObjectComparisons:
|
||||
def test_comparison_object_numeric_nas(self, comparison_op):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object)
|
||||
shifted = ser.shift(2)
|
||||
|
||||
func = comparison_op
|
||||
|
||||
result = func(ser, shifted)
|
||||
expected = func(ser.astype(float), shifted.astype(float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||||
)
|
||||
def test_object_comparisons(self, infer_string):
|
||||
with option_context("future.infer_string", infer_string):
|
||||
ser = Series(["a", "b", np.nan, "c", "a"])
|
||||
|
||||
result = ser == "a"
|
||||
expected = Series([True, False, False, False, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser < "a"
|
||||
expected = Series([False, False, False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser != "a"
|
||||
expected = -(ser == "a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_more_na_comparisons(self, dtype):
|
||||
left = Series(["a", np.nan, "c"], dtype=dtype)
|
||||
right = Series(["a", np.nan, "d"], dtype=dtype)
|
||||
|
||||
result = left == right
|
||||
expected = Series([True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != right
|
||||
expected = Series([False, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left == np.nan
|
||||
expected = Series([False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != np.nan
|
||||
expected = Series([True, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Arithmetic
|
||||
|
||||
|
||||
class TestArithmetic:
|
||||
def test_add_period_to_array_of_offset(self):
|
||||
# GH#50162
|
||||
per = pd.Period("2012-1-1", freq="D")
|
||||
pi = pd.period_range("2012-1-1", periods=10, freq="D")
|
||||
idx = per - pi
|
||||
|
||||
expected = pd.Index([x + per for x in idx], dtype=object)
|
||||
result = idx + per
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = per + idx
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: parametrize
|
||||
def test_pow_ops_object(self):
|
||||
# GH#22922
|
||||
# pow is weird with masking & 1, so testing here
|
||||
a = Series([1, np.nan, 1, np.nan], dtype=object)
|
||||
b = Series([1, np.nan, np.nan, 1], dtype=object)
|
||||
result = a**b
|
||||
expected = Series(a.values**b.values, dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = b**a
|
||||
expected = Series(b.values**a.values, dtype=object)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd])
|
||||
@pytest.mark.parametrize("other", ["category", "Int64"])
|
||||
def test_add_extension_scalar(self, other, box_with_array, op):
|
||||
# GH#22378
|
||||
# Check that scalars satisfying is_extension_array_dtype(obj)
|
||||
# do not incorrectly try to dispatch to an ExtensionArray operation
|
||||
|
||||
arr = Series(["a", "b", "c"])
|
||||
expected = Series([op(x, other) for x in arr])
|
||||
|
||||
arr = tm.box_expected(arr, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = op(arr, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_add_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["xa", np.nan, "xa"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = ser + "a"
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_radd_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["ax", np.nan, "ax"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = "a" + ser
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 2, 3],
|
||||
[1.1, 2.2, 3.3],
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
|
||||
["x", "y", 1],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only concatenate str",
|
||||
"did not contain a loop with signature matching types",
|
||||
"unsupported operand type",
|
||||
"must be str",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo_" + ser
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
|
||||
def test_objarr_add_invalid(self, op, box_with_array):
|
||||
# invalid ops
|
||||
box = box_with_array
|
||||
|
||||
obj_ser = Series(list("abc"), dtype=object, name="objects")
|
||||
|
||||
obj_ser = tm.box_expected(obj_ser, box)
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only concatenate str",
|
||||
"unsupported operand type",
|
||||
"must be str",
|
||||
"has no kernel",
|
||||
]
|
||||
)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, 1)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, np.array(1, dtype=np.int64))
|
||||
|
||||
# TODO: Moved from tests.series.test_operators; needs cleanup
|
||||
def test_operators_na_handling(self):
|
||||
ser = Series(["foo", "bar", "baz", np.nan])
|
||||
result = "prefix_" + ser
|
||||
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + "_suffix"
|
||||
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: parametrize over box
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_series_with_dtype_radd_timedelta(self, dtype):
|
||||
# note this test is _not_ aimed at timedelta64-dtyped Series
|
||||
# as of 2.0 we retain object dtype when ser.dtype == object
|
||||
ser = Series(
|
||||
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
|
||||
dtype=dtype,
|
||||
)
|
||||
expected = Series(
|
||||
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
result = pd.Timedelta("3 days") + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + pd.Timedelta("3 days")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_mixed_timezone_series_ops_object(self):
|
||||
# GH#13043
|
||||
ser = Series(
|
||||
[
|
||||
Timestamp("2015-01-01", tz="US/Eastern"),
|
||||
Timestamp("2015-01-01", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series(
|
||||
[
|
||||
Timestamp("2015-01-02", tz="US/Eastern"),
|
||||
Timestamp("2015-01-02", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
|
||||
|
||||
# object series & object series
|
||||
ser2 = Series(
|
||||
[
|
||||
Timestamp("2015-01-03", tz="US/Eastern"),
|
||||
Timestamp("2015-01-05", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser2.dtype == object
|
||||
exp = Series(
|
||||
[pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
|
||||
)
|
||||
tm.assert_series_equal(ser2 - ser, exp)
|
||||
tm.assert_series_equal(ser - ser2, -exp)
|
||||
|
||||
ser = Series(
|
||||
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
|
||||
name="xxx",
|
||||
dtype=object,
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series(
|
||||
[pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
|
||||
name="xxx",
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_iadd_preserves_name(self):
|
||||
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
|
||||
ser = Series([1, 2, 3])
|
||||
ser.index.name = "foo"
|
||||
|
||||
ser.index += 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
ser.index -= 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
def test_add_string(self):
|
||||
# from bug report
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
index2 = index + "foo"
|
||||
|
||||
assert "a" not in index2
|
||||
assert "afoo" in index2
|
||||
|
||||
def test_iadd_string(self):
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
# doesn't fail test unless there is a check before `+=`
|
||||
assert "a" in index
|
||||
|
||||
index += "_x"
|
||||
assert "a_x" in index
|
||||
|
||||
def test_add(self):
|
||||
index = pd.Index([str(i) for i in range(10)])
|
||||
expected = pd.Index(index.values * 2)
|
||||
tm.assert_index_equal(index + index, expected)
|
||||
tm.assert_index_equal(index + index.tolist(), expected)
|
||||
tm.assert_index_equal(index.tolist() + index, expected)
|
||||
|
||||
# test add and radd
|
||||
index = pd.Index(list("abc"))
|
||||
expected = pd.Index(["a1", "b1", "c1"])
|
||||
tm.assert_index_equal(index + "1", expected)
|
||||
expected = pd.Index(["1a", "1b", "1c"])
|
||||
tm.assert_index_equal("1" + index, expected)
|
||||
|
||||
def test_sub_fail(self):
|
||||
index = pd.Index([str(i) for i in range(10)])
|
||||
|
||||
msg = "unsupported operand type|Cannot broadcast|sub' not supported"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - "a"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - index
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - index.tolist()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.tolist() - index
|
||||
|
||||
def test_sub_object(self):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(0), Decimal(1)])
|
||||
|
||||
result = index - Decimal(1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index - pd.Index([Decimal(1), Decimal(1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - "foo"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - np.array([2, "foo"], dtype=object)
|
||||
|
||||
def test_rsub_object(self, fixed_now_ts):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(1), Decimal(0)])
|
||||
|
||||
result = Decimal(2) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = np.array([Decimal(2), Decimal(2)]) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo" - index
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([True, fixed_now_ts]) - index
|
||||
|
||||
|
||||
class MyIndex(pd.Index):
|
||||
# Simple index subclass that tracks ops calls.
|
||||
|
||||
_calls: int
|
||||
|
||||
@classmethod
|
||||
def _simple_new(cls, values, name=None, dtype=None):
|
||||
result = object.__new__(cls)
|
||||
result._data = values
|
||||
result._name = name
|
||||
result._calls = 0
|
||||
result._reset_identity()
|
||||
|
||||
return result
|
||||
|
||||
def __add__(self, other):
|
||||
self._calls += 1
|
||||
return self._simple_new(self._data)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
[datetime.timedelta(1), datetime.timedelta(2)],
|
||||
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
|
||||
[pd.Period("2000"), pd.Period("2001")],
|
||||
["a", "b"],
|
||||
],
|
||||
ids=["timedelta", "datetime", "period", "object"],
|
||||
)
|
||||
def test_index_ops_defer_to_unknown_subclasses(other):
|
||||
# https://github.com/pandas-dev/pandas/issues/31109
|
||||
values = np.array(
|
||||
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
|
||||
)
|
||||
a = MyIndex._simple_new(values)
|
||||
other = pd.Index(other)
|
||||
result = other + a
|
||||
assert isinstance(result, MyIndex)
|
||||
assert a._calls == 1
|
||||
+1679
File diff suppressed because it is too large
Load Diff
+472
@@ -0,0 +1,472 @@
|
||||
import operator
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import HAS_PYARROW
|
||||
from pandas.errors import Pandas4Warning
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
NA,
|
||||
ArrowDtype,
|
||||
Series,
|
||||
StringDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.construction import extract_array
|
||||
|
||||
|
||||
def string_dtype_highest_priority(dtype1, dtype2):
|
||||
if HAS_PYARROW:
|
||||
DTYPE_HIERARCHY = [
|
||||
StringDtype("python", na_value=np.nan),
|
||||
StringDtype("pyarrow", na_value=np.nan),
|
||||
StringDtype("python", na_value=NA),
|
||||
StringDtype("pyarrow", na_value=NA),
|
||||
]
|
||||
else:
|
||||
DTYPE_HIERARCHY = [
|
||||
StringDtype("python", na_value=np.nan),
|
||||
StringDtype("python", na_value=NA),
|
||||
]
|
||||
|
||||
h1 = DTYPE_HIERARCHY.index(dtype1)
|
||||
h2 = DTYPE_HIERARCHY.index(dtype2)
|
||||
return DTYPE_HIERARCHY[max(h1, h2)]
|
||||
|
||||
|
||||
def test_eq_all_na():
|
||||
pytest.importorskip("pyarrow")
|
||||
a = pd.array([NA, NA], dtype=StringDtype("pyarrow"))
|
||||
result = a == a
|
||||
expected = pd.array([NA, NA], dtype="boolean[pyarrow]")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_reversed_logical_ops(any_string_dtype):
|
||||
# GH#60234
|
||||
dtype = any_string_dtype
|
||||
warn = None if dtype == object else Pandas4Warning
|
||||
left = Series([True, False, False, True])
|
||||
right = Series(["", "", "b", "c"], dtype=dtype)
|
||||
|
||||
msg = "operations between boolean dtype and"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = left | right
|
||||
expected = left | right.astype(bool)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = left & right
|
||||
expected = left & right.astype(bool)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = left ^ right
|
||||
expected = left ^ right.astype(bool)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pathlib_path_division(any_string_dtype, request):
|
||||
# GH#61940
|
||||
if any_string_dtype == object:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="with NA present we go through _masked_arith_op which "
|
||||
"raises TypeError bc Path is not recognized by lib.is_scalar."
|
||||
)
|
||||
request.applymarker(mark)
|
||||
|
||||
item = Path("/Users/Irv/")
|
||||
ser = Series(["A", "B", NA], dtype=any_string_dtype)
|
||||
|
||||
result = item / ser
|
||||
expected = Series([item / "A", item / "B", ser.dtype.na_value], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser / item
|
||||
expected = Series(["A" / item, "B" / item, ser.dtype.na_value], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mixed_object_comparison(any_string_dtype):
|
||||
# GH#60228
|
||||
dtype = any_string_dtype
|
||||
ser = Series(["a", "b"], dtype=dtype)
|
||||
|
||||
mixed = Series([1, "b"], dtype=object)
|
||||
|
||||
result = ser == mixed
|
||||
expected = Series([False, True], dtype=bool)
|
||||
if dtype == object:
|
||||
pass
|
||||
elif dtype.storage == "python" and dtype.na_value is NA:
|
||||
expected = expected.astype("boolean")
|
||||
elif dtype.storage == "pyarrow" and dtype.na_value is NA:
|
||||
expected = expected.astype("bool[pyarrow]")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pyarrow_numpy_string_invalid():
|
||||
# GH#56008
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = Series([False, True])
|
||||
ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
|
||||
result = ser == ser2
|
||||
expected_eq = Series(False, index=ser.index)
|
||||
tm.assert_series_equal(result, expected_eq)
|
||||
|
||||
result = ser != ser2
|
||||
expected_ne = Series(True, index=ser.index)
|
||||
tm.assert_series_equal(result, expected_ne)
|
||||
|
||||
with pytest.raises(TypeError, match="Invalid comparison"):
|
||||
ser > ser2
|
||||
|
||||
# GH#59505
|
||||
ser3 = ser2.astype("string[pyarrow]")
|
||||
result3_eq = ser3 == ser
|
||||
tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
|
||||
result3_ne = ser3 != ser
|
||||
tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
|
||||
|
||||
with pytest.raises(TypeError, match="Invalid comparison"):
|
||||
ser > ser3
|
||||
|
||||
ser4 = ser2.astype(ArrowDtype(pa.string()))
|
||||
result4_eq = ser4 == ser
|
||||
tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
|
||||
result4_ne = ser4 != ser
|
||||
tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
|
||||
|
||||
with pytest.raises(TypeError, match="Invalid comparison"):
|
||||
ser > ser4
|
||||
|
||||
|
||||
def test_mul_bool_invalid(any_string_dtype):
|
||||
# GH#62595
|
||||
dtype = any_string_dtype
|
||||
ser = Series(["a", "b", "c"], dtype=dtype)
|
||||
|
||||
if dtype == object:
|
||||
pytest.skip("This is not expect to raise")
|
||||
elif dtype.storage == "python":
|
||||
msg = "Cannot multiply StringArray by bools. Explicitly cast to integers"
|
||||
else:
|
||||
msg = "Can only string multiply by an integer"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
False * ser
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser * True
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser * np.array([True, False, True], dtype=bool)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([True, False, True], dtype=bool) * ser
|
||||
|
||||
|
||||
def test_add(any_string_dtype, request):
|
||||
dtype = any_string_dtype
|
||||
if dtype == object:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="Need to update expected for numpy object dtype"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
|
||||
a = Series(["a", "b", "c", None, None], dtype=dtype)
|
||||
b = Series(["x", "y", None, "z", None], dtype=dtype)
|
||||
|
||||
result = a + b
|
||||
expected = Series(["ax", "by", None, None, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = a.add(b)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = a.radd(b)
|
||||
expected = Series(["xa", "yb", None, None, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = a.add(b, fill_value="-")
|
||||
expected = Series(["ax", "by", "c-", "-z", None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_add_2d(any_string_dtype, request):
|
||||
dtype = any_string_dtype
|
||||
|
||||
if dtype == object or dtype.storage == "pyarrow":
|
||||
reason = "Failed: DID NOT RAISE <class 'ValueError'>"
|
||||
mark = pytest.mark.xfail(raises=None, reason=reason)
|
||||
request.applymarker(mark)
|
||||
|
||||
a = pd.array(["a", "b", "c"], dtype=dtype)
|
||||
b = np.array([["a", "b", "c"]], dtype=object)
|
||||
with pytest.raises(ValueError, match="3 != 1"):
|
||||
a + b
|
||||
|
||||
s = Series(a)
|
||||
with pytest.raises(ValueError, match="3 != 1"):
|
||||
s + b
|
||||
|
||||
|
||||
def test_add_sequence(any_string_dtype, request, using_infer_string):
|
||||
dtype = any_string_dtype
|
||||
if (
|
||||
dtype != object
|
||||
and dtype.storage == "python"
|
||||
and dtype.na_value is np.nan
|
||||
and HAS_PYARROW
|
||||
and using_infer_string
|
||||
):
|
||||
mark = pytest.mark.xfail(
|
||||
reason="As of GH#62522, the list gets wrapped with sanitize_array, "
|
||||
"which casts to a higher-priority StringArray, so we get "
|
||||
"NotImplemented."
|
||||
)
|
||||
request.applymarker(mark)
|
||||
if dtype == np.dtype(object) and using_infer_string:
|
||||
mark = pytest.mark.xfail(reason="Cannot broadcast list")
|
||||
request.applymarker(mark)
|
||||
|
||||
a = pd.array(["a", "b", None, None], dtype=dtype)
|
||||
other = ["x", None, "y", None]
|
||||
|
||||
result = a + other
|
||||
expected = pd.array(["ax", None, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other + a
|
||||
expected = pd.array(["xa", None, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_mul(any_string_dtype):
|
||||
dtype = any_string_dtype
|
||||
a = pd.array(["a", "b", None], dtype=dtype)
|
||||
result = a * 2
|
||||
expected = pd.array(["aa", "bb", None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 2 * a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_add_strings(any_string_dtype, request):
|
||||
dtype = any_string_dtype
|
||||
if dtype != np.dtype(object):
|
||||
mark = pytest.mark.xfail(reason="GH-28527")
|
||||
request.applymarker(mark)
|
||||
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
|
||||
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
|
||||
assert arr.__add__(df) is NotImplemented
|
||||
|
||||
result = arr + df
|
||||
expected = pd.DataFrame([["at", "by", "cv", "dw"]]).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df + arr
|
||||
expected = pd.DataFrame([["ta", "yb", "vc", "wd"]]).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="GH-28527")
|
||||
def test_add_frame(dtype):
|
||||
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
|
||||
df = pd.DataFrame([["x", np.nan, "y", np.nan]])
|
||||
|
||||
assert arr.__add__(df) is NotImplemented
|
||||
|
||||
result = arr + df
|
||||
expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df + arr
|
||||
expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_comparison_methods_scalar(comparison_op, any_string_dtype):
|
||||
dtype = any_string_dtype
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
a = pd.array(["a", None, "c"], dtype=dtype)
|
||||
other = "a"
|
||||
result = getattr(a, op_name)(other)
|
||||
if dtype == object or dtype.na_value is np.nan:
|
||||
expected = np.array([getattr(item, op_name)(other) for item in a])
|
||||
if comparison_op == operator.ne:
|
||||
expected[1] = True
|
||||
else:
|
||||
expected[1] = False
|
||||
result = extract_array(result, extract_numpy=True)
|
||||
tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
|
||||
else:
|
||||
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
||||
expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
|
||||
expected = pd.array(expected, dtype=expected_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_comparison_methods_scalar_pd_na(comparison_op, any_string_dtype):
|
||||
dtype = any_string_dtype
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
a = pd.array(["a", None, "c"], dtype=dtype)
|
||||
result = getattr(a, op_name)(NA)
|
||||
|
||||
if dtype == np.dtype(object) or dtype.na_value is np.nan:
|
||||
if operator.ne == comparison_op:
|
||||
expected = np.array([True, True, True])
|
||||
else:
|
||||
expected = np.array([False, False, False])
|
||||
result = extract_array(result, extract_numpy=True)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
else:
|
||||
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
||||
expected = pd.array([None, None, None], dtype=expected_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_comparison_methods_scalar_not_string(comparison_op, any_string_dtype):
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
dtype = any_string_dtype
|
||||
|
||||
a = pd.array(["a", None, "c"], dtype=dtype)
|
||||
other = 42
|
||||
|
||||
if op_name not in ["__eq__", "__ne__"]:
|
||||
with pytest.raises(TypeError, match="Invalid comparison|not supported between"):
|
||||
getattr(a, op_name)(other)
|
||||
|
||||
return
|
||||
|
||||
result = getattr(a, op_name)(other)
|
||||
result = extract_array(result, extract_numpy=True)
|
||||
|
||||
if dtype == np.dtype(object) or dtype.na_value is np.nan:
|
||||
expected_data = {
|
||||
"__eq__": [False, False, False],
|
||||
"__ne__": [True, True, True],
|
||||
}[op_name]
|
||||
expected = np.array(expected_data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
else:
|
||||
expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[
|
||||
op_name
|
||||
]
|
||||
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
||||
expected = pd.array(expected_data, dtype=expected_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_comparison_methods_array(comparison_op, any_string_dtype, any_string_dtype2):
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
dtype = any_string_dtype
|
||||
dtype2 = any_string_dtype2
|
||||
|
||||
a = pd.array(["a", None, "c"], dtype=dtype)
|
||||
other = pd.array([None, None, "c"], dtype=dtype2)
|
||||
result = comparison_op(a, other)
|
||||
result = extract_array(result, extract_numpy=True)
|
||||
|
||||
# ensure operation is commutative
|
||||
result2 = comparison_op(other, a)
|
||||
result2 = extract_array(result2, extract_numpy=True)
|
||||
tm.assert_equal(result, result2)
|
||||
|
||||
if (dtype == object or dtype.na_value is np.nan) and (
|
||||
dtype2 == object or dtype2.na_value is np.nan
|
||||
):
|
||||
if operator.ne == comparison_op:
|
||||
expected = np.array([True, True, False])
|
||||
else:
|
||||
expected = np.array([False, False, False])
|
||||
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
||||
result = extract_array(result, extract_numpy=True)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
else:
|
||||
if dtype == object:
|
||||
max_dtype = dtype2
|
||||
elif dtype2 == object:
|
||||
max_dtype = dtype
|
||||
else:
|
||||
max_dtype = string_dtype_highest_priority(dtype, dtype2)
|
||||
if max_dtype.storage == "python":
|
||||
expected_dtype = "boolean"
|
||||
else:
|
||||
expected_dtype = "bool[pyarrow]"
|
||||
|
||||
expected = np.full(len(a), fill_value=None, dtype="object")
|
||||
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
||||
expected = pd.array(expected, dtype=expected_dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_comparison_methods_array_arrow_extension(comparison_op, any_string_dtype):
|
||||
# Test pd.ArrowDtype(pa.string()) against other string arrays
|
||||
import pyarrow as pa
|
||||
|
||||
dtype2 = any_string_dtype
|
||||
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
dtype = ArrowDtype(pa.string())
|
||||
a = pd.array(["a", None, "c"], dtype=dtype)
|
||||
other = pd.array([None, None, "c"], dtype=dtype2)
|
||||
result = comparison_op(a, other)
|
||||
|
||||
# ensure operation is commutative
|
||||
result2 = comparison_op(other, a)
|
||||
tm.assert_equal(result, result2)
|
||||
|
||||
expected = pd.array([None, None, True], dtype="bool[pyarrow]")
|
||||
expected[-1] = getattr(other[-1], op_name)(a[-1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.array, pd.Index, Series])
|
||||
def test_comparison_methods_list(comparison_op, any_string_dtype, box, request):
|
||||
dtype = any_string_dtype
|
||||
|
||||
if box is pd.array and dtype != object and dtype.na_value is np.nan:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="After wrapping list, op returns NotImplemented, see GH#62522"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
|
||||
a = box(pd.array(["a", None, "c"], dtype=dtype))
|
||||
item = "c"
|
||||
other = [None, None, "c"]
|
||||
result = comparison_op(a, other)
|
||||
|
||||
# ensure operation is commutative
|
||||
result2 = comparison_op(other, a)
|
||||
tm.assert_equal(result, result2)
|
||||
|
||||
if dtype == np.dtype(object) or dtype.na_value is np.nan:
|
||||
if operator.ne == comparison_op:
|
||||
expected = np.array([True, True, False])
|
||||
else:
|
||||
expected = np.array([False, False, False])
|
||||
expected[-1] = getattr(item, op_name)(item)
|
||||
if box is not pd.Index:
|
||||
# if GH#62766 is addressed this check can be removed
|
||||
expected = box(expected, dtype=expected.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
else:
|
||||
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
|
||||
expected = np.full(len(a), fill_value=None, dtype="object")
|
||||
expected[-1] = getattr(item, op_name)(item)
|
||||
expected = pd.array(expected, dtype=expected_dtype)
|
||||
expected = extract_array(expected, extract_numpy=True)
|
||||
if box is not pd.Index:
|
||||
# if GH#62766 is addressed this check can be removed
|
||||
expected = tm.box_expected(expected, box)
|
||||
tm.assert_equal(result, expected)
|
||||
+2331
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user