| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- """
- This file contains a minimal set of tests for compliance with the extension
- array interface test suite, and should contain no other tests.
- The test suite for the full functionality of the array is located in
- `pandas/tests/arrays/`.
- The tests in this file are inherited from the BaseExtensionTests, and only
- minimal tweaks should be applied to get the tests passing (by overwriting a
- parent method).
- Additional tests should either be added to one of the BaseExtensionTests
- classes (if they are relevant for the extension interface for all dtypes), or
- be added to the array-specific tests in `pandas/tests/arrays/`.
- """
- import warnings
- import numpy as np
- import pytest
- from pandas.compat import (
- IS64,
- is_platform_windows,
- )
- from pandas.compat.numpy import np_version_gt2
- from pandas.core.dtypes.common import (
- is_float_dtype,
- is_signed_integer_dtype,
- is_unsigned_integer_dtype,
- )
- import pandas as pd
- import pandas._testing as tm
- from pandas.core.arrays.boolean import BooleanDtype
- from pandas.core.arrays.floating import (
- Float32Dtype,
- Float64Dtype,
- )
- from pandas.core.arrays.integer import (
- Int8Dtype,
- Int16Dtype,
- Int32Dtype,
- Int64Dtype,
- UInt8Dtype,
- UInt16Dtype,
- UInt32Dtype,
- UInt64Dtype,
- )
- from pandas.tests.extension import base
- is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64
- pytestmark = [
- pytest.mark.filterwarnings(
- "ignore:invalid value encountered in divide:RuntimeWarning"
- ),
- pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning"),
- # overflow only relevant for Floating dtype cases cases
- pytest.mark.filterwarnings("ignore:overflow encountered in reduce:RuntimeWarning"),
- ]
- def make_data():
- return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100]
- def make_float_data():
- return (
- list(np.arange(0.1, 0.9, 0.1))
- + [pd.NA]
- + list(np.arange(1, 9.8, 0.1))
- + [pd.NA]
- + [9.9, 10.0]
- )
- def make_bool_data():
- return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False]
- @pytest.fixture(
- params=[
- Int8Dtype,
- Int16Dtype,
- Int32Dtype,
- Int64Dtype,
- UInt8Dtype,
- UInt16Dtype,
- UInt32Dtype,
- UInt64Dtype,
- Float32Dtype,
- Float64Dtype,
- BooleanDtype,
- ]
- )
- def dtype(request):
- return request.param()
- @pytest.fixture
- def data(dtype):
- if dtype.kind == "f":
- data = make_float_data()
- elif dtype.kind == "b":
- data = make_bool_data()
- else:
- data = make_data()
- return pd.array(data, dtype=dtype)
- @pytest.fixture
- def data_for_twos(dtype):
- if dtype.kind == "b":
- return pd.array(np.ones(100), dtype=dtype)
- return pd.array(np.ones(100) * 2, dtype=dtype)
- @pytest.fixture
- def data_missing(dtype):
- if dtype.kind == "f":
- return pd.array([pd.NA, 0.1], dtype=dtype)
- elif dtype.kind == "b":
- return pd.array([np.nan, True], dtype=dtype)
- return pd.array([pd.NA, 1], dtype=dtype)
- @pytest.fixture
- def data_for_sorting(dtype):
- if dtype.kind == "f":
- return pd.array([0.1, 0.2, 0.0], dtype=dtype)
- elif dtype.kind == "b":
- return pd.array([True, True, False], dtype=dtype)
- return pd.array([1, 2, 0], dtype=dtype)
- @pytest.fixture
- def data_missing_for_sorting(dtype):
- if dtype.kind == "f":
- return pd.array([0.1, pd.NA, 0.0], dtype=dtype)
- elif dtype.kind == "b":
- return pd.array([True, np.nan, False], dtype=dtype)
- return pd.array([1, pd.NA, 0], dtype=dtype)
- @pytest.fixture
- def na_cmp():
- # we are pd.NA
- return lambda x, y: x is pd.NA and y is pd.NA
- @pytest.fixture
- def data_for_grouping(dtype):
- if dtype.kind == "f":
- b = 0.1
- a = 0.0
- c = 0.2
- elif dtype.kind == "b":
- b = True
- a = False
- c = b
- else:
- b = 1
- a = 0
- c = 2
- na = pd.NA
- return pd.array([b, b, na, na, a, a, b, c], dtype=dtype)
- class TestMaskedArrays(base.ExtensionTests):
- @pytest.mark.parametrize("na_action", [None, "ignore"])
- def test_map(self, data_missing, na_action):
- result = data_missing.map(lambda x: x, na_action=na_action)
- if data_missing.dtype == Float32Dtype():
- # map roundtrips through objects, which converts to float64
- expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
- else:
- expected = data_missing.to_numpy()
- tm.assert_numpy_array_equal(result, expected)
- def test_map_na_action_ignore(self, data_missing_for_sorting):
- zero = data_missing_for_sorting[2]
- result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore")
- if data_missing_for_sorting.dtype.kind == "b":
- expected = np.array([False, pd.NA, False], dtype=object)
- else:
- expected = np.array([zero, np.nan, zero])
- tm.assert_numpy_array_equal(result, expected)
- def _get_expected_exception(self, op_name, obj, other):
- try:
- dtype = tm.get_dtype(obj)
- except AttributeError:
- # passed arguments reversed
- dtype = tm.get_dtype(other)
- if dtype.kind == "b":
- if op_name.strip("_").lstrip("r") in ["pow", "truediv", "floordiv"]:
- # match behavior with non-masked bool dtype
- return NotImplementedError
- elif op_name in ["__sub__", "__rsub__"]:
- # exception message would include "numpy boolean subtract""
- return TypeError
- return None
- return None
- def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
- sdtype = tm.get_dtype(obj)
- expected = pointwise_result
- if op_name in ("eq", "ne", "le", "ge", "lt", "gt"):
- return expected.astype("boolean")
- if sdtype.kind in "iu":
- if op_name in ("__rtruediv__", "__truediv__", "__div__"):
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore",
- "Downcasting object dtype arrays",
- category=FutureWarning,
- )
- filled = expected.fillna(np.nan)
- expected = filled.astype("Float64")
- else:
- # combine method result in 'biggest' (int64) dtype
- expected = expected.astype(sdtype)
- elif sdtype.kind == "b":
- if op_name in (
- "__floordiv__",
- "__rfloordiv__",
- "__pow__",
- "__rpow__",
- "__mod__",
- "__rmod__",
- ):
- # combine keeps boolean type
- expected = expected.astype("Int8")
- elif op_name in ("__truediv__", "__rtruediv__"):
- # combine with bools does not generate the correct result
- # (numpy behaviour for div is to regard the bools as numeric)
- op = self.get_op_from_name(op_name)
- expected = self._combine(obj.astype(float), other, op)
- expected = expected.astype("Float64")
- if op_name == "__rpow__":
- # for rpow, combine does not propagate NaN
- result = getattr(obj, op_name)(other)
- expected[result.isna()] = np.nan
- else:
- # combine method result in 'biggest' (float64) dtype
- expected = expected.astype(sdtype)
- return expected
- def test_divmod_series_array(self, data, data_for_twos, request):
- if data.dtype.kind == "b":
- mark = pytest.mark.xfail(
- reason="Inconsistency between floordiv and divmod; we raise for "
- "floordiv but not for divmod. This matches what we do for "
- "non-masked bool dtype."
- )
- request.applymarker(mark)
- super().test_divmod_series_array(data, data_for_twos)
- def test_combine_le(self, data_repeated):
- # TODO: patching self is a bad pattern here
- orig_data1, orig_data2 = data_repeated(2)
- if orig_data1.dtype.kind == "b":
- self._combine_le_expected_dtype = "boolean"
- else:
- # TODO: can we make this boolean?
- self._combine_le_expected_dtype = object
- super().test_combine_le(data_repeated)
- def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
- if op_name in ["any", "all"] and ser.dtype.kind != "b":
- pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
- return True
- def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
- # overwrite to ensure pd.NA is tested instead of np.nan
- # https://github.com/pandas-dev/pandas/issues/30958
- cmp_dtype = "int64"
- if ser.dtype.kind == "f":
- # Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has
- # no attribute "numpy_dtype"
- cmp_dtype = ser.dtype.numpy_dtype # type: ignore[union-attr]
- elif ser.dtype.kind == "b":
- if op_name in ["min", "max"]:
- cmp_dtype = "bool"
- # TODO: prod with integer dtypes does *not* match the result we would
- # get if we used object for cmp_dtype. In that cae the object result
- # is a large integer while the non-object case overflows and returns 0
- alt = ser.dropna().astype(cmp_dtype)
- if op_name == "count":
- result = getattr(ser, op_name)()
- expected = getattr(alt, op_name)()
- else:
- result = getattr(ser, op_name)(skipna=skipna)
- expected = getattr(alt, op_name)(skipna=skipna)
- if not skipna and ser.isna().any() and op_name not in ["any", "all"]:
- expected = pd.NA
- tm.assert_almost_equal(result, expected)
- def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
- if is_float_dtype(arr.dtype):
- cmp_dtype = arr.dtype.name
- elif op_name in ["mean", "median", "var", "std", "skew"]:
- cmp_dtype = "Float64"
- elif op_name in ["max", "min"]:
- cmp_dtype = arr.dtype.name
- elif arr.dtype in ["Int64", "UInt64"]:
- cmp_dtype = arr.dtype.name
- elif is_signed_integer_dtype(arr.dtype):
- # TODO: Why does Window Numpy 2.0 dtype depend on skipna?
- cmp_dtype = (
- "Int32"
- if (is_platform_windows() and (not np_version_gt2 or not skipna))
- or not IS64
- else "Int64"
- )
- elif is_unsigned_integer_dtype(arr.dtype):
- cmp_dtype = (
- "UInt32"
- if (is_platform_windows() and (not np_version_gt2 or not skipna))
- or not IS64
- else "UInt64"
- )
- elif arr.dtype.kind == "b":
- if op_name in ["mean", "median", "var", "std", "skew"]:
- cmp_dtype = "Float64"
- elif op_name in ["min", "max"]:
- cmp_dtype = "boolean"
- elif op_name in ["sum", "prod"]:
- cmp_dtype = (
- "Int32"
- if (is_platform_windows() and (not np_version_gt2 or not skipna))
- or not IS64
- else "Int64"
- )
- else:
- raise TypeError("not supposed to reach this")
- else:
- raise TypeError("not supposed to reach this")
- return cmp_dtype
- def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
- return True
- def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
- # overwrite to ensure pd.NA is tested instead of np.nan
- # https://github.com/pandas-dev/pandas/issues/30958
- length = 64
- if is_windows_or_32bit:
- # Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
- # no attribute "itemsize"
- if not ser.dtype.itemsize == 8: # type: ignore[union-attr]
- length = 32
- if ser.dtype.name.startswith("U"):
- expected_dtype = f"UInt{length}"
- elif ser.dtype.name.startswith("I"):
- expected_dtype = f"Int{length}"
- elif ser.dtype.name.startswith("F"):
- # Incompatible types in assignment (expression has type
- # "Union[dtype[Any], ExtensionDtype]", variable has type "str")
- expected_dtype = ser.dtype # type: ignore[assignment]
- elif ser.dtype.kind == "b":
- if op_name in ("cummin", "cummax"):
- expected_dtype = "boolean"
- else:
- expected_dtype = f"Int{length}"
- if expected_dtype == "Float32" and op_name == "cumprod" and skipna:
- # TODO: xfail?
- pytest.skip(
- f"Float32 precision lead to large differences with op {op_name} "
- f"and skipna={skipna}"
- )
- if op_name == "cumsum":
- result = getattr(ser, op_name)(skipna=skipna)
- expected = pd.Series(
- pd.array(
- getattr(ser.astype("float64"), op_name)(skipna=skipna),
- dtype=expected_dtype,
- )
- )
- tm.assert_series_equal(result, expected)
- elif op_name in ["cummax", "cummin"]:
- result = getattr(ser, op_name)(skipna=skipna)
- expected = pd.Series(
- pd.array(
- getattr(ser.astype("float64"), op_name)(skipna=skipna),
- dtype=ser.dtype,
- )
- )
- tm.assert_series_equal(result, expected)
- elif op_name == "cumprod":
- result = getattr(ser[:12], op_name)(skipna=skipna)
- expected = pd.Series(
- pd.array(
- getattr(ser[:12].astype("float64"), op_name)(skipna=skipna),
- dtype=expected_dtype,
- )
- )
- tm.assert_series_equal(result, expected)
- else:
- raise NotImplementedError(f"{op_name} not supported")
- class Test2DCompat(base.Dim2CompatTests):
- pass
|