reduce.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. from typing import final
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.api.types import is_numeric_dtype
  6. class BaseReduceTests:
  7. """
  8. Reduction specific tests. Generally these only
  9. make sense for numeric/boolean operations.
  10. """
  11. def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
  12. # Specify if we expect this reduction to succeed.
  13. return False
  14. def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
  15. # We perform the same operation on the np.float64 data and check
  16. # that the results match. Override if you need to cast to something
  17. # other than float64.
  18. res_op = getattr(ser, op_name)
  19. try:
  20. alt = ser.astype("float64")
  21. except (TypeError, ValueError):
  22. # e.g. Interval can't cast (TypeError), StringArray can't cast
  23. # (ValueError), so let's cast to object and do
  24. # the reduction pointwise
  25. alt = ser.astype(object)
  26. exp_op = getattr(alt, op_name)
  27. if op_name == "count":
  28. result = res_op()
  29. expected = exp_op()
  30. else:
  31. result = res_op(skipna=skipna)
  32. expected = exp_op(skipna=skipna)
  33. tm.assert_almost_equal(result, expected)
  34. def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
  35. # Find the expected dtype when the given reduction is done on a DataFrame
  36. # column with this array. The default assumes float64-like behavior,
  37. # i.e. retains the dtype.
  38. return arr.dtype
  39. # We anticipate that authors should not need to override check_reduce_frame,
  40. # but should be able to do any necessary overriding in
  41. # _get_expected_reduction_dtype. If you have a use case where this
  42. # does not hold, please let us know at github.com/pandas-dev/pandas/issues.
  43. @final
  44. def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
  45. # Check that the 2D reduction done in a DataFrame reduction "looks like"
  46. # a wrapped version of the 1D reduction done by Series.
  47. arr = ser.array
  48. df = pd.DataFrame({"a": arr})
  49. kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
  50. cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
  51. # The DataFrame method just calls arr._reduce with keepdims=True,
  52. # so this first check is perfunctory.
  53. result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
  54. result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
  55. tm.assert_extension_array_equal(result1, result2)
  56. # Check that the 2D reduction looks like a wrapped version of the
  57. # 1D reduction
  58. if not skipna and ser.isna().any():
  59. expected = pd.array([pd.NA], dtype=cmp_dtype)
  60. else:
  61. exp_value = getattr(ser.dropna(), op_name)()
  62. expected = pd.array([exp_value], dtype=cmp_dtype)
  63. tm.assert_extension_array_equal(result1, expected)
  64. @pytest.mark.parametrize("skipna", [True, False])
  65. def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
  66. op_name = all_boolean_reductions
  67. ser = pd.Series(data)
  68. if not self._supports_reduction(ser, op_name):
  69. # TODO: the message being checked here isn't actually checking anything
  70. msg = (
  71. "[Cc]annot perform|Categorical is not ordered for operation|"
  72. "does not support reduction|"
  73. )
  74. with pytest.raises(TypeError, match=msg):
  75. getattr(ser, op_name)(skipna=skipna)
  76. else:
  77. self.check_reduce(ser, op_name, skipna)
  78. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  79. @pytest.mark.parametrize("skipna", [True, False])
  80. def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
  81. op_name = all_numeric_reductions
  82. ser = pd.Series(data)
  83. if not self._supports_reduction(ser, op_name):
  84. # TODO: the message being checked here isn't actually checking anything
  85. msg = (
  86. "[Cc]annot perform|Categorical is not ordered for operation|"
  87. "does not support reduction|"
  88. )
  89. with pytest.raises(TypeError, match=msg):
  90. getattr(ser, op_name)(skipna=skipna)
  91. else:
  92. # min/max with empty produce numpy warnings
  93. self.check_reduce(ser, op_name, skipna)
  94. @pytest.mark.parametrize("skipna", [True, False])
  95. def test_reduce_frame(self, data, all_numeric_reductions, skipna):
  96. op_name = all_numeric_reductions
  97. ser = pd.Series(data)
  98. if not is_numeric_dtype(ser.dtype):
  99. pytest.skip(f"{ser.dtype} is not numeric dtype")
  100. if op_name in ["count", "kurt", "sem"]:
  101. pytest.skip(f"{op_name} not an array method")
  102. if not self._supports_reduction(ser, op_name):
  103. pytest.skip(f"Reduction {op_name} not supported for this dtype")
  104. self.check_reduce_frame(ser, op_name, skipna)
  105. # TODO(3.0): remove BaseNoReduceTests, BaseNumericReduceTests,
  106. # BaseBooleanReduceTests
  107. class BaseNoReduceTests(BaseReduceTests):
  108. """we don't define any reductions"""
  109. class BaseNumericReduceTests(BaseReduceTests):
  110. # For backward compatibility only, this only runs the numeric reductions
  111. def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
  112. if op_name in ["any", "all"]:
  113. pytest.skip("These are tested in BaseBooleanReduceTests")
  114. return True
  115. class BaseBooleanReduceTests(BaseReduceTests):
  116. # For backward compatibility only, this only runs the numeric reductions
  117. def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
  118. if op_name not in ["any", "all"]:
  119. pytest.skip("These are tested in BaseNumericReduceTests")
  120. return True