test_reductions.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Series
  5. import pandas._testing as tm
  6. @pytest.mark.parametrize("operation, expected", [("min", "a"), ("max", "b")])
  7. def test_reductions_series_strings(operation, expected):
  8. # GH#31746
  9. ser = Series(["a", "b"], dtype="string")
  10. res_operation_serie = getattr(ser, operation)()
  11. assert res_operation_serie == expected
  12. @pytest.mark.parametrize("as_period", [True, False])
  13. def test_mode_extension_dtype(as_period):
  14. # GH#41927 preserve dt64tz dtype
  15. ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)])
  16. if as_period:
  17. ser = ser.dt.to_period("D")
  18. else:
  19. ser = ser.dt.tz_localize("US/Central")
  20. res = ser.mode()
  21. assert res.dtype == ser.dtype
  22. tm.assert_series_equal(res, ser)
  23. def test_mode_nullable_dtype(any_numeric_ea_dtype):
  24. # GH#55340
  25. ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)
  26. result = ser.mode(dropna=False)
  27. expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)
  28. tm.assert_series_equal(result, expected)
  29. result = ser.mode(dropna=True)
  30. expected = Series([2, 3], dtype=any_numeric_ea_dtype)
  31. tm.assert_series_equal(result, expected)
  32. ser[-1] = pd.NA
  33. result = ser.mode(dropna=True)
  34. expected = Series([2, 3], dtype=any_numeric_ea_dtype)
  35. tm.assert_series_equal(result, expected)
  36. result = ser.mode(dropna=False)
  37. expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
  38. tm.assert_series_equal(result, expected)
  39. def test_mode_infer_string():
  40. # GH#56183
  41. pytest.importorskip("pyarrow")
  42. ser = Series(["a", "b"], dtype=object)
  43. with pd.option_context("future.infer_string", True):
  44. result = ser.mode()
  45. expected = Series(["a", "b"], dtype=object)
  46. tm.assert_series_equal(result, expected)
  47. def test_reductions_td64_with_nat():
  48. # GH#8617
  49. ser = Series([0, pd.NaT], dtype="m8[ns]")
  50. exp = ser[0]
  51. assert ser.median() == exp
  52. assert ser.min() == exp
  53. assert ser.max() == exp
  54. @pytest.mark.parametrize("skipna", [True, False])
  55. def test_td64_sum_empty(skipna):
  56. # GH#37151
  57. ser = Series([], dtype="timedelta64[ns]")
  58. result = ser.sum(skipna=skipna)
  59. assert isinstance(result, pd.Timedelta)
  60. assert result == pd.Timedelta(0)
  61. def test_td64_summation_overflow():
  62. # GH#9442
  63. ser = Series(pd.date_range("20130101", periods=100000, freq="h"))
  64. ser[0] += pd.Timedelta("1s 1ms")
  65. # mean
  66. result = (ser - ser.min()).mean()
  67. expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum())
  68. # the computation is converted to float so
  69. # might be some loss of precision
  70. assert np.allclose(result._value / 1000, expected._value / 1000)
  71. # sum
  72. msg = "overflow in timedelta operation"
  73. with pytest.raises(ValueError, match=msg):
  74. (ser - ser.min()).sum()
  75. s1 = ser[0:10000]
  76. with pytest.raises(ValueError, match=msg):
  77. (s1 - s1.min()).sum()
  78. s2 = ser[0:1000]
  79. (s2 - s2.min()).sum()
  80. def test_prod_numpy16_bug():
  81. ser = Series([1.0, 1.0, 1.0], index=range(3))
  82. result = ser.prod()
  83. assert not isinstance(result, Series)
  84. @pytest.mark.parametrize("func", [np.any, np.all])
  85. @pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}])
  86. def test_validate_any_all_out_keepdims_raises(kwargs, func):
  87. ser = Series([1, 2])
  88. param = next(iter(kwargs))
  89. name = func.__name__
  90. msg = (
  91. f"the '{param}' parameter is not "
  92. "supported in the pandas "
  93. rf"implementation of {name}\(\)"
  94. )
  95. with pytest.raises(ValueError, match=msg):
  96. func(ser, **kwargs)
  97. def test_validate_sum_initial():
  98. ser = Series([1, 2])
  99. msg = (
  100. r"the 'initial' parameter is not "
  101. r"supported in the pandas "
  102. r"implementation of sum\(\)"
  103. )
  104. with pytest.raises(ValueError, match=msg):
  105. np.sum(ser, initial=10)
  106. def test_validate_median_initial():
  107. ser = Series([1, 2])
  108. msg = (
  109. r"the 'overwrite_input' parameter is not "
  110. r"supported in the pandas "
  111. r"implementation of median\(\)"
  112. )
  113. with pytest.raises(ValueError, match=msg):
  114. # It seems like np.median doesn't dispatch, so we use the
  115. # method instead of the ufunc.
  116. ser.median(overwrite_input=True)
  117. def test_validate_stat_keepdims():
  118. ser = Series([1, 2])
  119. msg = (
  120. r"the 'keepdims' parameter is not "
  121. r"supported in the pandas "
  122. r"implementation of sum\(\)"
  123. )
  124. with pytest.raises(ValueError, match=msg):
  125. np.sum(ser, keepdims=True)
  126. def test_mean_with_convertible_string_raises(using_array_manager, using_infer_string):
  127. # GH#44008
  128. ser = Series(["1", "2"])
  129. assert ser.sum() == "12"
  130. msg = "Could not convert string '12' to numeric|does not support|Cannot perform"
  131. with pytest.raises(TypeError, match=msg):
  132. ser.mean()
  133. df = ser.to_frame()
  134. if not using_array_manager:
  135. msg = r"Could not convert \['12'\] to numeric|does not support|Cannot perform"
  136. with pytest.raises(TypeError, match=msg):
  137. df.mean()
  138. def test_mean_dont_convert_j_to_complex(using_array_manager):
  139. # GH#36703
  140. df = pd.DataFrame([{"db": "J", "numeric": 123}])
  141. if using_array_manager:
  142. msg = "Could not convert string 'J' to numeric"
  143. else:
  144. msg = r"Could not convert \['J'\] to numeric|does not support|Cannot perform"
  145. with pytest.raises(TypeError, match=msg):
  146. df.mean()
  147. with pytest.raises(TypeError, match=msg):
  148. df.agg("mean")
  149. msg = "Could not convert string 'J' to numeric|does not support|Cannot perform"
  150. with pytest.raises(TypeError, match=msg):
  151. df["db"].mean()
  152. msg = "Could not convert string 'J' to numeric|ufunc 'divide'|Cannot perform"
  153. with pytest.raises(TypeError, match=msg):
  154. np.mean(df["db"].astype("string").array)
  155. def test_median_with_convertible_string_raises(using_array_manager):
  156. # GH#34671 this _could_ return a string "2", but definitely not float 2.0
  157. msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support|Cannot perform"
  158. ser = Series(["1", "2", "3"])
  159. with pytest.raises(TypeError, match=msg):
  160. ser.median()
  161. if not using_array_manager:
  162. msg = (
  163. r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support"
  164. "|Cannot perform"
  165. )
  166. df = ser.to_frame()
  167. with pytest.raises(TypeError, match=msg):
  168. df.median()