test_str.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. from itertools import chain
  2. import operator
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.common import is_number
  6. from pandas import (
  7. DataFrame,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. from pandas.tests.apply.common import (
  12. frame_transform_kernels,
  13. series_transform_kernels,
  14. )
  15. @pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
  16. @pytest.mark.parametrize(
  17. "args,kwds",
  18. [
  19. pytest.param([], {}, id="no_args_or_kwds"),
  20. pytest.param([1], {}, id="axis_from_args"),
  21. pytest.param([], {"axis": 1}, id="axis_from_kwds"),
  22. pytest.param([], {"numeric_only": True}, id="optional_kwds"),
  23. pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
  24. ],
  25. )
  26. @pytest.mark.parametrize("how", ["agg", "apply"])
  27. def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
  28. if len(args) > 1 and how == "agg":
  29. request.applymarker(
  30. pytest.mark.xfail(
  31. raises=TypeError,
  32. reason="agg/apply signature mismatch - agg passes 2nd "
  33. "argument to func",
  34. )
  35. )
  36. result = getattr(float_frame, how)(func, *args, **kwds)
  37. expected = getattr(float_frame, func)(*args, **kwds)
  38. tm.assert_series_equal(result, expected)
  39. @pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"])
  40. def test_with_string_args(datetime_series, arg):
  41. result = datetime_series.apply(arg)
  42. expected = getattr(datetime_series, arg)()
  43. assert result == expected
  44. @pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
  45. @pytest.mark.parametrize("how", ["agg", "apply"])
  46. def test_apply_np_reducer(op, how):
  47. # GH 39116
  48. float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
  49. result = getattr(float_frame, how)(op)
  50. # pandas ddof defaults to 1, numpy to 0
  51. kwargs = {"ddof": 1} if op in ("std", "var") else {}
  52. expected = Series(
  53. getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
  54. )
  55. tm.assert_series_equal(result, expected)
  56. @pytest.mark.parametrize(
  57. "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
  58. )
  59. @pytest.mark.parametrize("how", ["transform", "apply"])
  60. def test_apply_np_transformer(float_frame, op, how):
  61. # GH 39116
  62. # float_frame will _usually_ have negative values, which will
  63. # trigger the warning here, but let's put one in just to be sure
  64. float_frame.iloc[0, 0] = -1.0
  65. warn = None
  66. if op in ["log", "sqrt"]:
  67. warn = RuntimeWarning
  68. with tm.assert_produces_warning(warn, check_stacklevel=False):
  69. # float_frame fixture is defined in conftest.py, so we don't check the
  70. # stacklevel as otherwise the test would fail.
  71. result = getattr(float_frame, how)(op)
  72. expected = getattr(np, op)(float_frame)
  73. tm.assert_frame_equal(result, expected)
  74. @pytest.mark.parametrize(
  75. "series, func, expected",
  76. chain(
  77. tm.get_cython_table_params(
  78. Series(dtype=np.float64),
  79. [
  80. ("sum", 0),
  81. ("max", np.nan),
  82. ("min", np.nan),
  83. ("all", True),
  84. ("any", False),
  85. ("mean", np.nan),
  86. ("prod", 1),
  87. ("std", np.nan),
  88. ("var", np.nan),
  89. ("median", np.nan),
  90. ],
  91. ),
  92. tm.get_cython_table_params(
  93. Series([np.nan, 1, 2, 3]),
  94. [
  95. ("sum", 6),
  96. ("max", 3),
  97. ("min", 1),
  98. ("all", True),
  99. ("any", True),
  100. ("mean", 2),
  101. ("prod", 6),
  102. ("std", 1),
  103. ("var", 1),
  104. ("median", 2),
  105. ],
  106. ),
  107. tm.get_cython_table_params(
  108. Series("a b c".split()),
  109. [
  110. ("sum", "abc"),
  111. ("max", "c"),
  112. ("min", "a"),
  113. ("all", True),
  114. ("any", True),
  115. ],
  116. ),
  117. ),
  118. )
  119. def test_agg_cython_table_series(series, func, expected):
  120. # GH21224
  121. # test reducing functions in
  122. # pandas.core.base.SelectionMixin._cython_table
  123. warn = None if isinstance(func, str) else FutureWarning
  124. with tm.assert_produces_warning(warn, match="is currently using Series.*"):
  125. result = series.agg(func)
  126. if is_number(expected):
  127. assert np.isclose(result, expected, equal_nan=True)
  128. else:
  129. assert result == expected
  130. @pytest.mark.parametrize(
  131. "series, func, expected",
  132. chain(
  133. tm.get_cython_table_params(
  134. Series(dtype=np.float64),
  135. [
  136. ("cumprod", Series([], dtype=np.float64)),
  137. ("cumsum", Series([], dtype=np.float64)),
  138. ],
  139. ),
  140. tm.get_cython_table_params(
  141. Series([np.nan, 1, 2, 3]),
  142. [
  143. ("cumprod", Series([np.nan, 1, 2, 6])),
  144. ("cumsum", Series([np.nan, 1, 3, 6])),
  145. ],
  146. ),
  147. tm.get_cython_table_params(
  148. Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
  149. ),
  150. ),
  151. )
  152. def test_agg_cython_table_transform_series(series, func, expected):
  153. # GH21224
  154. # test transforming functions in
  155. # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
  156. warn = None if isinstance(func, str) else FutureWarning
  157. with tm.assert_produces_warning(warn, match="is currently using Series.*"):
  158. result = series.agg(func)
  159. tm.assert_series_equal(result, expected)
  160. @pytest.mark.parametrize(
  161. "df, func, expected",
  162. chain(
  163. tm.get_cython_table_params(
  164. DataFrame(),
  165. [
  166. ("sum", Series(dtype="float64")),
  167. ("max", Series(dtype="float64")),
  168. ("min", Series(dtype="float64")),
  169. ("all", Series(dtype=bool)),
  170. ("any", Series(dtype=bool)),
  171. ("mean", Series(dtype="float64")),
  172. ("prod", Series(dtype="float64")),
  173. ("std", Series(dtype="float64")),
  174. ("var", Series(dtype="float64")),
  175. ("median", Series(dtype="float64")),
  176. ],
  177. ),
  178. tm.get_cython_table_params(
  179. DataFrame([[np.nan, 1], [1, 2]]),
  180. [
  181. ("sum", Series([1.0, 3])),
  182. ("max", Series([1.0, 2])),
  183. ("min", Series([1.0, 1])),
  184. ("all", Series([True, True])),
  185. ("any", Series([True, True])),
  186. ("mean", Series([1, 1.5])),
  187. ("prod", Series([1.0, 2])),
  188. ("std", Series([np.nan, 0.707107])),
  189. ("var", Series([np.nan, 0.5])),
  190. ("median", Series([1, 1.5])),
  191. ],
  192. ),
  193. ),
  194. )
  195. def test_agg_cython_table_frame(df, func, expected, axis):
  196. # GH 21224
  197. # test reducing functions in
  198. # pandas.core.base.SelectionMixin._cython_table
  199. warn = None if isinstance(func, str) else FutureWarning
  200. with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
  201. # GH#53425
  202. result = df.agg(func, axis=axis)
  203. tm.assert_series_equal(result, expected)
  204. @pytest.mark.parametrize(
  205. "df, func, expected",
  206. chain(
  207. tm.get_cython_table_params(
  208. DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
  209. ),
  210. tm.get_cython_table_params(
  211. DataFrame([[np.nan, 1], [1, 2]]),
  212. [
  213. ("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
  214. ("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
  215. ],
  216. ),
  217. ),
  218. )
  219. def test_agg_cython_table_transform_frame(df, func, expected, axis):
  220. # GH 21224
  221. # test transforming functions in
  222. # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
  223. if axis in ("columns", 1):
  224. # operating blockwise doesn't let us preserve dtypes
  225. expected = expected.astype("float64")
  226. warn = None if isinstance(func, str) else FutureWarning
  227. with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
  228. # GH#53425
  229. result = df.agg(func, axis=axis)
  230. tm.assert_frame_equal(result, expected)
  231. @pytest.mark.parametrize("op", series_transform_kernels)
  232. def test_transform_groupby_kernel_series(request, string_series, op):
  233. # GH 35964
  234. if op == "ngroup":
  235. request.applymarker(
  236. pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
  237. )
  238. args = [0.0] if op == "fillna" else []
  239. ones = np.ones(string_series.shape[0])
  240. warn = FutureWarning if op == "fillna" else None
  241. msg = "SeriesGroupBy.fillna is deprecated"
  242. with tm.assert_produces_warning(warn, match=msg):
  243. expected = string_series.groupby(ones).transform(op, *args)
  244. result = string_series.transform(op, 0, *args)
  245. tm.assert_series_equal(result, expected)
  246. @pytest.mark.parametrize("op", frame_transform_kernels)
  247. def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
  248. if op == "ngroup":
  249. request.applymarker(
  250. pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
  251. )
  252. # GH 35964
  253. args = [0.0] if op == "fillna" else []
  254. if axis in (0, "index"):
  255. ones = np.ones(float_frame.shape[0])
  256. msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
  257. else:
  258. ones = np.ones(float_frame.shape[1])
  259. msg = "DataFrame.groupby with axis=1 is deprecated"
  260. with tm.assert_produces_warning(FutureWarning, match=msg):
  261. gb = float_frame.groupby(ones, axis=axis)
  262. warn = FutureWarning if op == "fillna" else None
  263. op_msg = "DataFrameGroupBy.fillna is deprecated"
  264. with tm.assert_produces_warning(warn, match=op_msg):
  265. expected = gb.transform(op, *args)
  266. result = float_frame.transform(op, axis, *args)
  267. tm.assert_frame_equal(result, expected)
  268. # same thing, but ensuring we have multiple blocks
  269. assert "E" not in float_frame.columns
  270. float_frame["E"] = float_frame["A"].copy()
  271. assert len(float_frame._mgr.arrays) > 1
  272. if axis in (0, "index"):
  273. ones = np.ones(float_frame.shape[0])
  274. else:
  275. ones = np.ones(float_frame.shape[1])
  276. with tm.assert_produces_warning(FutureWarning, match=msg):
  277. gb2 = float_frame.groupby(ones, axis=axis)
  278. warn = FutureWarning if op == "fillna" else None
  279. op_msg = "DataFrameGroupBy.fillna is deprecated"
  280. with tm.assert_produces_warning(warn, match=op_msg):
  281. expected2 = gb2.transform(op, *args)
  282. result2 = float_frame.transform(op, axis, *args)
  283. tm.assert_frame_equal(result2, expected2)
  284. @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
  285. def test_transform_method_name(method):
  286. # GH 19760
  287. df = DataFrame({"A": [-1, 2]})
  288. result = df.transform(method)
  289. expected = operator.methodcaller(method)(df)
  290. tm.assert_frame_equal(result, expected)