test_series_apply.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. Index,
  7. MultiIndex,
  8. Series,
  9. concat,
  10. date_range,
  11. timedelta_range,
  12. )
  13. import pandas._testing as tm
  14. from pandas.tests.apply.common import series_transform_kernels
  15. @pytest.fixture(params=[False, "compat"])
  16. def by_row(request):
  17. return request.param
  18. def test_series_map_box_timedelta(by_row):
  19. # GH#11349
  20. ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h"))
  21. def f(x):
  22. return x.total_seconds() if by_row else x.dt.total_seconds()
  23. result = ser.apply(f, by_row=by_row)
  24. expected = ser.map(lambda x: x.total_seconds())
  25. tm.assert_series_equal(result, expected)
  26. expected = Series([86401.0, 90001.0, 93601.0])
  27. tm.assert_series_equal(result, expected)
  28. def test_apply(datetime_series, by_row):
  29. result = datetime_series.apply(np.sqrt, by_row=by_row)
  30. with np.errstate(all="ignore"):
  31. expected = np.sqrt(datetime_series)
  32. tm.assert_series_equal(result, expected)
  33. # element-wise apply (ufunc)
  34. result = datetime_series.apply(np.exp, by_row=by_row)
  35. expected = np.exp(datetime_series)
  36. tm.assert_series_equal(result, expected)
  37. # empty series
  38. s = Series(dtype=object, name="foo", index=Index([], name="bar"))
  39. rs = s.apply(lambda x: x, by_row=by_row)
  40. tm.assert_series_equal(s, rs)
  41. # check all metadata (GH 9322)
  42. assert s is not rs
  43. assert s.index is rs.index
  44. assert s.dtype == rs.dtype
  45. assert s.name == rs.name
  46. # index but no data
  47. s = Series(index=[1, 2, 3], dtype=np.float64)
  48. rs = s.apply(lambda x: x, by_row=by_row)
  49. tm.assert_series_equal(s, rs)
  50. def test_apply_map_same_length_inference_bug():
  51. s = Series([1, 2])
  52. def f(x):
  53. return (x, x + 1)
  54. result = s.apply(f, by_row="compat")
  55. expected = s.map(f)
  56. tm.assert_series_equal(result, expected)
  57. @pytest.mark.parametrize("convert_dtype", [True, False])
  58. def test_apply_convert_dtype_deprecated(convert_dtype):
  59. ser = Series(np.random.default_rng(2).standard_normal(10))
  60. def func(x):
  61. return x if x > 0 else np.nan
  62. with tm.assert_produces_warning(FutureWarning):
  63. ser.apply(func, convert_dtype=convert_dtype, by_row="compat")
  64. def test_apply_args():
  65. s = Series(["foo,bar"])
  66. result = s.apply(str.split, args=(",",))
  67. assert result[0] == ["foo", "bar"]
  68. assert isinstance(result[0], list)
  69. @pytest.mark.parametrize(
  70. "args, kwargs, increment",
  71. [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
  72. )
  73. def test_agg_args(args, kwargs, increment):
  74. # GH 43357
  75. def f(x, a=0, b=0, c=0):
  76. return x + a + 10 * b + 100 * c
  77. s = Series([1, 2])
  78. msg = (
  79. "in Series.agg cannot aggregate and has been deprecated. "
  80. "Use Series.transform to keep behavior unchanged."
  81. )
  82. with tm.assert_produces_warning(FutureWarning, match=msg):
  83. result = s.agg(f, 0, *args, **kwargs)
  84. expected = s + increment
  85. tm.assert_series_equal(result, expected)
  86. def test_agg_mapping_func_deprecated():
  87. # GH 53325
  88. s = Series([1, 2, 3])
  89. def foo1(x, a=1, c=0):
  90. return x + a + c
  91. def foo2(x, b=2, c=0):
  92. return x + b + c
  93. msg = "using .+ in Series.agg cannot aggregate and"
  94. with tm.assert_produces_warning(FutureWarning, match=msg):
  95. s.agg(foo1, 0, 3, c=4)
  96. with tm.assert_produces_warning(FutureWarning, match=msg):
  97. s.agg([foo1, foo2], 0, 3, c=4)
  98. with tm.assert_produces_warning(FutureWarning, match=msg):
  99. s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
  100. def test_series_apply_map_box_timestamps(by_row):
  101. # GH#2689, GH#2627
  102. ser = Series(date_range("1/1/2000", periods=10))
  103. def func(x):
  104. return (x.hour, x.day, x.month)
  105. if not by_row:
  106. msg = "Series' object has no attribute 'hour'"
  107. with pytest.raises(AttributeError, match=msg):
  108. ser.apply(func, by_row=by_row)
  109. return
  110. result = ser.apply(func, by_row=by_row)
  111. expected = ser.map(func)
  112. tm.assert_series_equal(result, expected)
  113. def test_apply_box_dt64():
  114. # ufunc will not be boxed. Same test cases as the test_map_box
  115. vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
  116. ser = Series(vals, dtype="M8[ns]")
  117. assert ser.dtype == "datetime64[ns]"
  118. # boxed value must be Timestamp instance
  119. res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
  120. exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
  121. tm.assert_series_equal(res, exp)
  122. def test_apply_box_dt64tz():
  123. vals = [
  124. pd.Timestamp("2011-01-01", tz="US/Eastern"),
  125. pd.Timestamp("2011-01-02", tz="US/Eastern"),
  126. ]
  127. ser = Series(vals, dtype="M8[ns, US/Eastern]")
  128. assert ser.dtype == "datetime64[ns, US/Eastern]"
  129. res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
  130. exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
  131. tm.assert_series_equal(res, exp)
  132. def test_apply_box_td64():
  133. # timedelta
  134. vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
  135. ser = Series(vals)
  136. assert ser.dtype == "timedelta64[ns]"
  137. res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
  138. exp = Series(["Timedelta_1", "Timedelta_2"])
  139. tm.assert_series_equal(res, exp)
  140. def test_apply_box_period():
  141. # period
  142. vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
  143. ser = Series(vals)
  144. assert ser.dtype == "Period[M]"
  145. res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat")
  146. exp = Series(["Period_M", "Period_M"])
  147. tm.assert_series_equal(res, exp)
  148. def test_apply_datetimetz(by_row):
  149. values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
  150. s = Series(values, name="XX")
  151. result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row)
  152. exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
  153. "Asia/Tokyo"
  154. )
  155. exp = Series(exp_values, name="XX")
  156. tm.assert_series_equal(result, exp)
  157. result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row)
  158. exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32")
  159. tm.assert_series_equal(result, exp)
  160. # not vectorized
  161. def f(x):
  162. return str(x.tz) if by_row else str(x.dt.tz)
  163. result = s.apply(f, by_row=by_row)
  164. if by_row:
  165. exp = Series(["Asia/Tokyo"] * 25, name="XX")
  166. tm.assert_series_equal(result, exp)
  167. else:
  168. assert result == "Asia/Tokyo"
  169. def test_apply_categorical(by_row, using_infer_string):
  170. values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
  171. ser = Series(values, name="XX", index=list("abcdefg"))
  172. if not by_row:
  173. msg = "Series' object has no attribute 'lower"
  174. with pytest.raises(AttributeError, match=msg):
  175. ser.apply(lambda x: x.lower(), by_row=by_row)
  176. assert ser.apply(lambda x: "A", by_row=by_row) == "A"
  177. return
  178. result = ser.apply(lambda x: x.lower(), by_row=by_row)
  179. # should be categorical dtype when the number of categories are
  180. # the same
  181. values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
  182. exp = Series(values, name="XX", index=list("abcdefg"))
  183. tm.assert_series_equal(result, exp)
  184. tm.assert_categorical_equal(result.values, exp.values)
  185. result = ser.apply(lambda x: "A")
  186. exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
  187. tm.assert_series_equal(result, exp)
  188. assert result.dtype == object if not using_infer_string else "str"
  189. @pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
  190. def test_apply_categorical_with_nan_values(series, by_row):
  191. # GH 20714 bug fixed in: GH 24275
  192. s = Series(series, dtype="category")
  193. if not by_row:
  194. msg = "'Series' object has no attribute 'split'"
  195. with pytest.raises(AttributeError, match=msg):
  196. s.apply(lambda x: x.split("-")[0], by_row=by_row)
  197. return
  198. result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
  199. result = result.astype(object)
  200. expected = Series(["1", "1", np.nan], dtype="category")
  201. expected = expected.astype(object)
  202. tm.assert_series_equal(result, expected)
  203. def test_apply_empty_integer_series_with_datetime_index(by_row):
  204. # GH 21245
  205. s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
  206. result = s.apply(lambda x: x, by_row=by_row)
  207. tm.assert_series_equal(result, s)
  208. def test_apply_dataframe_iloc():
  209. uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"])
  210. indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"])
  211. def retrieve(targetRow, targetDF):
  212. val = targetDF["Numbers"].iloc[targetRow]
  213. return val
  214. result = indexDF["Indices"].apply(retrieve, args=(uintDF,))
  215. expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64")
  216. tm.assert_series_equal(result, expected)
  217. def test_transform(string_series, by_row):
  218. # transforming functions
  219. with np.errstate(all="ignore"):
  220. f_sqrt = np.sqrt(string_series)
  221. f_abs = np.abs(string_series)
  222. # ufunc
  223. result = string_series.apply(np.sqrt, by_row=by_row)
  224. expected = f_sqrt.copy()
  225. tm.assert_series_equal(result, expected)
  226. # list-like
  227. result = string_series.apply([np.sqrt], by_row=by_row)
  228. expected = f_sqrt.to_frame().copy()
  229. expected.columns = ["sqrt"]
  230. tm.assert_frame_equal(result, expected)
  231. result = string_series.apply(["sqrt"], by_row=by_row)
  232. tm.assert_frame_equal(result, expected)
  233. # multiple items in list
  234. # these are in the order as if we are applying both functions per
  235. # series and then concatting
  236. expected = concat([f_sqrt, f_abs], axis=1)
  237. expected.columns = ["sqrt", "absolute"]
  238. result = string_series.apply([np.sqrt, np.abs], by_row=by_row)
  239. tm.assert_frame_equal(result, expected)
  240. # dict, provide renaming
  241. expected = concat([f_sqrt, f_abs], axis=1)
  242. expected.columns = ["foo", "bar"]
  243. expected = expected.unstack().rename("series")
  244. result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row)
  245. tm.assert_series_equal(result.reindex_like(expected), expected)
  246. @pytest.mark.parametrize("op", series_transform_kernels)
  247. def test_transform_partial_failure(op, request):
  248. # GH 35964
  249. if op in ("ffill", "bfill", "pad", "backfill", "shift"):
  250. request.applymarker(
  251. pytest.mark.xfail(reason=f"{op} is successful on any dtype")
  252. )
  253. # Using object makes most transform kernels fail
  254. ser = Series(3 * [object])
  255. if op in ("fillna", "ngroup"):
  256. error = ValueError
  257. msg = "Transform function failed"
  258. else:
  259. error = TypeError
  260. msg = "|".join(
  261. [
  262. "not supported between instances of 'type' and 'type'",
  263. "unsupported operand type",
  264. ]
  265. )
  266. with pytest.raises(error, match=msg):
  267. ser.transform([op, "shift"])
  268. with pytest.raises(error, match=msg):
  269. ser.transform({"A": op, "B": "shift"})
  270. with pytest.raises(error, match=msg):
  271. ser.transform({"A": [op], "B": ["shift"]})
  272. with pytest.raises(error, match=msg):
  273. ser.transform({"A": [op, "shift"], "B": [op]})
  274. def test_transform_partial_failure_valueerror():
  275. # GH 40211
  276. def noop(x):
  277. return x
  278. def raising_op(_):
  279. raise ValueError
  280. ser = Series(3 * [object])
  281. msg = "Transform function failed"
  282. with pytest.raises(ValueError, match=msg):
  283. ser.transform([noop, raising_op])
  284. with pytest.raises(ValueError, match=msg):
  285. ser.transform({"A": raising_op, "B": noop})
  286. with pytest.raises(ValueError, match=msg):
  287. ser.transform({"A": [raising_op], "B": [noop]})
  288. with pytest.raises(ValueError, match=msg):
  289. ser.transform({"A": [noop, raising_op], "B": [noop]})
  290. def test_demo():
  291. # demonstration tests
  292. s = Series(range(6), dtype="int64", name="series")
  293. result = s.agg(["min", "max"])
  294. expected = Series([0, 5], index=["min", "max"], name="series")
  295. tm.assert_series_equal(result, expected)
  296. result = s.agg({"foo": "min"})
  297. expected = Series([0], index=["foo"], name="series")
  298. tm.assert_series_equal(result, expected)
  299. @pytest.mark.parametrize("func", [str, lambda x: str(x)])
  300. def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
  301. # test that we are evaluating row-by-row first if by_row="compat"
  302. # else vectorized evaluation
  303. result = string_series.apply(func, by_row=by_row)
  304. if by_row:
  305. expected = string_series.map(func)
  306. tm.assert_series_equal(result, expected)
  307. else:
  308. assert result == str(string_series)
  309. def test_agg_evaluate_lambdas(string_series):
  310. # GH53325
  311. # in the future, the result will be a Series class.
  312. with tm.assert_produces_warning(FutureWarning):
  313. result = string_series.agg(lambda x: type(x))
  314. assert isinstance(result, Series) and len(result) == len(string_series)
  315. with tm.assert_produces_warning(FutureWarning):
  316. result = string_series.agg(type)
  317. assert isinstance(result, Series) and len(result) == len(string_series)
  318. @pytest.mark.parametrize("op_name", ["agg", "apply"])
  319. def test_with_nested_series(datetime_series, op_name):
  320. # GH 2316
  321. # .agg with a reducer and a transform, what to do
  322. msg = "cannot aggregate"
  323. warning = FutureWarning if op_name == "agg" else None
  324. with tm.assert_produces_warning(warning, match=msg):
  325. # GH52123
  326. result = getattr(datetime_series, op_name)(
  327. lambda x: Series([x, x**2], index=["x", "x^2"])
  328. )
  329. expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
  330. tm.assert_frame_equal(result, expected)
  331. with tm.assert_produces_warning(FutureWarning, match=msg):
  332. result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
  333. tm.assert_frame_equal(result, expected)
  334. def test_replicate_describe(string_series):
  335. # this also tests a result set that is all scalars
  336. expected = string_series.describe()
  337. result = string_series.apply(
  338. {
  339. "count": "count",
  340. "mean": "mean",
  341. "std": "std",
  342. "min": "min",
  343. "25%": lambda x: x.quantile(0.25),
  344. "50%": "median",
  345. "75%": lambda x: x.quantile(0.75),
  346. "max": "max",
  347. },
  348. )
  349. tm.assert_series_equal(result, expected)
  350. def test_reduce(string_series):
  351. # reductions with named functions
  352. result = string_series.agg(["sum", "mean"])
  353. expected = Series(
  354. [string_series.sum(), string_series.mean()],
  355. ["sum", "mean"],
  356. name=string_series.name,
  357. )
  358. tm.assert_series_equal(result, expected)
  359. @pytest.mark.parametrize(
  360. "how, kwds",
  361. [("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})],
  362. )
  363. def test_non_callable_aggregates(how, kwds):
  364. # test agg using non-callable series attributes
  365. # GH 39116 - expand to apply
  366. s = Series([1, 2, None])
  367. # Calling agg w/ just a string arg same as calling s.arg
  368. result = getattr(s, how)("size", **kwds)
  369. expected = s.size
  370. assert result == expected
  371. # test when mixed w/ callable reducers
  372. result = getattr(s, how)(["size", "count", "mean"], **kwds)
  373. expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
  374. tm.assert_series_equal(result, expected)
  375. result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds)
  376. tm.assert_series_equal(result, expected)
  377. def test_series_apply_no_suffix_index(by_row):
  378. # GH36189
  379. s = Series([4] * 3)
  380. result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row)
  381. expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
  382. tm.assert_series_equal(result, expected)
  383. @pytest.mark.parametrize(
  384. "dti,exp",
  385. [
  386. (
  387. Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
  388. DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
  389. ),
  390. (
  391. Series(
  392. np.arange(10, dtype=np.float64),
  393. index=date_range("2020-01-01", periods=10),
  394. name="ts",
  395. ),
  396. DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"),
  397. ),
  398. ],
  399. )
  400. @pytest.mark.parametrize("aware", [True, False])
  401. def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
  402. # GH 25959
  403. # Calling apply on a localized time series should not cause an error
  404. if aware:
  405. index = dti.tz_localize("UTC").index
  406. else:
  407. index = dti.index
  408. result = Series(index).apply(lambda x: Series([1, 2]))
  409. tm.assert_frame_equal(result, exp)
  410. @pytest.mark.parametrize(
  411. "by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)]
  412. )
  413. def test_apply_scalar_on_date_time_index_aware_series(by_row, expected):
  414. # GH 25959
  415. # Calling apply on a localized time series should not cause an error
  416. series = Series(
  417. np.arange(10, dtype=np.float64),
  418. index=date_range("2020-01-01", periods=10, tz="UTC"),
  419. )
  420. result = Series(series.index).apply(lambda x: 1, by_row=by_row)
  421. tm.assert_equal(result, expected)
  422. def test_apply_to_timedelta(by_row):
  423. list_of_valid_strings = ["00:00:01", "00:00:02"]
  424. a = pd.to_timedelta(list_of_valid_strings)
  425. b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row)
  426. tm.assert_series_equal(Series(a), b)
  427. list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
  428. a = pd.to_timedelta(list_of_strings)
  429. ser = Series(list_of_strings)
  430. b = ser.apply(pd.to_timedelta, by_row=by_row)
  431. tm.assert_series_equal(Series(a), b)
  432. @pytest.mark.parametrize(
  433. "ops, names",
  434. [
  435. ([np.sum], ["sum"]),
  436. ([np.sum, np.mean], ["sum", "mean"]),
  437. (np.array([np.sum]), ["sum"]),
  438. (np.array([np.sum, np.mean]), ["sum", "mean"]),
  439. ],
  440. )
  441. @pytest.mark.parametrize(
  442. "how, kwargs",
  443. [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
  444. )
  445. def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
  446. # GH 39140
  447. expected = Series({name: op(string_series) for name, op in zip(names, ops)})
  448. expected.name = "series"
  449. warn = FutureWarning if how == "agg" else None
  450. msg = f"using Series.[{'|'.join(names)}]"
  451. with tm.assert_produces_warning(warn, match=msg):
  452. result = getattr(string_series, how)(ops, **kwargs)
  453. tm.assert_series_equal(result, expected)
  454. @pytest.mark.parametrize(
  455. "ops",
  456. [
  457. {"A": np.sum},
  458. {"A": np.sum, "B": np.mean},
  459. Series({"A": np.sum}),
  460. Series({"A": np.sum, "B": np.mean}),
  461. ],
  462. )
  463. @pytest.mark.parametrize(
  464. "how, kwargs",
  465. [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
  466. )
  467. def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
  468. # GH 39140
  469. expected = Series({name: op(string_series) for name, op in ops.items()})
  470. expected.name = string_series.name
  471. warn = FutureWarning if how == "agg" else None
  472. msg = "using Series.[sum|mean]"
  473. with tm.assert_produces_warning(warn, match=msg):
  474. result = getattr(string_series, how)(ops, **kwargs)
  475. tm.assert_series_equal(result, expected)
  476. @pytest.mark.parametrize(
  477. "ops, names",
  478. [
  479. ([np.sqrt], ["sqrt"]),
  480. ([np.abs, np.sqrt], ["absolute", "sqrt"]),
  481. (np.array([np.sqrt]), ["sqrt"]),
  482. (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
  483. ],
  484. )
  485. def test_apply_listlike_transformer(string_series, ops, names, by_row):
  486. # GH 39140
  487. with np.errstate(all="ignore"):
  488. expected = concat([op(string_series) for op in ops], axis=1)
  489. expected.columns = names
  490. result = string_series.apply(ops, by_row=by_row)
  491. tm.assert_frame_equal(result, expected)
  492. @pytest.mark.parametrize(
  493. "ops, expected",
  494. [
  495. ([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})),
  496. ([lambda x: x.sum()], Series([6], index=["<lambda>"])),
  497. ],
  498. )
  499. def test_apply_listlike_lambda(ops, expected, by_row):
  500. # GH53400
  501. ser = Series([1, 2, 3])
  502. result = ser.apply(ops, by_row=by_row)
  503. tm.assert_equal(result, expected)
  504. @pytest.mark.parametrize(
  505. "ops",
  506. [
  507. {"A": np.sqrt},
  508. {"A": np.sqrt, "B": np.exp},
  509. Series({"A": np.sqrt}),
  510. Series({"A": np.sqrt, "B": np.exp}),
  511. ],
  512. )
  513. def test_apply_dictlike_transformer(string_series, ops, by_row):
  514. # GH 39140
  515. with np.errstate(all="ignore"):
  516. expected = concat({name: op(string_series) for name, op in ops.items()})
  517. expected.name = string_series.name
  518. result = string_series.apply(ops, by_row=by_row)
  519. tm.assert_series_equal(result, expected)
  520. @pytest.mark.parametrize(
  521. "ops, expected",
  522. [
  523. (
  524. {"a": lambda x: x},
  525. Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])),
  526. ),
  527. ({"a": lambda x: x.sum()}, Series([6], index=["a"])),
  528. ],
  529. )
  530. def test_apply_dictlike_lambda(ops, by_row, expected):
  531. # GH53400
  532. ser = Series([1, 2, 3])
  533. result = ser.apply(ops, by_row=by_row)
  534. tm.assert_equal(result, expected)
  535. def test_apply_retains_column_name(by_row):
  536. # GH 16380
  537. df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
  538. result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
  539. expected = DataFrame(
  540. [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
  541. columns=Index(range(3), name="y"),
  542. index=Index(range(3), name="x"),
  543. )
  544. tm.assert_frame_equal(result, expected)
  545. def test_apply_type():
  546. # GH 46719
  547. s = Series([3, "string", float], index=["a", "b", "c"])
  548. result = s.apply(type)
  549. expected = Series([int, str, type], index=["a", "b", "c"])
  550. tm.assert_series_equal(result, expected)
  551. def test_series_apply_unpack_nested_data():
  552. # GH#55189
  553. ser = Series([[1, 2, 3], [4, 5, 6, 7]])
  554. result = ser.apply(lambda x: Series(x))
  555. expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
  556. tm.assert_frame_equal(result, expected)