test_finalize.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. """
  2. An exhaustive list of pandas methods exercising NDFrame.__finalize__.
  3. """
  4. import operator
  5. import re
  6. import numpy as np
  7. import pytest
  8. import pandas as pd
  9. import pandas._testing as tm
  10. # TODO:
  11. # * Binary methods (mul, div, etc.)
  12. # * Binary outputs (align, etc.)
  13. # * top-level methods (concat, merge, get_dummies, etc.)
  14. # * window
  15. # * cumulative reductions
  16. not_implemented_mark = pytest.mark.xfail(reason="not implemented")
  17. mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"])
  18. frame_data = ({"A": [1]},)
  19. frame_mi_data = ({"A": [1, 2, 3, 4]}, mi)
  20. # Tuple of
  21. # - Callable: Constructor (Series, DataFrame)
  22. # - Tuple: Constructor args
  23. # - Callable: pass the constructed value with attrs set to this.
  24. _all_methods = [
  25. (pd.Series, ([0],), operator.methodcaller("take", [])),
  26. (pd.Series, ([0],), operator.methodcaller("__getitem__", [True])),
  27. (pd.Series, ([0],), operator.methodcaller("repeat", 2)),
  28. (pd.Series, ([0],), operator.methodcaller("reset_index")),
  29. (pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)),
  30. (pd.Series, ([0],), operator.methodcaller("to_frame")),
  31. (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")),
  32. (pd.Series, ([0, 0],), operator.methodcaller("duplicated")),
  33. (pd.Series, ([0, 0],), operator.methodcaller("round")),
  34. (pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)),
  35. (pd.Series, ([0, 0],), operator.methodcaller("rename", "name")),
  36. (pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])),
  37. (pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])),
  38. (pd.Series, ([0, 0],), operator.methodcaller("drop", [0])),
  39. (pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)),
  40. (pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})),
  41. (pd.Series, ([0, 0],), operator.methodcaller("shift")),
  42. (pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])),
  43. (pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)),
  44. (pd.Series, ([0, 0],), operator.methodcaller("isna")),
  45. (pd.Series, ([0, 0],), operator.methodcaller("isnull")),
  46. (pd.Series, ([0, 0],), operator.methodcaller("notna")),
  47. (pd.Series, ([0, 0],), operator.methodcaller("notnull")),
  48. (pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))),
  49. # TODO: mul, div, etc.
  50. (
  51. pd.Series,
  52. ([0], pd.period_range("2000", periods=1)),
  53. operator.methodcaller("to_timestamp"),
  54. ),
  55. (
  56. pd.Series,
  57. ([0], pd.date_range("2000", periods=1)),
  58. operator.methodcaller("to_period"),
  59. ),
  60. pytest.param(
  61. (
  62. pd.DataFrame,
  63. frame_data,
  64. operator.methodcaller("dot", pd.DataFrame(index=["A"])),
  65. ),
  66. marks=pytest.mark.xfail(reason="Implement binary finalize"),
  67. ),
  68. (pd.DataFrame, frame_data, operator.methodcaller("transpose")),
  69. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")),
  70. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])),
  71. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))),
  72. (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])),
  73. (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")),
  74. (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")),
  75. (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")),
  76. (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)),
  77. (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])),
  78. (pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])),
  79. (pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])),
  80. (pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])),
  81. (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})),
  82. (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)),
  83. (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")),
  84. (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")),
  85. (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")),
  86. (pd.DataFrame, frame_data, operator.methodcaller("reset_index")),
  87. (pd.DataFrame, frame_data, operator.methodcaller("isna")),
  88. (pd.DataFrame, frame_data, operator.methodcaller("isnull")),
  89. (pd.DataFrame, frame_data, operator.methodcaller("notna")),
  90. (pd.DataFrame, frame_data, operator.methodcaller("notnull")),
  91. (pd.DataFrame, frame_data, operator.methodcaller("dropna")),
  92. (pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")),
  93. (pd.DataFrame, frame_data, operator.methodcaller("duplicated")),
  94. (pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")),
  95. (pd.DataFrame, frame_data, operator.methodcaller("sort_index")),
  96. (pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")),
  97. (pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")),
  98. (pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")),
  99. (
  100. pd.DataFrame,
  101. frame_data,
  102. operator.methodcaller("add", pd.DataFrame(*frame_data)),
  103. ),
  104. # TODO: div, mul, etc.
  105. (
  106. pd.DataFrame,
  107. frame_data,
  108. operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add),
  109. ),
  110. (
  111. pd.DataFrame,
  112. frame_data,
  113. operator.methodcaller("combine_first", pd.DataFrame(*frame_data)),
  114. ),
  115. pytest.param(
  116. (
  117. pd.DataFrame,
  118. frame_data,
  119. operator.methodcaller("update", pd.DataFrame(*frame_data)),
  120. ),
  121. marks=not_implemented_mark,
  122. ),
  123. (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")),
  124. (
  125. pd.DataFrame,
  126. ({"A": [1], "B": [1]},),
  127. operator.methodcaller("pivot_table", columns="A"),
  128. ),
  129. (
  130. pd.DataFrame,
  131. ({"A": [1], "B": [1]},),
  132. operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]),
  133. ),
  134. (pd.DataFrame, frame_data, operator.methodcaller("stack")),
  135. (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")),
  136. (pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")),
  137. (
  138. pd.DataFrame,
  139. ({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},),
  140. operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
  141. ),
  142. (pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
  143. pytest.param(
  144. (
  145. pd.DataFrame,
  146. frame_data,
  147. operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
  148. ),
  149. marks=not_implemented_mark,
  150. ),
  151. (pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
  152. (pd.DataFrame, frame_data, operator.methodcaller("corr")),
  153. pytest.param(
  154. (pd.DataFrame, frame_data, operator.methodcaller("cov")),
  155. marks=[
  156. pytest.mark.filterwarnings("ignore::RuntimeWarning"),
  157. ],
  158. ),
  159. (
  160. pd.DataFrame,
  161. frame_data,
  162. operator.methodcaller("corrwith", pd.DataFrame(*frame_data)),
  163. ),
  164. (pd.DataFrame, frame_data, operator.methodcaller("count")),
  165. (pd.DataFrame, frame_data, operator.methodcaller("nunique")),
  166. (pd.DataFrame, frame_data, operator.methodcaller("idxmin")),
  167. (pd.DataFrame, frame_data, operator.methodcaller("idxmax")),
  168. (pd.DataFrame, frame_data, operator.methodcaller("mode")),
  169. (pd.Series, [0], operator.methodcaller("mode")),
  170. (pd.DataFrame, frame_data, operator.methodcaller("median")),
  171. (
  172. pd.DataFrame,
  173. frame_data,
  174. operator.methodcaller("quantile", numeric_only=True),
  175. ),
  176. (
  177. pd.DataFrame,
  178. frame_data,
  179. operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
  180. ),
  181. (
  182. pd.DataFrame,
  183. ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},),
  184. operator.methodcaller("quantile", numeric_only=False),
  185. ),
  186. (
  187. pd.DataFrame,
  188. ({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},),
  189. operator.methodcaller("quantile", numeric_only=True),
  190. ),
  191. (
  192. pd.DataFrame,
  193. ({"A": [1]}, [pd.Period("2000", "D")]),
  194. operator.methodcaller("to_timestamp"),
  195. ),
  196. (
  197. pd.DataFrame,
  198. ({"A": [1]}, [pd.Timestamp("2000")]),
  199. operator.methodcaller("to_period", freq="D"),
  200. ),
  201. (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])),
  202. (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))),
  203. (
  204. pd.DataFrame,
  205. frame_mi_data,
  206. operator.methodcaller("isin", pd.DataFrame({"A": [1]})),
  207. ),
  208. (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")),
  209. (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")),
  210. # Squeeze on columns, otherwise we'll end up with a scalar
  211. (pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")),
  212. (pd.Series, ([1, 2],), operator.methodcaller("squeeze")),
  213. (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")),
  214. (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")),
  215. # Unary ops
  216. (pd.DataFrame, frame_data, operator.neg),
  217. (pd.Series, [1], operator.neg),
  218. (pd.DataFrame, frame_data, operator.pos),
  219. (pd.Series, [1], operator.pos),
  220. (pd.DataFrame, frame_data, operator.inv),
  221. (pd.Series, [1], operator.inv),
  222. (pd.DataFrame, frame_data, abs),
  223. (pd.Series, [1], abs),
  224. (pd.DataFrame, frame_data, round),
  225. (pd.Series, [1], round),
  226. (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])),
  227. (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")),
  228. (pd.Series, (1, mi), operator.methodcaller("xs", "a")),
  229. (pd.DataFrame, frame_data, operator.methodcaller("get", "A")),
  230. (
  231. pd.DataFrame,
  232. frame_data,
  233. operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})),
  234. ),
  235. (
  236. pd.Series,
  237. frame_data,
  238. operator.methodcaller("reindex_like", pd.Series([0, 1, 2])),
  239. ),
  240. (pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")),
  241. (pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")),
  242. (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")),
  243. (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")),
  244. (pd.Series, ([3, 2],), operator.methodcaller("sort_values")),
  245. (pd.Series, ([1] * 10,), operator.methodcaller("head")),
  246. (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")),
  247. (pd.Series, ([1] * 10,), operator.methodcaller("tail")),
  248. (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")),
  249. (pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)),
  250. (pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)),
  251. (pd.Series, ([1, 2],), operator.methodcaller("astype", float)),
  252. (pd.DataFrame, frame_data, operator.methodcaller("astype", float)),
  253. (pd.Series, ([1, 2],), operator.methodcaller("copy")),
  254. (pd.DataFrame, frame_data, operator.methodcaller("copy")),
  255. (pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")),
  256. (
  257. pd.DataFrame,
  258. ({"A": np.array([1, 2], dtype=object)},),
  259. operator.methodcaller("infer_objects"),
  260. ),
  261. (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")),
  262. (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")),
  263. (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")),
  264. (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")),
  265. (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)),
  266. (pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)),
  267. (
  268. pd.Series,
  269. (1, pd.date_range("2000", periods=4)),
  270. operator.methodcaller("asfreq", "h"),
  271. ),
  272. (
  273. pd.DataFrame,
  274. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  275. operator.methodcaller("asfreq", "h"),
  276. ),
  277. (
  278. pd.Series,
  279. (1, pd.date_range("2000", periods=4)),
  280. operator.methodcaller("at_time", "12:00"),
  281. ),
  282. (
  283. pd.DataFrame,
  284. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  285. operator.methodcaller("at_time", "12:00"),
  286. ),
  287. (
  288. pd.Series,
  289. (1, pd.date_range("2000", periods=4)),
  290. operator.methodcaller("between_time", "12:00", "13:00"),
  291. ),
  292. (
  293. pd.DataFrame,
  294. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  295. operator.methodcaller("between_time", "12:00", "13:00"),
  296. ),
  297. (
  298. pd.Series,
  299. (1, pd.date_range("2000", periods=4)),
  300. operator.methodcaller("last", "3D"),
  301. ),
  302. (
  303. pd.DataFrame,
  304. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  305. operator.methodcaller("last", "3D"),
  306. ),
  307. (pd.Series, ([1, 2],), operator.methodcaller("rank")),
  308. (pd.DataFrame, frame_data, operator.methodcaller("rank")),
  309. (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))),
  310. (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))),
  311. (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))),
  312. (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))),
  313. (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)),
  314. (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)),
  315. (
  316. pd.Series,
  317. (1, pd.date_range("2000", periods=4, tz="UTC")),
  318. operator.methodcaller("tz_convert", "CET"),
  319. ),
  320. (
  321. pd.DataFrame,
  322. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")),
  323. operator.methodcaller("tz_convert", "CET"),
  324. ),
  325. (
  326. pd.Series,
  327. (1, pd.date_range("2000", periods=4)),
  328. operator.methodcaller("tz_localize", "CET"),
  329. ),
  330. (
  331. pd.DataFrame,
  332. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  333. operator.methodcaller("tz_localize", "CET"),
  334. ),
  335. (pd.Series, ([1, 2],), operator.methodcaller("describe")),
  336. (pd.DataFrame, frame_data, operator.methodcaller("describe")),
  337. (pd.Series, ([1, 2],), operator.methodcaller("pct_change")),
  338. (pd.DataFrame, frame_data, operator.methodcaller("pct_change")),
  339. (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())),
  340. (
  341. pd.DataFrame,
  342. frame_mi_data,
  343. operator.methodcaller("transform", lambda x: x - x.min()),
  344. ),
  345. (pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)),
  346. (pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)),
  347. # Cumulative reductions
  348. (pd.Series, ([1],), operator.methodcaller("cumsum")),
  349. (pd.DataFrame, frame_data, operator.methodcaller("cumsum")),
  350. (pd.Series, ([1],), operator.methodcaller("cummin")),
  351. (pd.DataFrame, frame_data, operator.methodcaller("cummin")),
  352. (pd.Series, ([1],), operator.methodcaller("cummax")),
  353. (pd.DataFrame, frame_data, operator.methodcaller("cummax")),
  354. (pd.Series, ([1],), operator.methodcaller("cumprod")),
  355. (pd.DataFrame, frame_data, operator.methodcaller("cumprod")),
  356. # Reductions
  357. (pd.DataFrame, frame_data, operator.methodcaller("any")),
  358. (pd.DataFrame, frame_data, operator.methodcaller("all")),
  359. (pd.DataFrame, frame_data, operator.methodcaller("min")),
  360. (pd.DataFrame, frame_data, operator.methodcaller("max")),
  361. (pd.DataFrame, frame_data, operator.methodcaller("sum")),
  362. (pd.DataFrame, frame_data, operator.methodcaller("std")),
  363. (pd.DataFrame, frame_data, operator.methodcaller("mean")),
  364. (pd.DataFrame, frame_data, operator.methodcaller("prod")),
  365. (pd.DataFrame, frame_data, operator.methodcaller("sem")),
  366. (pd.DataFrame, frame_data, operator.methodcaller("skew")),
  367. (pd.DataFrame, frame_data, operator.methodcaller("kurt")),
  368. ]
  369. def idfn(x):
  370. xpr = re.compile(r"'(.*)?'")
  371. m = xpr.search(str(x))
  372. if m:
  373. return m.group(1)
  374. else:
  375. return str(x)
  376. @pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1]))
  377. def ndframe_method(request):
  378. """
  379. An NDFrame method returning an NDFrame.
  380. """
  381. return request.param
  382. @pytest.mark.filterwarnings(
  383. "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning",
  384. "ignore:last is deprecated:FutureWarning",
  385. )
  386. def test_finalize_called(ndframe_method):
  387. cls, init_args, method = ndframe_method
  388. ndframe = cls(*init_args)
  389. ndframe.attrs = {"a": 1}
  390. result = method(ndframe)
  391. assert result.attrs == {"a": 1}
  392. @pytest.mark.parametrize(
  393. "data",
  394. [
  395. pd.Series(1, pd.date_range("2000", periods=4)),
  396. pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  397. ],
  398. )
  399. def test_finalize_first(data):
  400. deprecated_msg = "first is deprecated"
  401. data.attrs = {"a": 1}
  402. with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
  403. result = data.first("3D")
  404. assert result.attrs == {"a": 1}
  405. @pytest.mark.parametrize(
  406. "data",
  407. [
  408. pd.Series(1, pd.date_range("2000", periods=4)),
  409. pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  410. ],
  411. )
  412. def test_finalize_last(data):
  413. # GH 53710
  414. deprecated_msg = "last is deprecated"
  415. data.attrs = {"a": 1}
  416. with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
  417. result = data.last("3D")
  418. assert result.attrs == {"a": 1}
  419. @not_implemented_mark
  420. def test_finalize_called_eval_numexpr():
  421. pytest.importorskip("numexpr")
  422. df = pd.DataFrame({"A": [1, 2]})
  423. df.attrs["A"] = 1
  424. result = df.eval("A + 1", engine="numexpr")
  425. assert result.attrs == {"A": 1}
  426. # ----------------------------------------------------------------------------
  427. # Binary operations
  428. @pytest.mark.parametrize("annotate", ["left", "right", "both"])
  429. @pytest.mark.parametrize(
  430. "args",
  431. [
  432. (1, pd.Series([1])),
  433. (1, pd.DataFrame({"A": [1]})),
  434. (pd.Series([1]), 1),
  435. (pd.DataFrame({"A": [1]}), 1),
  436. (pd.Series([1]), pd.Series([1])),
  437. (pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})),
  438. (pd.Series([1]), pd.DataFrame({"A": [1]})),
  439. (pd.DataFrame({"A": [1]}), pd.Series([1])),
  440. ],
  441. ids=lambda x: f"({type(x[0]).__name__},{type(x[1]).__name__})",
  442. )
  443. def test_binops(request, args, annotate, all_binary_operators):
  444. # This generates 624 tests... Is that needed?
  445. left, right = args
  446. if isinstance(left, (pd.DataFrame, pd.Series)):
  447. left.attrs = {}
  448. if isinstance(right, (pd.DataFrame, pd.Series)):
  449. right.attrs = {}
  450. if annotate == "left" and isinstance(left, int):
  451. pytest.skip("left is an int and doesn't support .attrs")
  452. if annotate == "right" and isinstance(right, int):
  453. pytest.skip("right is an int and doesn't support .attrs")
  454. if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
  455. if not all_binary_operators.__name__.startswith("r"):
  456. if annotate == "right" and isinstance(left, type(right)):
  457. request.applymarker(
  458. pytest.mark.xfail(
  459. reason=f"{all_binary_operators} doesn't work when right has "
  460. f"attrs and both are {type(left)}"
  461. )
  462. )
  463. if not isinstance(left, type(right)):
  464. if annotate == "left" and isinstance(left, pd.Series):
  465. request.applymarker(
  466. pytest.mark.xfail(
  467. reason=f"{all_binary_operators} doesn't work when the "
  468. "objects are different Series has attrs"
  469. )
  470. )
  471. elif annotate == "right" and isinstance(right, pd.Series):
  472. request.applymarker(
  473. pytest.mark.xfail(
  474. reason=f"{all_binary_operators} doesn't work when the "
  475. "objects are different Series has attrs"
  476. )
  477. )
  478. else:
  479. if annotate == "left" and isinstance(left, type(right)):
  480. request.applymarker(
  481. pytest.mark.xfail(
  482. reason=f"{all_binary_operators} doesn't work when left has "
  483. f"attrs and both are {type(left)}"
  484. )
  485. )
  486. if not isinstance(left, type(right)):
  487. if annotate == "right" and isinstance(right, pd.Series):
  488. request.applymarker(
  489. pytest.mark.xfail(
  490. reason=f"{all_binary_operators} doesn't work when the "
  491. "objects are different Series has attrs"
  492. )
  493. )
  494. elif annotate == "left" and isinstance(left, pd.Series):
  495. request.applymarker(
  496. pytest.mark.xfail(
  497. reason=f"{all_binary_operators} doesn't work when the "
  498. "objects are different Series has attrs"
  499. )
  500. )
  501. if annotate in {"left", "both"} and not isinstance(left, int):
  502. left.attrs = {"a": 1}
  503. if annotate in {"right", "both"} and not isinstance(right, int):
  504. right.attrs = {"a": 1}
  505. is_cmp = all_binary_operators in [
  506. operator.eq,
  507. operator.ne,
  508. operator.gt,
  509. operator.ge,
  510. operator.lt,
  511. operator.le,
  512. ]
  513. if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
  514. # in 2.0 silent alignment on comparisons was removed xref GH#28759
  515. left, right = left.align(right, axis=1, copy=False)
  516. elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
  517. right, left = right.align(left, axis=1, copy=False)
  518. result = all_binary_operators(left, right)
  519. assert result.attrs == {"a": 1}
  520. # ----------------------------------------------------------------------------
  521. # Accessors
  522. @pytest.mark.parametrize(
  523. "method",
  524. [
  525. operator.methodcaller("capitalize"),
  526. operator.methodcaller("casefold"),
  527. operator.methodcaller("cat", ["a"]),
  528. operator.methodcaller("contains", "a"),
  529. operator.methodcaller("count", "a"),
  530. operator.methodcaller("encode", "utf-8"),
  531. operator.methodcaller("endswith", "a"),
  532. operator.methodcaller("extract", r"(\w)(\d)"),
  533. operator.methodcaller("extract", r"(\w)(\d)", expand=False),
  534. operator.methodcaller("find", "a"),
  535. operator.methodcaller("findall", "a"),
  536. operator.methodcaller("get", 0),
  537. operator.methodcaller("index", "a"),
  538. operator.methodcaller("len"),
  539. operator.methodcaller("ljust", 4),
  540. operator.methodcaller("lower"),
  541. operator.methodcaller("lstrip"),
  542. operator.methodcaller("match", r"\w"),
  543. operator.methodcaller("normalize", "NFC"),
  544. operator.methodcaller("pad", 4),
  545. operator.methodcaller("partition", "a"),
  546. operator.methodcaller("repeat", 2),
  547. operator.methodcaller("replace", "a", "b"),
  548. operator.methodcaller("rfind", "a"),
  549. operator.methodcaller("rindex", "a"),
  550. operator.methodcaller("rjust", 4),
  551. operator.methodcaller("rpartition", "a"),
  552. operator.methodcaller("rstrip"),
  553. operator.methodcaller("slice", 4),
  554. operator.methodcaller("slice_replace", 1, repl="a"),
  555. operator.methodcaller("startswith", "a"),
  556. operator.methodcaller("strip"),
  557. operator.methodcaller("swapcase"),
  558. operator.methodcaller("translate", {"a": "b"}),
  559. operator.methodcaller("upper"),
  560. operator.methodcaller("wrap", 4),
  561. operator.methodcaller("zfill", 4),
  562. operator.methodcaller("isalnum"),
  563. operator.methodcaller("isalpha"),
  564. operator.methodcaller("isdigit"),
  565. operator.methodcaller("isspace"),
  566. operator.methodcaller("islower"),
  567. operator.methodcaller("isupper"),
  568. operator.methodcaller("istitle"),
  569. operator.methodcaller("isnumeric"),
  570. operator.methodcaller("isdecimal"),
  571. operator.methodcaller("get_dummies"),
  572. ],
  573. ids=idfn,
  574. )
  575. def test_string_method(method):
  576. s = pd.Series(["a1"])
  577. s.attrs = {"a": 1}
  578. result = method(s.str)
  579. assert result.attrs == {"a": 1}
  580. @pytest.mark.parametrize(
  581. "method",
  582. [
  583. operator.methodcaller("to_period"),
  584. operator.methodcaller("tz_localize", "CET"),
  585. operator.methodcaller("normalize"),
  586. operator.methodcaller("strftime", "%Y"),
  587. operator.methodcaller("round", "h"),
  588. operator.methodcaller("floor", "h"),
  589. operator.methodcaller("ceil", "h"),
  590. operator.methodcaller("month_name"),
  591. operator.methodcaller("day_name"),
  592. ],
  593. ids=idfn,
  594. )
  595. def test_datetime_method(method):
  596. s = pd.Series(pd.date_range("2000", periods=4))
  597. s.attrs = {"a": 1}
  598. result = method(s.dt)
  599. assert result.attrs == {"a": 1}
  600. @pytest.mark.parametrize(
  601. "attr",
  602. [
  603. "date",
  604. "time",
  605. "timetz",
  606. "year",
  607. "month",
  608. "day",
  609. "hour",
  610. "minute",
  611. "second",
  612. "microsecond",
  613. "nanosecond",
  614. "dayofweek",
  615. "day_of_week",
  616. "dayofyear",
  617. "day_of_year",
  618. "quarter",
  619. "is_month_start",
  620. "is_month_end",
  621. "is_quarter_start",
  622. "is_quarter_end",
  623. "is_year_start",
  624. "is_year_end",
  625. "is_leap_year",
  626. "daysinmonth",
  627. "days_in_month",
  628. ],
  629. )
  630. def test_datetime_property(attr):
  631. s = pd.Series(pd.date_range("2000", periods=4))
  632. s.attrs = {"a": 1}
  633. result = getattr(s.dt, attr)
  634. assert result.attrs == {"a": 1}
  635. @pytest.mark.parametrize(
  636. "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"]
  637. )
  638. def test_timedelta_property(attr):
  639. s = pd.Series(pd.timedelta_range("2000", periods=4))
  640. s.attrs = {"a": 1}
  641. result = getattr(s.dt, attr)
  642. assert result.attrs == {"a": 1}
  643. @pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")])
  644. def test_timedelta_methods(method):
  645. s = pd.Series(pd.timedelta_range("2000", periods=4))
  646. s.attrs = {"a": 1}
  647. result = method(s.dt)
  648. assert result.attrs == {"a": 1}
  649. @pytest.mark.parametrize(
  650. "method",
  651. [
  652. operator.methodcaller("add_categories", ["c"]),
  653. operator.methodcaller("as_ordered"),
  654. operator.methodcaller("as_unordered"),
  655. lambda x: getattr(x, "codes"),
  656. operator.methodcaller("remove_categories", "a"),
  657. operator.methodcaller("remove_unused_categories"),
  658. operator.methodcaller("rename_categories", {"a": "A", "b": "B"}),
  659. operator.methodcaller("reorder_categories", ["b", "a"]),
  660. operator.methodcaller("set_categories", ["A", "B"]),
  661. ],
  662. )
  663. @not_implemented_mark
  664. def test_categorical_accessor(method):
  665. s = pd.Series(["a", "b"], dtype="category")
  666. s.attrs = {"a": 1}
  667. result = method(s.cat)
  668. assert result.attrs == {"a": 1}
  669. # ----------------------------------------------------------------------------
  670. # Groupby
  671. @pytest.mark.parametrize(
  672. "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
  673. )
  674. @pytest.mark.parametrize(
  675. "method",
  676. [
  677. operator.methodcaller("sum"),
  678. lambda x: x.apply(lambda y: y),
  679. lambda x: x.agg("sum"),
  680. lambda x: x.agg("mean"),
  681. lambda x: x.agg("median"),
  682. ],
  683. )
  684. def test_groupby_finalize(obj, method):
  685. obj.attrs = {"a": 1}
  686. result = method(obj.groupby([0, 0], group_keys=False))
  687. assert result.attrs == {"a": 1}
  688. @pytest.mark.parametrize(
  689. "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
  690. )
  691. @pytest.mark.parametrize(
  692. "method",
  693. [
  694. lambda x: x.agg(["sum", "count"]),
  695. lambda x: x.agg("std"),
  696. lambda x: x.agg("var"),
  697. lambda x: x.agg("sem"),
  698. lambda x: x.agg("size"),
  699. lambda x: x.agg("ohlc"),
  700. ],
  701. )
  702. @not_implemented_mark
  703. def test_groupby_finalize_not_implemented(obj, method):
  704. obj.attrs = {"a": 1}
  705. result = method(obj.groupby([0, 0]))
  706. assert result.attrs == {"a": 1}
  707. def test_finalize_frame_series_name():
  708. # https://github.com/pandas-dev/pandas/pull/37186/files#r506978889
  709. # ensure we don't copy the column `name` to the Series.
  710. df = pd.DataFrame({"name": [1, 2]})
  711. result = pd.Series([1, 2]).__finalize__(df)
  712. assert result.name is None