test_subclass.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. Index,
  7. MultiIndex,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. pytestmark = pytest.mark.filterwarnings(
  12. "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
  13. )
  14. @pytest.fixture()
  15. def gpd_style_subclass_df():
  16. class SubclassedDataFrame(DataFrame):
  17. @property
  18. def _constructor(self):
  19. return SubclassedDataFrame
  20. return SubclassedDataFrame({"a": [1, 2, 3]})
  21. class TestDataFrameSubclassing:
  22. def test_no_warning_on_mgr(self):
  23. # GH#57032
  24. df = tm.SubclassedDataFrame(
  25. {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"]
  26. )
  27. with tm.assert_produces_warning(None):
  28. # df.isna() goes through _constructor_from_mgr, which we want to
  29. # *not* pass a Manager do __init__
  30. df.isna()
  31. df["X"].isna()
  32. def test_frame_subclassing_and_slicing(self):
  33. # Subclass frame and ensure it returns the right class on slicing it
  34. # In reference to PR 9632
  35. class CustomSeries(Series):
  36. @property
  37. def _constructor(self):
  38. return CustomSeries
  39. def custom_series_function(self):
  40. return "OK"
  41. class CustomDataFrame(DataFrame):
  42. """
  43. Subclasses pandas DF, fills DF with simulation results, adds some
  44. custom plotting functions.
  45. """
  46. def __init__(self, *args, **kw) -> None:
  47. super().__init__(*args, **kw)
  48. @property
  49. def _constructor(self):
  50. return CustomDataFrame
  51. _constructor_sliced = CustomSeries
  52. def custom_frame_function(self):
  53. return "OK"
  54. data = {"col1": range(10), "col2": range(10)}
  55. cdf = CustomDataFrame(data)
  56. # Did we get back our own DF class?
  57. assert isinstance(cdf, CustomDataFrame)
  58. # Do we get back our own Series class after selecting a column?
  59. cdf_series = cdf.col1
  60. assert isinstance(cdf_series, CustomSeries)
  61. assert cdf_series.custom_series_function() == "OK"
  62. # Do we get back our own DF class after slicing row-wise?
  63. cdf_rows = cdf[1:5]
  64. assert isinstance(cdf_rows, CustomDataFrame)
  65. assert cdf_rows.custom_frame_function() == "OK"
  66. # Make sure sliced part of multi-index frame is custom class
  67. mcol = MultiIndex.from_tuples([("A", "A"), ("A", "B")])
  68. cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
  69. assert isinstance(cdf_multi["A"], CustomDataFrame)
  70. mcol = MultiIndex.from_tuples([("A", ""), ("B", "")])
  71. cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol)
  72. assert isinstance(cdf_multi2["A"], CustomSeries)
  73. def test_dataframe_metadata(self):
  74. df = tm.SubclassedDataFrame(
  75. {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"]
  76. )
  77. df.testattr = "XXX"
  78. assert df.testattr == "XXX"
  79. assert df[["X"]].testattr == "XXX"
  80. assert df.loc[["a", "b"], :].testattr == "XXX"
  81. assert df.iloc[[0, 1], :].testattr == "XXX"
  82. # see gh-9776
  83. assert df.iloc[0:1, :].testattr == "XXX"
  84. # see gh-10553
  85. unpickled = tm.round_trip_pickle(df)
  86. tm.assert_frame_equal(df, unpickled)
  87. assert df._metadata == unpickled._metadata
  88. assert df.testattr == unpickled.testattr
  89. def test_indexing_sliced(self):
  90. # GH 11559
  91. df = tm.SubclassedDataFrame(
  92. {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"]
  93. )
  94. res = df.loc[:, "X"]
  95. exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X")
  96. tm.assert_series_equal(res, exp)
  97. assert isinstance(res, tm.SubclassedSeries)
  98. res = df.iloc[:, 1]
  99. exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y")
  100. tm.assert_series_equal(res, exp)
  101. assert isinstance(res, tm.SubclassedSeries)
  102. res = df.loc[:, "Z"]
  103. exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z")
  104. tm.assert_series_equal(res, exp)
  105. assert isinstance(res, tm.SubclassedSeries)
  106. res = df.loc["a", :]
  107. exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a")
  108. tm.assert_series_equal(res, exp)
  109. assert isinstance(res, tm.SubclassedSeries)
  110. res = df.iloc[1, :]
  111. exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b")
  112. tm.assert_series_equal(res, exp)
  113. assert isinstance(res, tm.SubclassedSeries)
  114. res = df.loc["c", :]
  115. exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c")
  116. tm.assert_series_equal(res, exp)
  117. assert isinstance(res, tm.SubclassedSeries)
  118. def test_subclass_attr_err_propagation(self):
  119. # GH 11808
  120. class A(DataFrame):
  121. @property
  122. def nonexistence(self):
  123. return self.i_dont_exist
  124. with pytest.raises(AttributeError, match=".*i_dont_exist.*"):
  125. A().nonexistence
  126. def test_subclass_align(self):
  127. # GH 12983
  128. df1 = tm.SubclassedDataFrame(
  129. {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")
  130. )
  131. df2 = tm.SubclassedDataFrame(
  132. {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD")
  133. )
  134. res1, res2 = df1.align(df2, axis=0)
  135. exp1 = tm.SubclassedDataFrame(
  136. {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
  137. index=list("ABCDE"),
  138. )
  139. exp2 = tm.SubclassedDataFrame(
  140. {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]},
  141. index=list("ABCDE"),
  142. )
  143. assert isinstance(res1, tm.SubclassedDataFrame)
  144. tm.assert_frame_equal(res1, exp1)
  145. assert isinstance(res2, tm.SubclassedDataFrame)
  146. tm.assert_frame_equal(res2, exp2)
  147. res1, res2 = df1.a.align(df2.c)
  148. assert isinstance(res1, tm.SubclassedSeries)
  149. tm.assert_series_equal(res1, exp1.a)
  150. assert isinstance(res2, tm.SubclassedSeries)
  151. tm.assert_series_equal(res2, exp2.c)
  152. def test_subclass_align_combinations(self):
  153. # GH 12983
  154. df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE"))
  155. s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x")
  156. # frame + series
  157. res1, res2 = df.align(s, axis=0)
  158. exp1 = tm.SubclassedDataFrame(
  159. {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
  160. index=list("ABCDE"),
  161. )
  162. # name is lost when
  163. exp2 = tm.SubclassedSeries(
  164. [1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x"
  165. )
  166. assert isinstance(res1, tm.SubclassedDataFrame)
  167. tm.assert_frame_equal(res1, exp1)
  168. assert isinstance(res2, tm.SubclassedSeries)
  169. tm.assert_series_equal(res2, exp2)
  170. # series + frame
  171. res1, res2 = s.align(df)
  172. assert isinstance(res1, tm.SubclassedSeries)
  173. tm.assert_series_equal(res1, exp2)
  174. assert isinstance(res2, tm.SubclassedDataFrame)
  175. tm.assert_frame_equal(res2, exp1)
  176. def test_subclass_iterrows(self):
  177. # GH 13977
  178. df = tm.SubclassedDataFrame({"a": [1]})
  179. for i, row in df.iterrows():
  180. assert isinstance(row, tm.SubclassedSeries)
  181. tm.assert_series_equal(row, df.loc[i])
  182. def test_subclass_stack(self):
  183. # GH 15564
  184. df = tm.SubclassedDataFrame(
  185. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  186. index=["a", "b", "c"],
  187. columns=["X", "Y", "Z"],
  188. )
  189. res = df.stack(future_stack=True)
  190. exp = tm.SubclassedSeries(
  191. [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")]
  192. )
  193. tm.assert_series_equal(res, exp)
  194. def test_subclass_stack_multi(self):
  195. # GH 15564
  196. df = tm.SubclassedDataFrame(
  197. [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
  198. index=MultiIndex.from_tuples(
  199. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  200. ),
  201. columns=MultiIndex.from_tuples(
  202. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  203. ),
  204. )
  205. exp = tm.SubclassedDataFrame(
  206. [
  207. [10, 12],
  208. [11, 13],
  209. [20, 22],
  210. [21, 23],
  211. [30, 32],
  212. [31, 33],
  213. [40, 42],
  214. [41, 43],
  215. ],
  216. index=MultiIndex.from_tuples(
  217. list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))),
  218. names=["aaa", "ccc", "yyy"],
  219. ),
  220. columns=Index(["W", "X"], name="www"),
  221. )
  222. res = df.stack(future_stack=True)
  223. tm.assert_frame_equal(res, exp)
  224. res = df.stack("yyy", future_stack=True)
  225. tm.assert_frame_equal(res, exp)
  226. exp = tm.SubclassedDataFrame(
  227. [
  228. [10, 11],
  229. [12, 13],
  230. [20, 21],
  231. [22, 23],
  232. [30, 31],
  233. [32, 33],
  234. [40, 41],
  235. [42, 43],
  236. ],
  237. index=MultiIndex.from_tuples(
  238. list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))),
  239. names=["aaa", "ccc", "www"],
  240. ),
  241. columns=Index(["y", "z"], name="yyy"),
  242. )
  243. res = df.stack("www", future_stack=True)
  244. tm.assert_frame_equal(res, exp)
  245. def test_subclass_stack_multi_mixed(self):
  246. # GH 15564
  247. df = tm.SubclassedDataFrame(
  248. [
  249. [10, 11, 12.0, 13.0],
  250. [20, 21, 22.0, 23.0],
  251. [30, 31, 32.0, 33.0],
  252. [40, 41, 42.0, 43.0],
  253. ],
  254. index=MultiIndex.from_tuples(
  255. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  256. ),
  257. columns=MultiIndex.from_tuples(
  258. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  259. ),
  260. )
  261. exp = tm.SubclassedDataFrame(
  262. [
  263. [10, 12.0],
  264. [11, 13.0],
  265. [20, 22.0],
  266. [21, 23.0],
  267. [30, 32.0],
  268. [31, 33.0],
  269. [40, 42.0],
  270. [41, 43.0],
  271. ],
  272. index=MultiIndex.from_tuples(
  273. list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))),
  274. names=["aaa", "ccc", "yyy"],
  275. ),
  276. columns=Index(["W", "X"], name="www"),
  277. )
  278. res = df.stack(future_stack=True)
  279. tm.assert_frame_equal(res, exp)
  280. res = df.stack("yyy", future_stack=True)
  281. tm.assert_frame_equal(res, exp)
  282. exp = tm.SubclassedDataFrame(
  283. [
  284. [10.0, 11.0],
  285. [12.0, 13.0],
  286. [20.0, 21.0],
  287. [22.0, 23.0],
  288. [30.0, 31.0],
  289. [32.0, 33.0],
  290. [40.0, 41.0],
  291. [42.0, 43.0],
  292. ],
  293. index=MultiIndex.from_tuples(
  294. list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))),
  295. names=["aaa", "ccc", "www"],
  296. ),
  297. columns=Index(["y", "z"], name="yyy"),
  298. )
  299. res = df.stack("www", future_stack=True)
  300. tm.assert_frame_equal(res, exp)
  301. def test_subclass_unstack(self):
  302. # GH 15564
  303. df = tm.SubclassedDataFrame(
  304. [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  305. index=["a", "b", "c"],
  306. columns=["X", "Y", "Z"],
  307. )
  308. res = df.unstack()
  309. exp = tm.SubclassedSeries(
  310. [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")]
  311. )
  312. tm.assert_series_equal(res, exp)
  313. def test_subclass_unstack_multi(self):
  314. # GH 15564
  315. df = tm.SubclassedDataFrame(
  316. [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
  317. index=MultiIndex.from_tuples(
  318. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  319. ),
  320. columns=MultiIndex.from_tuples(
  321. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  322. ),
  323. )
  324. exp = tm.SubclassedDataFrame(
  325. [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]],
  326. index=Index(["A", "B"], name="aaa"),
  327. columns=MultiIndex.from_tuples(
  328. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))),
  329. names=["www", "yyy", "ccc"],
  330. ),
  331. )
  332. res = df.unstack()
  333. tm.assert_frame_equal(res, exp)
  334. res = df.unstack("ccc")
  335. tm.assert_frame_equal(res, exp)
  336. exp = tm.SubclassedDataFrame(
  337. [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]],
  338. index=Index(["c", "d"], name="ccc"),
  339. columns=MultiIndex.from_tuples(
  340. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))),
  341. names=["www", "yyy", "aaa"],
  342. ),
  343. )
  344. res = df.unstack("aaa")
  345. tm.assert_frame_equal(res, exp)
  346. def test_subclass_unstack_multi_mixed(self):
  347. # GH 15564
  348. df = tm.SubclassedDataFrame(
  349. [
  350. [10, 11, 12.0, 13.0],
  351. [20, 21, 22.0, 23.0],
  352. [30, 31, 32.0, 33.0],
  353. [40, 41, 42.0, 43.0],
  354. ],
  355. index=MultiIndex.from_tuples(
  356. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  357. ),
  358. columns=MultiIndex.from_tuples(
  359. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  360. ),
  361. )
  362. exp = tm.SubclassedDataFrame(
  363. [
  364. [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0],
  365. [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0],
  366. ],
  367. index=Index(["A", "B"], name="aaa"),
  368. columns=MultiIndex.from_tuples(
  369. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))),
  370. names=["www", "yyy", "ccc"],
  371. ),
  372. )
  373. res = df.unstack()
  374. tm.assert_frame_equal(res, exp)
  375. res = df.unstack("ccc")
  376. tm.assert_frame_equal(res, exp)
  377. exp = tm.SubclassedDataFrame(
  378. [
  379. [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0],
  380. [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0],
  381. ],
  382. index=Index(["c", "d"], name="ccc"),
  383. columns=MultiIndex.from_tuples(
  384. list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))),
  385. names=["www", "yyy", "aaa"],
  386. ),
  387. )
  388. res = df.unstack("aaa")
  389. tm.assert_frame_equal(res, exp)
  390. def test_subclass_pivot(self):
  391. # GH 15564
  392. df = tm.SubclassedDataFrame(
  393. {
  394. "index": ["A", "B", "C", "C", "B", "A"],
  395. "columns": ["One", "One", "One", "Two", "Two", "Two"],
  396. "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
  397. }
  398. )
  399. pivoted = df.pivot(index="index", columns="columns", values="values")
  400. expected = tm.SubclassedDataFrame(
  401. {
  402. "One": {"A": 1.0, "B": 2.0, "C": 3.0},
  403. "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
  404. }
  405. )
  406. expected.index.name, expected.columns.name = "index", "columns"
  407. tm.assert_frame_equal(pivoted, expected)
  408. def test_subclassed_melt(self):
  409. # GH 15564
  410. cheese = tm.SubclassedDataFrame(
  411. {
  412. "first": ["John", "Mary"],
  413. "last": ["Doe", "Bo"],
  414. "height": [5.5, 6.0],
  415. "weight": [130, 150],
  416. }
  417. )
  418. melted = pd.melt(cheese, id_vars=["first", "last"])
  419. expected = tm.SubclassedDataFrame(
  420. [
  421. ["John", "Doe", "height", 5.5],
  422. ["Mary", "Bo", "height", 6.0],
  423. ["John", "Doe", "weight", 130],
  424. ["Mary", "Bo", "weight", 150],
  425. ],
  426. columns=["first", "last", "variable", "value"],
  427. )
  428. tm.assert_frame_equal(melted, expected)
  429. def test_subclassed_wide_to_long(self):
  430. # GH 9762
  431. x = np.random.default_rng(2).standard_normal(3)
  432. df = tm.SubclassedDataFrame(
  433. {
  434. "A1970": {0: "a", 1: "b", 2: "c"},
  435. "A1980": {0: "d", 1: "e", 2: "f"},
  436. "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
  437. "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
  438. "X": dict(zip(range(3), x)),
  439. }
  440. )
  441. df["id"] = df.index
  442. exp_data = {
  443. "X": x.tolist() + x.tolist(),
  444. "A": ["a", "b", "c", "d", "e", "f"],
  445. "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
  446. "year": [1970, 1970, 1970, 1980, 1980, 1980],
  447. "id": [0, 1, 2, 0, 1, 2],
  448. }
  449. expected = tm.SubclassedDataFrame(exp_data)
  450. expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
  451. long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")
  452. tm.assert_frame_equal(long_frame, expected)
  453. def test_subclassed_apply(self):
  454. # GH 19822
  455. def check_row_subclass(row):
  456. assert isinstance(row, tm.SubclassedSeries)
  457. def stretch(row):
  458. if row["variable"] == "height":
  459. row["value"] += 0.5
  460. return row
  461. df = tm.SubclassedDataFrame(
  462. [
  463. ["John", "Doe", "height", 5.5],
  464. ["Mary", "Bo", "height", 6.0],
  465. ["John", "Doe", "weight", 130],
  466. ["Mary", "Bo", "weight", 150],
  467. ],
  468. columns=["first", "last", "variable", "value"],
  469. )
  470. df.apply(lambda x: check_row_subclass(x))
  471. df.apply(lambda x: check_row_subclass(x), axis=1)
  472. expected = tm.SubclassedDataFrame(
  473. [
  474. ["John", "Doe", "height", 6.0],
  475. ["Mary", "Bo", "height", 6.5],
  476. ["John", "Doe", "weight", 130],
  477. ["Mary", "Bo", "weight", 150],
  478. ],
  479. columns=["first", "last", "variable", "value"],
  480. )
  481. result = df.apply(lambda x: stretch(x), axis=1)
  482. assert isinstance(result, tm.SubclassedDataFrame)
  483. tm.assert_frame_equal(result, expected)
  484. expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
  485. result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1)
  486. assert isinstance(result, tm.SubclassedDataFrame)
  487. tm.assert_frame_equal(result, expected)
  488. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
  489. assert isinstance(result, tm.SubclassedDataFrame)
  490. tm.assert_frame_equal(result, expected)
  491. expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
  492. result = df.apply(lambda x: [1, 2, 3], axis=1)
  493. assert not isinstance(result, tm.SubclassedDataFrame)
  494. tm.assert_series_equal(result, expected)
  495. def test_subclassed_reductions(self, all_reductions):
  496. # GH 25596
  497. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  498. result = getattr(df, all_reductions)()
  499. assert isinstance(result, tm.SubclassedSeries)
  500. def test_subclassed_count(self):
  501. df = tm.SubclassedDataFrame(
  502. {
  503. "Person": ["John", "Myla", "Lewis", "John", "Myla"],
  504. "Age": [24.0, np.nan, 21.0, 33, 26],
  505. "Single": [False, True, True, True, False],
  506. }
  507. )
  508. result = df.count()
  509. assert isinstance(result, tm.SubclassedSeries)
  510. df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]})
  511. result = df.count()
  512. assert isinstance(result, tm.SubclassedSeries)
  513. df = tm.SubclassedDataFrame(
  514. [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
  515. index=MultiIndex.from_tuples(
  516. list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
  517. ),
  518. columns=MultiIndex.from_tuples(
  519. list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
  520. ),
  521. )
  522. result = df.count()
  523. assert isinstance(result, tm.SubclassedSeries)
  524. df = tm.SubclassedDataFrame()
  525. result = df.count()
  526. assert isinstance(result, tm.SubclassedSeries)
  527. def test_isin(self):
  528. df = tm.SubclassedDataFrame(
  529. {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
  530. )
  531. result = df.isin([0, 2])
  532. assert isinstance(result, tm.SubclassedDataFrame)
  533. def test_duplicated(self):
  534. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  535. result = df.duplicated()
  536. assert isinstance(result, tm.SubclassedSeries)
  537. df = tm.SubclassedDataFrame()
  538. result = df.duplicated()
  539. assert isinstance(result, tm.SubclassedSeries)
  540. @pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"])
  541. def test_idx(self, idx_method):
  542. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  543. result = getattr(df, idx_method)()
  544. assert isinstance(result, tm.SubclassedSeries)
  545. def test_dot(self):
  546. df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
  547. s = tm.SubclassedSeries([1, 1, 2, 1])
  548. result = df.dot(s)
  549. assert isinstance(result, tm.SubclassedSeries)
  550. df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
  551. s = tm.SubclassedDataFrame([1, 1, 2, 1])
  552. result = df.dot(s)
  553. assert isinstance(result, tm.SubclassedDataFrame)
  554. def test_memory_usage(self):
  555. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  556. result = df.memory_usage()
  557. assert isinstance(result, tm.SubclassedSeries)
  558. result = df.memory_usage(index=False)
  559. assert isinstance(result, tm.SubclassedSeries)
  560. def test_corrwith(self):
  561. pytest.importorskip("scipy")
  562. index = ["a", "b", "c", "d", "e"]
  563. columns = ["one", "two", "three", "four"]
  564. df1 = tm.SubclassedDataFrame(
  565. np.random.default_rng(2).standard_normal((5, 4)),
  566. index=index,
  567. columns=columns,
  568. )
  569. df2 = tm.SubclassedDataFrame(
  570. np.random.default_rng(2).standard_normal((4, 4)),
  571. index=index[:4],
  572. columns=columns,
  573. )
  574. correls = df1.corrwith(df2, axis=1, drop=True, method="kendall")
  575. assert isinstance(correls, (tm.SubclassedSeries))
  576. def test_asof(self):
  577. N = 3
  578. rng = pd.date_range("1/1/1990", periods=N, freq="53s")
  579. df = tm.SubclassedDataFrame(
  580. {
  581. "A": [np.nan, np.nan, np.nan],
  582. "B": [np.nan, np.nan, np.nan],
  583. "C": [np.nan, np.nan, np.nan],
  584. },
  585. index=rng,
  586. )
  587. result = df.asof(rng[-2:])
  588. assert isinstance(result, tm.SubclassedDataFrame)
  589. result = df.asof(rng[-2])
  590. assert isinstance(result, tm.SubclassedSeries)
  591. result = df.asof("1989-12-31")
  592. assert isinstance(result, tm.SubclassedSeries)
  593. def test_idxmin_preserves_subclass(self):
  594. # GH 28330
  595. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  596. result = df.idxmin()
  597. assert isinstance(result, tm.SubclassedSeries)
  598. def test_idxmax_preserves_subclass(self):
  599. # GH 28330
  600. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  601. result = df.idxmax()
  602. assert isinstance(result, tm.SubclassedSeries)
  603. def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df):
  604. # GH 43668
  605. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  606. result = df.convert_dtypes()
  607. assert isinstance(result, tm.SubclassedDataFrame)
  608. result = gpd_style_subclass_df.convert_dtypes()
  609. assert isinstance(result, type(gpd_style_subclass_df))
  610. def test_astype_preserves_subclass(self):
  611. # GH#40810
  612. df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  613. result = df.astype({"A": np.int64, "B": np.int32, "C": np.float64})
  614. assert isinstance(result, tm.SubclassedDataFrame)
  615. def test_equals_subclass(self):
  616. # https://github.com/pandas-dev/pandas/pull/34402
  617. # allow subclass in both directions
  618. df1 = DataFrame({"a": [1, 2, 3]})
  619. df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]})
  620. assert df1.equals(df2)
  621. assert df2.equals(df1)
  622. def test_replace_list_method(self):
  623. # https://github.com/pandas-dev/pandas/pull/46018
  624. df = tm.SubclassedDataFrame({"A": [0, 1, 2]})
  625. msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated"
  626. with tm.assert_produces_warning(
  627. FutureWarning, match=msg, raise_on_extra_warnings=False
  628. ):
  629. result = df.replace([1, 2], method="ffill")
  630. expected = tm.SubclassedDataFrame({"A": [0, 0, 0]})
  631. assert isinstance(result, tm.SubclassedDataFrame)
  632. tm.assert_frame_equal(result, expected)
  633. class MySubclassWithMetadata(DataFrame):
  634. _metadata = ["my_metadata"]
  635. def __init__(self, *args, **kwargs) -> None:
  636. super().__init__(*args, **kwargs)
  637. my_metadata = kwargs.pop("my_metadata", None)
  638. if args and isinstance(args[0], MySubclassWithMetadata):
  639. my_metadata = args[0].my_metadata # type: ignore[has-type]
  640. self.my_metadata = my_metadata
  641. @property
  642. def _constructor(self):
  643. return MySubclassWithMetadata
  644. def test_constructor_with_metadata():
  645. # https://github.com/pandas-dev/pandas/pull/54922
  646. # https://github.com/pandas-dev/pandas/issues/55120
  647. df = MySubclassWithMetadata(
  648. np.random.default_rng(2).random((5, 3)), columns=["A", "B", "C"]
  649. )
  650. subset = df[["A", "B"]]
  651. assert isinstance(subset, MySubclassWithMetadata)
  652. def test_constructor_with_metadata_from_records():
  653. # GH#57008
  654. df = MySubclassWithMetadata.from_records([{"a": 1, "b": 2}])
  655. assert df.my_metadata is None
  656. assert type(df) is MySubclassWithMetadata
  657. class SimpleDataFrameSubClass(DataFrame):
  658. """A subclass of DataFrame that does not define a constructor."""
  659. class SimpleSeriesSubClass(Series):
  660. """A subclass of Series that does not define a constructor."""
  661. class TestSubclassWithoutConstructor:
  662. def test_copy_df(self):
  663. expected = DataFrame({"a": [1, 2, 3]})
  664. result = SimpleDataFrameSubClass(expected).copy()
  665. assert (
  666. type(result) is DataFrame
  667. ) # assert_frame_equal only checks isinstance(lhs, type(rhs))
  668. tm.assert_frame_equal(result, expected)
  669. def test_copy_series(self):
  670. expected = Series([1, 2, 3])
  671. result = SimpleSeriesSubClass(expected).copy()
  672. tm.assert_series_equal(result, expected)
  673. def test_series_to_frame(self):
  674. orig = Series([1, 2, 3])
  675. expected = orig.to_frame()
  676. result = SimpleSeriesSubClass(orig).to_frame()
  677. assert (
  678. type(result) is DataFrame
  679. ) # assert_frame_equal only checks isinstance(lhs, type(rhs))
  680. tm.assert_frame_equal(result, expected)
  681. def test_groupby(self):
  682. df = SimpleDataFrameSubClass(DataFrame({"a": [1, 2, 3]}))
  683. for _, v in df.groupby("a"):
  684. assert type(v) is DataFrame