test_generic.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. from copy import (
  2. copy,
  3. deepcopy,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas.core.dtypes.common import is_scalar
  8. from pandas import (
  9. DataFrame,
  10. Index,
  11. Series,
  12. date_range,
  13. )
  14. import pandas._testing as tm
  15. # ----------------------------------------------------------------------
  16. # Generic types test cases
  17. def construct(box, shape, value=None, dtype=None, **kwargs):
  18. """
  19. construct an object for the given shape
  20. if value is specified use that if its a scalar
  21. if value is an array, repeat it as needed
  22. """
  23. if isinstance(shape, int):
  24. shape = tuple([shape] * box._AXIS_LEN)
  25. if value is not None:
  26. if is_scalar(value):
  27. if value == "empty":
  28. arr = None
  29. dtype = np.float64
  30. # remove the info axis
  31. kwargs.pop(box._info_axis_name, None)
  32. else:
  33. arr = np.empty(shape, dtype=dtype)
  34. arr.fill(value)
  35. else:
  36. fshape = np.prod(shape)
  37. arr = value.ravel()
  38. new_shape = fshape / arr.shape[0]
  39. if fshape % arr.shape[0] != 0:
  40. raise Exception("invalid value passed in construct")
  41. arr = np.repeat(arr, new_shape).reshape(shape)
  42. else:
  43. arr = np.random.default_rng(2).standard_normal(shape)
  44. return box(arr, dtype=dtype, **kwargs)
  45. class TestGeneric:
  46. @pytest.mark.parametrize(
  47. "func",
  48. [
  49. str.lower,
  50. {x: x.lower() for x in list("ABCD")},
  51. Series({x: x.lower() for x in list("ABCD")}),
  52. ],
  53. )
  54. def test_rename(self, frame_or_series, func):
  55. # single axis
  56. idx = list("ABCD")
  57. for axis in frame_or_series._AXIS_ORDERS:
  58. kwargs = {axis: idx}
  59. obj = construct(frame_or_series, 4, **kwargs)
  60. # rename a single axis
  61. result = obj.rename(**{axis: func})
  62. expected = obj.copy()
  63. setattr(expected, axis, list("abcd"))
  64. tm.assert_equal(result, expected)
  65. def test_get_numeric_data(self, frame_or_series):
  66. n = 4
  67. kwargs = {
  68. frame_or_series._get_axis_name(i): list(range(n))
  69. for i in range(frame_or_series._AXIS_LEN)
  70. }
  71. # get the numeric data
  72. o = construct(frame_or_series, n, **kwargs)
  73. result = o._get_numeric_data()
  74. tm.assert_equal(result, o)
  75. # non-inclusion
  76. result = o._get_bool_data()
  77. expected = construct(frame_or_series, n, value="empty", **kwargs)
  78. if isinstance(o, DataFrame):
  79. # preserve columns dtype
  80. expected.columns = o.columns[:0]
  81. # https://github.com/pandas-dev/pandas/issues/50862
  82. tm.assert_equal(result.reset_index(drop=True), expected)
  83. # get the bool data
  84. arr = np.array([True, True, False, True])
  85. o = construct(frame_or_series, n, value=arr, **kwargs)
  86. result = o._get_numeric_data()
  87. tm.assert_equal(result, o)
  88. def test_nonzero(self, frame_or_series):
  89. # GH 4633
  90. # look at the boolean/nonzero behavior for objects
  91. obj = construct(frame_or_series, shape=4)
  92. msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
  93. with pytest.raises(ValueError, match=msg):
  94. bool(obj == 0)
  95. with pytest.raises(ValueError, match=msg):
  96. bool(obj == 1)
  97. with pytest.raises(ValueError, match=msg):
  98. bool(obj)
  99. obj = construct(frame_or_series, shape=4, value=1)
  100. with pytest.raises(ValueError, match=msg):
  101. bool(obj == 0)
  102. with pytest.raises(ValueError, match=msg):
  103. bool(obj == 1)
  104. with pytest.raises(ValueError, match=msg):
  105. bool(obj)
  106. obj = construct(frame_or_series, shape=4, value=np.nan)
  107. with pytest.raises(ValueError, match=msg):
  108. bool(obj == 0)
  109. with pytest.raises(ValueError, match=msg):
  110. bool(obj == 1)
  111. with pytest.raises(ValueError, match=msg):
  112. bool(obj)
  113. # empty
  114. obj = construct(frame_or_series, shape=0)
  115. with pytest.raises(ValueError, match=msg):
  116. bool(obj)
  117. # invalid behaviors
  118. obj1 = construct(frame_or_series, shape=4, value=1)
  119. obj2 = construct(frame_or_series, shape=4, value=1)
  120. with pytest.raises(ValueError, match=msg):
  121. if obj1:
  122. pass
  123. with pytest.raises(ValueError, match=msg):
  124. obj1 and obj2
  125. with pytest.raises(ValueError, match=msg):
  126. obj1 or obj2
  127. with pytest.raises(ValueError, match=msg):
  128. not obj1
  129. def test_frame_or_series_compound_dtypes(self, frame_or_series):
  130. # see gh-5191
  131. # Compound dtypes should raise NotImplementedError.
  132. def f(dtype):
  133. return construct(frame_or_series, shape=3, value=1, dtype=dtype)
  134. msg = (
  135. "compound dtypes are not implemented "
  136. f"in the {frame_or_series.__name__} constructor"
  137. )
  138. with pytest.raises(NotImplementedError, match=msg):
  139. f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
  140. # these work (though results may be unexpected)
  141. f("int64")
  142. f("float64")
  143. f("M8[ns]")
  144. def test_metadata_propagation(self, frame_or_series):
  145. # check that the metadata matches up on the resulting ops
  146. o = construct(frame_or_series, shape=3)
  147. o.name = "foo"
  148. o2 = construct(frame_or_series, shape=3)
  149. o2.name = "bar"
  150. # ----------
  151. # preserving
  152. # ----------
  153. # simple ops with scalars
  154. for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
  155. result = getattr(o, op)(1)
  156. tm.assert_metadata_equivalent(o, result)
  157. # ops with like
  158. for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
  159. result = getattr(o, op)(o)
  160. tm.assert_metadata_equivalent(o, result)
  161. # simple boolean
  162. for op in ["__eq__", "__le__", "__ge__"]:
  163. v1 = getattr(o, op)(o)
  164. tm.assert_metadata_equivalent(o, v1)
  165. tm.assert_metadata_equivalent(o, v1 & v1)
  166. tm.assert_metadata_equivalent(o, v1 | v1)
  167. # combine_first
  168. result = o.combine_first(o2)
  169. tm.assert_metadata_equivalent(o, result)
  170. # ---------------------------
  171. # non-preserving (by default)
  172. # ---------------------------
  173. # add non-like
  174. result = o + o2
  175. tm.assert_metadata_equivalent(result)
  176. # simple boolean
  177. for op in ["__eq__", "__le__", "__ge__"]:
  178. # this is a name matching op
  179. v1 = getattr(o, op)(o)
  180. v2 = getattr(o, op)(o2)
  181. tm.assert_metadata_equivalent(v2)
  182. tm.assert_metadata_equivalent(v1 & v2)
  183. tm.assert_metadata_equivalent(v1 | v2)
  184. def test_size_compat(self, frame_or_series):
  185. # GH8846
  186. # size property should be defined
  187. o = construct(frame_or_series, shape=10)
  188. assert o.size == np.prod(o.shape)
  189. assert o.size == 10 ** len(o.axes)
  190. def test_split_compat(self, frame_or_series):
  191. # xref GH8846
  192. o = construct(frame_or_series, shape=10)
  193. with tm.assert_produces_warning(
  194. FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False
  195. ):
  196. assert len(np.array_split(o, 5)) == 5
  197. assert len(np.array_split(o, 2)) == 2
  198. # See gh-12301
  199. def test_stat_unexpected_keyword(self, frame_or_series):
  200. obj = construct(frame_or_series, 5)
  201. starwars = "Star Wars"
  202. errmsg = "unexpected keyword"
  203. with pytest.raises(TypeError, match=errmsg):
  204. obj.max(epic=starwars) # stat_function
  205. with pytest.raises(TypeError, match=errmsg):
  206. obj.var(epic=starwars) # stat_function_ddof
  207. with pytest.raises(TypeError, match=errmsg):
  208. obj.sum(epic=starwars) # cum_function
  209. with pytest.raises(TypeError, match=errmsg):
  210. obj.any(epic=starwars) # logical_function
  211. @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
  212. def test_api_compat(self, func, frame_or_series):
  213. # GH 12021
  214. # compat for __name__, __qualname__
  215. obj = construct(frame_or_series, 5)
  216. f = getattr(obj, func)
  217. assert f.__name__ == func
  218. assert f.__qualname__.endswith(func)
  219. def test_stat_non_defaults_args(self, frame_or_series):
  220. obj = construct(frame_or_series, 5)
  221. out = np.array([0])
  222. errmsg = "the 'out' parameter is not supported"
  223. with pytest.raises(ValueError, match=errmsg):
  224. obj.max(out=out) # stat_function
  225. with pytest.raises(ValueError, match=errmsg):
  226. obj.var(out=out) # stat_function_ddof
  227. with pytest.raises(ValueError, match=errmsg):
  228. obj.sum(out=out) # cum_function
  229. with pytest.raises(ValueError, match=errmsg):
  230. obj.any(out=out) # logical_function
  231. def test_truncate_out_of_bounds(self, frame_or_series):
  232. # GH11382
  233. # small
  234. shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
  235. small = construct(frame_or_series, shape, dtype="int8", value=1)
  236. tm.assert_equal(small.truncate(), small)
  237. tm.assert_equal(small.truncate(before=0, after=3e3), small)
  238. tm.assert_equal(small.truncate(before=-1, after=2e3), small)
  239. # big
  240. shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
  241. big = construct(frame_or_series, shape, dtype="int8", value=1)
  242. tm.assert_equal(big.truncate(), big)
  243. tm.assert_equal(big.truncate(before=0, after=3e6), big)
  244. tm.assert_equal(big.truncate(before=-1, after=2e6), big)
  245. @pytest.mark.parametrize(
  246. "func",
  247. [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
  248. )
  249. @pytest.mark.parametrize("shape", [0, 1, 2])
  250. def test_copy_and_deepcopy(self, frame_or_series, shape, func):
  251. # GH 15444
  252. obj = construct(frame_or_series, shape)
  253. obj_copy = func(obj)
  254. assert obj_copy is not obj
  255. tm.assert_equal(obj_copy, obj)
  256. def test_data_deprecated(self, frame_or_series):
  257. obj = frame_or_series()
  258. msg = "(Series|DataFrame)._data is deprecated"
  259. with tm.assert_produces_warning(DeprecationWarning, match=msg):
  260. mgr = obj._data
  261. assert mgr is obj._mgr
  262. class TestNDFrame:
  263. # tests that don't fit elsewhere
  264. @pytest.mark.parametrize(
  265. "ser",
  266. [
  267. Series(range(10), dtype=np.float64),
  268. Series([str(i) for i in range(10)], dtype=object),
  269. ],
  270. )
  271. def test_squeeze_series_noop(self, ser):
  272. # noop
  273. tm.assert_series_equal(ser.squeeze(), ser)
  274. def test_squeeze_frame_noop(self):
  275. # noop
  276. df = DataFrame(np.eye(2))
  277. tm.assert_frame_equal(df.squeeze(), df)
  278. def test_squeeze_frame_reindex(self):
  279. # squeezing
  280. df = DataFrame(
  281. np.random.default_rng(2).standard_normal((10, 4)),
  282. columns=Index(list("ABCD"), dtype=object),
  283. index=date_range("2000-01-01", periods=10, freq="B"),
  284. ).reindex(columns=["A"])
  285. tm.assert_series_equal(df.squeeze(), df["A"])
  286. def test_squeeze_0_len_dim(self):
  287. # don't fail with 0 length dimensions GH11229 & GH8999
  288. empty_series = Series([], name="five", dtype=np.float64)
  289. empty_frame = DataFrame([empty_series])
  290. tm.assert_series_equal(empty_series, empty_series.squeeze())
  291. tm.assert_series_equal(empty_series, empty_frame.squeeze())
  292. def test_squeeze_axis(self):
  293. # axis argument
  294. df = DataFrame(
  295. np.random.default_rng(2).standard_normal((1, 4)),
  296. columns=Index(list("ABCD"), dtype=object),
  297. index=date_range("2000-01-01", periods=1, freq="B"),
  298. ).iloc[:, :1]
  299. assert df.shape == (1, 1)
  300. tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
  301. tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
  302. tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
  303. tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
  304. assert df.squeeze() == df.iloc[0, 0]
  305. msg = "No axis named 2 for object type DataFrame"
  306. with pytest.raises(ValueError, match=msg):
  307. df.squeeze(axis=2)
  308. msg = "No axis named x for object type DataFrame"
  309. with pytest.raises(ValueError, match=msg):
  310. df.squeeze(axis="x")
  311. def test_squeeze_axis_len_3(self):
  312. df = DataFrame(
  313. np.random.default_rng(2).standard_normal((3, 4)),
  314. columns=Index(list("ABCD"), dtype=object),
  315. index=date_range("2000-01-01", periods=3, freq="B"),
  316. )
  317. tm.assert_frame_equal(df.squeeze(axis=0), df)
  318. def test_numpy_squeeze(self):
  319. s = Series(range(2), dtype=np.float64)
  320. tm.assert_series_equal(np.squeeze(s), s)
  321. df = DataFrame(
  322. np.random.default_rng(2).standard_normal((10, 4)),
  323. columns=Index(list("ABCD"), dtype=object),
  324. index=date_range("2000-01-01", periods=10, freq="B"),
  325. ).reindex(columns=["A"])
  326. tm.assert_series_equal(np.squeeze(df), df["A"])
  327. @pytest.mark.parametrize(
  328. "ser",
  329. [
  330. Series(range(10), dtype=np.float64),
  331. Series([str(i) for i in range(10)], dtype=object),
  332. ],
  333. )
  334. def test_transpose_series(self, ser):
  335. # calls implementation in pandas/core/base.py
  336. tm.assert_series_equal(ser.transpose(), ser)
  337. def test_transpose_frame(self):
  338. df = DataFrame(
  339. np.random.default_rng(2).standard_normal((10, 4)),
  340. columns=Index(list("ABCD"), dtype=object),
  341. index=date_range("2000-01-01", periods=10, freq="B"),
  342. )
  343. tm.assert_frame_equal(df.transpose().transpose(), df)
  344. def test_numpy_transpose(self, frame_or_series):
  345. obj = DataFrame(
  346. np.random.default_rng(2).standard_normal((10, 4)),
  347. columns=Index(list("ABCD"), dtype=object),
  348. index=date_range("2000-01-01", periods=10, freq="B"),
  349. )
  350. obj = tm.get_obj(obj, frame_or_series)
  351. if frame_or_series is Series:
  352. # 1D -> np.transpose is no-op
  353. tm.assert_series_equal(np.transpose(obj), obj)
  354. # round-trip preserved
  355. tm.assert_equal(np.transpose(np.transpose(obj)), obj)
  356. msg = "the 'axes' parameter is not supported"
  357. with pytest.raises(ValueError, match=msg):
  358. np.transpose(obj, axes=1)
  359. @pytest.mark.parametrize(
  360. "ser",
  361. [
  362. Series(range(10), dtype=np.float64),
  363. Series([str(i) for i in range(10)], dtype=object),
  364. ],
  365. )
  366. def test_take_series(self, ser):
  367. indices = [1, 5, -2, 6, 3, -1]
  368. out = ser.take(indices)
  369. expected = Series(
  370. data=ser.values.take(indices),
  371. index=ser.index.take(indices),
  372. dtype=ser.dtype,
  373. )
  374. tm.assert_series_equal(out, expected)
  375. def test_take_frame(self):
  376. indices = [1, 5, -2, 6, 3, -1]
  377. df = DataFrame(
  378. np.random.default_rng(2).standard_normal((10, 4)),
  379. columns=Index(list("ABCD"), dtype=object),
  380. index=date_range("2000-01-01", periods=10, freq="B"),
  381. )
  382. out = df.take(indices)
  383. expected = DataFrame(
  384. data=df.values.take(indices, axis=0),
  385. index=df.index.take(indices),
  386. columns=df.columns,
  387. )
  388. tm.assert_frame_equal(out, expected)
  389. def test_take_invalid_kwargs(self, frame_or_series):
  390. indices = [-3, 2, 0, 1]
  391. obj = DataFrame(range(5))
  392. obj = tm.get_obj(obj, frame_or_series)
  393. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  394. with pytest.raises(TypeError, match=msg):
  395. obj.take(indices, foo=2)
  396. msg = "the 'out' parameter is not supported"
  397. with pytest.raises(ValueError, match=msg):
  398. obj.take(indices, out=indices)
  399. msg = "the 'mode' parameter is not supported"
  400. with pytest.raises(ValueError, match=msg):
  401. obj.take(indices, mode="clip")
  402. def test_axis_classmethods(self, frame_or_series):
  403. box = frame_or_series
  404. obj = box(dtype=object)
  405. values = box._AXIS_TO_AXIS_NUMBER.keys()
  406. for v in values:
  407. assert obj._get_axis_number(v) == box._get_axis_number(v)
  408. assert obj._get_axis_name(v) == box._get_axis_name(v)
  409. assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
  410. def test_flags_identity(self, frame_or_series):
  411. obj = Series([1, 2])
  412. if frame_or_series is DataFrame:
  413. obj = obj.to_frame()
  414. assert obj.flags is obj.flags
  415. obj2 = obj.copy()
  416. assert obj2.flags is not obj.flags
  417. def test_bool_dep(self) -> None:
  418. # GH-51749
  419. msg_warn = (
  420. "DataFrame.bool is now deprecated and will be removed "
  421. "in future version of pandas"
  422. )
  423. with tm.assert_produces_warning(FutureWarning, match=msg_warn):
  424. DataFrame({"col": [False]}).bool()