test_api.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. from copy import deepcopy
  2. import inspect
  3. import pydoc
  4. import numpy as np
  5. import pytest
  6. from pandas._config import using_string_dtype
  7. from pandas._config.config import option_context
  8. from pandas.compat import HAS_PYARROW
  9. import pandas as pd
  10. from pandas import (
  11. DataFrame,
  12. Series,
  13. date_range,
  14. timedelta_range,
  15. )
  16. import pandas._testing as tm
  17. class TestDataFrameMisc:
  18. def test_getitem_pop_assign_name(self, float_frame):
  19. s = float_frame["A"]
  20. assert s.name == "A"
  21. s = float_frame.pop("A")
  22. assert s.name == "A"
  23. s = float_frame.loc[:, "B"]
  24. assert s.name == "B"
  25. s2 = s.loc[:]
  26. assert s2.name == "B"
  27. def test_get_axis(self, float_frame):
  28. f = float_frame
  29. assert f._get_axis_number(0) == 0
  30. assert f._get_axis_number(1) == 1
  31. assert f._get_axis_number("index") == 0
  32. assert f._get_axis_number("rows") == 0
  33. assert f._get_axis_number("columns") == 1
  34. assert f._get_axis_name(0) == "index"
  35. assert f._get_axis_name(1) == "columns"
  36. assert f._get_axis_name("index") == "index"
  37. assert f._get_axis_name("rows") == "index"
  38. assert f._get_axis_name("columns") == "columns"
  39. assert f._get_axis(0) is f.index
  40. assert f._get_axis(1) is f.columns
  41. with pytest.raises(ValueError, match="No axis named"):
  42. f._get_axis_number(2)
  43. with pytest.raises(ValueError, match="No axis.*foo"):
  44. f._get_axis_name("foo")
  45. with pytest.raises(ValueError, match="No axis.*None"):
  46. f._get_axis_name(None)
  47. with pytest.raises(ValueError, match="No axis named"):
  48. f._get_axis_number(None)
  49. def test_column_contains_raises(self, float_frame):
  50. with pytest.raises(TypeError, match="unhashable type: 'Index'"):
  51. float_frame.columns in float_frame
  52. def test_tab_completion(self):
  53. # DataFrame whose columns are identifiers shall have them in __dir__.
  54. df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD"))
  55. for key in list("ABCD"):
  56. assert key in dir(df)
  57. assert isinstance(df.__getitem__("A"), Series)
  58. # DataFrame whose first-level columns are identifiers shall have
  59. # them in __dir__.
  60. df = DataFrame(
  61. [list("abcd"), list("efgh")],
  62. columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))),
  63. )
  64. for key in list("ABCD"):
  65. assert key in dir(df)
  66. for key in list("EFGH"):
  67. assert key not in dir(df)
  68. assert isinstance(df.__getitem__("A"), DataFrame)
  69. def test_display_max_dir_items(self):
  70. # display.max_dir_items increaes the number of columns that are in __dir__.
  71. columns = ["a" + str(i) for i in range(420)]
  72. values = [range(420), range(420)]
  73. df = DataFrame(values, columns=columns)
  74. # The default value for display.max_dir_items is 100
  75. assert "a99" in dir(df)
  76. assert "a100" not in dir(df)
  77. with option_context("display.max_dir_items", 300):
  78. df = DataFrame(values, columns=columns)
  79. assert "a299" in dir(df)
  80. assert "a300" not in dir(df)
  81. with option_context("display.max_dir_items", None):
  82. df = DataFrame(values, columns=columns)
  83. assert "a419" in dir(df)
  84. def test_not_hashable(self):
  85. empty_frame = DataFrame()
  86. df = DataFrame([1])
  87. msg = "unhashable type: 'DataFrame'"
  88. with pytest.raises(TypeError, match=msg):
  89. hash(df)
  90. with pytest.raises(TypeError, match=msg):
  91. hash(empty_frame)
  92. @pytest.mark.xfail(
  93. using_string_dtype() and HAS_PYARROW, reason="surrogates not allowed"
  94. )
  95. def test_column_name_contains_unicode_surrogate(self):
  96. # GH 25509
  97. colname = "\ud83d"
  98. df = DataFrame({colname: []})
  99. # this should not crash
  100. assert colname not in dir(df)
  101. assert df.columns[0] == colname
  102. def test_new_empty_index(self):
  103. df1 = DataFrame(np.random.default_rng(2).standard_normal((0, 3)))
  104. df2 = DataFrame(np.random.default_rng(2).standard_normal((0, 3)))
  105. df1.index.name = "foo"
  106. assert df2.index.name is None
  107. def test_get_agg_axis(self, float_frame):
  108. cols = float_frame._get_agg_axis(0)
  109. assert cols is float_frame.columns
  110. idx = float_frame._get_agg_axis(1)
  111. assert idx is float_frame.index
  112. msg = r"Axis must be 0 or 1 \(got 2\)"
  113. with pytest.raises(ValueError, match=msg):
  114. float_frame._get_agg_axis(2)
  115. def test_empty(self, float_frame, float_string_frame):
  116. empty_frame = DataFrame()
  117. assert empty_frame.empty
  118. assert not float_frame.empty
  119. assert not float_string_frame.empty
  120. # corner case
  121. df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3))
  122. del df["A"]
  123. assert not df.empty
  124. def test_len(self, float_frame):
  125. assert len(float_frame) == len(float_frame.index)
  126. # single block corner case
  127. arr = float_frame[["A", "B"]].values
  128. expected = float_frame.reindex(columns=["A", "B"]).values
  129. tm.assert_almost_equal(arr, expected)
  130. def test_axis_aliases(self, float_frame):
  131. f = float_frame
  132. # reg name
  133. expected = f.sum(axis=0)
  134. result = f.sum(axis="index")
  135. tm.assert_series_equal(result, expected)
  136. expected = f.sum(axis=1)
  137. result = f.sum(axis="columns")
  138. tm.assert_series_equal(result, expected)
  139. def test_class_axis(self):
  140. # GH 18147
  141. # no exception and no empty docstring
  142. assert pydoc.getdoc(DataFrame.index)
  143. assert pydoc.getdoc(DataFrame.columns)
  144. def test_series_put_names(self, float_string_frame):
  145. series = float_string_frame._series
  146. for k, v in series.items():
  147. assert v.name == k
  148. def test_empty_nonzero(self):
  149. df = DataFrame([1, 2, 3])
  150. assert not df.empty
  151. df = DataFrame(index=[1], columns=[1])
  152. assert not df.empty
  153. df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna()
  154. assert df.empty
  155. assert df.T.empty
  156. @pytest.mark.parametrize(
  157. "df",
  158. [
  159. DataFrame(),
  160. DataFrame(index=[1]),
  161. DataFrame(columns=[1]),
  162. DataFrame({1: []}),
  163. ],
  164. )
  165. def test_empty_like(self, df):
  166. assert df.empty
  167. assert df.T.empty
  168. def test_with_datetimelikes(self):
  169. df = DataFrame(
  170. {
  171. "A": date_range("20130101", periods=10),
  172. "B": timedelta_range("1 day", periods=10),
  173. }
  174. )
  175. t = df.T
  176. result = t.dtypes.value_counts()
  177. expected = Series({np.dtype("object"): 10}, name="count")
  178. tm.assert_series_equal(result, expected)
  179. def test_deepcopy(self, float_frame):
  180. cp = deepcopy(float_frame)
  181. cp.loc[0, "A"] = 10
  182. assert not float_frame.equals(cp)
  183. def test_inplace_return_self(self):
  184. # GH 1893
  185. data = DataFrame(
  186. {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]}
  187. )
  188. def _check_f(base, f):
  189. result = f(base)
  190. assert result is None
  191. # -----DataFrame-----
  192. # set_index
  193. f = lambda x: x.set_index("a", inplace=True)
  194. _check_f(data.copy(), f)
  195. # reset_index
  196. f = lambda x: x.reset_index(inplace=True)
  197. _check_f(data.set_index("a"), f)
  198. # drop_duplicates
  199. f = lambda x: x.drop_duplicates(inplace=True)
  200. _check_f(data.copy(), f)
  201. # sort
  202. f = lambda x: x.sort_values("b", inplace=True)
  203. _check_f(data.copy(), f)
  204. # sort_index
  205. f = lambda x: x.sort_index(inplace=True)
  206. _check_f(data.copy(), f)
  207. # fillna
  208. f = lambda x: x.fillna(0, inplace=True)
  209. _check_f(data.copy(), f)
  210. # replace
  211. f = lambda x: x.replace(1, 0, inplace=True)
  212. _check_f(data.copy(), f)
  213. # rename
  214. f = lambda x: x.rename({1: "foo"}, inplace=True)
  215. _check_f(data.copy(), f)
  216. # -----Series-----
  217. d = data.copy()["c"]
  218. # reset_index
  219. f = lambda x: x.reset_index(inplace=True, drop=True)
  220. _check_f(data.set_index("a")["c"], f)
  221. # fillna
  222. f = lambda x: x.fillna(0, inplace=True)
  223. _check_f(d.copy(), f)
  224. # replace
  225. f = lambda x: x.replace(1, 0, inplace=True)
  226. _check_f(d.copy(), f)
  227. # rename
  228. f = lambda x: x.rename({1: "foo"}, inplace=True)
  229. _check_f(d.copy(), f)
  230. def test_tab_complete_warning(self, ip, frame_or_series):
  231. # GH 16409
  232. pytest.importorskip("IPython", minversion="6.0.0")
  233. from IPython.core.completer import provisionalcompleter
  234. if frame_or_series is DataFrame:
  235. code = "from pandas import DataFrame; obj = DataFrame()"
  236. else:
  237. code = "from pandas import Series; obj = Series(dtype=object)"
  238. ip.run_cell(code)
  239. # GH 31324 newer jedi version raises Deprecation warning;
  240. # appears resolved 2021-02-02
  241. with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
  242. with provisionalcompleter("ignore"):
  243. list(ip.Completer.completions("obj.", 1))
  244. def test_attrs(self):
  245. df = DataFrame({"A": [2, 3]})
  246. assert df.attrs == {}
  247. df.attrs["version"] = 1
  248. result = df.rename(columns=str)
  249. assert result.attrs == {"version": 1}
  250. def test_attrs_deepcopy(self):
  251. df = DataFrame({"A": [2, 3]})
  252. assert df.attrs == {}
  253. df.attrs["tags"] = {"spam", "ham"}
  254. result = df.rename(columns=str)
  255. assert result.attrs == df.attrs
  256. assert result.attrs["tags"] is not df.attrs["tags"]
  257. @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
  258. def test_set_flags(
  259. self,
  260. allows_duplicate_labels,
  261. frame_or_series,
  262. using_copy_on_write,
  263. warn_copy_on_write,
  264. ):
  265. obj = DataFrame({"A": [1, 2]})
  266. key = (0, 0)
  267. if frame_or_series is Series:
  268. obj = obj["A"]
  269. key = 0
  270. result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels)
  271. if allows_duplicate_labels is None:
  272. # We don't update when it's not provided
  273. assert result.flags.allows_duplicate_labels is True
  274. else:
  275. assert result.flags.allows_duplicate_labels is allows_duplicate_labels
  276. # We made a copy
  277. assert obj is not result
  278. # We didn't mutate obj
  279. assert obj.flags.allows_duplicate_labels is True
  280. # But we didn't copy data
  281. if frame_or_series is Series:
  282. assert np.may_share_memory(obj.values, result.values)
  283. else:
  284. assert np.may_share_memory(obj["A"].values, result["A"].values)
  285. with tm.assert_cow_warning(warn_copy_on_write):
  286. result.iloc[key] = 0
  287. if using_copy_on_write:
  288. assert obj.iloc[key] == 1
  289. else:
  290. assert obj.iloc[key] == 0
  291. # set back to 1 for test below
  292. with tm.assert_cow_warning(warn_copy_on_write):
  293. result.iloc[key] = 1
  294. # Now we do copy.
  295. result = obj.set_flags(
  296. copy=True, allows_duplicate_labels=allows_duplicate_labels
  297. )
  298. result.iloc[key] = 10
  299. assert obj.iloc[key] == 1
  300. def test_constructor_expanddim(self):
  301. # GH#33628 accessing _constructor_expanddim should not raise NotImplementedError
  302. # GH38782 pandas has no container higher than DataFrame (two-dim), so
  303. # DataFrame._constructor_expand_dim, doesn't make sense, so is removed.
  304. df = DataFrame()
  305. msg = "'DataFrame' object has no attribute '_constructor_expanddim'"
  306. with pytest.raises(AttributeError, match=msg):
  307. df._constructor_expanddim(np.arange(27).reshape(3, 3, 3))
  308. def test_inspect_getmembers(self):
  309. # GH38740
  310. df = DataFrame()
  311. msg = "DataFrame._data is deprecated"
  312. with tm.assert_produces_warning(
  313. DeprecationWarning, match=msg, check_stacklevel=False
  314. ):
  315. inspect.getmembers(df)