test_api.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. import inspect
  2. import pydoc
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame,
  8. Index,
  9. Series,
  10. date_range,
  11. period_range,
  12. timedelta_range,
  13. )
  14. import pandas._testing as tm
  15. class TestSeriesMisc:
  16. def test_tab_completion(self):
  17. # GH 9910
  18. s = Series(list("abcd"))
  19. # Series of str values should have .str but not .dt/.cat in __dir__
  20. assert "str" in dir(s)
  21. assert "dt" not in dir(s)
  22. assert "cat" not in dir(s)
  23. def test_tab_completion_dt(self):
  24. # similarly for .dt
  25. s = Series(date_range("1/1/2015", periods=5))
  26. assert "dt" in dir(s)
  27. assert "str" not in dir(s)
  28. assert "cat" not in dir(s)
  29. def test_tab_completion_cat(self):
  30. # Similarly for .cat, but with the twist that str and dt should be
  31. # there if the categories are of that type first cat and str.
  32. s = Series(list("abbcd"), dtype="category")
  33. assert "cat" in dir(s)
  34. assert "str" in dir(s) # as it is a string categorical
  35. assert "dt" not in dir(s)
  36. def test_tab_completion_cat_str(self):
  37. # similar to cat and str
  38. s = Series(date_range("1/1/2015", periods=5)).astype("category")
  39. assert "cat" in dir(s)
  40. assert "str" not in dir(s)
  41. assert "dt" in dir(s) # as it is a datetime categorical
  42. def test_tab_completion_with_categorical(self):
  43. # test the tab completion display
  44. ok_for_cat = [
  45. "categories",
  46. "codes",
  47. "ordered",
  48. "set_categories",
  49. "add_categories",
  50. "remove_categories",
  51. "rename_categories",
  52. "reorder_categories",
  53. "remove_unused_categories",
  54. "as_ordered",
  55. "as_unordered",
  56. ]
  57. s = Series(list("aabbcde")).astype("category")
  58. results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")})
  59. tm.assert_almost_equal(results, sorted(set(ok_for_cat)))
  60. @pytest.mark.parametrize(
  61. "index",
  62. [
  63. Index(list("ab") * 5, dtype="category"),
  64. Index([str(i) for i in range(10)]),
  65. Index(["foo", "bar", "baz"] * 2),
  66. date_range("2020-01-01", periods=10),
  67. period_range("2020-01-01", periods=10, freq="D"),
  68. timedelta_range("1 day", periods=10),
  69. Index(np.arange(10), dtype=np.uint64),
  70. Index(np.arange(10), dtype=np.int64),
  71. Index(np.arange(10), dtype=np.float64),
  72. Index([True, False]),
  73. Index([f"a{i}" for i in range(101)]),
  74. pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
  75. pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
  76. ],
  77. )
  78. def test_index_tab_completion(self, index):
  79. # dir contains string-like values of the Index.
  80. s = Series(index=index, dtype=object)
  81. dir_s = dir(s)
  82. for i, x in enumerate(s.index.unique(level=0)):
  83. if i < 100:
  84. assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
  85. else:
  86. assert x not in dir_s
  87. @pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])])
  88. def test_not_hashable(self, ser):
  89. msg = "unhashable type: 'Series'"
  90. with pytest.raises(TypeError, match=msg):
  91. hash(ser)
  92. def test_contains(self, datetime_series):
  93. tm.assert_contains_all(datetime_series.index, datetime_series)
  94. def test_axis_alias(self):
  95. s = Series([1, 2, np.nan])
  96. tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
  97. assert s.dropna().sum("rows") == 3
  98. assert s._get_axis_number("rows") == 0
  99. assert s._get_axis_name("rows") == "index"
  100. def test_class_axis(self):
  101. # https://github.com/pandas-dev/pandas/issues/18147
  102. # no exception and no empty docstring
  103. assert pydoc.getdoc(Series.index)
  104. def test_ndarray_compat(self):
  105. # test numpy compat with Series as sub-class of NDFrame
  106. tsdf = DataFrame(
  107. np.random.default_rng(2).standard_normal((1000, 3)),
  108. columns=["A", "B", "C"],
  109. index=date_range("1/1/2000", periods=1000),
  110. )
  111. def f(x):
  112. return x[x.idxmax()]
  113. result = tsdf.apply(f)
  114. expected = tsdf.max()
  115. tm.assert_series_equal(result, expected)
  116. def test_ndarray_compat_like_func(self):
  117. # using an ndarray like function
  118. s = Series(np.random.default_rng(2).standard_normal(10))
  119. result = Series(np.ones_like(s))
  120. expected = Series(1, index=range(10), dtype="float64")
  121. tm.assert_series_equal(result, expected)
  122. def test_ndarray_compat_ravel(self):
  123. # ravel
  124. s = Series(np.random.default_rng(2).standard_normal(10))
  125. with tm.assert_produces_warning(FutureWarning, match="ravel is deprecated"):
  126. result = s.ravel(order="F")
  127. tm.assert_almost_equal(result, s.values.ravel(order="F"))
  128. def test_empty_method(self):
  129. s_empty = Series(dtype=object)
  130. assert s_empty.empty
  131. @pytest.mark.parametrize("dtype", ["int64", object])
  132. def test_empty_method_full_series(self, dtype):
  133. full_series = Series(index=[1], dtype=dtype)
  134. assert not full_series.empty
  135. @pytest.mark.parametrize("dtype", [None, "Int64"])
  136. def test_integer_series_size(self, dtype):
  137. # GH 25580
  138. s = Series(range(9), dtype=dtype)
  139. assert s.size == 9
  140. def test_attrs(self):
  141. s = Series([0, 1], name="abc")
  142. assert s.attrs == {}
  143. s.attrs["version"] = 1
  144. result = s + 1
  145. assert result.attrs == {"version": 1}
  146. def test_inspect_getmembers(self):
  147. # GH38782
  148. ser = Series(dtype=object)
  149. msg = "Series._data is deprecated"
  150. with tm.assert_produces_warning(
  151. DeprecationWarning, match=msg, check_stacklevel=False
  152. ):
  153. inspect.getmembers(ser)
  154. def test_unknown_attribute(self):
  155. # GH#9680
  156. tdi = timedelta_range(start=0, periods=10, freq="1s")
  157. ser = Series(np.random.default_rng(2).normal(size=10), index=tdi)
  158. assert "foo" not in ser.__dict__
  159. msg = "'Series' object has no attribute 'foo'"
  160. with pytest.raises(AttributeError, match=msg):
  161. ser.foo
  162. @pytest.mark.parametrize("op", ["year", "day", "second", "weekday"])
  163. def test_datetime_series_no_datelike_attrs(self, op, datetime_series):
  164. # GH#7206
  165. msg = f"'Series' object has no attribute '{op}'"
  166. with pytest.raises(AttributeError, match=msg):
  167. getattr(datetime_series, op)
  168. def test_series_datetimelike_attribute_access(self):
  169. # attribute access should still work!
  170. ser = Series({"year": 2000, "month": 1, "day": 10})
  171. assert ser.year == 2000
  172. assert ser.month == 1
  173. assert ser.day == 10
  174. def test_series_datetimelike_attribute_access_invalid(self):
  175. ser = Series({"year": 2000, "month": 1, "day": 10})
  176. msg = "'Series' object has no attribute 'weekday'"
  177. with pytest.raises(AttributeError, match=msg):
  178. ser.weekday
  179. @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
  180. @pytest.mark.parametrize(
  181. "kernel, has_numeric_only",
  182. [
  183. ("skew", True),
  184. ("var", True),
  185. ("all", False),
  186. ("prod", True),
  187. ("any", False),
  188. ("idxmin", False),
  189. ("quantile", False),
  190. ("idxmax", False),
  191. ("min", True),
  192. ("sem", True),
  193. ("mean", True),
  194. ("nunique", False),
  195. ("max", True),
  196. ("sum", True),
  197. ("count", False),
  198. ("median", True),
  199. ("std", True),
  200. ("backfill", False),
  201. ("rank", True),
  202. ("pct_change", False),
  203. ("cummax", False),
  204. ("shift", False),
  205. ("diff", False),
  206. ("cumsum", False),
  207. ("cummin", False),
  208. ("cumprod", False),
  209. ("fillna", False),
  210. ("ffill", False),
  211. ("pad", False),
  212. ("bfill", False),
  213. ("sample", False),
  214. ("tail", False),
  215. ("take", False),
  216. ("head", False),
  217. ("cov", False),
  218. ("corr", False),
  219. ],
  220. )
  221. @pytest.mark.parametrize("dtype", [bool, int, float, object])
  222. def test_numeric_only(self, kernel, has_numeric_only, dtype):
  223. # GH#47500
  224. ser = Series([0, 1, 1], dtype=dtype)
  225. if kernel == "corrwith":
  226. args = (ser,)
  227. elif kernel == "corr":
  228. args = (ser,)
  229. elif kernel == "cov":
  230. args = (ser,)
  231. elif kernel == "nth":
  232. args = (0,)
  233. elif kernel == "fillna":
  234. args = (True,)
  235. elif kernel == "fillna":
  236. args = ("ffill",)
  237. elif kernel == "take":
  238. args = ([0],)
  239. elif kernel == "quantile":
  240. args = (0.5,)
  241. else:
  242. args = ()
  243. method = getattr(ser, kernel)
  244. if not has_numeric_only:
  245. msg = (
  246. "(got an unexpected keyword argument 'numeric_only'"
  247. "|too many arguments passed in)"
  248. )
  249. with pytest.raises(TypeError, match=msg):
  250. method(*args, numeric_only=True)
  251. elif dtype is object:
  252. msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
  253. with pytest.raises(TypeError, match=msg):
  254. method(*args, numeric_only=True)
  255. else:
  256. result = method(*args, numeric_only=True)
  257. expected = method(*args, numeric_only=False)
  258. if isinstance(expected, Series):
  259. # transformer
  260. tm.assert_series_equal(result, expected)
  261. else:
  262. # reducer
  263. assert result == expected
  264. @pytest.mark.parametrize("converter", [int, float, complex])
  265. def test_float_int_deprecated(converter):
  266. # GH 51101
  267. with tm.assert_produces_warning(FutureWarning):
  268. assert converter(Series([1])) == converter(1)