test_common.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. """
  2. Collection of tests asserting things that should be true for
  3. any index subclass except for MultiIndex. Makes use of the `index_flat`
  4. fixture defined in pandas/conftest.py.
  5. """
  6. from copy import (
  7. copy,
  8. deepcopy,
  9. )
  10. import re
  11. import numpy as np
  12. import pytest
  13. from pandas.compat import IS64
  14. from pandas.compat.numpy import np_version_gte1p25
  15. from pandas.core.dtypes.common import (
  16. is_integer_dtype,
  17. is_numeric_dtype,
  18. )
  19. import pandas as pd
  20. from pandas import (
  21. CategoricalIndex,
  22. MultiIndex,
  23. PeriodIndex,
  24. RangeIndex,
  25. )
  26. import pandas._testing as tm
  27. class TestCommon:
  28. @pytest.mark.parametrize("name", [None, "new_name"])
  29. def test_to_frame(self, name, index_flat, using_copy_on_write):
  30. # see GH#15230, GH#22580
  31. idx = index_flat
  32. if name:
  33. idx_name = name
  34. else:
  35. idx_name = idx.name or 0
  36. df = idx.to_frame(name=idx_name)
  37. assert df.index is idx
  38. assert len(df.columns) == 1
  39. assert df.columns[0] == idx_name
  40. if not using_copy_on_write:
  41. assert df[idx_name].values is not idx.values
  42. df = idx.to_frame(index=False, name=idx_name)
  43. assert df.index is not idx
  44. def test_droplevel(self, index_flat):
  45. # GH 21115
  46. # MultiIndex is tested separately in test_multi.py
  47. index = index_flat
  48. assert index.droplevel([]).equals(index)
  49. for level in [index.name, [index.name]]:
  50. if isinstance(index.name, tuple) and level is index.name:
  51. # GH 21121 : droplevel with tuple name
  52. continue
  53. msg = (
  54. "Cannot remove 1 levels from an index with 1 levels: at least one "
  55. "level must be left."
  56. )
  57. with pytest.raises(ValueError, match=msg):
  58. index.droplevel(level)
  59. for level in "wrong", ["wrong"]:
  60. with pytest.raises(
  61. KeyError,
  62. match=r"'Requested level \(wrong\) does not match index name \(None\)'",
  63. ):
  64. index.droplevel(level)
  65. def test_constructor_non_hashable_name(self, index_flat):
  66. # GH 20527
  67. index = index_flat
  68. message = "Index.name must be a hashable type"
  69. renamed = [["1"]]
  70. # With .rename()
  71. with pytest.raises(TypeError, match=message):
  72. index.rename(name=renamed)
  73. # With .set_names()
  74. with pytest.raises(TypeError, match=message):
  75. index.set_names(names=renamed)
  76. def test_constructor_unwraps_index(self, index_flat):
  77. a = index_flat
  78. # Passing dtype is necessary for Index([True, False], dtype=object)
  79. # case.
  80. b = type(a)(a, dtype=a.dtype)
  81. tm.assert_equal(a._data, b._data)
  82. def test_to_flat_index(self, index_flat):
  83. # 22866
  84. index = index_flat
  85. result = index.to_flat_index()
  86. tm.assert_index_equal(result, index)
  87. def test_set_name_methods(self, index_flat):
  88. # MultiIndex tested separately
  89. index = index_flat
  90. new_name = "This is the new name for this index"
  91. original_name = index.name
  92. new_ind = index.set_names([new_name])
  93. assert new_ind.name == new_name
  94. assert index.name == original_name
  95. res = index.rename(new_name, inplace=True)
  96. # should return None
  97. assert res is None
  98. assert index.name == new_name
  99. assert index.names == [new_name]
  100. with pytest.raises(ValueError, match="Level must be None"):
  101. index.set_names("a", level=0)
  102. # rename in place just leaves tuples and other containers alone
  103. name = ("A", "B")
  104. index.rename(name, inplace=True)
  105. assert index.name == name
  106. assert index.names == [name]
  107. @pytest.mark.xfail
  108. def test_set_names_single_label_no_level(self, index_flat):
  109. with pytest.raises(TypeError, match="list-like"):
  110. # should still fail even if it would be the right length
  111. index_flat.set_names("a")
  112. def test_copy_and_deepcopy(self, index_flat):
  113. index = index_flat
  114. for func in (copy, deepcopy):
  115. idx_copy = func(index)
  116. assert idx_copy is not index
  117. assert idx_copy.equals(index)
  118. new_copy = index.copy(deep=True, name="banana")
  119. assert new_copy.name == "banana"
  120. @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning")
  121. def test_copy_name(self, index_flat):
  122. # GH#12309: Check that the "name" argument
  123. # passed at initialization is honored.
  124. index = index_flat
  125. first = type(index)(index, copy=True, name="mario")
  126. second = type(first)(first, copy=False)
  127. # Even though "copy=False", we want a new object.
  128. assert first is not second
  129. tm.assert_index_equal(first, second)
  130. # Not using tm.assert_index_equal() since names differ.
  131. assert index.equals(first)
  132. assert first.name == "mario"
  133. assert second.name == "mario"
  134. # TODO: belongs in series arithmetic tests?
  135. s1 = pd.Series(2, index=first)
  136. s2 = pd.Series(3, index=second[:-1])
  137. # See GH#13365
  138. s3 = s1 * s2
  139. assert s3.index.name == "mario"
  140. def test_copy_name2(self, index_flat):
  141. # GH#35592
  142. index = index_flat
  143. assert index.copy(name="mario").name == "mario"
  144. with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
  145. index.copy(name=["mario", "luigi"])
  146. msg = f"{type(index).__name__}.name must be a hashable type"
  147. with pytest.raises(TypeError, match=msg):
  148. index.copy(name=[["mario"]])
  149. def test_unique_level(self, index_flat):
  150. # don't test a MultiIndex here (as its tested separated)
  151. index = index_flat
  152. # GH 17896
  153. expected = index.drop_duplicates()
  154. for level in [0, index.name, None]:
  155. result = index.unique(level=level)
  156. tm.assert_index_equal(result, expected)
  157. msg = "Too many levels: Index has only 1 level, not 4"
  158. with pytest.raises(IndexError, match=msg):
  159. index.unique(level=3)
  160. msg = (
  161. rf"Requested level \(wrong\) does not match index name "
  162. rf"\({re.escape(index.name.__repr__())}\)"
  163. )
  164. with pytest.raises(KeyError, match=msg):
  165. index.unique(level="wrong")
  166. def test_unique(self, index_flat):
  167. # MultiIndex tested separately
  168. index = index_flat
  169. if not len(index):
  170. pytest.skip("Skip check for empty Index and MultiIndex")
  171. idx = index[[0] * 5]
  172. idx_unique = index[[0]]
  173. # We test against `idx_unique`, so first we make sure it's unique
  174. # and doesn't contain nans.
  175. assert idx_unique.is_unique is True
  176. try:
  177. assert idx_unique.hasnans is False
  178. except NotImplementedError:
  179. pass
  180. result = idx.unique()
  181. tm.assert_index_equal(result, idx_unique)
  182. # nans:
  183. if not index._can_hold_na:
  184. pytest.skip("Skip na-check if index cannot hold na")
  185. vals = index._values[[0] * 5]
  186. vals[0] = np.nan
  187. vals_unique = vals[:2]
  188. idx_nan = index._shallow_copy(vals)
  189. idx_unique_nan = index._shallow_copy(vals_unique)
  190. assert idx_unique_nan.is_unique is True
  191. assert idx_nan.dtype == index.dtype
  192. assert idx_unique_nan.dtype == index.dtype
  193. expected = idx_unique_nan
  194. for pos, i in enumerate([idx_nan, idx_unique_nan]):
  195. result = i.unique()
  196. tm.assert_index_equal(result, expected)
  197. @pytest.mark.filterwarnings("ignore:Period with BDay freq:FutureWarning")
  198. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  199. def test_searchsorted_monotonic(self, index_flat, request):
  200. # GH17271
  201. index = index_flat
  202. # not implemented for tuple searches in MultiIndex
  203. # or Intervals searches in IntervalIndex
  204. if isinstance(index, pd.IntervalIndex):
  205. mark = pytest.mark.xfail(
  206. reason="IntervalIndex.searchsorted does not support Interval arg",
  207. raises=NotImplementedError,
  208. )
  209. request.applymarker(mark)
  210. # nothing to test if the index is empty
  211. if index.empty:
  212. pytest.skip("Skip check for empty Index")
  213. value = index[0]
  214. # determine the expected results (handle dupes for 'right')
  215. expected_left, expected_right = 0, (index == value).argmin()
  216. if expected_right == 0:
  217. # all values are the same, expected_right should be length
  218. expected_right = len(index)
  219. # test _searchsorted_monotonic in all cases
  220. # test searchsorted only for increasing
  221. if index.is_monotonic_increasing:
  222. ssm_left = index._searchsorted_monotonic(value, side="left")
  223. assert expected_left == ssm_left
  224. ssm_right = index._searchsorted_monotonic(value, side="right")
  225. assert expected_right == ssm_right
  226. ss_left = index.searchsorted(value, side="left")
  227. assert expected_left == ss_left
  228. ss_right = index.searchsorted(value, side="right")
  229. assert expected_right == ss_right
  230. elif index.is_monotonic_decreasing:
  231. ssm_left = index._searchsorted_monotonic(value, side="left")
  232. assert expected_left == ssm_left
  233. ssm_right = index._searchsorted_monotonic(value, side="right")
  234. assert expected_right == ssm_right
  235. else:
  236. # non-monotonic should raise.
  237. msg = "index must be monotonic increasing or decreasing"
  238. with pytest.raises(ValueError, match=msg):
  239. index._searchsorted_monotonic(value, side="left")
  240. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  241. def test_drop_duplicates(self, index_flat, keep):
  242. # MultiIndex is tested separately
  243. index = index_flat
  244. if isinstance(index, RangeIndex):
  245. pytest.skip(
  246. "RangeIndex is tested in test_drop_duplicates_no_duplicates "
  247. "as it cannot hold duplicates"
  248. )
  249. if len(index) == 0:
  250. pytest.skip(
  251. "empty index is tested in test_drop_duplicates_no_duplicates "
  252. "as it cannot hold duplicates"
  253. )
  254. # make unique index
  255. holder = type(index)
  256. unique_values = list(set(index))
  257. dtype = index.dtype if is_numeric_dtype(index) else None
  258. unique_idx = holder(unique_values, dtype=dtype)
  259. # make duplicated index
  260. n = len(unique_idx)
  261. duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5))
  262. idx = holder(unique_idx.values[duplicated_selection])
  263. # Series.duplicated is tested separately
  264. expected_duplicated = (
  265. pd.Series(duplicated_selection).duplicated(keep=keep).values
  266. )
  267. tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
  268. # Series.drop_duplicates is tested separately
  269. expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
  270. tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
  271. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  272. def test_drop_duplicates_no_duplicates(self, index_flat):
  273. # MultiIndex is tested separately
  274. index = index_flat
  275. # make unique index
  276. if isinstance(index, RangeIndex):
  277. # RangeIndex cannot have duplicates
  278. unique_idx = index
  279. else:
  280. holder = type(index)
  281. unique_values = list(set(index))
  282. dtype = index.dtype if is_numeric_dtype(index) else None
  283. unique_idx = holder(unique_values, dtype=dtype)
  284. # check on unique index
  285. expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
  286. tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
  287. result_dropped = unique_idx.drop_duplicates()
  288. tm.assert_index_equal(result_dropped, unique_idx)
  289. # validate shallow copy
  290. assert result_dropped is not unique_idx
  291. def test_drop_duplicates_inplace(self, index):
  292. msg = r"drop_duplicates\(\) got an unexpected keyword argument"
  293. with pytest.raises(TypeError, match=msg):
  294. index.drop_duplicates(inplace=True)
  295. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  296. def test_has_duplicates(self, index_flat):
  297. # MultiIndex tested separately in:
  298. # tests/indexes/multi/test_unique_and_duplicates.
  299. index = index_flat
  300. holder = type(index)
  301. if not len(index) or isinstance(index, RangeIndex):
  302. # MultiIndex tested separately in:
  303. # tests/indexes/multi/test_unique_and_duplicates.
  304. # RangeIndex is unique by definition.
  305. pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
  306. idx = holder([index[0]] * 5)
  307. assert idx.is_unique is False
  308. assert idx.has_duplicates is True
  309. @pytest.mark.parametrize(
  310. "dtype",
  311. ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
  312. )
  313. def test_astype_preserves_name(self, index, dtype):
  314. # https://github.com/pandas-dev/pandas/issues/32013
  315. if isinstance(index, MultiIndex):
  316. index.names = ["idx" + str(i) for i in range(index.nlevels)]
  317. else:
  318. index.name = "idx"
  319. warn = None
  320. if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
  321. # imaginary components discarded
  322. if np_version_gte1p25:
  323. warn = np.exceptions.ComplexWarning
  324. else:
  325. warn = np.ComplexWarning
  326. is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category"
  327. try:
  328. # Some of these conversions cannot succeed so we use a try / except
  329. with tm.assert_produces_warning(
  330. warn,
  331. raise_on_extra_warnings=is_pyarrow_str,
  332. check_stacklevel=False,
  333. ):
  334. result = index.astype(dtype)
  335. except (ValueError, TypeError, NotImplementedError, SystemError):
  336. return
  337. if isinstance(index, MultiIndex):
  338. assert result.names == index.names
  339. else:
  340. assert result.name == index.name
  341. def test_hasnans_isnans(self, index_flat):
  342. # GH#11343, added tests for hasnans / isnans
  343. index = index_flat
  344. # cases in indices doesn't include NaN
  345. idx = index.copy(deep=True)
  346. expected = np.array([False] * len(idx), dtype=bool)
  347. tm.assert_numpy_array_equal(idx._isnan, expected)
  348. assert idx.hasnans is False
  349. idx = index.copy(deep=True)
  350. values = idx._values
  351. if len(index) == 0:
  352. return
  353. elif is_integer_dtype(index.dtype):
  354. return
  355. elif index.dtype == bool:
  356. # values[1] = np.nan below casts to True!
  357. return
  358. values[1] = np.nan
  359. idx = type(index)(values)
  360. expected = np.array([False] * len(idx), dtype=bool)
  361. expected[1] = True
  362. tm.assert_numpy_array_equal(idx._isnan, expected)
  363. assert idx.hasnans is True
  364. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  365. @pytest.mark.parametrize("na_position", [None, "middle"])
  366. def test_sort_values_invalid_na_position(index_with_missing, na_position):
  367. with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
  368. index_with_missing.sort_values(na_position=na_position)
  369. @pytest.mark.fails_arm_wheels
  370. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  371. @pytest.mark.parametrize("na_position", ["first", "last"])
  372. def test_sort_values_with_missing(index_with_missing, na_position, request):
  373. # GH 35584. Test that sort_values works with missing values,
  374. # sort non-missing and place missing according to na_position
  375. if isinstance(index_with_missing, CategoricalIndex):
  376. request.applymarker(
  377. pytest.mark.xfail(
  378. reason="missing value sorting order not well-defined", strict=False
  379. )
  380. )
  381. missing_count = np.sum(index_with_missing.isna())
  382. not_na_vals = index_with_missing[index_with_missing.notna()].values
  383. sorted_values = np.sort(not_na_vals)
  384. if na_position == "first":
  385. sorted_values = np.concatenate([[None] * missing_count, sorted_values])
  386. else:
  387. sorted_values = np.concatenate([sorted_values, [None] * missing_count])
  388. # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
  389. expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
  390. result = index_with_missing.sort_values(na_position=na_position)
  391. tm.assert_index_equal(result, expected)
  392. def test_ndarray_compat_properties(index):
  393. if isinstance(index, PeriodIndex) and not IS64:
  394. pytest.skip("Overflow")
  395. idx = index
  396. assert idx.T.equals(idx)
  397. assert idx.transpose().equals(idx)
  398. values = idx.values
  399. assert idx.shape == values.shape
  400. assert idx.ndim == values.ndim
  401. assert idx.size == values.size
  402. if not isinstance(index, (RangeIndex, MultiIndex)):
  403. # These two are not backed by an ndarray
  404. assert idx.nbytes == values.nbytes
  405. # test for validity
  406. idx.nbytes
  407. idx.values.nbytes
  408. def test_compare_read_only_array():
  409. # GH#57130
  410. arr = np.array([], dtype=object)
  411. arr.flags.writeable = False
  412. idx = pd.Index(arr)
  413. result = idx > 69
  414. assert result.dtype == bool