test_old_base.py 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063
  1. from __future__ import annotations
  2. from datetime import datetime
  3. import weakref
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.tslibs import Timestamp
  7. from pandas.core.dtypes.common import (
  8. is_integer_dtype,
  9. is_numeric_dtype,
  10. )
  11. from pandas.core.dtypes.dtypes import CategoricalDtype
  12. import pandas as pd
  13. from pandas import (
  14. CategoricalIndex,
  15. DatetimeIndex,
  16. DatetimeTZDtype,
  17. Index,
  18. IntervalIndex,
  19. MultiIndex,
  20. PeriodIndex,
  21. RangeIndex,
  22. Series,
  23. StringDtype,
  24. TimedeltaIndex,
  25. isna,
  26. period_range,
  27. )
  28. import pandas._testing as tm
  29. import pandas.core.algorithms as algos
  30. from pandas.core.arrays import BaseMaskedArray
  31. class TestBase:
  32. @pytest.fixture(
  33. params=[
  34. RangeIndex(start=0, stop=20, step=2),
  35. Index(np.arange(5, dtype=np.float64)),
  36. Index(np.arange(5, dtype=np.float32)),
  37. Index(np.arange(5, dtype=np.uint64)),
  38. Index(range(0, 20, 2), dtype=np.int64),
  39. Index(range(0, 20, 2), dtype=np.int32),
  40. Index(range(0, 20, 2), dtype=np.int16),
  41. Index(range(0, 20, 2), dtype=np.int8),
  42. Index(list("abcde")),
  43. Index([0, "a", 1, "b", 2, "c"]),
  44. period_range("20130101", periods=5, freq="D"),
  45. TimedeltaIndex(
  46. [
  47. "0 days 01:00:00",
  48. "1 days 01:00:00",
  49. "2 days 01:00:00",
  50. "3 days 01:00:00",
  51. "4 days 01:00:00",
  52. ],
  53. dtype="timedelta64[ns]",
  54. freq="D",
  55. ),
  56. DatetimeIndex(
  57. ["2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05"],
  58. dtype="datetime64[ns]",
  59. freq="D",
  60. ),
  61. IntervalIndex.from_breaks(range(11), closed="right"),
  62. ]
  63. )
  64. def simple_index(self, request):
  65. return request.param
  66. def test_pickle_compat_construction(self, simple_index):
  67. # need an object to create with
  68. if isinstance(simple_index, RangeIndex):
  69. pytest.skip("RangeIndex() is a valid constructor")
  70. msg = "|".join(
  71. [
  72. r"Index\(\.\.\.\) must be called with a collection of some "
  73. r"kind, None was passed",
  74. r"DatetimeIndex\(\) must be called with a collection of some "
  75. r"kind, None was passed",
  76. r"TimedeltaIndex\(\) must be called with a collection of some "
  77. r"kind, None was passed",
  78. r"__new__\(\) missing 1 required positional argument: 'data'",
  79. r"__new__\(\) takes at least 2 arguments \(1 given\)",
  80. ]
  81. )
  82. with pytest.raises(TypeError, match=msg):
  83. type(simple_index)()
  84. def test_shift(self, simple_index):
  85. # GH8083 test the base class for shift
  86. if isinstance(simple_index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)):
  87. pytest.skip("Tested in test_ops/test_arithmetic")
  88. idx = simple_index
  89. msg = (
  90. f"This method is only implemented for DatetimeIndex, PeriodIndex and "
  91. f"TimedeltaIndex; Got type {type(idx).__name__}"
  92. )
  93. with pytest.raises(NotImplementedError, match=msg):
  94. idx.shift(1)
  95. with pytest.raises(NotImplementedError, match=msg):
  96. idx.shift(1, 2)
  97. def test_constructor_name_unhashable(self, simple_index):
  98. # GH#29069 check that name is hashable
  99. # See also same-named test in tests.series.test_constructors
  100. idx = simple_index
  101. with pytest.raises(TypeError, match="Index.name must be a hashable type"):
  102. type(idx)(idx, name=[])
  103. def test_create_index_existing_name(self, simple_index):
  104. # GH11193, when an existing index is passed, and a new name is not
  105. # specified, the new index should inherit the previous object name
  106. expected = simple_index.copy()
  107. if not isinstance(expected, MultiIndex):
  108. expected.name = "foo"
  109. result = Index(expected)
  110. tm.assert_index_equal(result, expected)
  111. result = Index(expected, name="bar")
  112. expected.name = "bar"
  113. tm.assert_index_equal(result, expected)
  114. else:
  115. expected.names = ["foo", "bar"]
  116. result = Index(expected)
  117. tm.assert_index_equal(
  118. result,
  119. Index(
  120. Index(
  121. [
  122. ("foo", "one"),
  123. ("foo", "two"),
  124. ("bar", "one"),
  125. ("baz", "two"),
  126. ("qux", "one"),
  127. ("qux", "two"),
  128. ],
  129. dtype="object",
  130. ),
  131. names=["foo", "bar"],
  132. ),
  133. )
  134. result = Index(expected, names=["A", "B"])
  135. tm.assert_index_equal(
  136. result,
  137. Index(
  138. Index(
  139. [
  140. ("foo", "one"),
  141. ("foo", "two"),
  142. ("bar", "one"),
  143. ("baz", "two"),
  144. ("qux", "one"),
  145. ("qux", "two"),
  146. ],
  147. dtype="object",
  148. ),
  149. names=["A", "B"],
  150. ),
  151. )
  152. def test_numeric_compat(self, simple_index):
  153. idx = simple_index
  154. # Check that this doesn't cover MultiIndex case, if/when it does,
  155. # we can remove multi.test_compat.test_numeric_compat
  156. assert not isinstance(idx, MultiIndex)
  157. if type(idx) is Index:
  158. pytest.skip("Not applicable for Index")
  159. if is_numeric_dtype(simple_index.dtype) or isinstance(
  160. simple_index, TimedeltaIndex
  161. ):
  162. pytest.skip("Tested elsewhere.")
  163. typ = type(idx._data).__name__
  164. cls = type(idx).__name__
  165. lmsg = "|".join(
  166. [
  167. rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'",
  168. "cannot perform (__mul__|__truediv__|__floordiv__) with "
  169. f"this index type: ({cls}|{typ})",
  170. ]
  171. )
  172. with pytest.raises(TypeError, match=lmsg):
  173. idx * 1
  174. rmsg = "|".join(
  175. [
  176. rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'",
  177. "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with "
  178. f"this index type: ({cls}|{typ})",
  179. ]
  180. )
  181. with pytest.raises(TypeError, match=rmsg):
  182. 1 * idx
  183. div_err = lmsg.replace("*", "/")
  184. with pytest.raises(TypeError, match=div_err):
  185. idx / 1
  186. div_err = rmsg.replace("*", "/")
  187. with pytest.raises(TypeError, match=div_err):
  188. 1 / idx
  189. floordiv_err = lmsg.replace("*", "//")
  190. with pytest.raises(TypeError, match=floordiv_err):
  191. idx // 1
  192. floordiv_err = rmsg.replace("*", "//")
  193. with pytest.raises(TypeError, match=floordiv_err):
  194. 1 // idx
  195. def test_logical_compat(self, simple_index):
  196. if simple_index.dtype in (object, "string"):
  197. pytest.skip("Tested elsewhere.")
  198. idx = simple_index
  199. if idx.dtype.kind in "iufcbm":
  200. assert idx.all() == idx._values.all()
  201. assert idx.all() == idx.to_series().all()
  202. assert idx.any() == idx._values.any()
  203. assert idx.any() == idx.to_series().any()
  204. else:
  205. msg = "cannot perform (any|all)"
  206. if isinstance(idx, IntervalIndex):
  207. msg = (
  208. r"'IntervalArray' with dtype interval\[.*\] does "
  209. "not support reduction '(any|all)'"
  210. )
  211. with pytest.raises(TypeError, match=msg):
  212. idx.all()
  213. with pytest.raises(TypeError, match=msg):
  214. idx.any()
  215. def test_repr_roundtrip(self, simple_index):
  216. if isinstance(simple_index, IntervalIndex):
  217. pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
  218. idx = simple_index
  219. tm.assert_index_equal(eval(repr(idx)), idx)
  220. def test_repr_max_seq_item_setting(self, simple_index):
  221. # GH10182
  222. if isinstance(simple_index, IntervalIndex):
  223. pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
  224. idx = simple_index
  225. idx = idx.repeat(50)
  226. with pd.option_context("display.max_seq_items", None):
  227. repr(idx)
  228. assert "..." not in str(idx)
  229. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  230. def test_ensure_copied_data(self, index):
  231. # Check the "copy" argument of each Index.__new__ is honoured
  232. # GH12309
  233. init_kwargs = {}
  234. if isinstance(index, PeriodIndex):
  235. # Needs "freq" specification:
  236. init_kwargs["freq"] = index.freq
  237. elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
  238. pytest.skip(
  239. "RangeIndex cannot be initialized from data, "
  240. "MultiIndex and CategoricalIndex are tested separately"
  241. )
  242. elif index.dtype == object and index.inferred_type in ["boolean", "string"]:
  243. init_kwargs["dtype"] = index.dtype
  244. index_type = type(index)
  245. result = index_type(index.values, copy=True, **init_kwargs)
  246. if isinstance(index.dtype, DatetimeTZDtype):
  247. result = result.tz_localize("UTC").tz_convert(index.tz)
  248. if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
  249. index = index._with_freq(None)
  250. tm.assert_index_equal(index, result)
  251. if isinstance(index, PeriodIndex):
  252. # .values an object array of Period, thus copied
  253. depr_msg = "The 'ordinal' keyword in PeriodIndex is deprecated"
  254. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  255. result = index_type(ordinal=index.asi8, copy=False, **init_kwargs)
  256. tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same")
  257. elif isinstance(index, IntervalIndex):
  258. # checked in test_interval.py
  259. pass
  260. elif type(index) is Index and not isinstance(index.dtype, np.dtype):
  261. result = index_type(index.values, copy=False, **init_kwargs)
  262. tm.assert_index_equal(result, index)
  263. if isinstance(index._values, BaseMaskedArray):
  264. assert np.shares_memory(index._values._data, result._values._data)
  265. tm.assert_numpy_array_equal(
  266. index._values._data, result._values._data, check_same="same"
  267. )
  268. assert np.shares_memory(index._values._mask, result._values._mask)
  269. tm.assert_numpy_array_equal(
  270. index._values._mask, result._values._mask, check_same="same"
  271. )
  272. elif (
  273. isinstance(index.dtype, StringDtype) and index.dtype.storage == "python"
  274. ):
  275. assert np.shares_memory(index._values._ndarray, result._values._ndarray)
  276. tm.assert_numpy_array_equal(
  277. index._values._ndarray, result._values._ndarray, check_same="same"
  278. )
  279. elif (
  280. isinstance(index.dtype, StringDtype)
  281. and index.dtype.storage == "pyarrow"
  282. ):
  283. assert tm.shares_memory(result._values, index._values)
  284. else:
  285. raise NotImplementedError(index.dtype)
  286. else:
  287. result = index_type(index.values, copy=False, **init_kwargs)
  288. tm.assert_numpy_array_equal(index.values, result.values, check_same="same")
  289. def test_memory_usage(self, index):
  290. index._engine.clear_mapping()
  291. result = index.memory_usage()
  292. if index.empty:
  293. # we report 0 for no-length
  294. assert result == 0
  295. return
  296. # non-zero length
  297. index.get_loc(index[0])
  298. result2 = index.memory_usage()
  299. result3 = index.memory_usage(deep=True)
  300. # RangeIndex, IntervalIndex
  301. # don't have engines
  302. # Index[EA] has engine but it does not have a Hashtable .mapping
  303. if not isinstance(index, (RangeIndex, IntervalIndex)) and not (
  304. type(index) is Index and not isinstance(index.dtype, np.dtype)
  305. ):
  306. assert result2 > result
  307. if index.inferred_type == "object":
  308. assert result3 > result2
  309. def test_argsort(self, index):
  310. if isinstance(index, CategoricalIndex):
  311. pytest.skip(f"{type(self).__name__} separately tested")
  312. result = index.argsort()
  313. expected = np.array(index).argsort()
  314. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  315. def test_numpy_argsort(self, index):
  316. result = np.argsort(index)
  317. expected = index.argsort()
  318. tm.assert_numpy_array_equal(result, expected)
  319. result = np.argsort(index, kind="mergesort")
  320. expected = index.argsort(kind="mergesort")
  321. tm.assert_numpy_array_equal(result, expected)
  322. # these are the only two types that perform
  323. # pandas compatibility input validation - the
  324. # rest already perform separate (or no) such
  325. # validation via their 'values' attribute as
  326. # defined in pandas.core.indexes/base.py - they
  327. # cannot be changed at the moment due to
  328. # backwards compatibility concerns
  329. if isinstance(index, (CategoricalIndex, RangeIndex)):
  330. msg = "the 'axis' parameter is not supported"
  331. with pytest.raises(ValueError, match=msg):
  332. np.argsort(index, axis=1)
  333. msg = "the 'order' parameter is not supported"
  334. with pytest.raises(ValueError, match=msg):
  335. np.argsort(index, order=("a", "b"))
  336. def test_repeat(self, simple_index):
  337. rep = 2
  338. idx = simple_index.copy()
  339. new_index_cls = idx._constructor
  340. expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
  341. tm.assert_index_equal(idx.repeat(rep), expected)
  342. idx = simple_index
  343. rep = np.arange(len(idx))
  344. expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
  345. tm.assert_index_equal(idx.repeat(rep), expected)
  346. def test_numpy_repeat(self, simple_index):
  347. rep = 2
  348. idx = simple_index
  349. expected = idx.repeat(rep)
  350. tm.assert_index_equal(np.repeat(idx, rep), expected)
  351. msg = "the 'axis' parameter is not supported"
  352. with pytest.raises(ValueError, match=msg):
  353. np.repeat(idx, rep, axis=0)
  354. def test_where(self, listlike_box, simple_index):
  355. if isinstance(simple_index, (IntervalIndex, PeriodIndex)) or is_numeric_dtype(
  356. simple_index.dtype
  357. ):
  358. pytest.skip("Tested elsewhere.")
  359. klass = listlike_box
  360. idx = simple_index
  361. if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
  362. # where does not preserve freq
  363. idx = idx._with_freq(None)
  364. cond = [True] * len(idx)
  365. result = idx.where(klass(cond))
  366. expected = idx
  367. tm.assert_index_equal(result, expected)
  368. cond = [False] + [True] * len(idx[1:])
  369. expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype)
  370. result = idx.where(klass(cond))
  371. tm.assert_index_equal(result, expected)
  372. def test_insert_base(self, index):
  373. trimmed = index[1:4]
  374. if not len(index):
  375. pytest.skip("Not applicable for empty index")
  376. # test 0th element
  377. warn = None
  378. if index.dtype == object and index.inferred_type == "boolean":
  379. # GH#51363
  380. warn = FutureWarning
  381. msg = "The behavior of Index.insert with object-dtype is deprecated"
  382. with tm.assert_produces_warning(warn, match=msg):
  383. result = trimmed.insert(0, index[0])
  384. assert index[0:4].equals(result)
  385. def test_insert_out_of_bounds(self, index, using_infer_string):
  386. # TypeError/IndexError matches what np.insert raises in these cases
  387. if len(index) > 0:
  388. err = TypeError
  389. else:
  390. err = IndexError
  391. if len(index) == 0:
  392. # 0 vs 0.5 in error message varies with numpy version
  393. msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
  394. else:
  395. msg = "slice indices must be integers or None or have an __index__ method"
  396. if using_infer_string:
  397. if index.dtype == "string" or index.dtype == "category": # noqa: PLR1714
  398. msg = "loc must be an integer between"
  399. elif index.dtype == "object" and len(index) == 0:
  400. msg = "loc must be an integer between"
  401. err = TypeError
  402. with pytest.raises(err, match=msg):
  403. index.insert(0.5, "foo")
  404. msg = "|".join(
  405. [
  406. r"index -?\d+ is out of bounds for axis 0 with size \d+",
  407. "loc must be an integer between",
  408. ]
  409. )
  410. with pytest.raises(IndexError, match=msg):
  411. index.insert(len(index) + 1, 1)
  412. with pytest.raises(IndexError, match=msg):
  413. index.insert(-len(index) - 1, 1)
  414. def test_delete_base(self, index):
  415. if not len(index):
  416. pytest.skip("Not applicable for empty index")
  417. if isinstance(index, RangeIndex):
  418. # tested in class
  419. pytest.skip(f"{type(self).__name__} tested elsewhere")
  420. expected = index[1:]
  421. result = index.delete(0)
  422. assert result.equals(expected)
  423. assert result.name == expected.name
  424. expected = index[:-1]
  425. result = index.delete(-1)
  426. assert result.equals(expected)
  427. assert result.name == expected.name
  428. length = len(index)
  429. msg = f"index {length} is out of bounds for axis 0 with size {length}"
  430. with pytest.raises(IndexError, match=msg):
  431. index.delete(length)
  432. @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning")
  433. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  434. def test_equals(self, index):
  435. if isinstance(index, IntervalIndex):
  436. pytest.skip(f"{type(index).__name__} tested elsewhere")
  437. is_ea_idx = type(index) is Index and not isinstance(index.dtype, np.dtype)
  438. assert index.equals(index)
  439. assert index.equals(index.copy())
  440. if not is_ea_idx:
  441. # doesn't hold for e.g. IntegerDtype
  442. assert index.equals(index.astype(object))
  443. assert not index.equals(list(index))
  444. assert not index.equals(np.array(index))
  445. # Cannot pass in non-int64 dtype to RangeIndex
  446. if not isinstance(index, RangeIndex) and not is_ea_idx:
  447. same_values = Index(index, dtype=object)
  448. assert index.equals(same_values)
  449. assert same_values.equals(index)
  450. if index.nlevels == 1:
  451. # do not test MultiIndex
  452. assert not index.equals(Series(index))
  453. def test_equals_op(self, simple_index):
  454. # GH9947, GH10637
  455. index_a = simple_index
  456. n = len(index_a)
  457. index_b = index_a[0:-1]
  458. index_c = index_a[0:-1].append(index_a[-2:-1])
  459. index_d = index_a[0:1]
  460. msg = "Lengths must match|could not be broadcast"
  461. with pytest.raises(ValueError, match=msg):
  462. index_a == index_b
  463. expected1 = np.array([True] * n)
  464. expected2 = np.array([True] * (n - 1) + [False])
  465. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  466. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  467. # test comparisons with numpy arrays
  468. array_a = np.array(index_a)
  469. array_b = np.array(index_a[0:-1])
  470. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  471. array_d = np.array(index_a[0:1])
  472. with pytest.raises(ValueError, match=msg):
  473. index_a == array_b
  474. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  475. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  476. # test comparisons with Series
  477. series_a = Series(array_a)
  478. series_b = Series(array_b)
  479. series_c = Series(array_c)
  480. series_d = Series(array_d)
  481. with pytest.raises(ValueError, match=msg):
  482. index_a == series_b
  483. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  484. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  485. # cases where length is 1 for one of them
  486. with pytest.raises(ValueError, match="Lengths must match"):
  487. index_a == index_d
  488. with pytest.raises(ValueError, match="Lengths must match"):
  489. index_a == series_d
  490. with pytest.raises(ValueError, match="Lengths must match"):
  491. index_a == array_d
  492. msg = "Can only compare identically-labeled Series objects"
  493. with pytest.raises(ValueError, match=msg):
  494. series_a == series_d
  495. with pytest.raises(ValueError, match="Lengths must match"):
  496. series_a == array_d
  497. # comparing with a scalar should broadcast; note that we are excluding
  498. # MultiIndex because in this case each item in the index is a tuple of
  499. # length 2, and therefore is considered an array of length 2 in the
  500. # comparison instead of a scalar
  501. if not isinstance(index_a, MultiIndex):
  502. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  503. # assuming the 2nd to last item is unique in the data
  504. item = index_a[-2]
  505. tm.assert_numpy_array_equal(index_a == item, expected3)
  506. tm.assert_series_equal(series_a == item, Series(expected3))
  507. def test_format(self, simple_index):
  508. # GH35439
  509. if is_numeric_dtype(simple_index.dtype) or isinstance(
  510. simple_index, DatetimeIndex
  511. ):
  512. pytest.skip("Tested elsewhere.")
  513. idx = simple_index
  514. expected = [str(x) for x in idx]
  515. msg = r"Index\.format is deprecated"
  516. with tm.assert_produces_warning(FutureWarning, match=msg):
  517. assert idx.format() == expected
  518. def test_format_empty(self, simple_index):
  519. # GH35712
  520. if isinstance(simple_index, (PeriodIndex, RangeIndex)):
  521. pytest.skip("Tested elsewhere")
  522. empty_idx = type(simple_index)([])
  523. msg = r"Index\.format is deprecated"
  524. with tm.assert_produces_warning(FutureWarning, match=msg):
  525. assert empty_idx.format() == []
  526. with tm.assert_produces_warning(FutureWarning, match=msg):
  527. assert empty_idx.format(name=True) == [""]
  528. def test_fillna(self, index):
  529. # GH 11343
  530. if len(index) == 0:
  531. pytest.skip("Not relevant for empty index")
  532. elif index.dtype == bool:
  533. pytest.skip(f"{index.dtype} cannot hold NAs")
  534. elif isinstance(index, Index) and is_integer_dtype(index.dtype):
  535. pytest.skip(f"Not relevant for Index with {index.dtype}")
  536. elif isinstance(index, MultiIndex):
  537. idx = index.copy(deep=True)
  538. msg = "isna is not defined for MultiIndex"
  539. with pytest.raises(NotImplementedError, match=msg):
  540. idx.fillna(idx[0])
  541. else:
  542. idx = index.copy(deep=True)
  543. result = idx.fillna(idx[0])
  544. tm.assert_index_equal(result, idx)
  545. assert result is not idx
  546. msg = "'value' must be a scalar, passed: "
  547. with pytest.raises(TypeError, match=msg):
  548. idx.fillna([idx[0]])
  549. idx = index.copy(deep=True)
  550. values = idx._values
  551. values[1] = np.nan
  552. idx = type(index)(values)
  553. msg = "does not support 'downcast'"
  554. msg2 = r"The 'downcast' keyword in .*Index\.fillna is deprecated"
  555. with tm.assert_produces_warning(FutureWarning, match=msg2):
  556. with pytest.raises(NotImplementedError, match=msg):
  557. # For now at least, we only raise if there are NAs present
  558. idx.fillna(idx[0], downcast="infer")
  559. expected = np.array([False] * len(idx), dtype=bool)
  560. expected[1] = True
  561. tm.assert_numpy_array_equal(idx._isnan, expected)
  562. assert idx.hasnans is True
  563. def test_nulls(self, index):
  564. # this is really a smoke test for the methods
  565. # as these are adequately tested for function elsewhere
  566. if len(index) == 0:
  567. tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool))
  568. elif isinstance(index, MultiIndex):
  569. idx = index.copy()
  570. msg = "isna is not defined for MultiIndex"
  571. with pytest.raises(NotImplementedError, match=msg):
  572. idx.isna()
  573. elif not index.hasnans:
  574. tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool))
  575. tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool))
  576. else:
  577. result = isna(index)
  578. tm.assert_numpy_array_equal(index.isna(), result)
  579. tm.assert_numpy_array_equal(index.notna(), ~result)
  580. def test_empty(self, simple_index):
  581. # GH 15270
  582. idx = simple_index
  583. assert not idx.empty
  584. assert idx[:0].empty
  585. def test_join_self_unique(self, join_type, simple_index):
  586. idx = simple_index
  587. if idx.is_unique:
  588. joined = idx.join(idx, how=join_type)
  589. expected = simple_index
  590. if join_type == "outer":
  591. expected = algos.safe_sort(expected)
  592. tm.assert_index_equal(joined, expected)
  593. def test_map(self, simple_index):
  594. # callable
  595. if isinstance(simple_index, (TimedeltaIndex, PeriodIndex)):
  596. pytest.skip("Tested elsewhere.")
  597. idx = simple_index
  598. result = idx.map(lambda x: x)
  599. # RangeIndex are equivalent to the similar Index with int64 dtype
  600. tm.assert_index_equal(result, idx, exact="equiv")
  601. @pytest.mark.parametrize(
  602. "mapper",
  603. [
  604. lambda values, index: {i: e for e, i in zip(values, index)},
  605. lambda values, index: Series(values, index),
  606. ],
  607. )
  608. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  609. def test_map_dictlike(self, mapper, simple_index, request):
  610. idx = simple_index
  611. if isinstance(idx, (DatetimeIndex, TimedeltaIndex, PeriodIndex)):
  612. pytest.skip("Tested elsewhere.")
  613. identity = mapper(idx.values, idx)
  614. result = idx.map(identity)
  615. # RangeIndex are equivalent to the similar Index with int64 dtype
  616. tm.assert_index_equal(result, idx, exact="equiv")
  617. # empty mappable
  618. dtype = None
  619. if idx.dtype.kind == "f":
  620. dtype = idx.dtype
  621. expected = Index([np.nan] * len(idx), dtype=dtype)
  622. result = idx.map(mapper(expected, idx))
  623. tm.assert_index_equal(result, expected)
  624. def test_map_str(self, simple_index):
  625. # GH 31202
  626. if isinstance(simple_index, CategoricalIndex):
  627. pytest.skip("See test_map.py")
  628. idx = simple_index
  629. result = idx.map(str)
  630. expected = Index([str(x) for x in idx])
  631. tm.assert_index_equal(result, expected)
  632. @pytest.mark.parametrize("copy", [True, False])
  633. @pytest.mark.parametrize("name", [None, "foo"])
  634. @pytest.mark.parametrize("ordered", [True, False])
  635. def test_astype_category(self, copy, name, ordered, simple_index):
  636. # GH 18630
  637. idx = simple_index
  638. if name:
  639. idx = idx.rename(name)
  640. # standard categories
  641. dtype = CategoricalDtype(ordered=ordered)
  642. result = idx.astype(dtype, copy=copy)
  643. expected = CategoricalIndex(idx, name=name, ordered=ordered)
  644. tm.assert_index_equal(result, expected, exact=True)
  645. # non-standard categories
  646. dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered)
  647. result = idx.astype(dtype, copy=copy)
  648. expected = CategoricalIndex(idx, name=name, dtype=dtype)
  649. tm.assert_index_equal(result, expected, exact=True)
  650. if ordered is False:
  651. # dtype='category' defaults to ordered=False, so only test once
  652. result = idx.astype("category", copy=copy)
  653. expected = CategoricalIndex(idx, name=name)
  654. tm.assert_index_equal(result, expected, exact=True)
  655. def test_is_unique(self, simple_index):
  656. # initialize a unique index
  657. index = simple_index.drop_duplicates()
  658. assert index.is_unique is True
  659. # empty index should be unique
  660. index_empty = index[:0]
  661. assert index_empty.is_unique is True
  662. # test basic dupes
  663. index_dup = index.insert(0, index[0])
  664. assert index_dup.is_unique is False
  665. # single NA should be unique
  666. index_na = index.insert(0, np.nan)
  667. assert index_na.is_unique is True
  668. # multiple NA should not be unique
  669. index_na_dup = index_na.insert(0, np.nan)
  670. assert index_na_dup.is_unique is False
  671. @pytest.mark.arm_slow
  672. def test_engine_reference_cycle(self, simple_index):
  673. # GH27585
  674. index = simple_index.copy()
  675. ref = weakref.ref(index)
  676. index._engine
  677. del index
  678. assert ref() is None
  679. def test_getitem_2d_deprecated(self, simple_index):
  680. # GH#30588, GH#31479
  681. if isinstance(simple_index, IntervalIndex):
  682. pytest.skip("Tested elsewhere")
  683. idx = simple_index
  684. msg = "Multi-dimensional indexing|too many|only"
  685. with pytest.raises((ValueError, IndexError), match=msg):
  686. idx[:, None]
  687. if not isinstance(idx, RangeIndex):
  688. # GH#44051 RangeIndex already raised pre-2.0 with a different message
  689. with pytest.raises((ValueError, IndexError), match=msg):
  690. idx[True]
  691. with pytest.raises((ValueError, IndexError), match=msg):
  692. idx[False]
  693. else:
  694. msg = "only integers, slices"
  695. with pytest.raises(IndexError, match=msg):
  696. idx[True]
  697. with pytest.raises(IndexError, match=msg):
  698. idx[False]
  699. def test_copy_shares_cache(self, simple_index):
  700. # GH32898, GH36840
  701. idx = simple_index
  702. idx.get_loc(idx[0]) # populates the _cache.
  703. copy = idx.copy()
  704. assert copy._cache is idx._cache
  705. def test_shallow_copy_shares_cache(self, simple_index):
  706. # GH32669, GH36840
  707. idx = simple_index
  708. idx.get_loc(idx[0]) # populates the _cache.
  709. shallow_copy = idx._view()
  710. assert shallow_copy._cache is idx._cache
  711. shallow_copy = idx._shallow_copy(idx._data)
  712. assert shallow_copy._cache is not idx._cache
  713. assert shallow_copy._cache == {}
  714. def test_index_groupby(self, simple_index):
  715. idx = simple_index[:5]
  716. to_groupby = np.array([1, 2, np.nan, 2, 1])
  717. tm.assert_dict_equal(
  718. idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]}
  719. )
  720. to_groupby = DatetimeIndex(
  721. [
  722. datetime(2011, 11, 1),
  723. datetime(2011, 12, 1),
  724. pd.NaT,
  725. datetime(2011, 12, 1),
  726. datetime(2011, 11, 1),
  727. ],
  728. tz="UTC",
  729. ).values
  730. ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
  731. expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]}
  732. tm.assert_dict_equal(idx.groupby(to_groupby), expected)
  733. def test_append_preserves_dtype(self, simple_index):
  734. # In particular Index with dtype float32
  735. index = simple_index
  736. N = len(index)
  737. result = index.append(index)
  738. assert result.dtype == index.dtype
  739. tm.assert_index_equal(result[:N], index, check_exact=True)
  740. tm.assert_index_equal(result[N:], index, check_exact=True)
  741. alt = index.take(list(range(N)) * 2)
  742. tm.assert_index_equal(result, alt, check_exact=True)
  743. def test_inv(self, simple_index, using_infer_string):
  744. idx = simple_index
  745. if idx.dtype.kind in ["i", "u"]:
  746. res = ~idx
  747. expected = Index(~idx.values, name=idx.name)
  748. tm.assert_index_equal(res, expected)
  749. # check that we are matching Series behavior
  750. res2 = ~Series(idx)
  751. tm.assert_series_equal(res2, Series(expected))
  752. else:
  753. if idx.dtype.kind == "f":
  754. msg = "ufunc 'invert' not supported for the input types"
  755. else:
  756. msg = "bad operand|__invert__ is not supported for string dtype"
  757. with pytest.raises(TypeError, match=msg):
  758. ~idx
  759. # check that we get the same behavior with Series
  760. with pytest.raises(TypeError, match=msg):
  761. ~Series(idx)
  762. def test_is_boolean_is_deprecated(self, simple_index):
  763. # GH50042
  764. idx = simple_index
  765. with tm.assert_produces_warning(FutureWarning):
  766. idx.is_boolean()
  767. def test_is_floating_is_deprecated(self, simple_index):
  768. # GH50042
  769. idx = simple_index
  770. with tm.assert_produces_warning(FutureWarning):
  771. idx.is_floating()
  772. def test_is_integer_is_deprecated(self, simple_index):
  773. # GH50042
  774. idx = simple_index
  775. with tm.assert_produces_warning(FutureWarning):
  776. idx.is_integer()
  777. def test_holds_integer_deprecated(self, simple_index):
  778. # GH50243
  779. idx = simple_index
  780. msg = f"{type(idx).__name__}.holds_integer is deprecated. "
  781. with tm.assert_produces_warning(FutureWarning, match=msg):
  782. idx.holds_integer()
  783. def test_is_numeric_is_deprecated(self, simple_index):
  784. # GH50042
  785. idx = simple_index
  786. with tm.assert_produces_warning(
  787. FutureWarning,
  788. match=f"{type(idx).__name__}.is_numeric is deprecated. ",
  789. ):
  790. idx.is_numeric()
  791. def test_is_categorical_is_deprecated(self, simple_index):
  792. # GH50042
  793. idx = simple_index
  794. with tm.assert_produces_warning(
  795. FutureWarning,
  796. match=r"Use pandas\.api\.types\.is_categorical_dtype instead",
  797. ):
  798. idx.is_categorical()
  799. def test_is_interval_is_deprecated(self, simple_index):
  800. # GH50042
  801. idx = simple_index
  802. with tm.assert_produces_warning(FutureWarning):
  803. idx.is_interval()
  804. def test_is_object_is_deprecated(self, simple_index):
  805. # GH50042
  806. idx = simple_index
  807. with tm.assert_produces_warning(FutureWarning):
  808. idx.is_object()
  809. class TestNumericBase:
  810. @pytest.fixture(
  811. params=[
  812. RangeIndex(start=0, stop=20, step=2),
  813. Index(np.arange(5, dtype=np.float64)),
  814. Index(np.arange(5, dtype=np.float32)),
  815. Index(np.arange(5, dtype=np.uint64)),
  816. Index(range(0, 20, 2), dtype=np.int64),
  817. Index(range(0, 20, 2), dtype=np.int32),
  818. Index(range(0, 20, 2), dtype=np.int16),
  819. Index(range(0, 20, 2), dtype=np.int8),
  820. ]
  821. )
  822. def simple_index(self, request):
  823. return request.param
  824. def test_constructor_unwraps_index(self, simple_index):
  825. if isinstance(simple_index, RangeIndex):
  826. pytest.skip("Tested elsewhere.")
  827. index_cls = type(simple_index)
  828. dtype = simple_index.dtype
  829. idx = Index([1, 2], dtype=dtype)
  830. result = index_cls(idx)
  831. expected = np.array([1, 2], dtype=idx.dtype)
  832. tm.assert_numpy_array_equal(result._data, expected)
  833. def test_can_hold_identifiers(self, simple_index):
  834. idx = simple_index
  835. key = idx[0]
  836. assert idx._can_hold_identifiers_and_holds_name(key) is False
  837. def test_view(self, simple_index):
  838. if isinstance(simple_index, RangeIndex):
  839. pytest.skip("Tested elsewhere.")
  840. index_cls = type(simple_index)
  841. dtype = simple_index.dtype
  842. idx = index_cls([], dtype=dtype, name="Foo")
  843. idx_view = idx.view()
  844. assert idx_view.name == "Foo"
  845. idx_view = idx.view(dtype)
  846. tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True)
  847. msg = "Passing a type in .*Index.view is deprecated"
  848. with tm.assert_produces_warning(FutureWarning, match=msg):
  849. idx_view = idx.view(index_cls)
  850. tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True)
  851. def test_format(self, simple_index):
  852. # GH35439
  853. if isinstance(simple_index, DatetimeIndex):
  854. pytest.skip("Tested elsewhere")
  855. idx = simple_index
  856. max_width = max(len(str(x)) for x in idx)
  857. expected = [str(x).ljust(max_width) for x in idx]
  858. msg = r"Index\.format is deprecated"
  859. with tm.assert_produces_warning(FutureWarning, match=msg):
  860. assert idx.format() == expected
  861. def test_insert_non_na(self, simple_index):
  862. # GH#43921 inserting an element that we know we can hold should
  863. # not change dtype or type (except for RangeIndex)
  864. index = simple_index
  865. result = index.insert(0, index[0])
  866. expected = Index([index[0]] + list(index), dtype=index.dtype)
  867. tm.assert_index_equal(result, expected, exact=True)
  868. def test_insert_na(self, nulls_fixture, simple_index):
  869. # GH 18295 (test missing)
  870. index = simple_index
  871. na_val = nulls_fixture
  872. if na_val is pd.NaT:
  873. expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
  874. else:
  875. expected = Index([index[0], np.nan] + list(index[1:]))
  876. # GH#43921 we preserve float dtype
  877. if index.dtype.kind == "f":
  878. expected = Index(expected, dtype=index.dtype)
  879. result = index.insert(1, na_val)
  880. tm.assert_index_equal(result, expected, exact=True)
  881. def test_arithmetic_explicit_conversions(self, simple_index):
  882. # GH 8608
  883. # add/sub are overridden explicitly for Float/Int Index
  884. index_cls = type(simple_index)
  885. if index_cls is RangeIndex:
  886. idx = RangeIndex(5)
  887. else:
  888. idx = index_cls(np.arange(5, dtype="int64"))
  889. # float conversions
  890. arr = np.arange(5, dtype="int64") * 3.2
  891. expected = Index(arr, dtype=np.float64)
  892. fidx = idx * 3.2
  893. tm.assert_index_equal(fidx, expected)
  894. fidx = 3.2 * idx
  895. tm.assert_index_equal(fidx, expected)
  896. # interops with numpy arrays
  897. expected = Index(arr, dtype=np.float64)
  898. a = np.zeros(5, dtype="float64")
  899. result = fidx - a
  900. tm.assert_index_equal(result, expected)
  901. expected = Index(-arr, dtype=np.float64)
  902. a = np.zeros(5, dtype="float64")
  903. result = a - fidx
  904. tm.assert_index_equal(result, expected)
  905. @pytest.mark.parametrize("complex_dtype", [np.complex64, np.complex128])
  906. def test_astype_to_complex(self, complex_dtype, simple_index):
  907. result = simple_index.astype(complex_dtype)
  908. assert type(result) is Index and result.dtype == complex_dtype
  909. def test_cast_string(self, simple_index):
  910. if isinstance(simple_index, RangeIndex):
  911. pytest.skip("casting of strings not relevant for RangeIndex")
  912. result = type(simple_index)(["0", "1", "2"], dtype=simple_index.dtype)
  913. expected = type(simple_index)([0, 1, 2], dtype=simple_index.dtype)
  914. tm.assert_index_equal(result, expected)