test_base.py 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734
  1. from collections import defaultdict
  2. from datetime import datetime
  3. from functools import partial
  4. import math
  5. import operator
  6. import re
  7. import numpy as np
  8. import pytest
  9. from pandas.compat import IS64
  10. from pandas.errors import InvalidIndexError
  11. import pandas.util._test_decorators as td
  12. from pandas.core.dtypes.common import (
  13. is_any_real_numeric_dtype,
  14. is_numeric_dtype,
  15. is_object_dtype,
  16. )
  17. import pandas as pd
  18. from pandas import (
  19. CategoricalIndex,
  20. DataFrame,
  21. DatetimeIndex,
  22. IntervalIndex,
  23. PeriodIndex,
  24. RangeIndex,
  25. Series,
  26. TimedeltaIndex,
  27. date_range,
  28. period_range,
  29. timedelta_range,
  30. )
  31. import pandas._testing as tm
  32. from pandas.core.indexes.api import (
  33. Index,
  34. MultiIndex,
  35. _get_combined_index,
  36. ensure_index,
  37. ensure_index_from_sequences,
  38. )
  39. class TestIndex:
  40. @pytest.fixture
  41. def simple_index(self) -> Index:
  42. return Index(list("abcde"))
  43. def test_can_hold_identifiers(self, simple_index):
  44. index = simple_index
  45. key = index[0]
  46. assert index._can_hold_identifiers_and_holds_name(key) is True
  47. @pytest.mark.parametrize("index", ["datetime"], indirect=True)
  48. def test_new_axis(self, index):
  49. # TODO: a bunch of scattered tests check this deprecation is enforced.
  50. # de-duplicate/centralize them.
  51. with pytest.raises(ValueError, match="Multi-dimensional indexing"):
  52. # GH#30588 multi-dimensional indexing deprecated
  53. index[None, :]
  54. def test_constructor_regular(self, index):
  55. tm.assert_contains_all(index, index)
  56. @pytest.mark.parametrize("index", ["string"], indirect=True)
  57. def test_constructor_casting(self, index):
  58. # casting
  59. arr = np.array(index)
  60. new_index = Index(arr)
  61. tm.assert_contains_all(arr, new_index)
  62. tm.assert_index_equal(index, new_index)
  63. def test_constructor_copy(self, using_infer_string):
  64. index = Index(list("abc"), name="name")
  65. arr = np.array(index)
  66. new_index = Index(arr, copy=True, name="name")
  67. assert isinstance(new_index, Index)
  68. assert new_index.name == "name"
  69. if using_infer_string:
  70. tm.assert_extension_array_equal(
  71. new_index.values, pd.array(arr, dtype="str")
  72. )
  73. else:
  74. tm.assert_numpy_array_equal(arr, new_index.values)
  75. arr[0] = "SOMEBIGLONGSTRING"
  76. assert new_index[0] != "SOMEBIGLONGSTRING"
  77. @pytest.mark.parametrize("cast_as_obj", [True, False])
  78. @pytest.mark.parametrize(
  79. "index",
  80. [
  81. date_range(
  82. "2015-01-01 10:00",
  83. freq="D",
  84. periods=3,
  85. tz="US/Eastern",
  86. name="Green Eggs & Ham",
  87. ), # DTI with tz
  88. date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz
  89. timedelta_range("1 days", freq="D", periods=3), # td
  90. period_range("2015-01-01", freq="D", periods=3), # period
  91. ],
  92. )
  93. def test_constructor_from_index_dtlike(self, cast_as_obj, index):
  94. if cast_as_obj:
  95. with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
  96. result = Index(index.astype(object))
  97. else:
  98. result = Index(index)
  99. tm.assert_index_equal(result, index)
  100. if isinstance(index, DatetimeIndex):
  101. assert result.tz == index.tz
  102. if cast_as_obj:
  103. # GH#23524 check that Index(dti, dtype=object) does not
  104. # incorrectly raise ValueError, and that nanoseconds are not
  105. # dropped
  106. index += pd.Timedelta(nanoseconds=50)
  107. result = Index(index, dtype=object)
  108. assert result.dtype == np.object_
  109. assert list(result) == list(index)
  110. @pytest.mark.parametrize(
  111. "index,has_tz",
  112. [
  113. (
  114. date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
  115. True,
  116. ), # datetimetz
  117. (timedelta_range("1 days", freq="D", periods=3), False), # td
  118. (period_range("2015-01-01", freq="D", periods=3), False), # period
  119. ],
  120. )
  121. def test_constructor_from_series_dtlike(self, index, has_tz):
  122. result = Index(Series(index))
  123. tm.assert_index_equal(result, index)
  124. if has_tz:
  125. assert result.tz == index.tz
  126. def test_constructor_from_series_freq(self):
  127. # GH 6273
  128. # create from a series, passing a freq
  129. dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"]
  130. expected = DatetimeIndex(dts, freq="MS")
  131. s = Series(pd.to_datetime(dts))
  132. result = DatetimeIndex(s, freq="MS")
  133. tm.assert_index_equal(result, expected)
  134. def test_constructor_from_frame_series_freq(self, using_infer_string):
  135. # GH 6273
  136. # create from a series, passing a freq
  137. dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"]
  138. expected = DatetimeIndex(dts, freq="MS")
  139. df = DataFrame(np.random.default_rng(2).random((5, 3)))
  140. df["date"] = dts
  141. result = DatetimeIndex(df["date"], freq="MS")
  142. dtype = object if not using_infer_string else "str"
  143. assert df["date"].dtype == dtype
  144. expected.name = "date"
  145. tm.assert_index_equal(result, expected)
  146. expected = Series(dts, name="date")
  147. tm.assert_series_equal(df["date"], expected)
  148. # GH 6274
  149. # infer freq of same
  150. if not using_infer_string:
  151. # Doesn't work with arrow strings
  152. freq = pd.infer_freq(df["date"])
  153. assert freq == "MS"
  154. def test_constructor_int_dtype_nan(self):
  155. # see gh-15187
  156. data = [np.nan]
  157. expected = Index(data, dtype=np.float64)
  158. result = Index(data, dtype="float")
  159. tm.assert_index_equal(result, expected)
  160. @pytest.mark.parametrize(
  161. "klass,dtype,na_val",
  162. [
  163. (Index, np.float64, np.nan),
  164. (DatetimeIndex, "datetime64[ns]", pd.NaT),
  165. ],
  166. )
  167. def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val):
  168. # GH 13467
  169. na_list = [na_val, na_val]
  170. expected = klass(na_list)
  171. assert expected.dtype == dtype
  172. result = Index(na_list)
  173. tm.assert_index_equal(result, expected)
  174. result = Index(np.array(na_list))
  175. tm.assert_index_equal(result, expected)
  176. @pytest.mark.parametrize(
  177. "vals,dtype",
  178. [
  179. ([1, 2, 3, 4, 5], "int"),
  180. ([1.1, np.nan, 2.2, 3.0], "float"),
  181. (["A", "B", "C", np.nan], "obj"),
  182. ],
  183. )
  184. def test_constructor_simple_new(self, vals, dtype):
  185. index = Index(vals, name=dtype)
  186. result = index._simple_new(index.values, dtype)
  187. tm.assert_index_equal(result, index)
  188. @pytest.mark.parametrize("attr", ["values", "asi8"])
  189. @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
  190. def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
  191. # Test constructing with a datetimetz dtype
  192. # .values produces numpy datetimes, so these are considered naive
  193. # .asi8 produces integers, so these are considered epoch timestamps
  194. # ^the above will be true in a later version. Right now we `.view`
  195. # the i8 values as NS_DTYPE, effectively treating them as wall times.
  196. index = date_range("2011-01-01", periods=5)
  197. arg = getattr(index, attr)
  198. index = index.tz_localize(tz_naive_fixture)
  199. dtype = index.dtype
  200. # As of 2.0 astype raises on dt64.astype(dt64tz)
  201. err = tz_naive_fixture is not None
  202. msg = "Cannot use .astype to convert from timezone-naive dtype to"
  203. if attr == "asi8":
  204. result = DatetimeIndex(arg).tz_localize(tz_naive_fixture)
  205. tm.assert_index_equal(result, index)
  206. elif klass is Index:
  207. with pytest.raises(TypeError, match="unexpected keyword"):
  208. klass(arg, tz=tz_naive_fixture)
  209. else:
  210. result = klass(arg, tz=tz_naive_fixture)
  211. tm.assert_index_equal(result, index)
  212. if attr == "asi8":
  213. if err:
  214. with pytest.raises(TypeError, match=msg):
  215. DatetimeIndex(arg).astype(dtype)
  216. else:
  217. result = DatetimeIndex(arg).astype(dtype)
  218. tm.assert_index_equal(result, index)
  219. else:
  220. result = klass(arg, dtype=dtype)
  221. tm.assert_index_equal(result, index)
  222. if attr == "asi8":
  223. result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture)
  224. tm.assert_index_equal(result, index)
  225. elif klass is Index:
  226. with pytest.raises(TypeError, match="unexpected keyword"):
  227. klass(arg, tz=tz_naive_fixture)
  228. else:
  229. result = klass(list(arg), tz=tz_naive_fixture)
  230. tm.assert_index_equal(result, index)
  231. if attr == "asi8":
  232. if err:
  233. with pytest.raises(TypeError, match=msg):
  234. DatetimeIndex(list(arg)).astype(dtype)
  235. else:
  236. result = DatetimeIndex(list(arg)).astype(dtype)
  237. tm.assert_index_equal(result, index)
  238. else:
  239. result = klass(list(arg), dtype=dtype)
  240. tm.assert_index_equal(result, index)
  241. @pytest.mark.parametrize("attr", ["values", "asi8"])
  242. @pytest.mark.parametrize("klass", [Index, TimedeltaIndex])
  243. def test_constructor_dtypes_timedelta(self, attr, klass):
  244. index = timedelta_range("1 days", periods=5)
  245. index = index._with_freq(None) # won't be preserved by constructors
  246. dtype = index.dtype
  247. values = getattr(index, attr)
  248. result = klass(values, dtype=dtype)
  249. tm.assert_index_equal(result, index)
  250. result = klass(list(values), dtype=dtype)
  251. tm.assert_index_equal(result, index)
  252. @pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])])
  253. @pytest.mark.parametrize(
  254. "klass",
  255. [
  256. Index,
  257. CategoricalIndex,
  258. DatetimeIndex,
  259. TimedeltaIndex,
  260. ],
  261. )
  262. def test_constructor_empty(self, value, klass):
  263. empty = klass(value)
  264. assert isinstance(empty, klass)
  265. assert not len(empty)
  266. @pytest.mark.parametrize(
  267. "empty,klass",
  268. [
  269. (PeriodIndex([], freq="D"), PeriodIndex),
  270. (PeriodIndex(iter([]), freq="D"), PeriodIndex),
  271. (PeriodIndex((_ for _ in []), freq="D"), PeriodIndex),
  272. (RangeIndex(step=1), RangeIndex),
  273. (MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex),
  274. ],
  275. )
  276. def test_constructor_empty_special(self, empty, klass):
  277. assert isinstance(empty, klass)
  278. assert not len(empty)
  279. @pytest.mark.parametrize(
  280. "index",
  281. [
  282. "datetime",
  283. "float64",
  284. "float32",
  285. "int64",
  286. "int32",
  287. "period",
  288. "range",
  289. "repeats",
  290. "timedelta",
  291. "tuples",
  292. "uint64",
  293. "uint32",
  294. ],
  295. indirect=True,
  296. )
  297. def test_view_with_args(self, index):
  298. index.view("i8")
  299. @pytest.mark.parametrize(
  300. "index",
  301. [
  302. "string",
  303. pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")),
  304. "bool-object",
  305. "bool-dtype",
  306. "empty",
  307. ],
  308. indirect=True,
  309. )
  310. def test_view_with_args_object_array_raises(self, index):
  311. if index.dtype == bool:
  312. msg = "When changing to a larger dtype"
  313. with pytest.raises(ValueError, match=msg):
  314. index.view("i8")
  315. else:
  316. msg = (
  317. r"Cannot change data-type for array of references\.|"
  318. r"Cannot change data-type for object array\.|"
  319. r"Cannot change data-type for array of strings\.|"
  320. )
  321. with pytest.raises(TypeError, match=msg):
  322. index.view("i8")
  323. @pytest.mark.parametrize(
  324. "index",
  325. ["int64", "int32", "range"],
  326. indirect=True,
  327. )
  328. def test_astype(self, index):
  329. casted = index.astype("i8")
  330. # it works!
  331. casted.get_loc(5)
  332. # pass on name
  333. index.name = "foobar"
  334. casted = index.astype("i8")
  335. assert casted.name == "foobar"
  336. def test_equals_object(self):
  337. # same
  338. assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"]))
  339. @pytest.mark.parametrize(
  340. "comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]]
  341. )
  342. def test_not_equals_object(self, comp):
  343. assert not Index(["a", "b", "c"]).equals(comp)
  344. def test_identical(self):
  345. # index
  346. i1 = Index(["a", "b", "c"])
  347. i2 = Index(["a", "b", "c"])
  348. assert i1.identical(i2)
  349. i1 = i1.rename("foo")
  350. assert i1.equals(i2)
  351. assert not i1.identical(i2)
  352. i2 = i2.rename("foo")
  353. assert i1.identical(i2)
  354. i3 = Index([("a", "a"), ("a", "b"), ("b", "a")])
  355. i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False)
  356. assert not i3.identical(i4)
  357. def test_is_(self):
  358. ind = Index(range(10))
  359. assert ind.is_(ind)
  360. assert ind.is_(ind.view().view().view().view())
  361. assert not ind.is_(Index(range(10)))
  362. assert not ind.is_(ind.copy())
  363. assert not ind.is_(ind.copy(deep=False))
  364. assert not ind.is_(ind[:])
  365. assert not ind.is_(np.array(range(10)))
  366. # quasi-implementation dependent
  367. assert ind.is_(ind.view())
  368. ind2 = ind.view()
  369. ind2.name = "bob"
  370. assert ind.is_(ind2)
  371. assert ind2.is_(ind)
  372. # doesn't matter if Indices are *actually* views of underlying data,
  373. assert not ind.is_(Index(ind.values))
  374. arr = np.array(range(1, 11))
  375. ind1 = Index(arr, copy=False)
  376. ind2 = Index(arr, copy=False)
  377. assert not ind1.is_(ind2)
  378. def test_asof_numeric_vs_bool_raises(self):
  379. left = Index([1, 2, 3])
  380. right = Index([True, False], dtype=object)
  381. msg = "Cannot compare dtypes int64 and bool"
  382. with pytest.raises(TypeError, match=msg):
  383. left.asof(right[0])
  384. # TODO: should right.asof(left[0]) also raise?
  385. with pytest.raises(InvalidIndexError, match=re.escape(str(right))):
  386. left.asof(right)
  387. with pytest.raises(InvalidIndexError, match=re.escape(str(left))):
  388. right.asof(left)
  389. @pytest.mark.parametrize("index", ["string"], indirect=True)
  390. def test_booleanindex(self, index):
  391. bool_index = np.ones(len(index), dtype=bool)
  392. bool_index[5:30:2] = False
  393. sub_index = index[bool_index]
  394. for i, val in enumerate(sub_index):
  395. assert sub_index.get_loc(val) == i
  396. sub_index = index[list(bool_index)]
  397. for i, val in enumerate(sub_index):
  398. assert sub_index.get_loc(val) == i
  399. def test_fancy(self, simple_index):
  400. index = simple_index
  401. sl = index[[1, 2, 3]]
  402. for i in sl:
  403. assert i == sl[sl.get_loc(i)]
  404. @pytest.mark.parametrize(
  405. "index",
  406. ["string", "int64", "int32", "uint64", "uint32", "float64", "float32"],
  407. indirect=True,
  408. )
  409. @pytest.mark.parametrize("dtype", [int, np.bool_])
  410. def test_empty_fancy(self, index, dtype, request, using_infer_string):
  411. if dtype is np.bool_ and using_infer_string and index.dtype == "string":
  412. request.applymarker(pytest.mark.xfail(reason="numpy behavior is buggy"))
  413. empty_arr = np.array([], dtype=dtype)
  414. empty_index = type(index)([], dtype=index.dtype)
  415. assert index[[]].identical(empty_index)
  416. if dtype == np.bool_:
  417. with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
  418. assert index[empty_arr].identical(empty_index)
  419. else:
  420. assert index[empty_arr].identical(empty_index)
  421. @pytest.mark.parametrize(
  422. "index",
  423. ["string", "int64", "int32", "uint64", "uint32", "float64", "float32"],
  424. indirect=True,
  425. )
  426. def test_empty_fancy_raises(self, index):
  427. # DatetimeIndex is excluded, because it overrides getitem and should
  428. # be tested separately.
  429. empty_farr = np.array([], dtype=np.float64)
  430. empty_index = type(index)([], dtype=index.dtype)
  431. assert index[[]].identical(empty_index)
  432. # np.ndarray only accepts ndarray of int & bool dtypes, so should Index
  433. msg = r"arrays used as indices must be of integer"
  434. with pytest.raises(IndexError, match=msg):
  435. index[empty_farr]
  436. def test_union_dt_as_obj(self, simple_index):
  437. # TODO: Replace with fixturesult
  438. index = simple_index
  439. date_index = date_range("2019-01-01", periods=10)
  440. first_cat = index.union(date_index)
  441. second_cat = index.union(index)
  442. appended = Index(np.append(index, date_index.astype("O")))
  443. tm.assert_index_equal(first_cat, appended)
  444. tm.assert_index_equal(second_cat, index)
  445. tm.assert_contains_all(index, first_cat)
  446. tm.assert_contains_all(index, second_cat)
  447. tm.assert_contains_all(date_index, first_cat)
  448. def test_map_with_tuples(self):
  449. # GH 12766
  450. # Test that returning a single tuple from an Index
  451. # returns an Index.
  452. index = Index(np.arange(3), dtype=np.int64)
  453. result = index.map(lambda x: (x,))
  454. expected = Index([(i,) for i in index])
  455. tm.assert_index_equal(result, expected)
  456. # Test that returning a tuple from a map of a single index
  457. # returns a MultiIndex object.
  458. result = index.map(lambda x: (x, x == 1))
  459. expected = MultiIndex.from_tuples([(i, i == 1) for i in index])
  460. tm.assert_index_equal(result, expected)
  461. def test_map_with_tuples_mi(self):
  462. # Test that returning a single object from a MultiIndex
  463. # returns an Index.
  464. first_level = ["foo", "bar", "baz"]
  465. multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3]))
  466. reduced_index = multi_index.map(lambda x: x[0])
  467. tm.assert_index_equal(reduced_index, Index(first_level))
  468. @pytest.mark.parametrize(
  469. "index",
  470. [
  471. date_range("2020-01-01", freq="D", periods=10),
  472. period_range("2020-01-01", freq="D", periods=10),
  473. timedelta_range("1 day", periods=10),
  474. ],
  475. )
  476. def test_map_tseries_indices_return_index(self, index):
  477. expected = Index([1] * 10)
  478. result = index.map(lambda x: 1)
  479. tm.assert_index_equal(expected, result)
  480. def test_map_tseries_indices_accsr_return_index(self):
  481. date_index = DatetimeIndex(
  482. date_range("2020-01-01", periods=24, freq="h"), name="hourly"
  483. )
  484. result = date_index.map(lambda x: x.hour)
  485. expected = Index(np.arange(24, dtype="int64"), name="hourly")
  486. tm.assert_index_equal(result, expected, exact=True)
  487. @pytest.mark.parametrize(
  488. "mapper",
  489. [
  490. lambda values, index: {i: e for e, i in zip(values, index)},
  491. lambda values, index: Series(values, index),
  492. ],
  493. )
  494. def test_map_dictlike_simple(self, mapper):
  495. # GH 12756
  496. expected = Index(["foo", "bar", "baz"])
  497. index = Index(np.arange(3), dtype=np.int64)
  498. result = index.map(mapper(expected.values, index))
  499. tm.assert_index_equal(result, expected)
  500. @pytest.mark.parametrize(
  501. "mapper",
  502. [
  503. lambda values, index: {i: e for e, i in zip(values, index)},
  504. lambda values, index: Series(values, index),
  505. ],
  506. )
  507. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  508. def test_map_dictlike(self, index, mapper, request):
  509. # GH 12756
  510. if isinstance(index, CategoricalIndex):
  511. pytest.skip("Tested in test_categorical")
  512. elif not index.is_unique:
  513. pytest.skip("Cannot map duplicated index")
  514. rng = np.arange(len(index), 0, -1, dtype=np.int64)
  515. if index.empty:
  516. # to match proper result coercion for uints
  517. expected = Index([])
  518. elif is_numeric_dtype(index.dtype):
  519. expected = index._constructor(rng, dtype=index.dtype)
  520. elif type(index) is Index and index.dtype != object:
  521. # i.e. EA-backed, for now just Nullable
  522. expected = Index(rng, dtype=index.dtype)
  523. else:
  524. expected = Index(rng)
  525. result = index.map(mapper(expected, index))
  526. tm.assert_index_equal(result, expected)
  527. @pytest.mark.parametrize(
  528. "mapper",
  529. [Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}],
  530. )
  531. def test_map_with_non_function_missing_values(self, mapper):
  532. # GH 12756
  533. expected = Index([2.0, np.nan, "foo"])
  534. result = Index([2, 1, 0]).map(mapper)
  535. tm.assert_index_equal(expected, result)
  536. def test_map_na_exclusion(self):
  537. index = Index([1.5, np.nan, 3, np.nan, 5])
  538. result = index.map(lambda x: x * 2, na_action="ignore")
  539. expected = index * 2
  540. tm.assert_index_equal(result, expected)
  541. def test_map_defaultdict(self):
  542. index = Index([1, 2, 3])
  543. default_dict = defaultdict(lambda: "blank")
  544. default_dict[1] = "stuff"
  545. result = index.map(default_dict)
  546. expected = Index(["stuff", "blank", "blank"])
  547. tm.assert_index_equal(result, expected)
  548. @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)])
  549. def test_append_empty_preserve_name(self, name, expected):
  550. left = Index([], name="foo")
  551. right = Index([1, 2, 3], name=name)
  552. msg = "The behavior of array concatenation with empty entries is deprecated"
  553. with tm.assert_produces_warning(FutureWarning, match=msg):
  554. result = left.append(right)
  555. assert result.name == expected
  556. @pytest.mark.parametrize(
  557. "index, expected",
  558. [
  559. ("string", False),
  560. ("bool-object", False),
  561. ("bool-dtype", False),
  562. ("categorical", False),
  563. ("int64", True),
  564. ("int32", True),
  565. ("uint64", True),
  566. ("uint32", True),
  567. ("datetime", False),
  568. ("float64", True),
  569. ("float32", True),
  570. ],
  571. indirect=["index"],
  572. )
  573. def test_is_numeric(self, index, expected):
  574. assert is_any_real_numeric_dtype(index) is expected
  575. @pytest.mark.parametrize(
  576. "index, expected",
  577. [
  578. ("string", True),
  579. ("bool-object", True),
  580. ("bool-dtype", False),
  581. ("categorical", False),
  582. ("int64", False),
  583. ("int32", False),
  584. ("uint64", False),
  585. ("uint32", False),
  586. ("datetime", False),
  587. ("float64", False),
  588. ("float32", False),
  589. ],
  590. indirect=["index"],
  591. )
  592. def test_is_object(self, index, expected, using_infer_string):
  593. if using_infer_string and index.dtype == "string" and expected:
  594. expected = False
  595. assert is_object_dtype(index) is expected
  596. def test_summary(self, index):
  597. index._summary()
  598. def test_format_bug(self):
  599. # GH 14626
  600. # windows has different precision on datetime.datetime.now (it doesn't
  601. # include us since the default for Timestamp shows these but Index
  602. # formatting does not we are skipping)
  603. now = datetime.now()
  604. msg = r"Index\.format is deprecated"
  605. if not str(now).endswith("000"):
  606. index = Index([now])
  607. with tm.assert_produces_warning(FutureWarning, match=msg):
  608. formatted = index.format()
  609. expected = [str(index[0])]
  610. assert formatted == expected
  611. with tm.assert_produces_warning(FutureWarning, match=msg):
  612. Index([]).format()
  613. @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]])
  614. def test_format_missing(self, vals, nulls_fixture):
  615. # 2845
  616. vals = list(vals) # Copy for each iteration
  617. vals.append(nulls_fixture)
  618. index = Index(vals, dtype=object)
  619. # TODO: case with complex dtype?
  620. msg = r"Index\.format is deprecated"
  621. with tm.assert_produces_warning(FutureWarning, match=msg):
  622. formatted = index.format()
  623. null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture)
  624. expected = [str(index[0]), str(index[1]), str(index[2]), null_repr]
  625. assert formatted == expected
  626. assert index[3] is nulls_fixture
  627. @pytest.mark.parametrize("op", ["any", "all"])
  628. def test_logical_compat(self, op, simple_index):
  629. index = simple_index
  630. left = getattr(index, op)()
  631. assert left == getattr(index.values, op)()
  632. right = getattr(index.to_series(), op)()
  633. # left might not match right exactly in e.g. string cases where the
  634. # because we use np.any/all instead of .any/all
  635. assert bool(left) == bool(right)
  636. @pytest.mark.parametrize(
  637. "index", ["string", "int64", "int32", "float64", "float32"], indirect=True
  638. )
  639. def test_drop_by_str_label(self, index):
  640. n = len(index)
  641. drop = index[list(range(5, 10))]
  642. dropped = index.drop(drop)
  643. expected = index[list(range(5)) + list(range(10, n))]
  644. tm.assert_index_equal(dropped, expected)
  645. dropped = index.drop(index[0])
  646. expected = index[1:]
  647. tm.assert_index_equal(dropped, expected)
  648. @pytest.mark.parametrize(
  649. "index", ["string", "int64", "int32", "float64", "float32"], indirect=True
  650. )
  651. @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]])
  652. def test_drop_by_str_label_raises_missing_keys(self, index, keys):
  653. with pytest.raises(KeyError, match=""):
  654. index.drop(keys)
  655. @pytest.mark.parametrize(
  656. "index", ["string", "int64", "int32", "float64", "float32"], indirect=True
  657. )
  658. def test_drop_by_str_label_errors_ignore(self, index):
  659. n = len(index)
  660. drop = index[list(range(5, 10))]
  661. mixed = drop.tolist() + ["foo"]
  662. dropped = index.drop(mixed, errors="ignore")
  663. expected = index[list(range(5)) + list(range(10, n))]
  664. tm.assert_index_equal(dropped, expected)
  665. dropped = index.drop(["foo", "bar"], errors="ignore")
  666. expected = index[list(range(n))]
  667. tm.assert_index_equal(dropped, expected)
  668. def test_drop_by_numeric_label_loc(self):
  669. # TODO: Parametrize numeric and str tests after self.strIndex fixture
  670. index = Index([1, 2, 3])
  671. dropped = index.drop(1)
  672. expected = Index([2, 3])
  673. tm.assert_index_equal(dropped, expected)
  674. def test_drop_by_numeric_label_raises_missing_keys(self):
  675. index = Index([1, 2, 3])
  676. with pytest.raises(KeyError, match=""):
  677. index.drop([3, 4])
  678. @pytest.mark.parametrize(
  679. "key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))]
  680. )
  681. def test_drop_by_numeric_label_errors_ignore(self, key, expected):
  682. index = Index([1, 2, 3])
  683. dropped = index.drop(key, errors="ignore")
  684. tm.assert_index_equal(dropped, expected)
  685. @pytest.mark.parametrize(
  686. "values",
  687. [["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]],
  688. )
  689. @pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]])
  690. def test_drop_tuple(self, values, to_drop):
  691. # GH 18304
  692. index = Index(values)
  693. expected = Index(["b"], dtype=object)
  694. result = index.drop(to_drop)
  695. tm.assert_index_equal(result, expected)
  696. removed = index.drop(to_drop[0])
  697. for drop_me in to_drop[1], [to_drop[1]]:
  698. result = removed.drop(drop_me)
  699. tm.assert_index_equal(result, expected)
  700. removed = index.drop(to_drop[1])
  701. msg = rf"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\""
  702. for drop_me in to_drop[1], [to_drop[1]]:
  703. with pytest.raises(KeyError, match=msg):
  704. removed.drop(drop_me)
  705. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  706. def test_drop_with_duplicates_in_index(self, index):
  707. # GH38051
  708. if len(index) == 0 or isinstance(index, MultiIndex):
  709. pytest.skip("Test doesn't make sense for empty MultiIndex")
  710. if isinstance(index, IntervalIndex) and not IS64:
  711. pytest.skip("Cannot test IntervalIndex with int64 dtype on 32 bit platform")
  712. index = index.unique().repeat(2)
  713. expected = index[2:]
  714. result = index.drop(index[0])
  715. tm.assert_index_equal(result, expected)
  716. @pytest.mark.parametrize(
  717. "attr",
  718. [
  719. "is_monotonic_increasing",
  720. "is_monotonic_decreasing",
  721. "_is_strictly_monotonic_increasing",
  722. "_is_strictly_monotonic_decreasing",
  723. ],
  724. )
  725. def test_is_monotonic_incomparable(self, attr):
  726. index = Index([5, datetime.now(), 7])
  727. assert not getattr(index, attr)
  728. @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}])
  729. @pytest.mark.parametrize(
  730. "index,expected",
  731. [
  732. (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])),
  733. (Index([]), np.array([], dtype=bool)), # empty
  734. ],
  735. )
  736. def test_isin(self, values, index, expected):
  737. result = index.isin(values)
  738. tm.assert_numpy_array_equal(result, expected)
  739. def test_isin_nan_common_object(
  740. self, nulls_fixture, nulls_fixture2, using_infer_string
  741. ):
  742. # Test cartesian product of null fixtures and ensure that we don't
  743. # mangle the various types (save a corner case with PyPy)
  744. idx = Index(["a", nulls_fixture])
  745. # all nans are the same
  746. if (
  747. isinstance(nulls_fixture, float)
  748. and isinstance(nulls_fixture2, float)
  749. and math.isnan(nulls_fixture)
  750. and math.isnan(nulls_fixture2)
  751. ):
  752. tm.assert_numpy_array_equal(
  753. idx.isin([nulls_fixture2]),
  754. np.array([False, True]),
  755. )
  756. elif nulls_fixture is nulls_fixture2: # should preserve NA type
  757. tm.assert_numpy_array_equal(
  758. idx.isin([nulls_fixture2]),
  759. np.array([False, True]),
  760. )
  761. elif using_infer_string and idx.dtype == "string":
  762. tm.assert_numpy_array_equal(
  763. idx.isin([nulls_fixture2]),
  764. np.array([False, True]),
  765. )
  766. else:
  767. tm.assert_numpy_array_equal(
  768. idx.isin([nulls_fixture2]),
  769. np.array([False, False]),
  770. )
  771. def test_isin_nan_common_float64(self, nulls_fixture, float_numpy_dtype):
  772. dtype = float_numpy_dtype
  773. if nulls_fixture is pd.NaT or nulls_fixture is pd.NA:
  774. # Check 1) that we cannot construct a float64 Index with this value
  775. # and 2) that with an NaN we do not have .isin(nulls_fixture)
  776. msg = (
  777. r"float\(\) argument must be a string or a (real )?number, "
  778. f"not {repr(type(nulls_fixture).__name__)}"
  779. )
  780. with pytest.raises(TypeError, match=msg):
  781. Index([1.0, nulls_fixture], dtype=dtype)
  782. idx = Index([1.0, np.nan], dtype=dtype)
  783. assert not idx.isin([nulls_fixture]).any()
  784. return
  785. idx = Index([1.0, nulls_fixture], dtype=dtype)
  786. res = idx.isin([np.nan])
  787. tm.assert_numpy_array_equal(res, np.array([False, True]))
  788. # we cannot compare NaT with NaN
  789. res = idx.isin([pd.NaT])
  790. tm.assert_numpy_array_equal(res, np.array([False, False]))
  791. @pytest.mark.parametrize("level", [0, -1])
  792. @pytest.mark.parametrize(
  793. "index",
  794. [
  795. Index(["qux", "baz", "foo", "bar"]),
  796. Index([1.0, 2.0, 3.0, 4.0], dtype=np.float64),
  797. ],
  798. )
  799. def test_isin_level_kwarg(self, level, index):
  800. values = index.tolist()[-2:] + ["nonexisting"]
  801. expected = np.array([False, False, True, True])
  802. tm.assert_numpy_array_equal(expected, index.isin(values, level=level))
  803. index.name = "foobar"
  804. tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar"))
  805. def test_isin_level_kwarg_bad_level_raises(self, index):
  806. for level in [10, index.nlevels, -(index.nlevels + 1)]:
  807. with pytest.raises(IndexError, match="Too many levels"):
  808. index.isin([], level=level)
  809. @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan])
  810. def test_isin_level_kwarg_bad_label_raises(self, label, index):
  811. if isinstance(index, MultiIndex):
  812. index = index.rename(["foo", "bar"] + index.names[2:])
  813. msg = f"'Level {label} not found'"
  814. else:
  815. index = index.rename("foo")
  816. msg = rf"Requested level \({label}\) does not match index name \(foo\)"
  817. with pytest.raises(KeyError, match=msg):
  818. index.isin([], level=label)
  819. @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
  820. def test_isin_empty(self, empty):
  821. # see gh-16991
  822. index = Index(["a", "b"])
  823. expected = np.array([False, False])
  824. result = index.isin(empty)
  825. tm.assert_numpy_array_equal(expected, result)
  826. def test_isin_string_null(self, string_dtype_no_object):
  827. # GH#55821
  828. index = Index(["a", "b"], dtype=string_dtype_no_object)
  829. result = index.isin([None])
  830. expected = np.array([False, False])
  831. tm.assert_numpy_array_equal(result, expected)
  832. @pytest.mark.parametrize(
  833. "values",
  834. [
  835. [1, 2, 3, 4],
  836. [1.0, 2.0, 3.0, 4.0],
  837. [True, True, True, True],
  838. ["foo", "bar", "baz", "qux"],
  839. date_range("2018-01-01", freq="D", periods=4),
  840. ],
  841. )
  842. def test_boolean_cmp(self, values):
  843. index = Index(values)
  844. result = index == values
  845. expected = np.array([True, True, True, True], dtype=bool)
  846. tm.assert_numpy_array_equal(result, expected)
  847. @pytest.mark.parametrize("index", ["string"], indirect=True)
  848. @pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")])
  849. def test_get_level_values(self, index, name, level):
  850. expected = index.copy()
  851. if name:
  852. expected.name = name
  853. result = expected.get_level_values(level)
  854. tm.assert_index_equal(result, expected)
  855. def test_slice_keep_name(self):
  856. index = Index(["a", "b"], name="asdf")
  857. assert index.name == index[1:].name
  858. @pytest.mark.parametrize(
  859. "index",
  860. [
  861. "string",
  862. "datetime",
  863. "int64",
  864. "int32",
  865. "uint64",
  866. "uint32",
  867. "float64",
  868. "float32",
  869. ],
  870. indirect=True,
  871. )
  872. def test_join_self(self, index, join_type):
  873. result = index.join(index, how=join_type)
  874. expected = index
  875. if join_type == "outer":
  876. expected = expected.sort_values()
  877. tm.assert_index_equal(result, expected)
  878. @pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"])
  879. def test_str_attribute(self, method):
  880. # GH9068
  881. index = Index([" jack", "jill ", " jesse ", "frank"])
  882. expected = Index([getattr(str, method)(x) for x in index.values])
  883. result = getattr(index.str, method)()
  884. tm.assert_index_equal(result, expected)
  885. @pytest.mark.parametrize(
  886. "index",
  887. [
  888. Index(range(5)),
  889. date_range("2020-01-01", periods=10),
  890. MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]),
  891. period_range(start="2000", end="2010", freq="Y"),
  892. ],
  893. )
  894. def test_str_attribute_raises(self, index):
  895. with pytest.raises(AttributeError, match="only use .str accessor"):
  896. index.str.repeat(2)
  897. @pytest.mark.parametrize(
  898. "expand,expected",
  899. [
  900. (None, Index([["a", "b", "c"], ["d", "e"], ["f"]])),
  901. (False, Index([["a", "b", "c"], ["d", "e"], ["f"]])),
  902. (
  903. True,
  904. MultiIndex.from_tuples(
  905. [("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)]
  906. ),
  907. ),
  908. ],
  909. )
  910. def test_str_split(self, expand, expected):
  911. index = Index(["a b c", "d e", "f"])
  912. if expand is not None:
  913. result = index.str.split(expand=expand)
  914. else:
  915. result = index.str.split()
  916. tm.assert_index_equal(result, expected)
  917. def test_str_bool_return(self):
  918. # test boolean case, should return np.array instead of boolean Index
  919. index = Index(["a1", "a2", "b1", "b2"])
  920. result = index.str.startswith("a")
  921. expected = np.array([True, True, False, False])
  922. tm.assert_numpy_array_equal(result, expected)
  923. assert isinstance(result, np.ndarray)
  924. def test_str_bool_series_indexing(self):
  925. index = Index(["a1", "a2", "b1", "b2"])
  926. s = Series(range(4), index=index)
  927. result = s[s.index.str.startswith("a")]
  928. expected = Series(range(2), index=["a1", "a2"])
  929. tm.assert_series_equal(result, expected)
  930. @pytest.mark.parametrize(
  931. "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)]
  932. )
  933. def test_tab_completion(self, index, expected):
  934. # GH 9910
  935. result = "str" in dir(index)
  936. assert result == expected
  937. def test_indexing_doesnt_change_class(self):
  938. index = Index([1, 2, 3, "a", "b", "c"])
  939. assert index[1:3].identical(Index([2, 3], dtype=np.object_))
  940. assert index[[0, 1]].identical(Index([1, 2], dtype=np.object_))
  941. def test_outer_join_sort(self):
  942. left_index = Index(np.random.default_rng(2).permutation(15))
  943. right_index = date_range("2020-01-01", periods=10)
  944. with tm.assert_produces_warning(RuntimeWarning):
  945. result = left_index.join(right_index, how="outer")
  946. with tm.assert_produces_warning(RuntimeWarning):
  947. expected = left_index.astype(object).union(right_index.astype(object))
  948. tm.assert_index_equal(result, expected)
  949. def test_take_fill_value(self):
  950. # GH 12631
  951. index = Index(list("ABC"), name="xxx")
  952. result = index.take(np.array([1, 0, -1]))
  953. expected = Index(list("BAC"), name="xxx")
  954. tm.assert_index_equal(result, expected)
  955. # fill_value
  956. result = index.take(np.array([1, 0, -1]), fill_value=True)
  957. expected = Index(["B", "A", np.nan], name="xxx")
  958. tm.assert_index_equal(result, expected)
  959. # allow_fill=False
  960. result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  961. expected = Index(["B", "A", "C"], name="xxx")
  962. tm.assert_index_equal(result, expected)
  963. def test_take_fill_value_none_raises(self):
  964. index = Index(list("ABC"), name="xxx")
  965. msg = (
  966. "When allow_fill=True and fill_value is not None, "
  967. "all indices must be >= -1"
  968. )
  969. with pytest.raises(ValueError, match=msg):
  970. index.take(np.array([1, 0, -2]), fill_value=True)
  971. with pytest.raises(ValueError, match=msg):
  972. index.take(np.array([1, 0, -5]), fill_value=True)
  973. def test_take_bad_bounds_raises(self):
  974. index = Index(list("ABC"), name="xxx")
  975. with pytest.raises(IndexError, match="out of bounds"):
  976. index.take(np.array([1, -5]))
  977. @pytest.mark.parametrize("name", [None, "foobar"])
  978. @pytest.mark.parametrize(
  979. "labels",
  980. [
  981. [],
  982. np.array([]),
  983. ["A", "B", "C"],
  984. ["C", "B", "A"],
  985. np.array(["A", "B", "C"]),
  986. np.array(["C", "B", "A"]),
  987. # Must preserve name even if dtype changes
  988. date_range("20130101", periods=3).values,
  989. date_range("20130101", periods=3).tolist(),
  990. ],
  991. )
  992. def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels):
  993. # GH6552
  994. index = Index([0, 1, 2])
  995. index.name = name
  996. assert index.reindex(labels)[0].name == name
  997. @pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)])
  998. def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels):
  999. # GH7774
  1000. index = Index(list("abc"))
  1001. assert index.reindex(labels)[0].dtype.type == index.dtype.type
  1002. @pytest.mark.parametrize(
  1003. "labels,dtype",
  1004. [
  1005. (DatetimeIndex([]), np.datetime64),
  1006. ],
  1007. )
  1008. def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype):
  1009. # GH7774
  1010. index = Index(list("abc"))
  1011. assert index.reindex(labels)[0].dtype.type == dtype
  1012. def test_reindex_doesnt_preserve_type_if_target_is_empty_index_numeric(
  1013. self, any_real_numpy_dtype
  1014. ):
  1015. # GH7774
  1016. dtype = any_real_numpy_dtype
  1017. index = Index(list("abc"))
  1018. labels = Index([], dtype=dtype)
  1019. assert index.reindex(labels)[0].dtype == dtype
  1020. def test_reindex_no_type_preserve_target_empty_mi(self):
  1021. index = Index(list("abc"))
  1022. result = index.reindex(
  1023. MultiIndex([Index([], np.int64), Index([], np.float64)], [[], []])
  1024. )[0]
  1025. assert result.levels[0].dtype.type == np.int64
  1026. assert result.levels[1].dtype.type == np.float64
  1027. def test_reindex_ignoring_level(self):
  1028. # GH#35132
  1029. idx = Index([1, 2, 3], name="x")
  1030. idx2 = Index([1, 2, 3, 4], name="x")
  1031. expected = Index([1, 2, 3, 4], name="x")
  1032. result, _ = idx.reindex(idx2, level="x")
  1033. tm.assert_index_equal(result, expected)
  1034. def test_groupby(self):
  1035. index = Index(range(5))
  1036. result = index.groupby(np.array([1, 1, 2, 2, 2]))
  1037. expected = {1: Index([0, 1]), 2: Index([2, 3, 4])}
  1038. tm.assert_dict_equal(result, expected)
  1039. @pytest.mark.parametrize(
  1040. "mi,expected",
  1041. [
  1042. (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])),
  1043. (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])),
  1044. ],
  1045. )
  1046. def test_equals_op_multiindex(self, mi, expected):
  1047. # GH9785
  1048. # test comparisons of multiindex
  1049. df = DataFrame(
  1050. [3, 6],
  1051. columns=["c"],
  1052. index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
  1053. )
  1054. result = df.index == mi
  1055. tm.assert_numpy_array_equal(result, expected)
  1056. def test_equals_op_multiindex_identify(self):
  1057. df = DataFrame(
  1058. [3, 6],
  1059. columns=["c"],
  1060. index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
  1061. )
  1062. result = df.index == df.index
  1063. expected = np.array([True, True])
  1064. tm.assert_numpy_array_equal(result, expected)
  1065. @pytest.mark.parametrize(
  1066. "index",
  1067. [
  1068. MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]),
  1069. Index(["foo", "bar", "baz"]),
  1070. ],
  1071. )
  1072. def test_equals_op_mismatched_multiindex_raises(self, index):
  1073. df = DataFrame(
  1074. [3, 6],
  1075. columns=["c"],
  1076. index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
  1077. )
  1078. with pytest.raises(ValueError, match="Lengths must match"):
  1079. df.index == index
  1080. def test_equals_op_index_vs_mi_same_length(self, using_infer_string):
  1081. mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
  1082. index = Index(["foo", "bar", "baz"])
  1083. result = mi == index
  1084. expected = np.array([False, False, False])
  1085. tm.assert_numpy_array_equal(result, expected)
  1086. @pytest.mark.parametrize(
  1087. "dt_conv, arg",
  1088. [
  1089. (pd.to_datetime, ["2000-01-01", "2000-01-02"]),
  1090. (pd.to_timedelta, ["01:02:03", "01:02:04"]),
  1091. ],
  1092. )
  1093. def test_dt_conversion_preserves_name(self, dt_conv, arg):
  1094. # GH 10875
  1095. index = Index(arg, name="label")
  1096. assert index.name == dt_conv(index).name
  1097. def test_cached_properties_not_settable(self):
  1098. index = Index([1, 2, 3])
  1099. with pytest.raises(AttributeError, match="Can't set attribute"):
  1100. index.is_unique = False
  1101. def test_tab_complete_warning(self, ip):
  1102. # https://github.com/pandas-dev/pandas/issues/16409
  1103. pytest.importorskip("IPython", minversion="6.0.0")
  1104. from IPython.core.completer import provisionalcompleter
  1105. code = "import pandas as pd; idx = pd.Index([1, 2])"
  1106. ip.run_cell(code)
  1107. # GH 31324 newer jedi version raises Deprecation warning;
  1108. # appears resolved 2021-02-02
  1109. with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
  1110. with provisionalcompleter("ignore"):
  1111. list(ip.Completer.completions("idx.", 4))
  1112. def test_contains_method_removed(self, index):
  1113. # GH#30103 method removed for all types except IntervalIndex
  1114. if isinstance(index, IntervalIndex):
  1115. index.contains(1)
  1116. else:
  1117. msg = f"'{type(index).__name__}' object has no attribute 'contains'"
  1118. with pytest.raises(AttributeError, match=msg):
  1119. index.contains(1)
  1120. def test_sortlevel(self):
  1121. index = Index([5, 4, 3, 2, 1])
  1122. with pytest.raises(Exception, match="ascending must be a single bool value or"):
  1123. index.sortlevel(ascending="True")
  1124. with pytest.raises(
  1125. Exception, match="ascending must be a list of bool values of length 1"
  1126. ):
  1127. index.sortlevel(ascending=[True, True])
  1128. with pytest.raises(Exception, match="ascending must be a bool value"):
  1129. index.sortlevel(ascending=["True"])
  1130. expected = Index([1, 2, 3, 4, 5])
  1131. result = index.sortlevel(ascending=[True])
  1132. tm.assert_index_equal(result[0], expected)
  1133. expected = Index([1, 2, 3, 4, 5])
  1134. result = index.sortlevel(ascending=True)
  1135. tm.assert_index_equal(result[0], expected)
  1136. expected = Index([5, 4, 3, 2, 1])
  1137. result = index.sortlevel(ascending=False)
  1138. tm.assert_index_equal(result[0], expected)
  1139. def test_sortlevel_na_position(self):
  1140. # GH#51612
  1141. idx = Index([1, np.nan])
  1142. result = idx.sortlevel(na_position="first")[0]
  1143. expected = Index([np.nan, 1])
  1144. tm.assert_index_equal(result, expected)
  1145. @pytest.mark.parametrize(
  1146. "periods, expected_results",
  1147. [
  1148. (1, [np.nan, 10, 10, 10, 10]),
  1149. (2, [np.nan, np.nan, 20, 20, 20]),
  1150. (3, [np.nan, np.nan, np.nan, 30, 30]),
  1151. ],
  1152. )
  1153. def test_index_diff(self, periods, expected_results):
  1154. # GH#19708
  1155. idx = Index([10, 20, 30, 40, 50])
  1156. result = idx.diff(periods)
  1157. expected = Index(expected_results)
  1158. tm.assert_index_equal(result, expected)
  1159. @pytest.mark.parametrize(
  1160. "decimals, expected_results",
  1161. [
  1162. (0, [1.0, 2.0, 3.0]),
  1163. (1, [1.2, 2.3, 3.5]),
  1164. (2, [1.23, 2.35, 3.46]),
  1165. ],
  1166. )
  1167. def test_index_round(self, decimals, expected_results):
  1168. # GH#19708
  1169. idx = Index([1.234, 2.345, 3.456])
  1170. result = idx.round(decimals)
  1171. expected = Index(expected_results)
  1172. tm.assert_index_equal(result, expected)
  1173. class TestMixedIntIndex:
  1174. # Mostly the tests from common.py for which the results differ
  1175. # in py2 and py3 because ints and strings are uncomparable in py3
  1176. # (GH 13514)
  1177. @pytest.fixture
  1178. def simple_index(self) -> Index:
  1179. return Index([0, "a", 1, "b", 2, "c"])
  1180. def test_argsort(self, simple_index):
  1181. index = simple_index
  1182. with pytest.raises(TypeError, match="'>|<' not supported"):
  1183. index.argsort()
  1184. def test_numpy_argsort(self, simple_index):
  1185. index = simple_index
  1186. with pytest.raises(TypeError, match="'>|<' not supported"):
  1187. np.argsort(index)
  1188. def test_copy_name(self, simple_index):
  1189. # Check that "name" argument passed at initialization is honoured
  1190. # GH12309
  1191. index = simple_index
  1192. first = type(index)(index, copy=True, name="mario")
  1193. second = type(first)(first, copy=False)
  1194. # Even though "copy=False", we want a new object.
  1195. assert first is not second
  1196. tm.assert_index_equal(first, second)
  1197. assert first.name == "mario"
  1198. assert second.name == "mario"
  1199. s1 = Series(2, index=first)
  1200. s2 = Series(3, index=second[:-1])
  1201. s3 = s1 * s2
  1202. assert s3.index.name == "mario"
  1203. def test_copy_name2(self):
  1204. # Check that adding a "name" parameter to the copy is honored
  1205. # GH14302
  1206. index = Index([1, 2], name="MyName")
  1207. index1 = index.copy()
  1208. tm.assert_index_equal(index, index1)
  1209. index2 = index.copy(name="NewName")
  1210. tm.assert_index_equal(index, index2, check_names=False)
  1211. assert index.name == "MyName"
  1212. assert index2.name == "NewName"
  1213. def test_unique_na(self):
  1214. idx = Index([2, np.nan, 2, 1], name="my_index")
  1215. expected = Index([2, np.nan, 1], name="my_index")
  1216. result = idx.unique()
  1217. tm.assert_index_equal(result, expected)
  1218. def test_logical_compat(self, simple_index):
  1219. index = simple_index
  1220. assert index.all() == index.values.all()
  1221. assert index.any() == index.values.any()
  1222. @pytest.mark.parametrize("how", ["any", "all"])
  1223. @pytest.mark.parametrize("dtype", [None, object, "category"])
  1224. @pytest.mark.parametrize(
  1225. "vals,expected",
  1226. [
  1227. ([1, 2, 3], [1, 2, 3]),
  1228. ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]),
  1229. ([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]),
  1230. (["A", "B", "C"], ["A", "B", "C"]),
  1231. (["A", np.nan, "B", "C"], ["A", "B", "C"]),
  1232. ],
  1233. )
  1234. def test_dropna(self, how, dtype, vals, expected):
  1235. # GH 6194
  1236. index = Index(vals, dtype=dtype)
  1237. result = index.dropna(how=how)
  1238. expected = Index(expected, dtype=dtype)
  1239. tm.assert_index_equal(result, expected)
  1240. @pytest.mark.parametrize("how", ["any", "all"])
  1241. @pytest.mark.parametrize(
  1242. "index,expected",
  1243. [
  1244. (
  1245. DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
  1246. DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
  1247. ),
  1248. (
  1249. DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]),
  1250. DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
  1251. ),
  1252. (
  1253. TimedeltaIndex(["1 days", "2 days", "3 days"]),
  1254. TimedeltaIndex(["1 days", "2 days", "3 days"]),
  1255. ),
  1256. (
  1257. TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]),
  1258. TimedeltaIndex(["1 days", "2 days", "3 days"]),
  1259. ),
  1260. (
  1261. PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
  1262. PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
  1263. ),
  1264. (
  1265. PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"),
  1266. PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
  1267. ),
  1268. ],
  1269. )
  1270. def test_dropna_dt_like(self, how, index, expected):
  1271. result = index.dropna(how=how)
  1272. tm.assert_index_equal(result, expected)
  1273. def test_dropna_invalid_how_raises(self):
  1274. msg = "invalid how option: xxx"
  1275. with pytest.raises(ValueError, match=msg):
  1276. Index([1, 2, 3]).dropna(how="xxx")
  1277. @pytest.mark.parametrize(
  1278. "index",
  1279. [
  1280. Index([np.nan]),
  1281. Index([np.nan, 1]),
  1282. Index([1, 2, np.nan]),
  1283. Index(["a", "b", np.nan]),
  1284. pd.to_datetime(["NaT"]),
  1285. pd.to_datetime(["NaT", "2000-01-01"]),
  1286. pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]),
  1287. pd.to_timedelta(["1 day", "NaT"]),
  1288. ],
  1289. )
  1290. def test_is_monotonic_na(self, index):
  1291. assert index.is_monotonic_increasing is False
  1292. assert index.is_monotonic_decreasing is False
  1293. assert index._is_strictly_monotonic_increasing is False
  1294. assert index._is_strictly_monotonic_decreasing is False
  1295. @pytest.mark.parametrize("dtype", ["f8", "m8[ns]", "M8[us]"])
  1296. @pytest.mark.parametrize("unique_first", [True, False])
  1297. def test_is_monotonic_unique_na(self, dtype, unique_first):
  1298. # GH 55755
  1299. index = Index([None, 1, 1], dtype=dtype)
  1300. if unique_first:
  1301. assert index.is_unique is False
  1302. assert index.is_monotonic_increasing is False
  1303. assert index.is_monotonic_decreasing is False
  1304. else:
  1305. assert index.is_monotonic_increasing is False
  1306. assert index.is_monotonic_decreasing is False
  1307. assert index.is_unique is False
  1308. def test_int_name_format(self, frame_or_series):
  1309. index = Index(["a", "b", "c"], name=0)
  1310. result = frame_or_series(list(range(3)), index=index)
  1311. assert "0" in repr(result)
  1312. def test_str_to_bytes_raises(self):
  1313. # GH 26447
  1314. index = Index([str(x) for x in range(10)])
  1315. msg = "^'str' object cannot be interpreted as an integer$"
  1316. with pytest.raises(TypeError, match=msg):
  1317. bytes(index)
  1318. @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning")
  1319. def test_index_with_tuple_bool(self):
  1320. # GH34123
  1321. # TODO: also this op right now produces FutureWarning from numpy
  1322. # https://github.com/numpy/numpy/issues/11521
  1323. idx = Index([("a", "b"), ("b", "c"), ("c", "a")])
  1324. result = idx == ("c", "a")
  1325. expected = np.array([False, False, True])
  1326. tm.assert_numpy_array_equal(result, expected)
  1327. class TestIndexUtils:
  1328. @pytest.mark.parametrize(
  1329. "data, names, expected",
  1330. [
  1331. ([[1, 2, 3]], None, Index([1, 2, 3])),
  1332. ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")),
  1333. (
  1334. [["a", "a"], ["c", "d"]],
  1335. None,
  1336. MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]),
  1337. ),
  1338. (
  1339. [["a", "a"], ["c", "d"]],
  1340. ["L1", "L2"],
  1341. MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]),
  1342. ),
  1343. ],
  1344. )
  1345. def test_ensure_index_from_sequences(self, data, names, expected):
  1346. result = ensure_index_from_sequences(data, names)
  1347. tm.assert_index_equal(result, expected)
  1348. def test_ensure_index_mixed_closed_intervals(self):
  1349. # GH27172
  1350. intervals = [
  1351. pd.Interval(0, 1, closed="left"),
  1352. pd.Interval(1, 2, closed="right"),
  1353. pd.Interval(2, 3, closed="neither"),
  1354. pd.Interval(3, 4, closed="both"),
  1355. ]
  1356. result = ensure_index(intervals)
  1357. expected = Index(intervals, dtype=object)
  1358. tm.assert_index_equal(result, expected)
  1359. def test_ensure_index_uint64(self):
  1360. # with both 0 and a large-uint64, np.array will infer to float64
  1361. # https://github.com/numpy/numpy/issues/19146
  1362. # but a more accurate choice would be uint64
  1363. values = [0, np.iinfo(np.uint64).max]
  1364. result = ensure_index(values)
  1365. assert list(result) == values
  1366. expected = Index(values, dtype="uint64")
  1367. tm.assert_index_equal(result, expected)
  1368. def test_get_combined_index(self):
  1369. result = _get_combined_index([])
  1370. expected = Index([])
  1371. tm.assert_index_equal(result, expected)
  1372. @pytest.mark.parametrize(
  1373. "opname",
  1374. [
  1375. "eq",
  1376. "ne",
  1377. "le",
  1378. "lt",
  1379. "ge",
  1380. "gt",
  1381. "add",
  1382. "radd",
  1383. "sub",
  1384. "rsub",
  1385. "mul",
  1386. "rmul",
  1387. "truediv",
  1388. "rtruediv",
  1389. "floordiv",
  1390. "rfloordiv",
  1391. "pow",
  1392. "rpow",
  1393. "mod",
  1394. "divmod",
  1395. ],
  1396. )
  1397. def test_generated_op_names(opname, index):
  1398. opname = f"__{opname}__"
  1399. method = getattr(index, opname)
  1400. assert method.__name__ == opname
  1401. @pytest.mark.parametrize(
  1402. "klass",
  1403. [
  1404. partial(CategoricalIndex, data=[1]),
  1405. partial(DatetimeIndex, data=["2020-01-01"]),
  1406. partial(PeriodIndex, data=["2020-01-01"]),
  1407. partial(TimedeltaIndex, data=["1 day"]),
  1408. partial(RangeIndex, data=range(1)),
  1409. partial(IntervalIndex, data=[pd.Interval(0, 1)]),
  1410. partial(Index, data=["a"], dtype=object),
  1411. partial(MultiIndex, levels=[1], codes=[0]),
  1412. ],
  1413. )
  1414. def test_index_subclass_constructor_wrong_kwargs(klass):
  1415. # GH #19348
  1416. with pytest.raises(TypeError, match="unexpected keyword argument"):
  1417. klass(foo="bar")
  1418. def test_deprecated_fastpath():
  1419. msg = "[Uu]nexpected keyword argument"
  1420. with pytest.raises(TypeError, match=msg):
  1421. Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True)
  1422. with pytest.raises(TypeError, match=msg):
  1423. Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True)
  1424. with pytest.raises(TypeError, match=msg):
  1425. RangeIndex(0, 5, 2, name="test", fastpath=True)
  1426. with pytest.raises(TypeError, match=msg):
  1427. CategoricalIndex(["a", "b", "c"], name="test", fastpath=True)
  1428. def test_shape_of_invalid_index():
  1429. # Pre-2.0, it was possible to create "invalid" index objects backed by
  1430. # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125
  1431. # about this). However, as long as this is not solved in general,this test ensures
  1432. # that the returned shape is consistent with this underlying array for
  1433. # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775)
  1434. idx = Index([0, 1, 2, 3])
  1435. with pytest.raises(ValueError, match="Multi-dimensional indexing"):
  1436. # GH#30588 multi-dimensional indexing deprecated
  1437. idx[:, None]
  1438. @pytest.mark.parametrize("dtype", [None, np.int64, np.uint64, np.float64])
  1439. def test_validate_1d_input(dtype):
  1440. # GH#27125 check that we do not have >1-dimensional input
  1441. msg = "Index data must be 1-dimensional"
  1442. arr = np.arange(8).reshape(2, 2, 2)
  1443. with pytest.raises(ValueError, match=msg):
  1444. Index(arr, dtype=dtype)
  1445. df = DataFrame(arr.reshape(4, 2))
  1446. with pytest.raises(ValueError, match=msg):
  1447. Index(df, dtype=dtype)
  1448. # GH#13601 trying to assign a multi-dimensional array to an index is not allowed
  1449. ser = Series(0, range(4))
  1450. with pytest.raises(ValueError, match=msg):
  1451. ser.index = np.array([[2, 3]] * 4, dtype=dtype)
  1452. @pytest.mark.parametrize(
  1453. "klass, extra_kwargs",
  1454. [
  1455. [Index, {}],
  1456. *[[lambda x: Index(x, dtype=dtyp), {}] for dtyp in tm.ALL_REAL_NUMPY_DTYPES],
  1457. [DatetimeIndex, {}],
  1458. [TimedeltaIndex, {}],
  1459. [PeriodIndex, {"freq": "Y"}],
  1460. ],
  1461. )
  1462. def test_construct_from_memoryview(klass, extra_kwargs):
  1463. # GH 13120
  1464. result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs)
  1465. expected = klass(list(range(2000, 2005)), **extra_kwargs)
  1466. tm.assert_index_equal(result, expected, exact=True)
  1467. @pytest.mark.parametrize("op", [operator.lt, operator.gt])
  1468. def test_nan_comparison_same_object(op):
  1469. # GH#47105
  1470. idx = Index([np.nan])
  1471. expected = np.array([False])
  1472. result = op(idx, idx)
  1473. tm.assert_numpy_array_equal(result, expected)
  1474. result = op(idx, idx.copy())
  1475. tm.assert_numpy_array_equal(result, expected)
  1476. @td.skip_if_no("pyarrow")
  1477. def test_is_monotonic_pyarrow_list_type():
  1478. # GH 57333
  1479. import pyarrow as pa
  1480. idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
  1481. assert not idx.is_monotonic_increasing
  1482. assert not idx.is_monotonic_decreasing