test_loc.py 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
  1. import numpy as np
  2. import pytest
  3. from pandas.errors import (
  4. IndexingError,
  5. PerformanceWarning,
  6. )
  7. import pandas as pd
  8. from pandas import (
  9. DataFrame,
  10. Index,
  11. MultiIndex,
  12. Series,
  13. )
  14. import pandas._testing as tm
  15. @pytest.fixture
  16. def single_level_multiindex():
  17. """single level MultiIndex"""
  18. return MultiIndex(
  19. levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
  20. )
  21. @pytest.fixture
  22. def frame_random_data_integer_multi_index():
  23. levels = [[0, 1], [0, 1, 2]]
  24. codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
  25. index = MultiIndex(levels=levels, codes=codes)
  26. return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
  27. class TestMultiIndexLoc:
  28. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  29. @pytest.mark.parametrize("has_ref", [True, False])
  30. def test_loc_setitem_frame_with_multiindex(
  31. self, multiindex_dataframe_random_data, has_ref
  32. ):
  33. frame = multiindex_dataframe_random_data
  34. if has_ref:
  35. view = frame[:]
  36. frame.loc[("bar", "two"), "B"] = 5
  37. assert frame.loc[("bar", "two"), "B"] == 5
  38. # with integer labels
  39. df = frame.copy()
  40. df.columns = list(range(3))
  41. if has_ref:
  42. view = df[:] # noqa: F841
  43. df.loc[("bar", "two"), 1] = 7
  44. assert df.loc[("bar", "two"), 1] == 7
  45. def test_loc_getitem_general(self, any_real_numpy_dtype):
  46. # GH#2817
  47. dtype = any_real_numpy_dtype
  48. data = {
  49. "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
  50. "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
  51. "num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
  52. }
  53. df = DataFrame(data)
  54. df = df.astype({"col": dtype, "num": dtype})
  55. df = df.set_index(keys=["col", "num"])
  56. key = 4.0, 12
  57. # emits a PerformanceWarning, ok
  58. with tm.assert_produces_warning(PerformanceWarning):
  59. tm.assert_frame_equal(df.loc[key], df.iloc[2:])
  60. # this is ok
  61. return_value = df.sort_index(inplace=True)
  62. assert return_value is None
  63. res = df.loc[key]
  64. # col has float dtype, result should be float64 Index
  65. col_arr = np.array([4.0] * 3, dtype=dtype)
  66. year_arr = np.array([12] * 3, dtype=dtype)
  67. index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
  68. expected = DataFrame({"amount": [222, 333, 444]}, index=index)
  69. tm.assert_frame_equal(res, expected)
  70. def test_loc_getitem_multiindex_missing_label_raises(self):
  71. # GH#21593
  72. df = DataFrame(
  73. np.random.default_rng(2).standard_normal((3, 3)),
  74. columns=[[2, 2, 4], [6, 8, 10]],
  75. index=[[4, 4, 8], [8, 10, 12]],
  76. )
  77. with pytest.raises(KeyError, match=r"^2$"):
  78. df.loc[2]
  79. def test_loc_getitem_list_of_tuples_with_multiindex(
  80. self, multiindex_year_month_day_dataframe_random_data
  81. ):
  82. ser = multiindex_year_month_day_dataframe_random_data["A"]
  83. expected = ser.reindex(ser.index[49:51])
  84. result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
  85. tm.assert_series_equal(result, expected)
  86. def test_loc_getitem_series(self):
  87. # GH14730
  88. # passing a series as a key with a MultiIndex
  89. index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
  90. x = Series(index=index, data=range(9), dtype=np.float64)
  91. y = Series([1, 3])
  92. expected = Series(
  93. data=[0, 1, 2, 6, 7, 8],
  94. index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
  95. dtype=np.float64,
  96. )
  97. result = x.loc[y]
  98. tm.assert_series_equal(result, expected)
  99. result = x.loc[[1, 3]]
  100. tm.assert_series_equal(result, expected)
  101. # GH15424
  102. y1 = Series([1, 3], index=[1, 2])
  103. result = x.loc[y1]
  104. tm.assert_series_equal(result, expected)
  105. empty = Series(data=[], dtype=np.float64)
  106. expected = Series(
  107. [],
  108. index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
  109. dtype=np.float64,
  110. )
  111. result = x.loc[empty]
  112. tm.assert_series_equal(result, expected)
  113. def test_loc_getitem_array(self):
  114. # GH15434
  115. # passing an array as a key with a MultiIndex
  116. index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
  117. x = Series(index=index, data=range(9), dtype=np.float64)
  118. y = np.array([1, 3])
  119. expected = Series(
  120. data=[0, 1, 2, 6, 7, 8],
  121. index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
  122. dtype=np.float64,
  123. )
  124. result = x.loc[y]
  125. tm.assert_series_equal(result, expected)
  126. # empty array:
  127. empty = np.array([])
  128. expected = Series(
  129. [],
  130. index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
  131. dtype="float64",
  132. )
  133. result = x.loc[empty]
  134. tm.assert_series_equal(result, expected)
  135. # 0-dim array (scalar):
  136. scalar = np.int64(1)
  137. expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
  138. result = x.loc[scalar]
  139. tm.assert_series_equal(result, expected)
  140. def test_loc_multiindex_labels(self):
  141. df = DataFrame(
  142. np.random.default_rng(2).standard_normal((3, 3)),
  143. columns=[["i", "i", "j"], ["A", "A", "B"]],
  144. index=[["i", "i", "j"], ["X", "X", "Y"]],
  145. )
  146. # the first 2 rows
  147. expected = df.iloc[[0, 1]].droplevel(0)
  148. result = df.loc["i"]
  149. tm.assert_frame_equal(result, expected)
  150. # 2nd (last) column
  151. expected = df.iloc[:, [2]].droplevel(0, axis=1)
  152. result = df.loc[:, "j"]
  153. tm.assert_frame_equal(result, expected)
  154. # bottom right corner
  155. expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
  156. result = df.loc["j"].loc[:, "j"]
  157. tm.assert_frame_equal(result, expected)
  158. # with a tuple
  159. expected = df.iloc[[0, 1]]
  160. result = df.loc[("i", "X")]
  161. tm.assert_frame_equal(result, expected)
  162. def test_loc_multiindex_ints(self):
  163. df = DataFrame(
  164. np.random.default_rng(2).standard_normal((3, 3)),
  165. columns=[[2, 2, 4], [6, 8, 10]],
  166. index=[[4, 4, 8], [8, 10, 12]],
  167. )
  168. expected = df.iloc[[0, 1]].droplevel(0)
  169. result = df.loc[4]
  170. tm.assert_frame_equal(result, expected)
  171. def test_loc_multiindex_missing_label_raises(self):
  172. df = DataFrame(
  173. np.random.default_rng(2).standard_normal((3, 3)),
  174. columns=[[2, 2, 4], [6, 8, 10]],
  175. index=[[4, 4, 8], [8, 10, 12]],
  176. )
  177. with pytest.raises(KeyError, match=r"^2$"):
  178. df.loc[2]
  179. @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
  180. def test_loc_multiindex_list_missing_label(self, key, pos):
  181. # GH 27148 - lists with missing labels _do_ raise
  182. df = DataFrame(
  183. np.random.default_rng(2).standard_normal((3, 3)),
  184. columns=[[2, 2, 4], [6, 8, 10]],
  185. index=[[4, 4, 8], [8, 10, 12]],
  186. )
  187. with pytest.raises(KeyError, match="not in index"):
  188. df.loc[key]
  189. def test_loc_multiindex_too_many_dims_raises(self):
  190. # GH 14885
  191. s = Series(
  192. range(8),
  193. index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
  194. )
  195. with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
  196. s.loc["a", "b"]
  197. with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
  198. s.loc["a", "d", "g"]
  199. with pytest.raises(IndexingError, match="Too many indexers"):
  200. s.loc["a", "d", "g", "j"]
  201. def test_loc_multiindex_indexer_none(self):
  202. # GH6788
  203. # multi-index indexer is None (meaning take all)
  204. attributes = ["Attribute" + str(i) for i in range(1)]
  205. attribute_values = ["Value" + str(i) for i in range(5)]
  206. index = MultiIndex.from_product([attributes, attribute_values])
  207. df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
  208. df = DataFrame(df, columns=index)
  209. result = df[attributes]
  210. tm.assert_frame_equal(result, df)
  211. # GH 7349
  212. # loc with a multi-index seems to be doing fallback
  213. df = DataFrame(
  214. np.arange(12).reshape(-1, 1),
  215. index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
  216. )
  217. expected = df.loc[([1, 2],), :]
  218. result = df.loc[[1, 2]]
  219. tm.assert_frame_equal(result, expected)
  220. def test_loc_multiindex_incomplete(self):
  221. # GH 7399
  222. # incomplete indexers
  223. s = Series(
  224. np.arange(15, dtype="int64"),
  225. MultiIndex.from_product([range(5), ["a", "b", "c"]]),
  226. )
  227. expected = s.loc[:, "a":"c"]
  228. result = s.loc[0:4, "a":"c"]
  229. tm.assert_series_equal(result, expected)
  230. result = s.loc[:4, "a":"c"]
  231. tm.assert_series_equal(result, expected)
  232. result = s.loc[0:, "a":"c"]
  233. tm.assert_series_equal(result, expected)
  234. # GH 7400
  235. # multiindexer getitem with list of indexers skips wrong element
  236. s = Series(
  237. np.arange(15, dtype="int64"),
  238. MultiIndex.from_product([range(5), ["a", "b", "c"]]),
  239. )
  240. expected = s.iloc[[6, 7, 8, 12, 13, 14]]
  241. result = s.loc[2:4:2, "a":"c"]
  242. tm.assert_series_equal(result, expected)
  243. def test_get_loc_single_level(self, single_level_multiindex):
  244. single_level = single_level_multiindex
  245. s = Series(
  246. np.random.default_rng(2).standard_normal(len(single_level)),
  247. index=single_level,
  248. )
  249. for k in single_level.values:
  250. s[k]
  251. def test_loc_getitem_int_slice(self):
  252. # GH 3053
  253. # loc should treat integer slices like label slices
  254. index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
  255. df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
  256. result = df.loc[6:8, :]
  257. expected = df
  258. tm.assert_frame_equal(result, expected)
  259. index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
  260. df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
  261. result = df.loc[20:30, :]
  262. expected = df.iloc[2:]
  263. tm.assert_frame_equal(result, expected)
  264. # doc examples
  265. result = df.loc[10, :]
  266. expected = df.iloc[0:2]
  267. expected.index = ["a", "b"]
  268. tm.assert_frame_equal(result, expected)
  269. result = df.loc[:, 10]
  270. expected = df[10]
  271. tm.assert_frame_equal(result, expected)
  272. @pytest.mark.parametrize(
  273. "indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
  274. )
  275. @pytest.mark.parametrize(
  276. "indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
  277. )
  278. def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
  279. # GH #19686
  280. # .loc should work with nested indexers which can be
  281. # any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
  282. def convert_nested_indexer(indexer_type, keys):
  283. if indexer_type == np.ndarray:
  284. return np.array(keys)
  285. if indexer_type == slice:
  286. return slice(*keys)
  287. return indexer_type(keys)
  288. a = [10, 20, 30]
  289. b = [1, 2, 3]
  290. index = MultiIndex.from_product([a, b])
  291. df = DataFrame(
  292. np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
  293. )
  294. keys = ([10, 20], [2, 3])
  295. types = (indexer_type_1, indexer_type_2)
  296. # check indexers with all the combinations of nested objects
  297. # of all the valid types
  298. indexer = tuple(
  299. convert_nested_indexer(indexer_type, k)
  300. for indexer_type, k in zip(types, keys)
  301. )
  302. if indexer_type_1 is set or indexer_type_2 is set:
  303. with pytest.raises(TypeError, match="as an indexer is not supported"):
  304. df.loc[indexer, "Data"]
  305. return
  306. else:
  307. result = df.loc[indexer, "Data"]
  308. expected = Series(
  309. [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
  310. )
  311. tm.assert_series_equal(result, expected)
  312. def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
  313. # GH#37711
  314. mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
  315. obj = frame_or_series([1, 2], index=mi)
  316. obj.loc[("a",)] = 0
  317. expected = frame_or_series([0, 2], index=mi)
  318. tm.assert_equal(obj, expected)
  319. @pytest.mark.parametrize("indexer", [("a",), ("a")])
  320. def test_multiindex_one_dimensional_tuple_columns(self, indexer):
  321. # GH#37711
  322. mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
  323. obj = DataFrame([1, 2], index=mi)
  324. obj.loc[indexer, :] = 0
  325. expected = DataFrame([0, 2], index=mi)
  326. tm.assert_frame_equal(obj, expected)
  327. @pytest.mark.parametrize(
  328. "indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
  329. )
  330. def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
  331. # GH#39147
  332. mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
  333. df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
  334. df.loc[indexer, ["c", "d"]] = 1.0
  335. expected = DataFrame(
  336. [[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
  337. index=mi,
  338. columns=["a", "b", "c", "d"],
  339. )
  340. tm.assert_frame_equal(df, expected)
  341. def test_sorted_multiindex_after_union(self):
  342. # GH#44752
  343. midx = MultiIndex.from_product(
  344. [pd.date_range("20110101", periods=2), Index(["a", "b"])]
  345. )
  346. ser1 = Series(1, index=midx)
  347. ser2 = Series(1, index=midx[:2])
  348. df = pd.concat([ser1, ser2], axis=1)
  349. expected = df.copy()
  350. result = df.loc["2011-01-01":"2011-01-02"]
  351. tm.assert_frame_equal(result, expected)
  352. df = DataFrame({0: ser1, 1: ser2})
  353. result = df.loc["2011-01-01":"2011-01-02"]
  354. tm.assert_frame_equal(result, expected)
  355. df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
  356. result = df.loc["2011-01-01":"2011-01-02"]
  357. tm.assert_frame_equal(result, expected)
  358. def test_loc_no_second_level_index(self):
  359. # GH#43599
  360. df = DataFrame(
  361. index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
  362. columns=["Val"],
  363. )
  364. res = df.loc[np.s_[:, "c", :]]
  365. expected = DataFrame(
  366. index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
  367. )
  368. tm.assert_frame_equal(res, expected)
  369. def test_loc_multi_index_key_error(self):
  370. # GH 51892
  371. df = DataFrame(
  372. {
  373. (1, 2): ["a", "b", "c"],
  374. (1, 3): ["d", "e", "f"],
  375. (2, 2): ["g", "h", "i"],
  376. (2, 4): ["j", "k", "l"],
  377. }
  378. )
  379. with pytest.raises(KeyError, match=r"(1, 4)"):
  380. df.loc[0, (1, 4)]
  381. @pytest.mark.parametrize(
  382. "indexer, pos",
  383. [
  384. ([], []), # empty ok
  385. (["A"], slice(3)),
  386. (["A", "D"], []), # "D" isn't present -> raise
  387. (["D", "E"], []), # no values found -> raise
  388. (["D"], []), # same, with single item list: GH 27148
  389. (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
  390. (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
  391. ],
  392. )
  393. def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
  394. # GH 7866
  395. # multi-index slicing with missing indexers
  396. idx = MultiIndex.from_product(
  397. [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
  398. )
  399. ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
  400. expected = ser.iloc[pos]
  401. if expected.size == 0 and indexer != []:
  402. with pytest.raises(KeyError, match=str(indexer)):
  403. ser.loc[indexer]
  404. elif indexer == (slice(None), ["foo", "bah"]):
  405. # "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
  406. with pytest.raises(KeyError, match="'bah'"):
  407. ser.loc[indexer]
  408. else:
  409. result = ser.loc[indexer]
  410. tm.assert_series_equal(result, expected)
  411. @pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
  412. def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
  413. # GH 8737
  414. # empty indexer
  415. multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
  416. df = DataFrame(
  417. np.random.default_rng(2).standard_normal((5, 6)),
  418. index=range(5),
  419. columns=multi_index,
  420. )
  421. df = df.sort_index(level=0, axis=1)
  422. expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
  423. result = df.loc[:, columns_indexer]
  424. tm.assert_frame_equal(result, expected)
  425. def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
  426. # regression from < 0.14.0
  427. # GH 7914
  428. df = DataFrame(
  429. [[np.mean, np.median], ["mean", "median"]],
  430. columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
  431. index=["function", "name"],
  432. )
  433. result = df.loc["function", ("functs", "mean")]
  434. expected = np.mean
  435. assert result == expected
  436. def test_loc_getitem_tuple_plus_slice():
  437. # GH 671
  438. df = DataFrame(
  439. {
  440. "a": np.arange(10),
  441. "b": np.arange(10),
  442. "c": np.random.default_rng(2).standard_normal(10),
  443. "d": np.random.default_rng(2).standard_normal(10),
  444. }
  445. ).set_index(["a", "b"])
  446. expected = df.loc[0, 0]
  447. result = df.loc[(0, 0), :]
  448. tm.assert_series_equal(result, expected)
  449. def test_loc_getitem_int(frame_random_data_integer_multi_index):
  450. df = frame_random_data_integer_multi_index
  451. result = df.loc[1]
  452. expected = df[-3:]
  453. expected.index = expected.index.droplevel(0)
  454. tm.assert_frame_equal(result, expected)
  455. def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
  456. df = frame_random_data_integer_multi_index
  457. with pytest.raises(KeyError, match=r"^3$"):
  458. df.loc[3]
  459. def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
  460. df = multiindex_dataframe_random_data
  461. # test setup - check key not in dataframe
  462. with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
  463. df.loc[("bar", "three"), "B"]
  464. # in theory should be inserting in a sorted space????
  465. df.loc[("bar", "three"), "B"] = 0
  466. expected = 0
  467. result = df.sort_index().loc[("bar", "three"), "B"]
  468. assert result == expected
  469. def test_loc_setitem_single_column_slice():
  470. # case from https://github.com/pandas-dev/pandas/issues/27841
  471. df = DataFrame(
  472. "string",
  473. index=list("abcd"),
  474. columns=MultiIndex.from_product([["Main"], ("another", "one")]),
  475. )
  476. df["labels"] = "a"
  477. df.loc[:, "labels"] = df.index
  478. tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
  479. # test with non-object block
  480. df = DataFrame(
  481. np.nan,
  482. index=range(4),
  483. columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
  484. )
  485. expected = df.copy()
  486. df.loc[:, "B"] = np.arange(4)
  487. expected.iloc[:, 2] = np.arange(4)
  488. tm.assert_frame_equal(df, expected)
  489. def test_loc_nan_multiindex(using_infer_string):
  490. # GH 5286
  491. tups = [
  492. ("Good Things", "C", np.nan),
  493. ("Good Things", "R", np.nan),
  494. ("Bad Things", "C", np.nan),
  495. ("Bad Things", "T", np.nan),
  496. ("Okay Things", "N", "B"),
  497. ("Okay Things", "N", "D"),
  498. ("Okay Things", "B", np.nan),
  499. ("Okay Things", "D", np.nan),
  500. ]
  501. df = DataFrame(
  502. np.ones((8, 4)),
  503. columns=Index(["d1", "d2", "d3", "d4"]),
  504. index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
  505. )
  506. result = df.loc["Good Things"].loc["C"]
  507. expected = DataFrame(
  508. np.ones((1, 4)),
  509. index=Index(
  510. [np.nan],
  511. dtype="object" if not using_infer_string else "str",
  512. name="u3",
  513. ),
  514. columns=Index(["d1", "d2", "d3", "d4"]),
  515. )
  516. tm.assert_frame_equal(result, expected)
  517. def test_loc_period_string_indexing():
  518. # GH 9892
  519. a = pd.period_range("2013Q1", "2013Q4", freq="Q")
  520. i = (1111, 2222, 3333)
  521. idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
  522. df = DataFrame(
  523. index=idx,
  524. columns=(
  525. "OMS",
  526. "OMK",
  527. "RES",
  528. "DRIFT_IND",
  529. "OEVRIG_IND",
  530. "FIN_IND",
  531. "VARE_UD",
  532. "LOEN_UD",
  533. "FIN_UD",
  534. ),
  535. )
  536. result = df.loc[("2013Q1", 1111), "OMS"]
  537. alt = df.loc[(a[0], 1111), "OMS"]
  538. assert np.isnan(alt)
  539. # Because the resolution of the string matches, it is an exact lookup,
  540. # not a slice
  541. assert np.isnan(result)
  542. alt = df.loc[("2013Q1", 1111), "OMS"]
  543. assert np.isnan(alt)
  544. def test_loc_datetime_mask_slicing():
  545. # GH 16699
  546. dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
  547. m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
  548. df = DataFrame(
  549. data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
  550. )
  551. result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
  552. expected = Series(
  553. [3],
  554. name="C1",
  555. index=MultiIndex.from_tuples(
  556. [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
  557. names=["Idx1", "Idx2"],
  558. ),
  559. )
  560. tm.assert_series_equal(result, expected)
  561. def test_loc_datetime_series_tuple_slicing():
  562. # https://github.com/pandas-dev/pandas/issues/35858
  563. date = pd.Timestamp("2000")
  564. ser = Series(
  565. 1,
  566. index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
  567. name="c",
  568. )
  569. result = ser.loc[:, [date]]
  570. tm.assert_series_equal(result, ser)
  571. def test_loc_with_mi_indexer():
  572. # https://github.com/pandas-dev/pandas/issues/35351
  573. df = DataFrame(
  574. data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
  575. index=MultiIndex.from_tuples(
  576. [(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
  577. ),
  578. columns=["author", "price"],
  579. )
  580. idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
  581. result = df.loc[idx, :]
  582. expected = DataFrame(
  583. [["a", 1], ["b", 1], ["c", 2]],
  584. index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
  585. columns=["author", "price"],
  586. )
  587. tm.assert_frame_equal(result, expected)
  588. def test_loc_mi_with_level1_named_0():
  589. # GH#37194
  590. dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
  591. ser = Series(range(3), index=dti)
  592. df = ser.to_frame()
  593. df[1] = dti
  594. df2 = df.set_index(0, append=True)
  595. assert df2.index.names == (None, 0)
  596. df2.index.get_loc(dti[0]) # smoke test
  597. result = df2.loc[dti[0]]
  598. expected = df2.iloc[[0]].droplevel(None)
  599. tm.assert_frame_equal(result, expected)
  600. ser2 = df2[1]
  601. assert ser2.index.names == (None, 0)
  602. result = ser2.loc[dti[0]]
  603. expected = ser2.iloc[[0]].droplevel(None)
  604. tm.assert_series_equal(result, expected)
  605. def test_getitem_str_slice():
  606. # GH#15928
  607. df = DataFrame(
  608. [
  609. ["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
  610. ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
  611. ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
  612. ["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
  613. ["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
  614. ["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
  615. ],
  616. columns="time,ticker,bid,ask".split(","),
  617. )
  618. df2 = df.set_index(["ticker", "time"]).sort_index()
  619. res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
  620. expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
  621. tm.assert_frame_equal(res, expected)
  622. def test_3levels_leading_period_index():
  623. # GH#24091
  624. pi = pd.PeriodIndex(
  625. ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
  626. name="datetime",
  627. freq="D",
  628. )
  629. lev2 = ["A", "A", "Z", "W"]
  630. lev3 = ["B", "C", "Q", "F"]
  631. mi = MultiIndex.from_arrays([pi, lev2, lev3])
  632. ser = Series(range(4), index=mi, dtype=np.float64)
  633. result = ser.loc[(pi[0], "A", "B")]
  634. assert result == 0.0
  635. class TestKeyErrorsWithMultiIndex:
  636. def test_missing_keys_raises_keyerror(self):
  637. # GH#27420 KeyError, not TypeError
  638. df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
  639. df2 = df.set_index(["A", "B"])
  640. with pytest.raises(KeyError, match="1"):
  641. df2.loc[(1, 6)]
  642. def test_missing_key_raises_keyerror2(self):
  643. # GH#21168 KeyError, not "IndexingError: Too many indexers"
  644. ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
  645. with pytest.raises(KeyError, match=r"\(0, 3\)"):
  646. ser.loc[0, 3]
  647. def test_missing_key_combination(self):
  648. # GH: 19556
  649. mi = MultiIndex.from_arrays(
  650. [
  651. np.array(["a", "a", "b", "b"]),
  652. np.array(["1", "2", "2", "3"]),
  653. np.array(["c", "d", "c", "d"]),
  654. ],
  655. names=["one", "two", "three"],
  656. )
  657. df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
  658. msg = r"\('b', '1', slice\(None, None, None\)\)"
  659. with pytest.raises(KeyError, match=msg):
  660. df.loc[("b", "1", slice(None)), :]
  661. with pytest.raises(KeyError, match=msg):
  662. df.index.get_locs(("b", "1", slice(None)))
  663. with pytest.raises(KeyError, match=r"\('b', '1'\)"):
  664. df.loc[("b", "1"), :]
  665. def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
  666. df = multiindex_year_month_day_dataframe_random_data
  667. ser = df["A"]
  668. result = ser[2000, 5]
  669. expected = df.loc[2000, 5]["A"]
  670. tm.assert_series_equal(result, expected)
  671. def test_loc_with_nan():
  672. # GH: 27104
  673. df = DataFrame(
  674. {"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
  675. ).set_index(["ind1", "ind2"])
  676. result = df.loc[["a"]]
  677. expected = DataFrame(
  678. {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
  679. )
  680. tm.assert_frame_equal(result, expected)
  681. result = df.loc["a"]
  682. expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
  683. tm.assert_frame_equal(result, expected)
  684. def test_getitem_non_found_tuple():
  685. # GH: 25236
  686. df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
  687. ["a", "b", "c"]
  688. )
  689. with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
  690. df.loc[(2.0, 2.0, 3.0)]
  691. def test_get_loc_datetime_index():
  692. # GH#24263
  693. index = pd.date_range("2001-01-01", periods=100)
  694. mi = MultiIndex.from_arrays([index])
  695. # Check if get_loc matches for Index and MultiIndex
  696. assert mi.get_loc("2001-01") == slice(0, 31, None)
  697. assert index.get_loc("2001-01") == slice(0, 31, None)
  698. loc = mi[::2].get_loc("2001-01")
  699. expected = index[::2].get_loc("2001-01")
  700. assert loc == expected
  701. loc = mi.repeat(2).get_loc("2001-01")
  702. expected = index.repeat(2).get_loc("2001-01")
  703. assert loc == expected
  704. loc = mi.append(mi).get_loc("2001-01")
  705. expected = index.append(index).get_loc("2001-01")
  706. # TODO: standardize return type for MultiIndex.get_loc
  707. tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
  708. def test_loc_setitem_indexer_differently_ordered():
  709. # GH#34603
  710. mi = MultiIndex.from_product([["a", "b"], [0, 1]])
  711. df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
  712. indexer = ("a", [1, 0])
  713. df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
  714. expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
  715. tm.assert_frame_equal(df, expected)
  716. def test_loc_getitem_index_differently_ordered_slice_none():
  717. # GH#31330
  718. df = DataFrame(
  719. [[1, 2], [3, 4], [5, 6], [7, 8]],
  720. index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
  721. columns=["a", "b"],
  722. )
  723. result = df.loc[(slice(None), [2, 1]), :]
  724. expected = DataFrame(
  725. [[3, 4], [7, 8], [1, 2], [5, 6]],
  726. index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
  727. columns=["a", "b"],
  728. )
  729. tm.assert_frame_equal(result, expected)
  730. @pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
  731. def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
  732. # GH#40978
  733. df = DataFrame(
  734. [1] * 8,
  735. index=MultiIndex.from_tuples(
  736. [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
  737. ),
  738. columns=["a"],
  739. )
  740. result = df.loc[(slice(None), indexer), :]
  741. expected = DataFrame(
  742. [1] * 8,
  743. index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
  744. columns=["a"],
  745. )
  746. tm.assert_frame_equal(result, expected)
  747. result = df.loc[df.index.isin(indexer, level=1), :]
  748. tm.assert_frame_equal(result, df)
  749. def test_loc_getitem_drops_levels_for_one_row_dataframe():
  750. # GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
  751. mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
  752. df = DataFrame({"d": [0]}, index=mi)
  753. expected = df.droplevel([0, 2])
  754. result = df.loc["x", :, "z"]
  755. tm.assert_frame_equal(result, expected)
  756. ser = Series([0], index=mi)
  757. result = ser.loc["x", :, "z"]
  758. expected = Series([0], index=Index(["y"], name="b"))
  759. tm.assert_series_equal(result, expected)
  760. def test_mi_columns_loc_list_label_order():
  761. # GH 10710
  762. cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
  763. df = DataFrame(np.zeros((5, 6)), columns=cols)
  764. result = df.loc[:, ["B", "A"]]
  765. expected = DataFrame(
  766. np.zeros((5, 4)),
  767. columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
  768. )
  769. tm.assert_frame_equal(result, expected)
  770. def test_mi_partial_indexing_list_raises():
  771. # GH 13501
  772. frame = DataFrame(
  773. np.arange(12).reshape((4, 3)),
  774. index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
  775. columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
  776. )
  777. frame.index.names = ["key1", "key2"]
  778. frame.columns.names = ["state", "color"]
  779. with pytest.raises(KeyError, match="\\[2\\] not in index"):
  780. frame.loc[["b", 2], "Colorado"]
  781. def test_mi_indexing_list_nonexistent_raises():
  782. # GH 15452
  783. s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
  784. with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
  785. s.loc[["not", "found"]]
  786. def test_mi_add_cell_missing_row_non_unique():
  787. # GH 16018
  788. result = DataFrame(
  789. [[1, 2, 5, 6], [3, 4, 7, 8]],
  790. index=["a", "a"],
  791. columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
  792. )
  793. result.loc["c"] = -1
  794. result.loc["c", (1, "A")] = 3
  795. result.loc["d", (1, "A")] = 3
  796. expected = DataFrame(
  797. [
  798. [1.0, 2.0, 5.0, 6.0],
  799. [3.0, 4.0, 7.0, 8.0],
  800. [3.0, -1.0, -1, -1],
  801. [3.0, np.nan, np.nan, np.nan],
  802. ],
  803. index=["a", "a", "c", "d"],
  804. columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
  805. )
  806. tm.assert_frame_equal(result, expected)
  807. def test_loc_get_scalar_casting_to_float():
  808. # GH#41369
  809. df = DataFrame(
  810. {"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
  811. )
  812. result = df.loc[(3, 4), "b"]
  813. assert result == 2
  814. assert isinstance(result, np.int64)
  815. result = df.loc[[(3, 4)], "b"].iloc[0]
  816. assert result == 2
  817. assert isinstance(result, np.int64)
  818. def test_loc_empty_single_selector_with_names():
  819. # GH 19517
  820. idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
  821. s2 = Series(index=idx, dtype=np.float64)
  822. result = s2.loc["a"]
  823. expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
  824. tm.assert_series_equal(result, expected)
  825. def test_loc_keyerror_rightmost_key_missing():
  826. # GH 20951
  827. df = DataFrame(
  828. {
  829. "A": [100, 100, 200, 200, 300, 300],
  830. "B": [10, 10, 20, 21, 31, 33],
  831. "C": range(6),
  832. }
  833. )
  834. df = df.set_index(["A", "B"])
  835. with pytest.raises(KeyError, match="^1$"):
  836. df.loc[(100, 1)]
  837. def test_multindex_series_loc_with_tuple_label():
  838. # GH#43908
  839. mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
  840. ser = Series([1, 2], index=mi)
  841. result = ser.loc[(3, (4, 5))]
  842. assert result == 2