test_partial.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. from pandas import (
  5. DataFrame,
  6. DatetimeIndex,
  7. MultiIndex,
  8. date_range,
  9. )
  10. import pandas._testing as tm
  11. class TestMultiIndexPartial:
  12. def test_getitem_partial_int(self):
  13. # GH 12416
  14. # with single item
  15. l1 = [10, 20]
  16. l2 = ["a", "b"]
  17. df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
  18. expected = DataFrame(index=range(2), columns=l2)
  19. result = df[20]
  20. tm.assert_frame_equal(result, expected)
  21. # with list
  22. expected = DataFrame(
  23. index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
  24. )
  25. result = df[[20]]
  26. tm.assert_frame_equal(result, expected)
  27. # missing item:
  28. with pytest.raises(KeyError, match="1"):
  29. df[1]
  30. with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
  31. df[[1]]
  32. def test_series_slice_partial(self):
  33. pass
  34. def test_xs_partial(
  35. self,
  36. multiindex_dataframe_random_data,
  37. multiindex_year_month_day_dataframe_random_data,
  38. ):
  39. frame = multiindex_dataframe_random_data
  40. ymd = multiindex_year_month_day_dataframe_random_data
  41. result = frame.xs("foo")
  42. result2 = frame.loc["foo"]
  43. expected = frame.T["foo"].T
  44. tm.assert_frame_equal(result, expected)
  45. tm.assert_frame_equal(result, result2)
  46. result = ymd.xs((2000, 4))
  47. expected = ymd.loc[2000, 4]
  48. tm.assert_frame_equal(result, expected)
  49. # ex from #1796
  50. index = MultiIndex(
  51. levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
  52. codes=[
  53. [0, 0, 0, 0, 1, 1, 1, 1],
  54. [0, 0, 1, 1, 0, 0, 1, 1],
  55. [0, 1, 0, 1, 0, 1, 0, 1],
  56. ],
  57. )
  58. df = DataFrame(
  59. np.random.default_rng(2).standard_normal((8, 4)),
  60. index=index,
  61. columns=list("abcd"),
  62. )
  63. result = df.xs(("foo", "one"))
  64. expected = df.loc["foo", "one"]
  65. tm.assert_frame_equal(result, expected)
  66. def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
  67. ymd = multiindex_year_month_day_dataframe_random_data
  68. ymd = ymd.T
  69. result = ymd[2000, 2]
  70. expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
  71. expected.columns = expected.columns.droplevel(0).droplevel(0)
  72. tm.assert_frame_equal(result, expected)
  73. def test_fancy_slice_partial(
  74. self,
  75. multiindex_dataframe_random_data,
  76. multiindex_year_month_day_dataframe_random_data,
  77. ):
  78. frame = multiindex_dataframe_random_data
  79. result = frame.loc["bar":"baz"]
  80. expected = frame[3:7]
  81. tm.assert_frame_equal(result, expected)
  82. ymd = multiindex_year_month_day_dataframe_random_data
  83. result = ymd.loc[(2000, 2):(2000, 4)]
  84. lev = ymd.index.codes[1]
  85. expected = ymd[(lev >= 1) & (lev <= 3)]
  86. tm.assert_frame_equal(result, expected)
  87. def test_getitem_partial_column_select(self):
  88. idx = MultiIndex(
  89. codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
  90. levels=[["a", "b"], ["x", "y"], ["p", "q"]],
  91. )
  92. df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
  93. result = df.loc[("a", "y"), :]
  94. expected = df.loc[("a", "y")]
  95. tm.assert_frame_equal(result, expected)
  96. result = df.loc[("a", "y"), [1, 0]]
  97. expected = df.loc[("a", "y")][[1, 0]]
  98. tm.assert_frame_equal(result, expected)
  99. with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
  100. df.loc[("a", "foo"), :]
  101. # TODO(ArrayManager) rewrite test to not use .values
  102. # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
  103. @td.skip_array_manager_invalid_test
  104. def test_partial_set(
  105. self,
  106. multiindex_year_month_day_dataframe_random_data,
  107. using_copy_on_write,
  108. warn_copy_on_write,
  109. ):
  110. # GH #397
  111. ymd = multiindex_year_month_day_dataframe_random_data
  112. df = ymd.copy()
  113. exp = ymd.copy()
  114. df.loc[2000, 4] = 0
  115. exp.iloc[65:85] = 0
  116. tm.assert_frame_equal(df, exp)
  117. if using_copy_on_write:
  118. with tm.raises_chained_assignment_error():
  119. df["A"].loc[2000, 4] = 1
  120. df.loc[(2000, 4), "A"] = 1
  121. else:
  122. with tm.raises_chained_assignment_error():
  123. df["A"].loc[2000, 4] = 1
  124. exp.iloc[65:85, 0] = 1
  125. tm.assert_frame_equal(df, exp)
  126. df.loc[2000] = 5
  127. exp.iloc[:100] = 5
  128. tm.assert_frame_equal(df, exp)
  129. # this works...for now
  130. with tm.raises_chained_assignment_error():
  131. df["A"].iloc[14] = 5
  132. if using_copy_on_write:
  133. assert df["A"].iloc[14] == exp["A"].iloc[14]
  134. else:
  135. assert df["A"].iloc[14] == 5
  136. @pytest.mark.parametrize("dtype", [int, float])
  137. def test_getitem_intkey_leading_level(
  138. self, multiindex_year_month_day_dataframe_random_data, dtype
  139. ):
  140. # GH#33355 dont fall-back to positional when leading level is int
  141. ymd = multiindex_year_month_day_dataframe_random_data
  142. levels = ymd.index.levels
  143. ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
  144. ser = ymd["A"]
  145. mi = ser.index
  146. assert isinstance(mi, MultiIndex)
  147. if dtype is int:
  148. assert mi.levels[0].dtype == np.dtype(int)
  149. else:
  150. assert mi.levels[0].dtype == np.float64
  151. assert 14 not in mi.levels[0]
  152. assert not mi.levels[0]._should_fallback_to_positional
  153. assert not mi._should_fallback_to_positional
  154. with pytest.raises(KeyError, match="14"):
  155. ser[14]
  156. # ---------------------------------------------------------------------
  157. def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
  158. frame = multiindex_dataframe_random_data
  159. expected = frame.copy()
  160. result = frame.copy()
  161. result.loc[["foo", "bar"]] = 0
  162. expected.loc["foo"] = 0
  163. expected.loc["bar"] = 0
  164. tm.assert_frame_equal(result, expected)
  165. expected = frame.copy()
  166. result = frame.copy()
  167. result.loc["foo":"bar"] = 0
  168. expected.loc["foo"] = 0
  169. expected.loc["bar"] = 0
  170. tm.assert_frame_equal(result, expected)
  171. expected = frame["A"].copy()
  172. result = frame["A"].copy()
  173. result.loc[["foo", "bar"]] = 0
  174. expected.loc["foo"] = 0
  175. expected.loc["bar"] = 0
  176. tm.assert_series_equal(result, expected)
  177. expected = frame["A"].copy()
  178. result = frame["A"].copy()
  179. result.loc["foo":"bar"] = 0
  180. expected.loc["foo"] = 0
  181. expected.loc["bar"] = 0
  182. tm.assert_series_equal(result, expected)
  183. @pytest.mark.parametrize(
  184. "indexer, exp_idx, exp_values",
  185. [
  186. (
  187. slice("2019-2", None),
  188. DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
  189. [2, 3],
  190. ),
  191. (
  192. slice(None, "2019-2"),
  193. date_range("2019", periods=2, freq="MS"),
  194. [0, 1, 2, 3],
  195. ),
  196. ],
  197. )
  198. def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
  199. # GH: 25165
  200. date_idx = date_range("2019", periods=2, freq="MS")
  201. df = DataFrame(
  202. list(range(4)),
  203. index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
  204. )
  205. expected = DataFrame(
  206. exp_values,
  207. index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
  208. )
  209. result = df[indexer]
  210. tm.assert_frame_equal(result, expected)
  211. result = df.loc[indexer]
  212. tm.assert_frame_equal(result, expected)
  213. result = df.loc(axis=0)[indexer]
  214. tm.assert_frame_equal(result, expected)
  215. result = df.loc[indexer, :]
  216. tm.assert_frame_equal(result, expected)
  217. df2 = df.swaplevel(0, 1).sort_index()
  218. expected = expected.swaplevel(0, 1).sort_index()
  219. result = df2.loc[:, indexer, :]
  220. tm.assert_frame_equal(result, expected)
  221. def test_loc_getitem_partial_both_axis():
  222. # gh-12660
  223. iterables = [["a", "b"], [2, 1]]
  224. columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
  225. rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
  226. df = DataFrame(
  227. np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
  228. )
  229. expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
  230. result = df.loc["a", "b"]
  231. tm.assert_frame_equal(result, expected)