test_indexing.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Index
  5. import pandas._testing as tm
  6. def _isnan(val):
  7. try:
  8. return val is not pd.NA and np.isnan(val)
  9. except TypeError:
  10. return False
  11. def _equivalent_na(dtype, null):
  12. if dtype.na_value is pd.NA and null is pd.NA:
  13. return True
  14. elif _isnan(dtype.na_value) and _isnan(null):
  15. return True
  16. else:
  17. return False
  18. class TestGetLoc:
  19. def test_get_loc(self, any_string_dtype):
  20. index = Index(["a", "b", "c"], dtype=any_string_dtype)
  21. assert index.get_loc("b") == 1
  22. def test_get_loc_raises(self, any_string_dtype):
  23. index = Index(["a", "b", "c"], dtype=any_string_dtype)
  24. with pytest.raises(KeyError, match="d"):
  25. index.get_loc("d")
  26. def test_get_loc_invalid_value(self, any_string_dtype):
  27. index = Index(["a", "b", "c"], dtype=any_string_dtype)
  28. with pytest.raises(KeyError, match="1"):
  29. index.get_loc(1)
  30. def test_get_loc_non_unique(self, any_string_dtype):
  31. index = Index(["a", "b", "a"], dtype=any_string_dtype)
  32. result = index.get_loc("a")
  33. expected = np.array([True, False, True])
  34. tm.assert_numpy_array_equal(result, expected)
  35. def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
  36. index = Index(["a", "b", "c"], dtype=any_string_dtype)
  37. with pytest.raises(KeyError):
  38. index.get_loc(nulls_fixture)
  39. def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
  40. index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
  41. assert index.get_loc(nulls_fixture) == 2
  42. class TestGetIndexer:
  43. @pytest.mark.parametrize(
  44. "method,expected",
  45. [
  46. ("pad", [-1, 0, 1, 1]),
  47. ("backfill", [0, 0, 1, -1]),
  48. ],
  49. )
  50. def test_get_indexer_strings(self, any_string_dtype, method, expected):
  51. expected = np.array(expected, dtype=np.intp)
  52. index = Index(["b", "c"], dtype=any_string_dtype)
  53. actual = index.get_indexer(["a", "b", "c", "d"], method=method)
  54. tm.assert_numpy_array_equal(actual, expected)
  55. def test_get_indexer_strings_raises(self, any_string_dtype):
  56. index = Index(["b", "c"], dtype=any_string_dtype)
  57. msg = "|".join(
  58. [
  59. "operation 'sub' not supported for dtype 'str",
  60. r"unsupported operand type\(s\) for -: 'str' and 'str'",
  61. ]
  62. )
  63. with pytest.raises(TypeError, match=msg):
  64. index.get_indexer(["a", "b", "c", "d"], method="nearest")
  65. with pytest.raises(TypeError, match=msg):
  66. index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
  67. with pytest.raises(TypeError, match=msg):
  68. index.get_indexer(
  69. ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
  70. )
  71. @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
  72. def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
  73. # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
  74. index = Index(["a", "b", null], dtype=any_string_dtype)
  75. result = index.get_indexer(["a", null, "c"])
  76. if using_infer_string:
  77. expected = np.array([0, 2, -1], dtype=np.intp)
  78. elif any_string_dtype == "string" and not _equivalent_na(
  79. any_string_dtype, null
  80. ):
  81. expected = np.array([0, -1, -1], dtype=np.intp)
  82. else:
  83. expected = np.array([0, 2, -1], dtype=np.intp)
  84. tm.assert_numpy_array_equal(result, expected)
  85. class TestGetIndexerNonUnique:
  86. @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
  87. def test_get_indexer_non_unique_nas(
  88. self, any_string_dtype, null, using_infer_string
  89. ):
  90. index = Index(["a", "b", null], dtype=any_string_dtype)
  91. indexer, missing = index.get_indexer_non_unique(["a", null])
  92. if using_infer_string:
  93. expected_indexer = np.array([0, 2], dtype=np.intp)
  94. expected_missing = np.array([], dtype=np.intp)
  95. elif any_string_dtype == "string" and not _equivalent_na(
  96. any_string_dtype, null
  97. ):
  98. expected_indexer = np.array([0, -1], dtype=np.intp)
  99. expected_missing = np.array([1], dtype=np.intp)
  100. else:
  101. expected_indexer = np.array([0, 2], dtype=np.intp)
  102. expected_missing = np.array([], dtype=np.intp)
  103. tm.assert_numpy_array_equal(indexer, expected_indexer)
  104. tm.assert_numpy_array_equal(missing, expected_missing)
  105. # actually non-unique
  106. index = Index(["a", null, "b", null], dtype=any_string_dtype)
  107. indexer, missing = index.get_indexer_non_unique(["a", null])
  108. if using_infer_string:
  109. expected_indexer = np.array([0, 1, 3], dtype=np.intp)
  110. elif any_string_dtype == "string" and not _equivalent_na(
  111. any_string_dtype, null
  112. ):
  113. pass
  114. else:
  115. expected_indexer = np.array([0, 1, 3], dtype=np.intp)
  116. tm.assert_numpy_array_equal(indexer, expected_indexer)
  117. tm.assert_numpy_array_equal(missing, expected_missing)
  118. class TestSliceLocs:
  119. @pytest.mark.parametrize(
  120. "in_slice,expected",
  121. [
  122. # error: Slice index must be an integer or None
  123. (pd.IndexSlice[::-1], "yxdcb"),
  124. (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
  125. (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
  126. (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
  127. (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
  128. (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
  129. (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
  130. # absent labels
  131. (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
  132. (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
  133. (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
  134. (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
  135. (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
  136. (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
  137. (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
  138. (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
  139. (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
  140. ],
  141. )
  142. def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
  143. index = Index(list("bcdxy"), dtype=any_string_dtype)
  144. s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
  145. result = index[s_start : s_stop : in_slice.step]
  146. expected = Index(list(expected), dtype=any_string_dtype)
  147. tm.assert_index_equal(result, expected)
  148. def test_slice_locs_negative_step_oob(self, any_string_dtype):
  149. index = Index(list("bcdxy"), dtype=any_string_dtype)
  150. result = index[-10:5:1]
  151. tm.assert_index_equal(result, index)
  152. result = index[4:-10:-1]
  153. expected = Index(list("yxdcb"), dtype=any_string_dtype)
  154. tm.assert_index_equal(result, expected)
  155. def test_slice_locs_dup(self, any_string_dtype):
  156. index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
  157. assert index.slice_locs("a", "d") == (0, 6)
  158. assert index.slice_locs(end="d") == (0, 6)
  159. assert index.slice_locs("a", "c") == (0, 4)
  160. assert index.slice_locs("b", "d") == (2, 6)
  161. index2 = index[::-1]
  162. assert index2.slice_locs("d", "a") == (0, 6)
  163. assert index2.slice_locs(end="a") == (0, 6)
  164. assert index2.slice_locs("d", "b") == (0, 4)
  165. assert index2.slice_locs("c", "a") == (2, 6)