test_indexing.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. from decimal import Decimal
  2. import numpy as np
  3. import pytest
  4. from pandas._libs.missing import is_matching_na
  5. from pandas import Index
  6. import pandas._testing as tm
  7. class TestGetIndexer:
  8. @pytest.mark.parametrize(
  9. "method,expected",
  10. [
  11. ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
  12. ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
  13. ],
  14. )
  15. def test_get_indexer_strings(self, method, expected):
  16. expected = np.array(expected, dtype=np.intp)
  17. index = Index(["b", "c"], dtype=object)
  18. actual = index.get_indexer(["a", "b", "c", "d"], method=method)
  19. tm.assert_numpy_array_equal(actual, expected)
  20. def test_get_indexer_strings_raises(self):
  21. index = Index(["b", "c"], dtype=object)
  22. msg = "|".join(
  23. [
  24. "operation 'sub' not supported for dtype 'str'",
  25. r"unsupported operand type\(s\) for -: 'str' and 'str'",
  26. ]
  27. )
  28. with pytest.raises(TypeError, match=msg):
  29. index.get_indexer(["a", "b", "c", "d"], method="nearest")
  30. with pytest.raises(TypeError, match=msg):
  31. index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
  32. with pytest.raises(TypeError, match=msg):
  33. index.get_indexer(
  34. ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
  35. )
  36. def test_get_indexer_with_NA_values(
  37. self, unique_nulls_fixture, unique_nulls_fixture2
  38. ):
  39. # GH#22332
  40. # check pairwise, that no pair of na values
  41. # is mangled
  42. if unique_nulls_fixture is unique_nulls_fixture2:
  43. return # skip it, values are not unique
  44. arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
  45. index = Index(arr, dtype=object)
  46. result = index.get_indexer(
  47. Index(
  48. [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object
  49. )
  50. )
  51. expected = np.array([0, 1, -1], dtype=np.intp)
  52. tm.assert_numpy_array_equal(result, expected)
  53. def test_get_indexer_infer_string_missing_values(self):
  54. # ensure the passed list is not cast to string but to object so that
  55. # the None value is matched in the index
  56. # https://github.com/pandas-dev/pandas/issues/55834
  57. idx = Index(["a", "b", None], dtype="object")
  58. result = idx.get_indexer([None, "x"])
  59. expected = np.array([2, -1], dtype=np.intp)
  60. tm.assert_numpy_array_equal(result, expected)
  61. class TestGetIndexerNonUnique:
  62. def test_get_indexer_non_unique_nas(self, nulls_fixture):
  63. # even though this isn't non-unique, this should still work
  64. index = Index(["a", "b", nulls_fixture], dtype=object)
  65. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  66. expected_indexer = np.array([2], dtype=np.intp)
  67. expected_missing = np.array([], dtype=np.intp)
  68. tm.assert_numpy_array_equal(indexer, expected_indexer)
  69. tm.assert_numpy_array_equal(missing, expected_missing)
  70. # actually non-unique
  71. index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
  72. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  73. expected_indexer = np.array([1, 3], dtype=np.intp)
  74. tm.assert_numpy_array_equal(indexer, expected_indexer)
  75. tm.assert_numpy_array_equal(missing, expected_missing)
  76. # matching-but-not-identical nans
  77. if is_matching_na(nulls_fixture, float("NaN")):
  78. index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
  79. match_but_not_identical = True
  80. elif is_matching_na(nulls_fixture, Decimal("NaN")):
  81. index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
  82. match_but_not_identical = True
  83. else:
  84. match_but_not_identical = False
  85. if match_but_not_identical:
  86. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  87. expected_indexer = np.array([1, 3], dtype=np.intp)
  88. tm.assert_numpy_array_equal(indexer, expected_indexer)
  89. tm.assert_numpy_array_equal(missing, expected_missing)
  90. @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
  91. def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
  92. expected_missing = np.array([], dtype=np.intp)
  93. # matching-but-not-identical nats
  94. if is_matching_na(np_nat_fixture, np_nat_fixture2):
  95. # ensure nats are different objects
  96. index = Index(
  97. np.array(
  98. ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
  99. dtype=object,
  100. ),
  101. dtype=object,
  102. )
  103. # pass as index to prevent target from being casted to DatetimeIndex
  104. indexer, missing = index.get_indexer_non_unique(
  105. Index([np_nat_fixture], dtype=object)
  106. )
  107. expected_indexer = np.array([1, 2], dtype=np.intp)
  108. tm.assert_numpy_array_equal(indexer, expected_indexer)
  109. tm.assert_numpy_array_equal(missing, expected_missing)
  110. # dt64nat vs td64nat
  111. else:
  112. try:
  113. np_nat_fixture == np_nat_fixture2
  114. except (TypeError, OverflowError):
  115. # Numpy will raise on uncomparable types, like
  116. # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
  117. # https://github.com/numpy/numpy/issues/22762
  118. return
  119. index = Index(
  120. np.array(
  121. [
  122. "2021-10-02",
  123. np_nat_fixture,
  124. np_nat_fixture2,
  125. np_nat_fixture,
  126. np_nat_fixture2,
  127. ],
  128. dtype=object,
  129. ),
  130. dtype=object,
  131. )
  132. # pass as index to prevent target from being casted to DatetimeIndex
  133. indexer, missing = index.get_indexer_non_unique(
  134. Index([np_nat_fixture], dtype=object)
  135. )
  136. expected_indexer = np.array([1, 3], dtype=np.intp)
  137. tm.assert_numpy_array_equal(indexer, expected_indexer)
  138. tm.assert_numpy_array_equal(missing, expected_missing)