| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- from decimal import Decimal
- import numpy as np
- import pytest
- from pandas._libs.missing import is_matching_na
- from pandas import Index
- import pandas._testing as tm
- class TestGetIndexer:
- @pytest.mark.parametrize(
- "method,expected",
- [
- ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
- ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
- ],
- )
- def test_get_indexer_strings(self, method, expected):
- expected = np.array(expected, dtype=np.intp)
- index = Index(["b", "c"], dtype=object)
- actual = index.get_indexer(["a", "b", "c", "d"], method=method)
- tm.assert_numpy_array_equal(actual, expected)
- def test_get_indexer_strings_raises(self):
- index = Index(["b", "c"], dtype=object)
- msg = "|".join(
- [
- "operation 'sub' not supported for dtype 'str'",
- r"unsupported operand type\(s\) for -: 'str' and 'str'",
- ]
- )
- with pytest.raises(TypeError, match=msg):
- index.get_indexer(["a", "b", "c", "d"], method="nearest")
- with pytest.raises(TypeError, match=msg):
- index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
- with pytest.raises(TypeError, match=msg):
- index.get_indexer(
- ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
- )
- def test_get_indexer_with_NA_values(
- self, unique_nulls_fixture, unique_nulls_fixture2
- ):
- # GH#22332
- # check pairwise, that no pair of na values
- # is mangled
- if unique_nulls_fixture is unique_nulls_fixture2:
- return # skip it, values are not unique
- arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
- index = Index(arr, dtype=object)
- result = index.get_indexer(
- Index(
- [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object
- )
- )
- expected = np.array([0, 1, -1], dtype=np.intp)
- tm.assert_numpy_array_equal(result, expected)
- def test_get_indexer_infer_string_missing_values(self):
- # ensure the passed list is not cast to string but to object so that
- # the None value is matched in the index
- # https://github.com/pandas-dev/pandas/issues/55834
- idx = Index(["a", "b", None], dtype="object")
- result = idx.get_indexer([None, "x"])
- expected = np.array([2, -1], dtype=np.intp)
- tm.assert_numpy_array_equal(result, expected)
- class TestGetIndexerNonUnique:
- def test_get_indexer_non_unique_nas(self, nulls_fixture):
- # even though this isn't non-unique, this should still work
- index = Index(["a", "b", nulls_fixture], dtype=object)
- indexer, missing = index.get_indexer_non_unique([nulls_fixture])
- expected_indexer = np.array([2], dtype=np.intp)
- expected_missing = np.array([], dtype=np.intp)
- tm.assert_numpy_array_equal(indexer, expected_indexer)
- tm.assert_numpy_array_equal(missing, expected_missing)
- # actually non-unique
- index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
- indexer, missing = index.get_indexer_non_unique([nulls_fixture])
- expected_indexer = np.array([1, 3], dtype=np.intp)
- tm.assert_numpy_array_equal(indexer, expected_indexer)
- tm.assert_numpy_array_equal(missing, expected_missing)
- # matching-but-not-identical nans
- if is_matching_na(nulls_fixture, float("NaN")):
- index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
- match_but_not_identical = True
- elif is_matching_na(nulls_fixture, Decimal("NaN")):
- index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
- match_but_not_identical = True
- else:
- match_but_not_identical = False
- if match_but_not_identical:
- indexer, missing = index.get_indexer_non_unique([nulls_fixture])
- expected_indexer = np.array([1, 3], dtype=np.intp)
- tm.assert_numpy_array_equal(indexer, expected_indexer)
- tm.assert_numpy_array_equal(missing, expected_missing)
- @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
- def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
- expected_missing = np.array([], dtype=np.intp)
- # matching-but-not-identical nats
- if is_matching_na(np_nat_fixture, np_nat_fixture2):
- # ensure nats are different objects
- index = Index(
- np.array(
- ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
- dtype=object,
- ),
- dtype=object,
- )
- # pass as index to prevent target from being casted to DatetimeIndex
- indexer, missing = index.get_indexer_non_unique(
- Index([np_nat_fixture], dtype=object)
- )
- expected_indexer = np.array([1, 2], dtype=np.intp)
- tm.assert_numpy_array_equal(indexer, expected_indexer)
- tm.assert_numpy_array_equal(missing, expected_missing)
- # dt64nat vs td64nat
- else:
- try:
- np_nat_fixture == np_nat_fixture2
- except (TypeError, OverflowError):
- # Numpy will raise on uncomparable types, like
- # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
- # https://github.com/numpy/numpy/issues/22762
- return
- index = Index(
- np.array(
- [
- "2021-10-02",
- np_nat_fixture,
- np_nat_fixture2,
- np_nat_fixture,
- np_nat_fixture2,
- ],
- dtype=object,
- ),
- dtype=object,
- )
- # pass as index to prevent target from being casted to DatetimeIndex
- indexer, missing = index.get_indexer_non_unique(
- Index([np_nat_fixture], dtype=object)
- )
- expected_indexer = np.array([1, 3], dtype=np.intp)
- tm.assert_numpy_array_equal(indexer, expected_indexer)
- tm.assert_numpy_array_equal(missing, expected_missing)
|