| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689 |
- import numpy as np
- import pytest
- from pandas import (
- DataFrame,
- Index,
- RangeIndex,
- Series,
- date_range,
- period_range,
- timedelta_range,
- )
- import pandas._testing as tm
- def gen_obj(klass, index):
- if klass is Series:
- obj = Series(np.arange(len(index)), index=index)
- else:
- obj = DataFrame(
- np.random.default_rng(2).standard_normal((len(index), len(index))),
- index=index,
- columns=index,
- )
- return obj
- class TestFloatIndexers:
- def check(self, result, original, indexer, getitem):
- """
- comparator for results
- we need to take care if we are indexing on a
- Series or a frame
- """
- if isinstance(original, Series):
- expected = original.iloc[indexer]
- elif getitem:
- expected = original.iloc[:, indexer]
- else:
- expected = original.iloc[indexer]
- tm.assert_almost_equal(result, expected)
- @pytest.mark.parametrize(
- "index",
- [
- Index(list("abcde")),
- Index(list("abcde"), dtype="category"),
- date_range("2020-01-01", periods=5),
- timedelta_range("1 day", periods=5),
- period_range("2020-01-01", periods=5),
- ],
- )
- def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
- # GH 4892
- # float_indexers should raise exceptions
- # on appropriate Index types & accessors
- s = gen_obj(frame_or_series, index)
- # getting
- with pytest.raises(KeyError, match="^3.0$"):
- indexer_sl(s)[3.0]
- # contains
- assert 3.0 not in s
- s2 = s.copy()
- indexer_sl(s2)[3.0] = 10
- if indexer_sl is tm.setitem:
- assert 3.0 in s2.axes[-1]
- elif indexer_sl is tm.loc:
- assert 3.0 in s2.axes[0]
- else:
- assert 3.0 not in s2.axes[0]
- assert 3.0 not in s2.axes[-1]
- @pytest.mark.parametrize(
- "index",
- [
- Index(list("abcde")),
- Index(list("abcde"), dtype="category"),
- date_range("2020-01-01", periods=5),
- timedelta_range("1 day", periods=5),
- period_range("2020-01-01", periods=5),
- ],
- )
- def test_scalar_non_numeric_series_fallback(self, index):
- # fallsback to position selection, series only
- s = Series(np.arange(len(index)), index=index)
- msg = "Series.__getitem__ treating keys as positions is deprecated"
- with tm.assert_produces_warning(FutureWarning, match=msg):
- s[3]
- with pytest.raises(KeyError, match="^3.0$"):
- s[3.0]
- def test_scalar_with_mixed(self, indexer_sl):
- s2 = Series([1, 2, 3], index=["a", "b", "c"])
- s3 = Series([1, 2, 3], index=["a", "b", 1.5])
- # lookup in a pure string index with an invalid indexer
- with pytest.raises(KeyError, match="^1.0$"):
- indexer_sl(s2)[1.0]
- with pytest.raises(KeyError, match=r"^1\.0$"):
- indexer_sl(s2)[1.0]
- result = indexer_sl(s2)["b"]
- expected = 2
- assert result == expected
- # mixed index so we have label
- # indexing
- with pytest.raises(KeyError, match="^1.0$"):
- indexer_sl(s3)[1.0]
- if indexer_sl is not tm.loc:
- # __getitem__ falls back to positional
- msg = "Series.__getitem__ treating keys as positions is deprecated"
- with tm.assert_produces_warning(FutureWarning, match=msg):
- result = s3[1]
- expected = 2
- assert result == expected
- with pytest.raises(KeyError, match=r"^1\.0$"):
- indexer_sl(s3)[1.0]
- result = indexer_sl(s3)[1.5]
- expected = 3
- assert result == expected
- @pytest.mark.parametrize(
- "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
- )
- def test_scalar_integer(self, index, frame_or_series, indexer_sl):
- getitem = indexer_sl is not tm.loc
- # test how scalar float indexers work on int indexes
- # integer index
- i = index
- obj = gen_obj(frame_or_series, i)
- # coerce to equal int
- result = indexer_sl(obj)[3.0]
- self.check(result, obj, 3, getitem)
- if isinstance(obj, Series):
- def compare(x, y):
- assert x == y
- expected = 100
- else:
- compare = tm.assert_series_equal
- if getitem:
- expected = Series(100, index=range(len(obj)), name=3)
- else:
- expected = Series(100.0, index=range(len(obj)), name=3)
- s2 = obj.copy()
- indexer_sl(s2)[3.0] = 100
- result = indexer_sl(s2)[3.0]
- compare(result, expected)
- result = indexer_sl(s2)[3]
- compare(result, expected)
- @pytest.mark.parametrize(
- "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
- )
- def test_scalar_integer_contains_float(self, index, frame_or_series):
- # contains
- # integer index
- obj = gen_obj(frame_or_series, index)
- # coerce to equal int
- assert 3.0 in obj
- def test_scalar_float(self, frame_or_series):
- # scalar float indexers work on a float index
- index = Index(np.arange(5.0))
- s = gen_obj(frame_or_series, index)
- # assert all operations except for iloc are ok
- indexer = index[3]
- for idxr in [tm.loc, tm.setitem]:
- getitem = idxr is not tm.loc
- # getting
- result = idxr(s)[indexer]
- self.check(result, s, 3, getitem)
- # setting
- s2 = s.copy()
- result = idxr(s2)[indexer]
- self.check(result, s, 3, getitem)
- # random float is a KeyError
- with pytest.raises(KeyError, match=r"^3\.5$"):
- idxr(s)[3.5]
- # contains
- assert 3.0 in s
- # iloc succeeds with an integer
- expected = s.iloc[3]
- s2 = s.copy()
- s2.iloc[3] = expected
- result = s2.iloc[3]
- self.check(result, s, 3, False)
- @pytest.mark.parametrize(
- "index",
- [
- Index(list("abcde"), dtype=object),
- date_range("2020-01-01", periods=5),
- timedelta_range("1 day", periods=5),
- period_range("2020-01-01", periods=5),
- ],
- )
- @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
- def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli):
- # GH 4892
- # float_indexers should raise exceptions
- # on appropriate Index types & accessors
- s = gen_obj(frame_or_series, index)
- # getitem
- if indexer_sli is tm.iloc:
- msg = (
- "cannot do positional indexing "
- rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
- "type float"
- )
- else:
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers "
- r"\[(3|4)(\.0)?\] "
- r"of type (float|int)"
- )
- with pytest.raises(TypeError, match=msg):
- indexer_sli(s)[idx]
- # setitem
- if indexer_sli is tm.iloc:
- # otherwise we keep the same message as above
- msg = "slice indices must be integers or None or have an __index__ method"
- with pytest.raises(TypeError, match=msg):
- indexer_sli(s)[idx] = 0
- def test_slice_integer(self):
- # same as above, but for Integer based indexes
- # these coerce to a like integer
- # oob indicates if we are out of bounds
- # of positional indexing
- for index, oob in [
- (Index(np.arange(5, dtype=np.int64)), False),
- (RangeIndex(5), False),
- (Index(np.arange(5, dtype=np.int64) + 10), True),
- ]:
- # s is an in-range index
- s = Series(range(5), index=index)
- # getitem
- for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
- result = s.loc[idx]
- # these are all label indexing
- # except getitem which is positional
- # empty
- if oob:
- indexer = slice(0, 0)
- else:
- indexer = slice(3, 5)
- self.check(result, s, indexer, False)
- # getitem out-of-bounds
- for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
- result = s.loc[idx]
- # these are all label indexing
- # except getitem which is positional
- # empty
- if oob:
- indexer = slice(0, 0)
- else:
- indexer = slice(-6, 6)
- self.check(result, s, indexer, False)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[slice(-6.0, 6.0)]
- # getitem odd floats
- for idx, res1 in [
- (slice(2.5, 4), slice(3, 5)),
- (slice(2, 3.5), slice(2, 4)),
- (slice(2.5, 3.5), slice(3, 4)),
- ]:
- result = s.loc[idx]
- if oob:
- res = slice(0, 0)
- else:
- res = res1
- self.check(result, s, res, False)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[idx]
- @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
- def test_integer_positional_indexing(self, idx):
- """make sure that we are raising on positional indexing
- w.r.t. an integer index
- """
- s = Series(range(2, 6), index=range(2, 6))
- result = s[2:4]
- expected = s.iloc[2:4]
- tm.assert_series_equal(result, expected)
- klass = RangeIndex
- msg = (
- "cannot do (slice|positional) indexing "
- rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[idx]
- with pytest.raises(TypeError, match=msg):
- s.iloc[idx]
- @pytest.mark.parametrize(
- "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
- )
- def test_slice_integer_frame_getitem(self, index):
- # similar to above, but on the getitem dim (of a DataFrame)
- s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
- # getitem
- for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
- result = s.loc[idx]
- indexer = slice(0, 2)
- self.check(result, s, indexer, False)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[idx]
- # getitem out-of-bounds
- for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
- result = s.loc[idx]
- self.check(result, s, slice(-10, 10), True)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[slice(-10.0, 10.0)]
- # getitem odd floats
- for idx, res in [
- (slice(0.5, 1), slice(1, 2)),
- (slice(0, 0.5), slice(0, 1)),
- (slice(0.5, 1.5), slice(1, 2)),
- ]:
- result = s.loc[idx]
- self.check(result, s, res, False)
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[0\.5\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[idx]
- @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
- @pytest.mark.parametrize(
- "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
- )
- def test_float_slice_getitem_with_integer_index_raises(self, idx, index):
- # similar to above, but on the getitem dim (of a DataFrame)
- s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
- # setitem
- sc = s.copy()
- sc.loc[idx] = 0
- result = sc.loc[idx].values.ravel()
- assert (result == 0).all()
- # positional indexing
- msg = (
- "cannot do slice indexing "
- rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
- "type float"
- )
- with pytest.raises(TypeError, match=msg):
- s[idx] = 0
- with pytest.raises(TypeError, match=msg):
- s[idx]
- @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
- def test_slice_float(self, idx, frame_or_series, indexer_sl):
- # same as above, but for floats
- index = Index(np.arange(5.0)) + 0.1
- s = gen_obj(frame_or_series, index)
- expected = s.iloc[3:4]
- # getitem
- result = indexer_sl(s)[idx]
- assert isinstance(result, type(s))
- tm.assert_equal(result, expected)
- # setitem
- s2 = s.copy()
- indexer_sl(s2)[idx] = 0
- result = indexer_sl(s2)[idx].values.ravel()
- assert (result == 0).all()
- def test_floating_index_doc_example(self):
- index = Index([1.5, 2, 3, 4.5, 5])
- s = Series(range(5), index=index)
- assert s[3] == 2
- assert s.loc[3] == 2
- assert s.iloc[3] == 3
- def test_floating_misc(self, indexer_sl):
- # related 236
- # scalar/slicing of a float index
- s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
- # label based slicing
- result = indexer_sl(s)[1.0:3.0]
- expected = Series(1, index=[2.5])
- tm.assert_series_equal(result, expected)
- # exact indexing when found
- result = indexer_sl(s)[5.0]
- assert result == 2
- result = indexer_sl(s)[5]
- assert result == 2
- # value not found (and no fallbacking at all)
- # scalar integers
- with pytest.raises(KeyError, match=r"^4$"):
- indexer_sl(s)[4]
- # fancy floats/integers create the correct entry (as nan)
- # fancy tests
- expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64))
- for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
- tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
- expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
- for fancy_idx in [[5, 0], np.array([5, 0])]:
- tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
- warn = FutureWarning if indexer_sl is tm.setitem else None
- msg = r"The behavior of obj\[i:j\] with a float-dtype index"
- # all should return the same as we are slicing 'the same'
- with tm.assert_produces_warning(warn, match=msg):
- result1 = indexer_sl(s)[2:5]
- result2 = indexer_sl(s)[2.0:5.0]
- result3 = indexer_sl(s)[2.0:5]
- result4 = indexer_sl(s)[2.1:5]
- tm.assert_series_equal(result1, result2)
- tm.assert_series_equal(result1, result3)
- tm.assert_series_equal(result1, result4)
- expected = Series([1, 2], index=[2.5, 5.0])
- with tm.assert_produces_warning(warn, match=msg):
- result = indexer_sl(s)[2:5]
- tm.assert_series_equal(result, expected)
- # list selection
- result1 = indexer_sl(s)[[0.0, 5, 10]]
- result2 = s.iloc[[0, 2, 4]]
- tm.assert_series_equal(result1, result2)
- with pytest.raises(KeyError, match="not in index"):
- indexer_sl(s)[[1.6, 5, 10]]
- with pytest.raises(KeyError, match="not in index"):
- indexer_sl(s)[[0, 1, 2]]
- result = indexer_sl(s)[[2.5, 5]]
- tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
- result = indexer_sl(s)[[2.5]]
- tm.assert_series_equal(result, Series([1], index=[2.5]))
- def test_floatindex_slicing_bug(self, float_numpy_dtype):
- # GH 5557, related to slicing a float index
- dtype = float_numpy_dtype
- ser = {
- 256: 2321.0,
- 1: 78.0,
- 2: 2716.0,
- 3: 0.0,
- 4: 369.0,
- 5: 0.0,
- 6: 269.0,
- 7: 0.0,
- 8: 0.0,
- 9: 0.0,
- 10: 3536.0,
- 11: 0.0,
- 12: 24.0,
- 13: 0.0,
- 14: 931.0,
- 15: 0.0,
- 16: 101.0,
- 17: 78.0,
- 18: 9643.0,
- 19: 0.0,
- 20: 0.0,
- 21: 0.0,
- 22: 63761.0,
- 23: 0.0,
- 24: 446.0,
- 25: 0.0,
- 26: 34773.0,
- 27: 0.0,
- 28: 729.0,
- 29: 78.0,
- 30: 0.0,
- 31: 0.0,
- 32: 3374.0,
- 33: 0.0,
- 34: 1391.0,
- 35: 0.0,
- 36: 361.0,
- 37: 0.0,
- 38: 61808.0,
- 39: 0.0,
- 40: 0.0,
- 41: 0.0,
- 42: 6677.0,
- 43: 0.0,
- 44: 802.0,
- 45: 0.0,
- 46: 2691.0,
- 47: 0.0,
- 48: 3582.0,
- 49: 0.0,
- 50: 734.0,
- 51: 0.0,
- 52: 627.0,
- 53: 70.0,
- 54: 2584.0,
- 55: 0.0,
- 56: 324.0,
- 57: 0.0,
- 58: 605.0,
- 59: 0.0,
- 60: 0.0,
- 61: 0.0,
- 62: 3989.0,
- 63: 10.0,
- 64: 42.0,
- 65: 0.0,
- 66: 904.0,
- 67: 0.0,
- 68: 88.0,
- 69: 70.0,
- 70: 8172.0,
- 71: 0.0,
- 72: 0.0,
- 73: 0.0,
- 74: 64902.0,
- 75: 0.0,
- 76: 347.0,
- 77: 0.0,
- 78: 36605.0,
- 79: 0.0,
- 80: 379.0,
- 81: 70.0,
- 82: 0.0,
- 83: 0.0,
- 84: 3001.0,
- 85: 0.0,
- 86: 1630.0,
- 87: 7.0,
- 88: 364.0,
- 89: 0.0,
- 90: 67404.0,
- 91: 9.0,
- 92: 0.0,
- 93: 0.0,
- 94: 7685.0,
- 95: 0.0,
- 96: 1017.0,
- 97: 0.0,
- 98: 2831.0,
- 99: 0.0,
- 100: 2963.0,
- 101: 0.0,
- 102: 854.0,
- 103: 0.0,
- 104: 0.0,
- 105: 0.0,
- 106: 0.0,
- 107: 0.0,
- 108: 0.0,
- 109: 0.0,
- 110: 0.0,
- 111: 0.0,
- 112: 0.0,
- 113: 0.0,
- 114: 0.0,
- 115: 0.0,
- 116: 0.0,
- 117: 0.0,
- 118: 0.0,
- 119: 0.0,
- 120: 0.0,
- 121: 0.0,
- 122: 0.0,
- 123: 0.0,
- 124: 0.0,
- 125: 0.0,
- 126: 67744.0,
- 127: 22.0,
- 128: 264.0,
- 129: 0.0,
- 260: 197.0,
- 268: 0.0,
- 265: 0.0,
- 269: 0.0,
- 261: 0.0,
- 266: 1198.0,
- 267: 0.0,
- 262: 2629.0,
- 258: 775.0,
- 257: 0.0,
- 263: 0.0,
- 259: 0.0,
- 264: 163.0,
- 250: 10326.0,
- 251: 0.0,
- 252: 1228.0,
- 253: 0.0,
- 254: 2769.0,
- 255: 0.0,
- }
- # smoke test for the repr
- s = Series(ser, dtype=dtype)
- result = s.value_counts()
- assert result.index.dtype == dtype
- str(result)
|