test_floats.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. RangeIndex,
  7. Series,
  8. date_range,
  9. period_range,
  10. timedelta_range,
  11. )
  12. import pandas._testing as tm
  13. def gen_obj(klass, index):
  14. if klass is Series:
  15. obj = Series(np.arange(len(index)), index=index)
  16. else:
  17. obj = DataFrame(
  18. np.random.default_rng(2).standard_normal((len(index), len(index))),
  19. index=index,
  20. columns=index,
  21. )
  22. return obj
  23. class TestFloatIndexers:
  24. def check(self, result, original, indexer, getitem):
  25. """
  26. comparator for results
  27. we need to take care if we are indexing on a
  28. Series or a frame
  29. """
  30. if isinstance(original, Series):
  31. expected = original.iloc[indexer]
  32. elif getitem:
  33. expected = original.iloc[:, indexer]
  34. else:
  35. expected = original.iloc[indexer]
  36. tm.assert_almost_equal(result, expected)
  37. @pytest.mark.parametrize(
  38. "index",
  39. [
  40. Index(list("abcde")),
  41. Index(list("abcde"), dtype="category"),
  42. date_range("2020-01-01", periods=5),
  43. timedelta_range("1 day", periods=5),
  44. period_range("2020-01-01", periods=5),
  45. ],
  46. )
  47. def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
  48. # GH 4892
  49. # float_indexers should raise exceptions
  50. # on appropriate Index types & accessors
  51. s = gen_obj(frame_or_series, index)
  52. # getting
  53. with pytest.raises(KeyError, match="^3.0$"):
  54. indexer_sl(s)[3.0]
  55. # contains
  56. assert 3.0 not in s
  57. s2 = s.copy()
  58. indexer_sl(s2)[3.0] = 10
  59. if indexer_sl is tm.setitem:
  60. assert 3.0 in s2.axes[-1]
  61. elif indexer_sl is tm.loc:
  62. assert 3.0 in s2.axes[0]
  63. else:
  64. assert 3.0 not in s2.axes[0]
  65. assert 3.0 not in s2.axes[-1]
  66. @pytest.mark.parametrize(
  67. "index",
  68. [
  69. Index(list("abcde")),
  70. Index(list("abcde"), dtype="category"),
  71. date_range("2020-01-01", periods=5),
  72. timedelta_range("1 day", periods=5),
  73. period_range("2020-01-01", periods=5),
  74. ],
  75. )
  76. def test_scalar_non_numeric_series_fallback(self, index):
  77. # fallsback to position selection, series only
  78. s = Series(np.arange(len(index)), index=index)
  79. msg = "Series.__getitem__ treating keys as positions is deprecated"
  80. with tm.assert_produces_warning(FutureWarning, match=msg):
  81. s[3]
  82. with pytest.raises(KeyError, match="^3.0$"):
  83. s[3.0]
  84. def test_scalar_with_mixed(self, indexer_sl):
  85. s2 = Series([1, 2, 3], index=["a", "b", "c"])
  86. s3 = Series([1, 2, 3], index=["a", "b", 1.5])
  87. # lookup in a pure string index with an invalid indexer
  88. with pytest.raises(KeyError, match="^1.0$"):
  89. indexer_sl(s2)[1.0]
  90. with pytest.raises(KeyError, match=r"^1\.0$"):
  91. indexer_sl(s2)[1.0]
  92. result = indexer_sl(s2)["b"]
  93. expected = 2
  94. assert result == expected
  95. # mixed index so we have label
  96. # indexing
  97. with pytest.raises(KeyError, match="^1.0$"):
  98. indexer_sl(s3)[1.0]
  99. if indexer_sl is not tm.loc:
  100. # __getitem__ falls back to positional
  101. msg = "Series.__getitem__ treating keys as positions is deprecated"
  102. with tm.assert_produces_warning(FutureWarning, match=msg):
  103. result = s3[1]
  104. expected = 2
  105. assert result == expected
  106. with pytest.raises(KeyError, match=r"^1\.0$"):
  107. indexer_sl(s3)[1.0]
  108. result = indexer_sl(s3)[1.5]
  109. expected = 3
  110. assert result == expected
  111. @pytest.mark.parametrize(
  112. "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
  113. )
  114. def test_scalar_integer(self, index, frame_or_series, indexer_sl):
  115. getitem = indexer_sl is not tm.loc
  116. # test how scalar float indexers work on int indexes
  117. # integer index
  118. i = index
  119. obj = gen_obj(frame_or_series, i)
  120. # coerce to equal int
  121. result = indexer_sl(obj)[3.0]
  122. self.check(result, obj, 3, getitem)
  123. if isinstance(obj, Series):
  124. def compare(x, y):
  125. assert x == y
  126. expected = 100
  127. else:
  128. compare = tm.assert_series_equal
  129. if getitem:
  130. expected = Series(100, index=range(len(obj)), name=3)
  131. else:
  132. expected = Series(100.0, index=range(len(obj)), name=3)
  133. s2 = obj.copy()
  134. indexer_sl(s2)[3.0] = 100
  135. result = indexer_sl(s2)[3.0]
  136. compare(result, expected)
  137. result = indexer_sl(s2)[3]
  138. compare(result, expected)
  139. @pytest.mark.parametrize(
  140. "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
  141. )
  142. def test_scalar_integer_contains_float(self, index, frame_or_series):
  143. # contains
  144. # integer index
  145. obj = gen_obj(frame_or_series, index)
  146. # coerce to equal int
  147. assert 3.0 in obj
  148. def test_scalar_float(self, frame_or_series):
  149. # scalar float indexers work on a float index
  150. index = Index(np.arange(5.0))
  151. s = gen_obj(frame_or_series, index)
  152. # assert all operations except for iloc are ok
  153. indexer = index[3]
  154. for idxr in [tm.loc, tm.setitem]:
  155. getitem = idxr is not tm.loc
  156. # getting
  157. result = idxr(s)[indexer]
  158. self.check(result, s, 3, getitem)
  159. # setting
  160. s2 = s.copy()
  161. result = idxr(s2)[indexer]
  162. self.check(result, s, 3, getitem)
  163. # random float is a KeyError
  164. with pytest.raises(KeyError, match=r"^3\.5$"):
  165. idxr(s)[3.5]
  166. # contains
  167. assert 3.0 in s
  168. # iloc succeeds with an integer
  169. expected = s.iloc[3]
  170. s2 = s.copy()
  171. s2.iloc[3] = expected
  172. result = s2.iloc[3]
  173. self.check(result, s, 3, False)
  174. @pytest.mark.parametrize(
  175. "index",
  176. [
  177. Index(list("abcde"), dtype=object),
  178. date_range("2020-01-01", periods=5),
  179. timedelta_range("1 day", periods=5),
  180. period_range("2020-01-01", periods=5),
  181. ],
  182. )
  183. @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
  184. def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli):
  185. # GH 4892
  186. # float_indexers should raise exceptions
  187. # on appropriate Index types & accessors
  188. s = gen_obj(frame_or_series, index)
  189. # getitem
  190. if indexer_sli is tm.iloc:
  191. msg = (
  192. "cannot do positional indexing "
  193. rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
  194. "type float"
  195. )
  196. else:
  197. msg = (
  198. "cannot do slice indexing "
  199. rf"on {type(index).__name__} with these indexers "
  200. r"\[(3|4)(\.0)?\] "
  201. r"of type (float|int)"
  202. )
  203. with pytest.raises(TypeError, match=msg):
  204. indexer_sli(s)[idx]
  205. # setitem
  206. if indexer_sli is tm.iloc:
  207. # otherwise we keep the same message as above
  208. msg = "slice indices must be integers or None or have an __index__ method"
  209. with pytest.raises(TypeError, match=msg):
  210. indexer_sli(s)[idx] = 0
  211. def test_slice_integer(self):
  212. # same as above, but for Integer based indexes
  213. # these coerce to a like integer
  214. # oob indicates if we are out of bounds
  215. # of positional indexing
  216. for index, oob in [
  217. (Index(np.arange(5, dtype=np.int64)), False),
  218. (RangeIndex(5), False),
  219. (Index(np.arange(5, dtype=np.int64) + 10), True),
  220. ]:
  221. # s is an in-range index
  222. s = Series(range(5), index=index)
  223. # getitem
  224. for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
  225. result = s.loc[idx]
  226. # these are all label indexing
  227. # except getitem which is positional
  228. # empty
  229. if oob:
  230. indexer = slice(0, 0)
  231. else:
  232. indexer = slice(3, 5)
  233. self.check(result, s, indexer, False)
  234. # getitem out-of-bounds
  235. for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
  236. result = s.loc[idx]
  237. # these are all label indexing
  238. # except getitem which is positional
  239. # empty
  240. if oob:
  241. indexer = slice(0, 0)
  242. else:
  243. indexer = slice(-6, 6)
  244. self.check(result, s, indexer, False)
  245. # positional indexing
  246. msg = (
  247. "cannot do slice indexing "
  248. rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
  249. "type float"
  250. )
  251. with pytest.raises(TypeError, match=msg):
  252. s[slice(-6.0, 6.0)]
  253. # getitem odd floats
  254. for idx, res1 in [
  255. (slice(2.5, 4), slice(3, 5)),
  256. (slice(2, 3.5), slice(2, 4)),
  257. (slice(2.5, 3.5), slice(3, 4)),
  258. ]:
  259. result = s.loc[idx]
  260. if oob:
  261. res = slice(0, 0)
  262. else:
  263. res = res1
  264. self.check(result, s, res, False)
  265. # positional indexing
  266. msg = (
  267. "cannot do slice indexing "
  268. rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
  269. "type float"
  270. )
  271. with pytest.raises(TypeError, match=msg):
  272. s[idx]
  273. @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
  274. def test_integer_positional_indexing(self, idx):
  275. """make sure that we are raising on positional indexing
  276. w.r.t. an integer index
  277. """
  278. s = Series(range(2, 6), index=range(2, 6))
  279. result = s[2:4]
  280. expected = s.iloc[2:4]
  281. tm.assert_series_equal(result, expected)
  282. klass = RangeIndex
  283. msg = (
  284. "cannot do (slice|positional) indexing "
  285. rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
  286. "type float"
  287. )
  288. with pytest.raises(TypeError, match=msg):
  289. s[idx]
  290. with pytest.raises(TypeError, match=msg):
  291. s.iloc[idx]
  292. @pytest.mark.parametrize(
  293. "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
  294. )
  295. def test_slice_integer_frame_getitem(self, index):
  296. # similar to above, but on the getitem dim (of a DataFrame)
  297. s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
  298. # getitem
  299. for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
  300. result = s.loc[idx]
  301. indexer = slice(0, 2)
  302. self.check(result, s, indexer, False)
  303. # positional indexing
  304. msg = (
  305. "cannot do slice indexing "
  306. rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
  307. "type float"
  308. )
  309. with pytest.raises(TypeError, match=msg):
  310. s[idx]
  311. # getitem out-of-bounds
  312. for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
  313. result = s.loc[idx]
  314. self.check(result, s, slice(-10, 10), True)
  315. # positional indexing
  316. msg = (
  317. "cannot do slice indexing "
  318. rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
  319. "type float"
  320. )
  321. with pytest.raises(TypeError, match=msg):
  322. s[slice(-10.0, 10.0)]
  323. # getitem odd floats
  324. for idx, res in [
  325. (slice(0.5, 1), slice(1, 2)),
  326. (slice(0, 0.5), slice(0, 1)),
  327. (slice(0.5, 1.5), slice(1, 2)),
  328. ]:
  329. result = s.loc[idx]
  330. self.check(result, s, res, False)
  331. # positional indexing
  332. msg = (
  333. "cannot do slice indexing "
  334. rf"on {type(index).__name__} with these indexers \[0\.5\] of "
  335. "type float"
  336. )
  337. with pytest.raises(TypeError, match=msg):
  338. s[idx]
  339. @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
  340. @pytest.mark.parametrize(
  341. "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
  342. )
  343. def test_float_slice_getitem_with_integer_index_raises(self, idx, index):
  344. # similar to above, but on the getitem dim (of a DataFrame)
  345. s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
  346. # setitem
  347. sc = s.copy()
  348. sc.loc[idx] = 0
  349. result = sc.loc[idx].values.ravel()
  350. assert (result == 0).all()
  351. # positional indexing
  352. msg = (
  353. "cannot do slice indexing "
  354. rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
  355. "type float"
  356. )
  357. with pytest.raises(TypeError, match=msg):
  358. s[idx] = 0
  359. with pytest.raises(TypeError, match=msg):
  360. s[idx]
  361. @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
  362. def test_slice_float(self, idx, frame_or_series, indexer_sl):
  363. # same as above, but for floats
  364. index = Index(np.arange(5.0)) + 0.1
  365. s = gen_obj(frame_or_series, index)
  366. expected = s.iloc[3:4]
  367. # getitem
  368. result = indexer_sl(s)[idx]
  369. assert isinstance(result, type(s))
  370. tm.assert_equal(result, expected)
  371. # setitem
  372. s2 = s.copy()
  373. indexer_sl(s2)[idx] = 0
  374. result = indexer_sl(s2)[idx].values.ravel()
  375. assert (result == 0).all()
  376. def test_floating_index_doc_example(self):
  377. index = Index([1.5, 2, 3, 4.5, 5])
  378. s = Series(range(5), index=index)
  379. assert s[3] == 2
  380. assert s.loc[3] == 2
  381. assert s.iloc[3] == 3
  382. def test_floating_misc(self, indexer_sl):
  383. # related 236
  384. # scalar/slicing of a float index
  385. s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
  386. # label based slicing
  387. result = indexer_sl(s)[1.0:3.0]
  388. expected = Series(1, index=[2.5])
  389. tm.assert_series_equal(result, expected)
  390. # exact indexing when found
  391. result = indexer_sl(s)[5.0]
  392. assert result == 2
  393. result = indexer_sl(s)[5]
  394. assert result == 2
  395. # value not found (and no fallbacking at all)
  396. # scalar integers
  397. with pytest.raises(KeyError, match=r"^4$"):
  398. indexer_sl(s)[4]
  399. # fancy floats/integers create the correct entry (as nan)
  400. # fancy tests
  401. expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64))
  402. for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
  403. tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
  404. expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
  405. for fancy_idx in [[5, 0], np.array([5, 0])]:
  406. tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
  407. warn = FutureWarning if indexer_sl is tm.setitem else None
  408. msg = r"The behavior of obj\[i:j\] with a float-dtype index"
  409. # all should return the same as we are slicing 'the same'
  410. with tm.assert_produces_warning(warn, match=msg):
  411. result1 = indexer_sl(s)[2:5]
  412. result2 = indexer_sl(s)[2.0:5.0]
  413. result3 = indexer_sl(s)[2.0:5]
  414. result4 = indexer_sl(s)[2.1:5]
  415. tm.assert_series_equal(result1, result2)
  416. tm.assert_series_equal(result1, result3)
  417. tm.assert_series_equal(result1, result4)
  418. expected = Series([1, 2], index=[2.5, 5.0])
  419. with tm.assert_produces_warning(warn, match=msg):
  420. result = indexer_sl(s)[2:5]
  421. tm.assert_series_equal(result, expected)
  422. # list selection
  423. result1 = indexer_sl(s)[[0.0, 5, 10]]
  424. result2 = s.iloc[[0, 2, 4]]
  425. tm.assert_series_equal(result1, result2)
  426. with pytest.raises(KeyError, match="not in index"):
  427. indexer_sl(s)[[1.6, 5, 10]]
  428. with pytest.raises(KeyError, match="not in index"):
  429. indexer_sl(s)[[0, 1, 2]]
  430. result = indexer_sl(s)[[2.5, 5]]
  431. tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
  432. result = indexer_sl(s)[[2.5]]
  433. tm.assert_series_equal(result, Series([1], index=[2.5]))
  434. def test_floatindex_slicing_bug(self, float_numpy_dtype):
  435. # GH 5557, related to slicing a float index
  436. dtype = float_numpy_dtype
  437. ser = {
  438. 256: 2321.0,
  439. 1: 78.0,
  440. 2: 2716.0,
  441. 3: 0.0,
  442. 4: 369.0,
  443. 5: 0.0,
  444. 6: 269.0,
  445. 7: 0.0,
  446. 8: 0.0,
  447. 9: 0.0,
  448. 10: 3536.0,
  449. 11: 0.0,
  450. 12: 24.0,
  451. 13: 0.0,
  452. 14: 931.0,
  453. 15: 0.0,
  454. 16: 101.0,
  455. 17: 78.0,
  456. 18: 9643.0,
  457. 19: 0.0,
  458. 20: 0.0,
  459. 21: 0.0,
  460. 22: 63761.0,
  461. 23: 0.0,
  462. 24: 446.0,
  463. 25: 0.0,
  464. 26: 34773.0,
  465. 27: 0.0,
  466. 28: 729.0,
  467. 29: 78.0,
  468. 30: 0.0,
  469. 31: 0.0,
  470. 32: 3374.0,
  471. 33: 0.0,
  472. 34: 1391.0,
  473. 35: 0.0,
  474. 36: 361.0,
  475. 37: 0.0,
  476. 38: 61808.0,
  477. 39: 0.0,
  478. 40: 0.0,
  479. 41: 0.0,
  480. 42: 6677.0,
  481. 43: 0.0,
  482. 44: 802.0,
  483. 45: 0.0,
  484. 46: 2691.0,
  485. 47: 0.0,
  486. 48: 3582.0,
  487. 49: 0.0,
  488. 50: 734.0,
  489. 51: 0.0,
  490. 52: 627.0,
  491. 53: 70.0,
  492. 54: 2584.0,
  493. 55: 0.0,
  494. 56: 324.0,
  495. 57: 0.0,
  496. 58: 605.0,
  497. 59: 0.0,
  498. 60: 0.0,
  499. 61: 0.0,
  500. 62: 3989.0,
  501. 63: 10.0,
  502. 64: 42.0,
  503. 65: 0.0,
  504. 66: 904.0,
  505. 67: 0.0,
  506. 68: 88.0,
  507. 69: 70.0,
  508. 70: 8172.0,
  509. 71: 0.0,
  510. 72: 0.0,
  511. 73: 0.0,
  512. 74: 64902.0,
  513. 75: 0.0,
  514. 76: 347.0,
  515. 77: 0.0,
  516. 78: 36605.0,
  517. 79: 0.0,
  518. 80: 379.0,
  519. 81: 70.0,
  520. 82: 0.0,
  521. 83: 0.0,
  522. 84: 3001.0,
  523. 85: 0.0,
  524. 86: 1630.0,
  525. 87: 7.0,
  526. 88: 364.0,
  527. 89: 0.0,
  528. 90: 67404.0,
  529. 91: 9.0,
  530. 92: 0.0,
  531. 93: 0.0,
  532. 94: 7685.0,
  533. 95: 0.0,
  534. 96: 1017.0,
  535. 97: 0.0,
  536. 98: 2831.0,
  537. 99: 0.0,
  538. 100: 2963.0,
  539. 101: 0.0,
  540. 102: 854.0,
  541. 103: 0.0,
  542. 104: 0.0,
  543. 105: 0.0,
  544. 106: 0.0,
  545. 107: 0.0,
  546. 108: 0.0,
  547. 109: 0.0,
  548. 110: 0.0,
  549. 111: 0.0,
  550. 112: 0.0,
  551. 113: 0.0,
  552. 114: 0.0,
  553. 115: 0.0,
  554. 116: 0.0,
  555. 117: 0.0,
  556. 118: 0.0,
  557. 119: 0.0,
  558. 120: 0.0,
  559. 121: 0.0,
  560. 122: 0.0,
  561. 123: 0.0,
  562. 124: 0.0,
  563. 125: 0.0,
  564. 126: 67744.0,
  565. 127: 22.0,
  566. 128: 264.0,
  567. 129: 0.0,
  568. 260: 197.0,
  569. 268: 0.0,
  570. 265: 0.0,
  571. 269: 0.0,
  572. 261: 0.0,
  573. 266: 1198.0,
  574. 267: 0.0,
  575. 262: 2629.0,
  576. 258: 775.0,
  577. 257: 0.0,
  578. 263: 0.0,
  579. 259: 0.0,
  580. 264: 163.0,
  581. 250: 10326.0,
  582. 251: 0.0,
  583. 252: 1228.0,
  584. 253: 0.0,
  585. 254: 2769.0,
  586. 255: 0.0,
  587. }
  588. # smoke test for the repr
  589. s = Series(ser, dtype=dtype)
  590. result = s.value_counts()
  591. assert result.index.dtype == dtype
  592. str(result)