test_indexing.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import SparseDtype
  5. import pandas._testing as tm
  6. from pandas.core.arrays.sparse import SparseArray
  7. @pytest.fixture
  8. def arr_data():
  9. return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
  10. @pytest.fixture
  11. def arr(arr_data):
  12. return SparseArray(arr_data)
  13. class TestGetitem:
  14. def test_getitem(self, arr):
  15. dense = arr.to_dense()
  16. for i, value in enumerate(arr):
  17. tm.assert_almost_equal(value, dense[i])
  18. tm.assert_almost_equal(arr[-i], dense[-i])
  19. def test_getitem_arraylike_mask(self, arr):
  20. arr = SparseArray([0, 1, 2])
  21. result = arr[[True, False, True]]
  22. expected = SparseArray([0, 2])
  23. tm.assert_sp_array_equal(result, expected)
  24. @pytest.mark.parametrize(
  25. "slc",
  26. [
  27. np.s_[:],
  28. np.s_[1:10],
  29. np.s_[1:100],
  30. np.s_[10:1],
  31. np.s_[:-3],
  32. np.s_[-5:-4],
  33. np.s_[:-12],
  34. np.s_[-12:],
  35. np.s_[2:],
  36. np.s_[2::3],
  37. np.s_[::2],
  38. np.s_[::-1],
  39. np.s_[::-2],
  40. np.s_[1:6:2],
  41. np.s_[:-6:-2],
  42. ],
  43. )
  44. @pytest.mark.parametrize(
  45. "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
  46. )
  47. def test_getslice(self, slc, as_dense):
  48. as_dense = np.array(as_dense)
  49. arr = SparseArray(as_dense)
  50. result = arr[slc]
  51. expected = SparseArray(as_dense[slc])
  52. tm.assert_sp_array_equal(result, expected)
  53. def test_getslice_tuple(self):
  54. dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
  55. sparse = SparseArray(dense)
  56. res = sparse[(slice(4, None),)]
  57. exp = SparseArray(dense[4:])
  58. tm.assert_sp_array_equal(res, exp)
  59. sparse = SparseArray(dense, fill_value=0)
  60. res = sparse[(slice(4, None),)]
  61. exp = SparseArray(dense[4:], fill_value=0)
  62. tm.assert_sp_array_equal(res, exp)
  63. msg = "too many indices for array"
  64. with pytest.raises(IndexError, match=msg):
  65. sparse[4:, :]
  66. with pytest.raises(IndexError, match=msg):
  67. # check numpy compat
  68. dense[4:, :]
  69. def test_boolean_slice_empty(self):
  70. arr = SparseArray([0, 1, 2])
  71. res = arr[[False, False, False]]
  72. assert res.dtype == arr.dtype
  73. def test_getitem_bool_sparse_array(self, arr):
  74. # GH 23122
  75. spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
  76. exp = SparseArray([np.nan, 2, np.nan, 5, 6])
  77. tm.assert_sp_array_equal(arr[spar_bool], exp)
  78. spar_bool = ~spar_bool
  79. res = arr[spar_bool]
  80. exp = SparseArray([np.nan, 1, 3, 4, np.nan])
  81. tm.assert_sp_array_equal(res, exp)
  82. spar_bool = SparseArray(
  83. [False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan
  84. )
  85. res = arr[spar_bool]
  86. exp = SparseArray([np.nan, 3, 5])
  87. tm.assert_sp_array_equal(res, exp)
  88. def test_getitem_bool_sparse_array_as_comparison(self):
  89. # GH 45110
  90. arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
  91. res = arr[arr > 2]
  92. exp = SparseArray([3.0, 4.0], fill_value=np.nan)
  93. tm.assert_sp_array_equal(res, exp)
  94. def test_get_item(self, arr):
  95. zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
  96. assert np.isnan(arr[1])
  97. assert arr[2] == 1
  98. assert arr[7] == 5
  99. assert zarr[0] == 0
  100. assert zarr[2] == 1
  101. assert zarr[7] == 5
  102. errmsg = "must be an integer between -10 and 10"
  103. with pytest.raises(IndexError, match=errmsg):
  104. arr[11]
  105. with pytest.raises(IndexError, match=errmsg):
  106. arr[-11]
  107. assert arr[-1] == arr[len(arr) - 1]
  108. class TestSetitem:
  109. def test_set_item(self, arr_data):
  110. arr = SparseArray(arr_data).copy()
  111. def setitem():
  112. arr[5] = 3
  113. def setslice():
  114. arr[1:5] = 2
  115. with pytest.raises(TypeError, match="assignment via setitem"):
  116. setitem()
  117. with pytest.raises(TypeError, match="assignment via setitem"):
  118. setslice()
  119. class TestTake:
  120. def test_take_scalar_raises(self, arr):
  121. msg = "'indices' must be an array, not a scalar '2'."
  122. with pytest.raises(ValueError, match=msg):
  123. arr.take(2)
  124. def test_take(self, arr_data, arr):
  125. exp = SparseArray(np.take(arr_data, [2, 3]))
  126. tm.assert_sp_array_equal(arr.take([2, 3]), exp)
  127. exp = SparseArray(np.take(arr_data, [0, 1, 2]))
  128. tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp)
  129. def test_take_all_empty(self):
  130. sparse = pd.array([0, 0], dtype=SparseDtype("int64"))
  131. result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan)
  132. tm.assert_sp_array_equal(sparse, result)
  133. def test_take_different_fill_value(self):
  134. # Take with a different fill value shouldn't overwrite the original
  135. sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0))
  136. result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan)
  137. expected = pd.array([0, np.nan], dtype=sparse.dtype)
  138. tm.assert_sp_array_equal(expected, result)
  139. def test_take_fill_value(self):
  140. data = np.array([1, np.nan, 0, 3, 0])
  141. sparse = SparseArray(data, fill_value=0)
  142. exp = SparseArray(np.take(data, [0]), fill_value=0)
  143. tm.assert_sp_array_equal(sparse.take([0]), exp)
  144. exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
  145. tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
  146. def test_take_negative(self, arr_data, arr):
  147. exp = SparseArray(np.take(arr_data, [-1]))
  148. tm.assert_sp_array_equal(arr.take([-1]), exp)
  149. exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
  150. tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
  151. def test_bad_take(self, arr):
  152. with pytest.raises(IndexError, match="bounds"):
  153. arr.take([11])
  154. def test_take_filling(self):
  155. # similar tests as GH 12631
  156. sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
  157. result = sparse.take(np.array([1, 0, -1]))
  158. expected = SparseArray([np.nan, np.nan, 4])
  159. tm.assert_sp_array_equal(result, expected)
  160. # TODO: actionable?
  161. # XXX: test change: fill_value=True -> allow_fill=True
  162. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  163. expected = SparseArray([np.nan, np.nan, np.nan])
  164. tm.assert_sp_array_equal(result, expected)
  165. # allow_fill=False
  166. result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  167. expected = SparseArray([np.nan, np.nan, 4])
  168. tm.assert_sp_array_equal(result, expected)
  169. msg = "Invalid value in 'indices'"
  170. with pytest.raises(ValueError, match=msg):
  171. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  172. with pytest.raises(ValueError, match=msg):
  173. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  174. msg = "out of bounds value in 'indices'"
  175. with pytest.raises(IndexError, match=msg):
  176. sparse.take(np.array([1, -6]))
  177. with pytest.raises(IndexError, match=msg):
  178. sparse.take(np.array([1, 5]))
  179. with pytest.raises(IndexError, match=msg):
  180. sparse.take(np.array([1, 5]), allow_fill=True)
  181. def test_take_filling_fill_value(self):
  182. # same tests as GH#12631
  183. sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
  184. result = sparse.take(np.array([1, 0, -1]))
  185. expected = SparseArray([0, np.nan, 4], fill_value=0)
  186. tm.assert_sp_array_equal(result, expected)
  187. # fill_value
  188. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  189. # TODO: actionable?
  190. # XXX: behavior change.
  191. # the old way of filling self.fill_value doesn't follow EA rules.
  192. # It's supposed to be self.dtype.na_value (nan in this case)
  193. expected = SparseArray([0, np.nan, np.nan], fill_value=0)
  194. tm.assert_sp_array_equal(result, expected)
  195. # allow_fill=False
  196. result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  197. expected = SparseArray([0, np.nan, 4], fill_value=0)
  198. tm.assert_sp_array_equal(result, expected)
  199. msg = "Invalid value in 'indices'."
  200. with pytest.raises(ValueError, match=msg):
  201. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  202. with pytest.raises(ValueError, match=msg):
  203. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  204. msg = "out of bounds value in 'indices'"
  205. with pytest.raises(IndexError, match=msg):
  206. sparse.take(np.array([1, -6]))
  207. with pytest.raises(IndexError, match=msg):
  208. sparse.take(np.array([1, 5]))
  209. with pytest.raises(IndexError, match=msg):
  210. sparse.take(np.array([1, 5]), fill_value=True)
  211. @pytest.mark.parametrize("kind", ["block", "integer"])
  212. def test_take_filling_all_nan(self, kind):
  213. sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
  214. result = sparse.take(np.array([1, 0, -1]))
  215. expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
  216. tm.assert_sp_array_equal(result, expected)
  217. result = sparse.take(np.array([1, 0, -1]), fill_value=True)
  218. expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
  219. tm.assert_sp_array_equal(result, expected)
  220. msg = "out of bounds value in 'indices'"
  221. with pytest.raises(IndexError, match=msg):
  222. sparse.take(np.array([1, -6]))
  223. with pytest.raises(IndexError, match=msg):
  224. sparse.take(np.array([1, 5]))
  225. with pytest.raises(IndexError, match=msg):
  226. sparse.take(np.array([1, 5]), fill_value=True)
  227. class TestWhere:
  228. def test_where_retain_fill_value(self):
  229. # GH#45691 don't lose fill_value on _where
  230. arr = SparseArray([np.nan, 1.0], fill_value=0)
  231. mask = np.array([True, False])
  232. res = arr._where(~mask, 1)
  233. exp = SparseArray([1, 1.0], fill_value=0)
  234. tm.assert_sp_array_equal(res, exp)
  235. ser = pd.Series(arr)
  236. res = ser.where(~mask, 1)
  237. tm.assert_series_equal(res, pd.Series(exp))