test_reductions.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. NaT,
  5. SparseDtype,
  6. Timestamp,
  7. isna,
  8. )
  9. from pandas.core.arrays.sparse import SparseArray
  10. class TestReductions:
  11. @pytest.mark.parametrize(
  12. "data,pos,neg",
  13. [
  14. ([True, True, True], True, False),
  15. ([1, 2, 1], 1, 0),
  16. ([1.0, 2.0, 1.0], 1.0, 0.0),
  17. ],
  18. )
  19. def test_all(self, data, pos, neg):
  20. # GH#17570
  21. out = SparseArray(data).all()
  22. assert out
  23. out = SparseArray(data, fill_value=pos).all()
  24. assert out
  25. data[1] = neg
  26. out = SparseArray(data).all()
  27. assert not out
  28. out = SparseArray(data, fill_value=pos).all()
  29. assert not out
  30. @pytest.mark.parametrize(
  31. "data,pos,neg",
  32. [
  33. ([True, True, True], True, False),
  34. ([1, 2, 1], 1, 0),
  35. ([1.0, 2.0, 1.0], 1.0, 0.0),
  36. ],
  37. )
  38. def test_numpy_all(self, data, pos, neg):
  39. # GH#17570
  40. out = np.all(SparseArray(data))
  41. assert out
  42. out = np.all(SparseArray(data, fill_value=pos))
  43. assert out
  44. data[1] = neg
  45. out = np.all(SparseArray(data))
  46. assert not out
  47. out = np.all(SparseArray(data, fill_value=pos))
  48. assert not out
  49. # raises with a different message on py2.
  50. msg = "the 'out' parameter is not supported"
  51. with pytest.raises(ValueError, match=msg):
  52. np.all(SparseArray(data), out=np.array([]))
  53. @pytest.mark.parametrize(
  54. "data,pos,neg",
  55. [
  56. ([False, True, False], True, False),
  57. ([0, 2, 0], 2, 0),
  58. ([0.0, 2.0, 0.0], 2.0, 0.0),
  59. ],
  60. )
  61. def test_any(self, data, pos, neg):
  62. # GH#17570
  63. out = SparseArray(data).any()
  64. assert out
  65. out = SparseArray(data, fill_value=pos).any()
  66. assert out
  67. data[1] = neg
  68. out = SparseArray(data).any()
  69. assert not out
  70. out = SparseArray(data, fill_value=pos).any()
  71. assert not out
  72. @pytest.mark.parametrize(
  73. "data,pos,neg",
  74. [
  75. ([False, True, False], True, False),
  76. ([0, 2, 0], 2, 0),
  77. ([0.0, 2.0, 0.0], 2.0, 0.0),
  78. ],
  79. )
  80. def test_numpy_any(self, data, pos, neg):
  81. # GH#17570
  82. out = np.any(SparseArray(data))
  83. assert out
  84. out = np.any(SparseArray(data, fill_value=pos))
  85. assert out
  86. data[1] = neg
  87. out = np.any(SparseArray(data))
  88. assert not out
  89. out = np.any(SparseArray(data, fill_value=pos))
  90. assert not out
  91. msg = "the 'out' parameter is not supported"
  92. with pytest.raises(ValueError, match=msg):
  93. np.any(SparseArray(data), out=out)
  94. def test_sum(self):
  95. data = np.arange(10).astype(float)
  96. out = SparseArray(data).sum()
  97. assert out == 45.0
  98. data[5] = np.nan
  99. out = SparseArray(data, fill_value=2).sum()
  100. assert out == 40.0
  101. out = SparseArray(data, fill_value=np.nan).sum()
  102. assert out == 40.0
  103. @pytest.mark.parametrize(
  104. "arr",
  105. [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])],
  106. )
  107. @pytest.mark.parametrize("fill_value", [0, 1, np.nan])
  108. @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
  109. def test_sum_min_count(self, arr, fill_value, min_count, expected):
  110. # GH#25777
  111. sparray = SparseArray(arr, fill_value=fill_value)
  112. result = sparray.sum(min_count=min_count)
  113. if np.isnan(expected):
  114. assert np.isnan(result)
  115. else:
  116. assert result == expected
  117. def test_bool_sum_min_count(self):
  118. spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
  119. res = spar_bool.sum(min_count=1)
  120. assert res == 5
  121. res = spar_bool.sum(min_count=11)
  122. assert isna(res)
  123. def test_numpy_sum(self):
  124. data = np.arange(10).astype(float)
  125. out = np.sum(SparseArray(data))
  126. assert out == 45.0
  127. data[5] = np.nan
  128. out = np.sum(SparseArray(data, fill_value=2))
  129. assert out == 40.0
  130. out = np.sum(SparseArray(data, fill_value=np.nan))
  131. assert out == 40.0
  132. msg = "the 'dtype' parameter is not supported"
  133. with pytest.raises(ValueError, match=msg):
  134. np.sum(SparseArray(data), dtype=np.int64)
  135. msg = "the 'out' parameter is not supported"
  136. with pytest.raises(ValueError, match=msg):
  137. np.sum(SparseArray(data), out=out)
  138. def test_mean(self):
  139. data = np.arange(10).astype(float)
  140. out = SparseArray(data).mean()
  141. assert out == 4.5
  142. data[5] = np.nan
  143. out = SparseArray(data).mean()
  144. assert out == 40.0 / 9
  145. def test_numpy_mean(self):
  146. data = np.arange(10).astype(float)
  147. out = np.mean(SparseArray(data))
  148. assert out == 4.5
  149. data[5] = np.nan
  150. out = np.mean(SparseArray(data))
  151. assert out == 40.0 / 9
  152. msg = "the 'dtype' parameter is not supported"
  153. with pytest.raises(ValueError, match=msg):
  154. np.mean(SparseArray(data), dtype=np.int64)
  155. msg = "the 'out' parameter is not supported"
  156. with pytest.raises(ValueError, match=msg):
  157. np.mean(SparseArray(data), out=out)
  158. class TestMinMax:
  159. @pytest.mark.parametrize(
  160. "raw_data,max_expected,min_expected",
  161. [
  162. (np.arange(5.0), [4], [0]),
  163. (-np.arange(5.0), [0], [-4]),
  164. (np.array([0, 1, 2, np.nan, 4]), [4], [0]),
  165. (np.array([np.nan] * 5), [np.nan], [np.nan]),
  166. (np.array([]), [np.nan], [np.nan]),
  167. ],
  168. )
  169. def test_nan_fill_value(self, raw_data, max_expected, min_expected):
  170. arr = SparseArray(raw_data)
  171. max_result = arr.max()
  172. min_result = arr.min()
  173. assert max_result in max_expected
  174. assert min_result in min_expected
  175. max_result = arr.max(skipna=False)
  176. min_result = arr.min(skipna=False)
  177. if np.isnan(raw_data).any():
  178. assert np.isnan(max_result)
  179. assert np.isnan(min_result)
  180. else:
  181. assert max_result in max_expected
  182. assert min_result in min_expected
  183. @pytest.mark.parametrize(
  184. "fill_value,max_expected,min_expected",
  185. [
  186. (100, 100, 0),
  187. (-100, 1, -100),
  188. ],
  189. )
  190. def test_fill_value(self, fill_value, max_expected, min_expected):
  191. arr = SparseArray(
  192. np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
  193. )
  194. max_result = arr.max()
  195. assert max_result == max_expected
  196. min_result = arr.min()
  197. assert min_result == min_expected
  198. def test_only_fill_value(self):
  199. fv = 100
  200. arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
  201. assert len(arr._valid_sp_values) == 0
  202. assert arr.max() == fv
  203. assert arr.min() == fv
  204. assert arr.max(skipna=False) == fv
  205. assert arr.min(skipna=False) == fv
  206. @pytest.mark.parametrize("func", ["min", "max"])
  207. @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
  208. @pytest.mark.parametrize(
  209. "dtype,expected",
  210. [
  211. (SparseDtype(np.float64, np.nan), np.nan),
  212. (SparseDtype(np.float64, 5.0), np.nan),
  213. (SparseDtype("datetime64[ns]", NaT), NaT),
  214. (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT),
  215. ],
  216. )
  217. def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
  218. arr = SparseArray(data, dtype=dtype)
  219. result = getattr(arr, func)()
  220. if expected is NaT:
  221. # TODO: pin down whether we wrap datetime64("NaT")
  222. assert result is NaT or np.isnat(result)
  223. else:
  224. assert np.isnan(result)
  225. class TestArgmaxArgmin:
  226. @pytest.mark.parametrize(
  227. "arr,argmax_expected,argmin_expected",
  228. [
  229. (SparseArray([1, 2, 0, 1, 2]), 1, 2),
  230. (SparseArray([-1, -2, 0, -1, -2]), 2, 1),
  231. (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5),
  232. (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2),
  233. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2),
  234. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2),
  235. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2),
  236. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2),
  237. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2),
  238. (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10),
  239. (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10),
  240. (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10),
  241. (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0),
  242. (SparseArray([1] + [0] * 10, fill_value=0), 0, 1),
  243. (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0),
  244. (SparseArray([1] + [0] * 10, fill_value=1), 0, 1),
  245. ],
  246. )
  247. def test_argmax_argmin(self, arr, argmax_expected, argmin_expected):
  248. argmax_result = arr.argmax()
  249. argmin_result = arr.argmin()
  250. assert argmax_result == argmax_expected
  251. assert argmin_result == argmin_expected
  252. @pytest.mark.parametrize(
  253. "arr,method",
  254. [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")],
  255. )
  256. def test_empty_array(self, arr, method):
  257. msg = f"attempt to get {method} of an empty sequence"
  258. with pytest.raises(ValueError, match=msg):
  259. arr.argmax() if method == "argmax" else arr.argmin()