test_ufunc.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. from collections import deque
  2. import re
  3. import string
  4. import numpy as np
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. import pandas as pd
  8. import pandas._testing as tm
  9. from pandas.arrays import SparseArray
  10. @pytest.fixture(params=[np.add, np.logaddexp])
  11. def ufunc(request):
  12. # dunder op
  13. return request.param
  14. @pytest.fixture(
  15. params=[pytest.param(True, marks=pytest.mark.fails_arm_wheels), False],
  16. ids=["sparse", "dense"],
  17. )
  18. def sparse(request):
  19. return request.param
  20. @pytest.fixture
  21. def arrays_for_binary_ufunc():
  22. """
  23. A pair of random, length-100 integer-dtype arrays, that are mostly 0.
  24. """
  25. a1 = np.random.default_rng(2).integers(0, 10, 100, dtype="int64")
  26. a2 = np.random.default_rng(2).integers(0, 10, 100, dtype="int64")
  27. a1[::3] = 0
  28. a2[::4] = 0
  29. return a1, a2
  30. @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp])
  31. def test_unary_ufunc(ufunc, sparse):
  32. # Test that ufunc(pd.Series) == pd.Series(ufunc)
  33. arr = np.random.default_rng(2).integers(0, 10, 10, dtype="int64")
  34. arr[::2] = 0
  35. if sparse:
  36. arr = SparseArray(arr, dtype=pd.SparseDtype("int64", 0))
  37. index = list(string.ascii_letters[:10])
  38. name = "name"
  39. series = pd.Series(arr, index=index, name=name)
  40. result = ufunc(series)
  41. expected = pd.Series(ufunc(arr), index=index, name=name)
  42. tm.assert_series_equal(result, expected)
  43. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  44. def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
  45. # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b))
  46. a1, a2 = arrays_for_binary_ufunc
  47. if sparse:
  48. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  49. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  50. name = "name" # op(pd.Series, array) preserves the name.
  51. series = pd.Series(a1, name=name)
  52. other = a2
  53. array_args = (a1, a2)
  54. series_args = (series, other) # ufunc(series, array)
  55. if flip:
  56. array_args = reversed(array_args)
  57. series_args = reversed(series_args) # ufunc(array, series)
  58. expected = pd.Series(ufunc(*array_args), name=name)
  59. result = ufunc(*series_args)
  60. tm.assert_series_equal(result, expected)
  61. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  62. def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
  63. # Test that
  64. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  65. # * ufunc(Index, pd.Series) dispatches to pd.Series (returns a pd.Series)
  66. a1, a2 = arrays_for_binary_ufunc
  67. if sparse:
  68. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  69. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  70. name = "name" # op(pd.Series, array) preserves the name.
  71. series = pd.Series(a1, name=name)
  72. other = pd.Index(a2, name=name).astype("int64")
  73. array_args = (a1, a2)
  74. series_args = (series, other) # ufunc(series, array)
  75. if flip:
  76. array_args = reversed(array_args)
  77. series_args = reversed(series_args) # ufunc(array, series)
  78. expected = pd.Series(ufunc(*array_args), name=name)
  79. result = ufunc(*series_args)
  80. tm.assert_series_equal(result, expected)
  81. @pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
  82. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  83. def test_binary_ufunc_with_series(
  84. flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
  85. ):
  86. # Test that
  87. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  88. # with alignment between the indices
  89. a1, a2 = arrays_for_binary_ufunc
  90. if sparse:
  91. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  92. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  93. name = "name" # op(pd.Series, array) preserves the name.
  94. series = pd.Series(a1, name=name)
  95. other = pd.Series(a2, name=name)
  96. idx = np.random.default_rng(2).permutation(len(a1))
  97. if shuffle:
  98. other = other.take(idx)
  99. if flip:
  100. index = other.align(series)[0].index
  101. else:
  102. index = series.align(other)[0].index
  103. else:
  104. index = series.index
  105. array_args = (a1, a2)
  106. series_args = (series, other) # ufunc(series, array)
  107. if flip:
  108. array_args = tuple(reversed(array_args))
  109. series_args = tuple(reversed(series_args)) # ufunc(array, series)
  110. expected = pd.Series(ufunc(*array_args), index=index, name=name)
  111. result = ufunc(*series_args)
  112. tm.assert_series_equal(result, expected)
  113. @pytest.mark.parametrize("flip", [True, False])
  114. def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
  115. # Test that
  116. # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar))
  117. # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series)
  118. arr, _ = arrays_for_binary_ufunc
  119. if sparse:
  120. arr = SparseArray(arr)
  121. other = 2
  122. series = pd.Series(arr, name="name")
  123. series_args = (series, other)
  124. array_args = (arr, other)
  125. if flip:
  126. series_args = tuple(reversed(series_args))
  127. array_args = tuple(reversed(array_args))
  128. expected = pd.Series(ufunc(*array_args), name="name")
  129. result = ufunc(*series_args)
  130. tm.assert_series_equal(result, expected)
  131. @pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp
  132. @pytest.mark.parametrize("shuffle", [True, False])
  133. @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
  134. def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
  135. # Test that
  136. # the same conditions from binary_ufunc_scalar apply to
  137. # ufuncs with multiple outputs.
  138. a1, a2 = arrays_for_binary_ufunc
  139. # work around https://github.com/pandas-dev/pandas/issues/26987
  140. a1[a1 == 0] = 1
  141. a2[a2 == 0] = 1
  142. if sparse:
  143. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  144. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  145. s1 = pd.Series(a1)
  146. s2 = pd.Series(a2)
  147. if shuffle:
  148. # ensure we align before applying the ufunc
  149. s2 = s2.sample(frac=1)
  150. expected = ufunc(a1, a2)
  151. assert isinstance(expected, tuple)
  152. result = ufunc(s1, s2)
  153. assert isinstance(result, tuple)
  154. tm.assert_series_equal(result[0], pd.Series(expected[0]))
  155. tm.assert_series_equal(result[1], pd.Series(expected[1]))
  156. def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
  157. # Test that the same conditions from unary input apply to multi-output
  158. # ufuncs
  159. arr, _ = arrays_for_binary_ufunc
  160. if sparse:
  161. arr = SparseArray(arr)
  162. series = pd.Series(arr, name="name")
  163. result = np.modf(series)
  164. expected = np.modf(arr)
  165. assert isinstance(result, tuple)
  166. assert isinstance(expected, tuple)
  167. tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
  168. tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
  169. def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
  170. # Drop the names when they differ.
  171. a1, a2 = arrays_for_binary_ufunc
  172. s1 = pd.Series(a1, name="a")
  173. s2 = pd.Series(a2, name="b")
  174. result = ufunc(s1, s2)
  175. assert result.name is None
  176. def test_object_series_ok():
  177. class Dummy:
  178. def __init__(self, value) -> None:
  179. self.value = value
  180. def __add__(self, other):
  181. return self.value + other.value
  182. arr = np.array([Dummy(0), Dummy(1)])
  183. ser = pd.Series(arr)
  184. tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr)))
  185. tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
  186. @pytest.fixture(
  187. params=[
  188. pd.array([1, 3, 2], dtype=np.int64),
  189. pd.array([1, 3, 2], dtype="Int64"),
  190. pd.array([1, 3, 2], dtype="Float32"),
  191. pd.array([1, 10, 2], dtype="Sparse[int]"),
  192. pd.to_datetime(["2000", "2010", "2001"]),
  193. pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
  194. pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
  195. pd.to_timedelta(["1 Day", "3 Days", "2 Days"]),
  196. pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]),
  197. ],
  198. ids=lambda x: str(x.dtype),
  199. )
  200. def values_for_np_reduce(request):
  201. # min/max tests assume that these are monotonic increasing
  202. return request.param
  203. class TestNumpyReductions:
  204. # TODO: cases with NAs, axis kwarg for DataFrame
  205. def test_multiply(self, values_for_np_reduce, box_with_array, request):
  206. box = box_with_array
  207. values = values_for_np_reduce
  208. with tm.assert_produces_warning(None):
  209. obj = box(values)
  210. if isinstance(values, pd.core.arrays.SparseArray):
  211. mark = pytest.mark.xfail(reason="SparseArray has no 'prod'")
  212. request.applymarker(mark)
  213. if values.dtype.kind in "iuf":
  214. result = np.multiply.reduce(obj)
  215. if box is pd.DataFrame:
  216. expected = obj.prod(numeric_only=False)
  217. tm.assert_series_equal(result, expected)
  218. elif box is pd.Index:
  219. # Index has no 'prod'
  220. expected = obj._values.prod()
  221. assert result == expected
  222. else:
  223. expected = obj.prod()
  224. assert result == expected
  225. else:
  226. msg = "|".join(
  227. [
  228. "does not support reduction",
  229. "unsupported operand type",
  230. "ufunc 'multiply' cannot use operands",
  231. ]
  232. )
  233. with pytest.raises(TypeError, match=msg):
  234. np.multiply.reduce(obj)
  235. def test_add(self, values_for_np_reduce, box_with_array):
  236. box = box_with_array
  237. values = values_for_np_reduce
  238. with tm.assert_produces_warning(None):
  239. obj = box(values)
  240. if values.dtype.kind in "miuf":
  241. result = np.add.reduce(obj)
  242. if box is pd.DataFrame:
  243. expected = obj.sum(numeric_only=False)
  244. tm.assert_series_equal(result, expected)
  245. elif box is pd.Index:
  246. # Index has no 'sum'
  247. expected = obj._values.sum()
  248. assert result == expected
  249. else:
  250. expected = obj.sum()
  251. assert result == expected
  252. else:
  253. msg = "|".join(
  254. [
  255. "does not support reduction",
  256. "unsupported operand type",
  257. "ufunc 'add' cannot use operands",
  258. ]
  259. )
  260. with pytest.raises(TypeError, match=msg):
  261. np.add.reduce(obj)
  262. def test_max(self, values_for_np_reduce, box_with_array):
  263. box = box_with_array
  264. values = values_for_np_reduce
  265. same_type = True
  266. if box is pd.Index and values.dtype.kind in ["i", "f"]:
  267. # ATM Index casts to object, so we get python ints/floats
  268. same_type = False
  269. with tm.assert_produces_warning(None):
  270. obj = box(values)
  271. result = np.maximum.reduce(obj)
  272. if box is pd.DataFrame:
  273. # TODO: cases with axis kwarg
  274. expected = obj.max(numeric_only=False)
  275. tm.assert_series_equal(result, expected)
  276. else:
  277. expected = values[1]
  278. assert result == expected
  279. if same_type:
  280. # check we have e.g. Timestamp instead of dt64
  281. assert type(result) == type(expected)
  282. def test_min(self, values_for_np_reduce, box_with_array):
  283. box = box_with_array
  284. values = values_for_np_reduce
  285. same_type = True
  286. if box is pd.Index and values.dtype.kind in ["i", "f"]:
  287. # ATM Index casts to object, so we get python ints/floats
  288. same_type = False
  289. with tm.assert_produces_warning(None):
  290. obj = box(values)
  291. result = np.minimum.reduce(obj)
  292. if box is pd.DataFrame:
  293. expected = obj.min(numeric_only=False)
  294. tm.assert_series_equal(result, expected)
  295. else:
  296. expected = values[0]
  297. assert result == expected
  298. if same_type:
  299. # check we have e.g. Timestamp instead of dt64
  300. assert type(result) == type(expected)
  301. @pytest.mark.parametrize("type_", [list, deque, tuple])
  302. def test_binary_ufunc_other_types(type_):
  303. a = pd.Series([1, 2, 3], name="name")
  304. b = type_([3, 4, 5])
  305. result = np.add(a, b)
  306. expected = pd.Series(np.add(a.to_numpy(), b), name="name")
  307. tm.assert_series_equal(result, expected)
  308. def test_object_dtype_ok():
  309. class Thing:
  310. def __init__(self, value) -> None:
  311. self.value = value
  312. def __add__(self, other):
  313. other = getattr(other, "value", other)
  314. return type(self)(self.value + other)
  315. def __eq__(self, other) -> bool:
  316. return type(other) is Thing and self.value == other.value
  317. def __repr__(self) -> str:
  318. return f"Thing({self.value})"
  319. s = pd.Series([Thing(1), Thing(2)])
  320. result = np.add(s, Thing(1))
  321. expected = pd.Series([Thing(2), Thing(3)])
  322. tm.assert_series_equal(result, expected)
  323. def test_outer():
  324. # https://github.com/pandas-dev/pandas/issues/27186
  325. ser = pd.Series([1, 2, 3])
  326. obj = np.array([1, 2, 3])
  327. with pytest.raises(NotImplementedError, match=""):
  328. np.subtract.outer(ser, obj)
  329. def test_np_matmul():
  330. # GH26650
  331. df1 = pd.DataFrame(data=[[-1, 1, 10]])
  332. df2 = pd.DataFrame(data=[-1, 1, 10])
  333. expected = pd.DataFrame(data=[102])
  334. result = np.matmul(df1, df2)
  335. tm.assert_frame_equal(expected, result)
  336. def test_array_ufuncs_for_many_arguments():
  337. # GH39853
  338. def add3(x, y, z):
  339. return x + y + z
  340. ufunc = np.frompyfunc(add3, 3, 1)
  341. ser = pd.Series([1, 2])
  342. result = ufunc(ser, ser, 1)
  343. expected = pd.Series([3, 5], dtype=object)
  344. tm.assert_series_equal(result, expected)
  345. df = pd.DataFrame([[1, 2]])
  346. msg = (
  347. "Cannot apply ufunc <ufunc 'add3 (vectorized)'> "
  348. "to mixed DataFrame and Series inputs."
  349. )
  350. with pytest.raises(NotImplementedError, match=re.escape(msg)):
  351. ufunc(ser, ser, df)
  352. # TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082
  353. @td.skip_copy_on_write_not_yet_implemented
  354. def test_np_fix():
  355. # np.fix is not a ufunc but is composed of several ufunc calls under the hood
  356. # with `out` and `where` keywords
  357. ser = pd.Series([-1.5, -0.5, 0.5, 1.5])
  358. result = np.fix(ser)
  359. expected = pd.Series([-1.0, -0.0, 0.0, 1.0])
  360. tm.assert_series_equal(result, expected)