test_arithmetic.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. import operator
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. import pandas._testing as tm
  6. from pandas.core import ops
  7. from pandas.core.arrays import FloatingArray
  8. # Basic test for the arithmetic array ops
  9. # -----------------------------------------------------------------------------
  10. @pytest.mark.parametrize(
  11. "opname, exp",
  12. [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
  13. ids=["add", "mul"],
  14. )
  15. def test_add_mul(dtype, opname, exp):
  16. a = pd.array([0, 1, None, 3, 4], dtype=dtype)
  17. b = pd.array([1, 2, 3, None, 5], dtype=dtype)
  18. # array / array
  19. expected = pd.array(exp, dtype=dtype)
  20. op = getattr(operator, opname)
  21. result = op(a, b)
  22. tm.assert_extension_array_equal(result, expected)
  23. op = getattr(ops, "r" + opname)
  24. result = op(a, b)
  25. tm.assert_extension_array_equal(result, expected)
  26. def test_sub(dtype):
  27. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  28. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  29. result = a - b
  30. expected = pd.array([1, 1, None, None, 1], dtype=dtype)
  31. tm.assert_extension_array_equal(result, expected)
  32. def test_div(dtype):
  33. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  34. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  35. result = a / b
  36. expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
  37. tm.assert_extension_array_equal(result, expected)
  38. @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
  39. def test_divide_by_zero(zero, negative):
  40. # https://github.com/pandas-dev/pandas/issues/27398, GH#22793
  41. a = pd.array([0, 1, -1, None], dtype="Int64")
  42. result = a / zero
  43. expected = FloatingArray(
  44. np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
  45. np.array([False, False, False, True]),
  46. )
  47. if negative:
  48. expected *= -1
  49. tm.assert_extension_array_equal(result, expected)
  50. def test_floordiv(dtype):
  51. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  52. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  53. result = a // b
  54. # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
  55. expected = pd.array([0, 2, None, None, 1], dtype=dtype)
  56. tm.assert_extension_array_equal(result, expected)
  57. def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
  58. # GH 48223: Aligns with non-masked floordiv
  59. # but differs from numpy
  60. # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
  61. ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
  62. result = 1 // ser
  63. expected = pd.Series([np.inf, 1.0], dtype="Float64")
  64. tm.assert_series_equal(result, expected)
  65. ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
  66. result = 1 // ser_non_nullable
  67. expected = expected.astype(np.float64)
  68. tm.assert_series_equal(result, expected)
  69. def test_mod(dtype):
  70. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  71. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  72. result = a % b
  73. expected = pd.array([0, 0, None, None, 1], dtype=dtype)
  74. tm.assert_extension_array_equal(result, expected)
  75. def test_pow_scalar():
  76. a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
  77. result = a**0
  78. expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
  79. tm.assert_extension_array_equal(result, expected)
  80. result = a**1
  81. expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
  82. tm.assert_extension_array_equal(result, expected)
  83. result = a**pd.NA
  84. expected = pd.array([None, None, 1, None, None], dtype="Int64")
  85. tm.assert_extension_array_equal(result, expected)
  86. result = a**np.nan
  87. expected = FloatingArray(
  88. np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
  89. np.array([False, False, False, True, False]),
  90. )
  91. tm.assert_extension_array_equal(result, expected)
  92. # reversed
  93. a = a[1:] # Can't raise integers to negative powers.
  94. result = 0**a
  95. expected = pd.array([1, 0, None, 0], dtype="Int64")
  96. tm.assert_extension_array_equal(result, expected)
  97. result = 1**a
  98. expected = pd.array([1, 1, 1, 1], dtype="Int64")
  99. tm.assert_extension_array_equal(result, expected)
  100. result = pd.NA**a
  101. expected = pd.array([1, None, None, None], dtype="Int64")
  102. tm.assert_extension_array_equal(result, expected)
  103. result = np.nan**a
  104. expected = FloatingArray(
  105. np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
  106. np.array([False, False, True, False]),
  107. )
  108. tm.assert_extension_array_equal(result, expected)
  109. def test_pow_array():
  110. a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
  111. b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
  112. result = a**b
  113. expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
  114. tm.assert_extension_array_equal(result, expected)
  115. def test_rpow_one_to_na():
  116. # https://github.com/pandas-dev/pandas/issues/22022
  117. # https://github.com/pandas-dev/pandas/issues/29997
  118. arr = pd.array([np.nan, np.nan], dtype="Int64")
  119. result = np.array([1.0, 2.0]) ** arr
  120. expected = pd.array([1.0, np.nan], dtype="Float64")
  121. tm.assert_extension_array_equal(result, expected)
  122. @pytest.mark.parametrize("other", [0, 0.5])
  123. def test_numpy_zero_dim_ndarray(other):
  124. arr = pd.array([1, None, 2])
  125. result = arr + np.array(other)
  126. expected = arr + other
  127. tm.assert_equal(result, expected)
  128. # Test generic characteristics / errors
  129. # -----------------------------------------------------------------------------
  130. def test_error_invalid_values(data, all_arithmetic_operators):
  131. op = all_arithmetic_operators
  132. s = pd.Series(data)
  133. ops = getattr(s, op)
  134. # invalid scalars
  135. with tm.external_error_raised(TypeError):
  136. ops("foo")
  137. with tm.external_error_raised(TypeError):
  138. ops(pd.Timestamp("20180101"))
  139. # invalid array-likes
  140. str_ser = pd.Series("foo", index=s.index)
  141. # with pytest.raises(TypeError, match=msg):
  142. if all_arithmetic_operators in [
  143. "__mul__",
  144. "__rmul__",
  145. ]: # (data[~data.isna()] >= 0).all():
  146. res = ops(str_ser)
  147. expected = pd.Series(["foo" * x for x in data], index=s.index)
  148. expected = expected.fillna(np.nan)
  149. # TODO: doing this fillna to keep tests passing as we make
  150. # assert_almost_equal stricter, but the expected with pd.NA seems
  151. # more-correct than np.nan here.
  152. tm.assert_series_equal(res, expected)
  153. else:
  154. with tm.external_error_raised(TypeError):
  155. ops(str_ser)
  156. with tm.external_error_raised(TypeError):
  157. ops(pd.Series(pd.date_range("20180101", periods=len(s))))
  158. # Various
  159. # -----------------------------------------------------------------------------
  160. # TODO test unsigned overflow
  161. def test_arith_coerce_scalar(data, all_arithmetic_operators):
  162. op = tm.get_op_from_name(all_arithmetic_operators)
  163. s = pd.Series(data)
  164. other = 0.01
  165. result = op(s, other)
  166. expected = op(s.astype(float), other)
  167. expected = expected.astype("Float64")
  168. # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
  169. if all_arithmetic_operators == "__rmod__":
  170. mask = (s == 0).fillna(False).to_numpy(bool)
  171. expected.array._mask[mask] = False
  172. tm.assert_series_equal(result, expected)
  173. @pytest.mark.parametrize("other", [1.0, np.array(1.0)])
  174. def test_arithmetic_conversion(all_arithmetic_operators, other):
  175. # if we have a float operand we should have a float result
  176. # if that is equal to an integer
  177. op = tm.get_op_from_name(all_arithmetic_operators)
  178. s = pd.Series([1, 2, 3], dtype="Int64")
  179. result = op(s, other)
  180. assert result.dtype == "Float64"
  181. def test_cross_type_arithmetic():
  182. df = pd.DataFrame(
  183. {
  184. "A": pd.Series([1, 2, np.nan], dtype="Int64"),
  185. "B": pd.Series([1, np.nan, 3], dtype="UInt8"),
  186. "C": [1, 2, 3],
  187. }
  188. )
  189. result = df.A + df.C
  190. expected = pd.Series([2, 4, np.nan], dtype="Int64")
  191. tm.assert_series_equal(result, expected)
  192. result = (df.A + df.C) * 3 == 12
  193. expected = pd.Series([False, True, None], dtype="boolean")
  194. tm.assert_series_equal(result, expected)
  195. result = df.A + df.B
  196. expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
  197. tm.assert_series_equal(result, expected)
  198. @pytest.mark.parametrize("op", ["mean"])
  199. def test_reduce_to_float(op):
  200. # some reduce ops always return float, even if the result
  201. # is a rounded number
  202. df = pd.DataFrame(
  203. {
  204. "A": ["a", "b", "b"],
  205. "B": [1, None, 3],
  206. "C": pd.array([1, None, 3], dtype="Int64"),
  207. }
  208. )
  209. # op
  210. result = getattr(df.C, op)()
  211. assert isinstance(result, float)
  212. # groupby
  213. result = getattr(df.groupby("A"), op)()
  214. expected = pd.DataFrame(
  215. {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
  216. index=pd.Index(["a", "b"], name="A"),
  217. )
  218. tm.assert_frame_equal(result, expected)
  219. @pytest.mark.parametrize(
  220. "source, neg_target, abs_target",
  221. [
  222. ([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
  223. ([1, 2, None], [-1, -2, None], [1, 2, None]),
  224. ([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
  225. ],
  226. )
  227. def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
  228. dtype = any_signed_int_ea_dtype
  229. arr = pd.array(source, dtype=dtype)
  230. neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
  231. neg_target = pd.array(neg_target, dtype=dtype)
  232. abs_target = pd.array(abs_target, dtype=dtype)
  233. tm.assert_extension_array_equal(neg_result, neg_target)
  234. tm.assert_extension_array_equal(pos_result, arr)
  235. assert not tm.shares_memory(pos_result, arr)
  236. tm.assert_extension_array_equal(abs_result, abs_target)
  237. def test_values_multiplying_large_series_by_NA():
  238. # GH#33701
  239. result = pd.NA * pd.Series(np.zeros(10001))
  240. expected = pd.Series([pd.NA] * 10001)
  241. tm.assert_series_equal(result, expected)
  242. def test_bitwise(dtype):
  243. left = pd.array([1, None, 3, 4], dtype=dtype)
  244. right = pd.array([None, 3, 5, 4], dtype=dtype)
  245. result = left | right
  246. expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
  247. tm.assert_extension_array_equal(result, expected)
  248. result = left & right
  249. expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
  250. tm.assert_extension_array_equal(result, expected)
  251. result = left ^ right
  252. expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
  253. tm.assert_extension_array_equal(result, expected)
  254. # TODO: desired behavior when operating with boolean? defer?
  255. floats = right.astype("Float64")
  256. with pytest.raises(TypeError, match="unsupported operand type"):
  257. left | floats
  258. with pytest.raises(TypeError, match="unsupported operand type"):
  259. left & floats
  260. with pytest.raises(TypeError, match="unsupported operand type"):
  261. left ^ floats