test_object.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. # Arithmetic tests for DataFrame/Series/Index/Array classes that should
  2. # behave identically.
  3. # Specifically for object dtype
  4. import datetime
  5. from decimal import Decimal
  6. import operator
  7. import numpy as np
  8. import pytest
  9. import pandas.util._test_decorators as td
  10. import pandas as pd
  11. from pandas import (
  12. Series,
  13. Timestamp,
  14. option_context,
  15. )
  16. import pandas._testing as tm
  17. from pandas.core import ops
  18. # ------------------------------------------------------------------
  19. # Comparisons
  20. class TestObjectComparisons:
  21. def test_comparison_object_numeric_nas(self, comparison_op):
  22. ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object)
  23. shifted = ser.shift(2)
  24. func = comparison_op
  25. result = func(ser, shifted)
  26. expected = func(ser.astype(float), shifted.astype(float))
  27. tm.assert_series_equal(result, expected)
  28. @pytest.mark.parametrize(
  29. "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
  30. )
  31. def test_object_comparisons(self, infer_string):
  32. with option_context("future.infer_string", infer_string):
  33. ser = Series(["a", "b", np.nan, "c", "a"])
  34. result = ser == "a"
  35. expected = Series([True, False, False, False, True])
  36. tm.assert_series_equal(result, expected)
  37. result = ser < "a"
  38. expected = Series([False, False, False, False, False])
  39. tm.assert_series_equal(result, expected)
  40. result = ser != "a"
  41. expected = -(ser == "a")
  42. tm.assert_series_equal(result, expected)
  43. @pytest.mark.parametrize("dtype", [None, object])
  44. def test_more_na_comparisons(self, dtype):
  45. left = Series(["a", np.nan, "c"], dtype=dtype)
  46. right = Series(["a", np.nan, "d"], dtype=dtype)
  47. result = left == right
  48. expected = Series([True, False, False])
  49. tm.assert_series_equal(result, expected)
  50. result = left != right
  51. expected = Series([False, True, True])
  52. tm.assert_series_equal(result, expected)
  53. result = left == np.nan
  54. expected = Series([False, False, False])
  55. tm.assert_series_equal(result, expected)
  56. result = left != np.nan
  57. expected = Series([True, True, True])
  58. tm.assert_series_equal(result, expected)
  59. # ------------------------------------------------------------------
  60. # Arithmetic
  61. class TestArithmetic:
  62. def test_add_period_to_array_of_offset(self):
  63. # GH#50162
  64. per = pd.Period("2012-1-1", freq="D")
  65. pi = pd.period_range("2012-1-1", periods=10, freq="D")
  66. idx = per - pi
  67. expected = pd.Index([x + per for x in idx], dtype=object)
  68. result = idx + per
  69. tm.assert_index_equal(result, expected)
  70. result = per + idx
  71. tm.assert_index_equal(result, expected)
  72. # TODO: parametrize
  73. def test_pow_ops_object(self):
  74. # GH#22922
  75. # pow is weird with masking & 1, so testing here
  76. a = Series([1, np.nan, 1, np.nan], dtype=object)
  77. b = Series([1, np.nan, np.nan, 1], dtype=object)
  78. result = a**b
  79. expected = Series(a.values**b.values, dtype=object)
  80. tm.assert_series_equal(result, expected)
  81. result = b**a
  82. expected = Series(b.values**a.values, dtype=object)
  83. tm.assert_series_equal(result, expected)
  84. @pytest.mark.parametrize("op", [operator.add, ops.radd])
  85. @pytest.mark.parametrize("other", ["category", "Int64"])
  86. def test_add_extension_scalar(self, other, box_with_array, op):
  87. # GH#22378
  88. # Check that scalars satisfying is_extension_array_dtype(obj)
  89. # do not incorrectly try to dispatch to an ExtensionArray operation
  90. arr = Series(["a", "b", "c"])
  91. expected = Series([op(x, other) for x in arr])
  92. arr = tm.box_expected(arr, box_with_array)
  93. expected = tm.box_expected(expected, box_with_array)
  94. result = op(arr, other)
  95. tm.assert_equal(result, expected)
  96. def test_objarr_add_str(self, box_with_array):
  97. ser = Series(["x", np.nan, "x"])
  98. expected = Series(["xa", np.nan, "xa"])
  99. ser = tm.box_expected(ser, box_with_array)
  100. expected = tm.box_expected(expected, box_with_array)
  101. result = ser + "a"
  102. tm.assert_equal(result, expected)
  103. def test_objarr_radd_str(self, box_with_array):
  104. ser = Series(["x", np.nan, "x"])
  105. expected = Series(["ax", np.nan, "ax"])
  106. ser = tm.box_expected(ser, box_with_array)
  107. expected = tm.box_expected(expected, box_with_array)
  108. result = "a" + ser
  109. tm.assert_equal(result, expected)
  110. @pytest.mark.parametrize(
  111. "data",
  112. [
  113. [1, 2, 3],
  114. [1.1, 2.2, 3.3],
  115. [Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
  116. ["x", "y", 1],
  117. ],
  118. )
  119. @pytest.mark.parametrize("dtype", [None, object])
  120. def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
  121. ser = Series(data, dtype=dtype)
  122. ser = tm.box_expected(ser, box_with_array)
  123. msg = "|".join(
  124. [
  125. "can only concatenate str",
  126. "did not contain a loop with signature matching types",
  127. "unsupported operand type",
  128. "must be str",
  129. ]
  130. )
  131. with pytest.raises(TypeError, match=msg):
  132. "foo_" + ser
  133. @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
  134. def test_objarr_add_invalid(self, op, box_with_array):
  135. # invalid ops
  136. box = box_with_array
  137. obj_ser = Series(list("abc"), dtype=object, name="objects")
  138. obj_ser = tm.box_expected(obj_ser, box)
  139. msg = "|".join(
  140. [
  141. "can only concatenate str",
  142. "unsupported operand type",
  143. "must be str",
  144. "has no kernel",
  145. "operation 'add' not supported",
  146. "operation 'radd' not supported",
  147. "operation 'sub' not supported",
  148. "operation 'rsub' not supported",
  149. ]
  150. )
  151. with pytest.raises(Exception, match=msg):
  152. op(obj_ser, 1)
  153. with pytest.raises(Exception, match=msg):
  154. op(obj_ser, np.array(1, dtype=np.int64))
  155. # TODO: Moved from tests.series.test_operators; needs cleanup
  156. def test_operators_na_handling(self):
  157. ser = Series(["foo", "bar", "baz", np.nan])
  158. result = "prefix_" + ser
  159. expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
  160. tm.assert_series_equal(result, expected)
  161. result = ser + "_suffix"
  162. expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
  163. tm.assert_series_equal(result, expected)
  164. # TODO: parametrize over box
  165. @pytest.mark.parametrize("dtype", [None, object])
  166. def test_series_with_dtype_radd_timedelta(self, dtype):
  167. # note this test is _not_ aimed at timedelta64-dtyped Series
  168. # as of 2.0 we retain object dtype when ser.dtype == object
  169. ser = Series(
  170. [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
  171. dtype=dtype,
  172. )
  173. expected = Series(
  174. [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
  175. dtype=dtype,
  176. )
  177. result = pd.Timedelta("3 days") + ser
  178. tm.assert_series_equal(result, expected)
  179. result = ser + pd.Timedelta("3 days")
  180. tm.assert_series_equal(result, expected)
  181. # TODO: cleanup & parametrize over box
  182. def test_mixed_timezone_series_ops_object(self):
  183. # GH#13043
  184. ser = Series(
  185. [
  186. Timestamp("2015-01-01", tz="US/Eastern"),
  187. Timestamp("2015-01-01", tz="Asia/Tokyo"),
  188. ],
  189. name="xxx",
  190. )
  191. assert ser.dtype == object
  192. exp = Series(
  193. [
  194. Timestamp("2015-01-02", tz="US/Eastern"),
  195. Timestamp("2015-01-02", tz="Asia/Tokyo"),
  196. ],
  197. name="xxx",
  198. )
  199. tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
  200. tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
  201. # object series & object series
  202. ser2 = Series(
  203. [
  204. Timestamp("2015-01-03", tz="US/Eastern"),
  205. Timestamp("2015-01-05", tz="Asia/Tokyo"),
  206. ],
  207. name="xxx",
  208. )
  209. assert ser2.dtype == object
  210. exp = Series(
  211. [pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
  212. )
  213. tm.assert_series_equal(ser2 - ser, exp)
  214. tm.assert_series_equal(ser - ser2, -exp)
  215. ser = Series(
  216. [pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
  217. name="xxx",
  218. dtype=object,
  219. )
  220. assert ser.dtype == object
  221. exp = Series(
  222. [pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
  223. name="xxx",
  224. dtype=object,
  225. )
  226. tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
  227. tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
  228. # TODO: cleanup & parametrize over box
  229. def test_iadd_preserves_name(self):
  230. # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
  231. ser = Series([1, 2, 3])
  232. ser.index.name = "foo"
  233. ser.index += 1
  234. assert ser.index.name == "foo"
  235. ser.index -= 1
  236. assert ser.index.name == "foo"
  237. def test_add_string(self):
  238. # from bug report
  239. index = pd.Index(["a", "b", "c"])
  240. index2 = index + "foo"
  241. assert "a" not in index2
  242. assert "afoo" in index2
  243. def test_iadd_string(self):
  244. index = pd.Index(["a", "b", "c"])
  245. # doesn't fail test unless there is a check before `+=`
  246. assert "a" in index
  247. index += "_x"
  248. assert "a_x" in index
  249. def test_add(self):
  250. index = pd.Index([str(i) for i in range(10)])
  251. expected = pd.Index(index.values * 2)
  252. tm.assert_index_equal(index + index, expected)
  253. tm.assert_index_equal(index + index.tolist(), expected)
  254. tm.assert_index_equal(index.tolist() + index, expected)
  255. # test add and radd
  256. index = pd.Index(list("abc"))
  257. expected = pd.Index(["a1", "b1", "c1"])
  258. tm.assert_index_equal(index + "1", expected)
  259. expected = pd.Index(["1a", "1b", "1c"])
  260. tm.assert_index_equal("1" + index, expected)
  261. def test_sub_fail(self):
  262. index = pd.Index([str(i) for i in range(10)])
  263. msg = "unsupported operand type|Cannot broadcast|sub' not supported"
  264. with pytest.raises(TypeError, match=msg):
  265. index - "a"
  266. with pytest.raises(TypeError, match=msg):
  267. index - index
  268. with pytest.raises(TypeError, match=msg):
  269. index - index.tolist()
  270. with pytest.raises(TypeError, match=msg):
  271. index.tolist() - index
  272. def test_sub_object(self):
  273. # GH#19369
  274. index = pd.Index([Decimal(1), Decimal(2)])
  275. expected = pd.Index([Decimal(0), Decimal(1)])
  276. result = index - Decimal(1)
  277. tm.assert_index_equal(result, expected)
  278. result = index - pd.Index([Decimal(1), Decimal(1)])
  279. tm.assert_index_equal(result, expected)
  280. msg = "unsupported operand type"
  281. with pytest.raises(TypeError, match=msg):
  282. index - "foo"
  283. with pytest.raises(TypeError, match=msg):
  284. index - np.array([2, "foo"], dtype=object)
  285. def test_rsub_object(self, fixed_now_ts):
  286. # GH#19369
  287. index = pd.Index([Decimal(1), Decimal(2)])
  288. expected = pd.Index([Decimal(1), Decimal(0)])
  289. result = Decimal(2) - index
  290. tm.assert_index_equal(result, expected)
  291. result = np.array([Decimal(2), Decimal(2)]) - index
  292. tm.assert_index_equal(result, expected)
  293. msg = "unsupported operand type"
  294. with pytest.raises(TypeError, match=msg):
  295. "foo" - index
  296. with pytest.raises(TypeError, match=msg):
  297. np.array([True, fixed_now_ts]) - index
  298. class MyIndex(pd.Index):
  299. # Simple index subclass that tracks ops calls.
  300. _calls: int
  301. @classmethod
  302. def _simple_new(cls, values, name=None, dtype=None):
  303. result = object.__new__(cls)
  304. result._data = values
  305. result._name = name
  306. result._calls = 0
  307. result._reset_identity()
  308. return result
  309. def __add__(self, other):
  310. self._calls += 1
  311. return self._simple_new(self._data)
  312. def __radd__(self, other):
  313. return self.__add__(other)
  314. @pytest.mark.parametrize(
  315. "other",
  316. [
  317. [datetime.timedelta(1), datetime.timedelta(2)],
  318. [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
  319. [pd.Period("2000"), pd.Period("2001")],
  320. ["a", "b"],
  321. ],
  322. ids=["timedelta", "datetime", "period", "object"],
  323. )
  324. def test_index_ops_defer_to_unknown_subclasses(other):
  325. # https://github.com/pandas-dev/pandas/issues/31109
  326. values = np.array(
  327. [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
  328. )
  329. a = MyIndex._simple_new(values)
  330. other = pd.Index(other)
  331. result = other + a
  332. assert isinstance(result, MyIndex)
  333. assert a._calls == 1