test_logical_ops.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. from datetime import datetime
  2. import operator
  3. import numpy as np
  4. import pytest
  5. from pandas._config import using_string_dtype
  6. from pandas import (
  7. ArrowDtype,
  8. DataFrame,
  9. Index,
  10. Series,
  11. StringDtype,
  12. bdate_range,
  13. )
  14. import pandas._testing as tm
  15. from pandas.core import ops
  16. class TestSeriesLogicalOps:
  17. @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
  18. @pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
  19. def test_bool_operators_with_nas(self, bool_op):
  20. # boolean &, |, ^ should work with object arrays and propagate NAs
  21. ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
  22. ser[::2] = np.nan
  23. mask = ser.isna()
  24. filled = ser.fillna(ser[0])
  25. result = bool_op(ser < ser[9], ser > ser[3])
  26. expected = bool_op(filled < filled[9], filled > filled[3])
  27. expected[mask] = False
  28. tm.assert_series_equal(result, expected)
  29. def test_logical_operators_bool_dtype_with_empty(self):
  30. # GH#9016: support bitwise op for integer types
  31. index = list("bca")
  32. s_tft = Series([True, False, True], index=index)
  33. s_fff = Series([False, False, False], index=index)
  34. s_empty = Series([], dtype=object)
  35. res = s_tft & s_empty
  36. expected = s_fff.sort_index()
  37. tm.assert_series_equal(res, expected)
  38. res = s_tft | s_empty
  39. expected = s_tft.sort_index()
  40. tm.assert_series_equal(res, expected)
  41. def test_logical_operators_int_dtype_with_int_dtype(self):
  42. # GH#9016: support bitwise op for integer types
  43. s_0123 = Series(range(4), dtype="int64")
  44. s_3333 = Series([3] * 4)
  45. s_4444 = Series([4] * 4)
  46. res = s_0123 & s_3333
  47. expected = Series(range(4), dtype="int64")
  48. tm.assert_series_equal(res, expected)
  49. res = s_0123 | s_4444
  50. expected = Series(range(4, 8), dtype="int64")
  51. tm.assert_series_equal(res, expected)
  52. s_1111 = Series([1] * 4, dtype="int8")
  53. res = s_0123 & s_1111
  54. expected = Series([0, 1, 0, 1], dtype="int64")
  55. tm.assert_series_equal(res, expected)
  56. res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
  57. expected = Series([1, 1, 3, 3], dtype="int32")
  58. tm.assert_series_equal(res, expected)
  59. def test_logical_operators_int_dtype_with_int_scalar(self):
  60. # GH#9016: support bitwise op for integer types
  61. s_0123 = Series(range(4), dtype="int64")
  62. res = s_0123 & 0
  63. expected = Series([0] * 4)
  64. tm.assert_series_equal(res, expected)
  65. res = s_0123 & 1
  66. expected = Series([0, 1, 0, 1])
  67. tm.assert_series_equal(res, expected)
  68. def test_logical_operators_int_dtype_with_float(self):
  69. # GH#9016: support bitwise op for integer types
  70. s_0123 = Series(range(4), dtype="int64")
  71. warn_msg = (
  72. r"Logical ops \(and, or, xor\) between Pandas objects and "
  73. "dtype-less sequences"
  74. )
  75. msg = "Cannot perform.+with a dtyped.+array and scalar of type"
  76. with pytest.raises(TypeError, match=msg):
  77. s_0123 & np.nan
  78. with pytest.raises(TypeError, match=msg):
  79. s_0123 & 3.14
  80. msg = "unsupported operand type.+for &:"
  81. with pytest.raises(TypeError, match=msg):
  82. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  83. s_0123 & [0.1, 4, 3.14, 2]
  84. with pytest.raises(TypeError, match=msg):
  85. s_0123 & np.array([0.1, 4, 3.14, 2])
  86. with pytest.raises(TypeError, match=msg):
  87. s_0123 & Series([0.1, 4, -3.14, 2])
  88. def test_logical_operators_int_dtype_with_str(self):
  89. s_1111 = Series([1] * 4, dtype="int8")
  90. warn_msg = (
  91. r"Logical ops \(and, or, xor\) between Pandas objects and "
  92. "dtype-less sequences"
  93. )
  94. msg = "Cannot perform 'and_' with a dtyped.+array and scalar of type"
  95. with pytest.raises(TypeError, match=msg):
  96. s_1111 & "a"
  97. with pytest.raises(TypeError, match="unsupported operand.+for &"):
  98. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  99. s_1111 & ["a", "b", "c", "d"]
  100. def test_logical_operators_int_dtype_with_bool(self):
  101. # GH#9016: support bitwise op for integer types
  102. s_0123 = Series(range(4), dtype="int64")
  103. expected = Series([False] * 4)
  104. result = s_0123 & False
  105. tm.assert_series_equal(result, expected)
  106. warn_msg = (
  107. r"Logical ops \(and, or, xor\) between Pandas objects and "
  108. "dtype-less sequences"
  109. )
  110. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  111. result = s_0123 & [False]
  112. tm.assert_series_equal(result, expected)
  113. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  114. result = s_0123 & (False,)
  115. tm.assert_series_equal(result, expected)
  116. result = s_0123 ^ False
  117. expected = Series([False, True, True, True])
  118. tm.assert_series_equal(result, expected)
  119. def test_logical_operators_int_dtype_with_object(self):
  120. # GH#9016: support bitwise op for integer types
  121. s_0123 = Series(range(4), dtype="int64")
  122. result = s_0123 & Series([False, np.nan, False, False])
  123. expected = Series([False] * 4)
  124. tm.assert_series_equal(result, expected)
  125. s_abNd = Series(["a", "b", np.nan, "d"])
  126. with pytest.raises(
  127. TypeError, match="unsupported.* 'int' and 'str'|'rand_' not supported"
  128. ):
  129. s_0123 & s_abNd
  130. def test_logical_operators_bool_dtype_with_int(self):
  131. index = list("bca")
  132. s_tft = Series([True, False, True], index=index)
  133. s_fff = Series([False, False, False], index=index)
  134. res = s_tft & 0
  135. expected = s_fff
  136. tm.assert_series_equal(res, expected)
  137. res = s_tft & 1
  138. expected = s_tft
  139. tm.assert_series_equal(res, expected)
  140. def test_logical_ops_bool_dtype_with_ndarray(self):
  141. # make sure we operate on ndarray the same as Series
  142. left = Series([True, True, True, False, True])
  143. right = [True, False, None, True, np.nan]
  144. msg = (
  145. r"Logical ops \(and, or, xor\) between Pandas objects and "
  146. "dtype-less sequences"
  147. )
  148. expected = Series([True, False, False, False, False])
  149. with tm.assert_produces_warning(FutureWarning, match=msg):
  150. result = left & right
  151. tm.assert_series_equal(result, expected)
  152. result = left & np.array(right)
  153. tm.assert_series_equal(result, expected)
  154. result = left & Index(right)
  155. tm.assert_series_equal(result, expected)
  156. result = left & Series(right)
  157. tm.assert_series_equal(result, expected)
  158. expected = Series([True, True, True, True, True])
  159. with tm.assert_produces_warning(FutureWarning, match=msg):
  160. result = left | right
  161. tm.assert_series_equal(result, expected)
  162. result = left | np.array(right)
  163. tm.assert_series_equal(result, expected)
  164. result = left | Index(right)
  165. tm.assert_series_equal(result, expected)
  166. result = left | Series(right)
  167. tm.assert_series_equal(result, expected)
  168. expected = Series([False, True, True, True, True])
  169. with tm.assert_produces_warning(FutureWarning, match=msg):
  170. result = left ^ right
  171. tm.assert_series_equal(result, expected)
  172. result = left ^ np.array(right)
  173. tm.assert_series_equal(result, expected)
  174. result = left ^ Index(right)
  175. tm.assert_series_equal(result, expected)
  176. result = left ^ Series(right)
  177. tm.assert_series_equal(result, expected)
  178. def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self):
  179. # GH#9016: support bitwise op for integer types
  180. index = list("bca")
  181. s_tft = Series([True, False, True], index=index)
  182. s_tft = Series([True, False, True], index=index)
  183. s_tff = Series([True, False, False], index=index)
  184. s_0123 = Series(range(4), dtype="int64")
  185. # s_0123 will be all false now because of reindexing like s_tft
  186. expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
  187. with tm.assert_produces_warning(FutureWarning):
  188. result = s_tft & s_0123
  189. tm.assert_series_equal(result, expected)
  190. # GH 52538: Deprecate casting to object type when reindex is needed;
  191. # matches DataFrame behavior
  192. expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
  193. with tm.assert_produces_warning(FutureWarning):
  194. result = s_0123 & s_tft
  195. tm.assert_series_equal(result, expected)
  196. s_a0b1c0 = Series([1], list("b"))
  197. with tm.assert_produces_warning(FutureWarning):
  198. res = s_tft & s_a0b1c0
  199. expected = s_tff.reindex(list("abc"))
  200. tm.assert_series_equal(res, expected)
  201. with tm.assert_produces_warning(FutureWarning):
  202. res = s_tft | s_a0b1c0
  203. expected = s_tft.reindex(list("abc"))
  204. tm.assert_series_equal(res, expected)
  205. def test_scalar_na_logical_ops_corners(self):
  206. s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
  207. msg = "Cannot perform.+with a dtyped.+array and scalar of type"
  208. with pytest.raises(TypeError, match=msg):
  209. s & datetime(2005, 1, 1)
  210. s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
  211. s[::2] = np.nan
  212. expected = Series(True, index=s.index)
  213. expected[::2] = False
  214. msg = (
  215. r"Logical ops \(and, or, xor\) between Pandas objects and "
  216. "dtype-less sequences"
  217. )
  218. with tm.assert_produces_warning(FutureWarning, match=msg):
  219. result = s & list(s)
  220. tm.assert_series_equal(result, expected)
  221. def test_scalar_na_logical_ops_corners_aligns(self):
  222. s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
  223. s[::2] = np.nan
  224. d = DataFrame({"A": s})
  225. expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9)))
  226. result = s & d
  227. tm.assert_frame_equal(result, expected)
  228. result = d & s
  229. tm.assert_frame_equal(result, expected)
  230. @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
  231. def test_logical_ops_with_index(self, op):
  232. # GH#22092, GH#19792
  233. ser = Series([True, True, False, False])
  234. idx1 = Index([True, False, True, False])
  235. idx2 = Index([1, 0, 1, 0])
  236. expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
  237. result = op(ser, idx1)
  238. tm.assert_series_equal(result, expected)
  239. expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool)
  240. result = op(ser, idx2)
  241. tm.assert_series_equal(result, expected)
  242. def test_reversed_xor_with_index_returns_series(self):
  243. # GH#22092, GH#19792 pre-2.0 these were aliased to setops
  244. ser = Series([True, True, False, False])
  245. idx1 = Index([True, False, True, False], dtype=bool)
  246. idx2 = Index([1, 0, 1, 0])
  247. expected = Series([False, True, True, False])
  248. result = idx1 ^ ser
  249. tm.assert_series_equal(result, expected)
  250. result = idx2 ^ ser
  251. tm.assert_series_equal(result, expected)
  252. @pytest.mark.parametrize(
  253. "op",
  254. [
  255. ops.rand_,
  256. ops.ror_,
  257. ],
  258. )
  259. def test_reversed_logical_op_with_index_returns_series(self, op):
  260. # GH#22092, GH#19792
  261. ser = Series([True, True, False, False])
  262. idx1 = Index([True, False, True, False])
  263. idx2 = Index([1, 0, 1, 0])
  264. expected = Series(op(idx1.values, ser.values))
  265. result = op(ser, idx1)
  266. tm.assert_series_equal(result, expected)
  267. expected = op(ser, Series(idx2))
  268. result = op(ser, idx2)
  269. tm.assert_series_equal(result, expected)
  270. @pytest.mark.parametrize(
  271. "op, expected",
  272. [
  273. (ops.rand_, Series([False, False])),
  274. (ops.ror_, Series([True, True])),
  275. (ops.rxor, Series([True, True])),
  276. ],
  277. )
  278. def test_reverse_ops_with_index(self, op, expected):
  279. # https://github.com/pandas-dev/pandas/pull/23628
  280. # multi-set Index ops are buggy, so let's avoid duplicates...
  281. # GH#49503
  282. ser = Series([True, False])
  283. idx = Index([False, True])
  284. result = op(ser, idx)
  285. tm.assert_series_equal(result, expected)
  286. @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
  287. def test_logical_ops_label_based(self, using_infer_string):
  288. # GH#4947
  289. # logical ops should be label based
  290. a = Series([True, False, True], list("bca"))
  291. b = Series([False, True, False], list("abc"))
  292. expected = Series([False, True, False], list("abc"))
  293. result = a & b
  294. tm.assert_series_equal(result, expected)
  295. expected = Series([True, True, False], list("abc"))
  296. result = a | b
  297. tm.assert_series_equal(result, expected)
  298. expected = Series([True, False, False], list("abc"))
  299. result = a ^ b
  300. tm.assert_series_equal(result, expected)
  301. # rhs is bigger
  302. a = Series([True, False, True], list("bca"))
  303. b = Series([False, True, False, True], list("abcd"))
  304. expected = Series([False, True, False, False], list("abcd"))
  305. result = a & b
  306. tm.assert_series_equal(result, expected)
  307. expected = Series([True, True, False, False], list("abcd"))
  308. result = a | b
  309. tm.assert_series_equal(result, expected)
  310. # filling
  311. # vs empty
  312. empty = Series([], dtype=object)
  313. result = a & empty.copy()
  314. expected = Series([False, False, False], list("abc"))
  315. tm.assert_series_equal(result, expected)
  316. result = a | empty.copy()
  317. expected = Series([True, True, False], list("abc"))
  318. tm.assert_series_equal(result, expected)
  319. # vs non-matching
  320. with tm.assert_produces_warning(FutureWarning):
  321. result = a & Series([1], ["z"])
  322. expected = Series([False, False, False, False], list("abcz"))
  323. tm.assert_series_equal(result, expected)
  324. with tm.assert_produces_warning(FutureWarning):
  325. result = a | Series([1], ["z"])
  326. expected = Series([True, True, False, False], list("abcz"))
  327. tm.assert_series_equal(result, expected)
  328. # identity
  329. # we would like s[s|e] == s to hold for any e, whether empty or not
  330. with tm.assert_produces_warning(FutureWarning):
  331. for e in [
  332. empty.copy(),
  333. Series([1], ["z"]),
  334. Series(np.nan, b.index),
  335. Series(np.nan, a.index),
  336. ]:
  337. result = a[a | e]
  338. tm.assert_series_equal(result, a[a])
  339. for e in [Series(["z"])]:
  340. if using_infer_string:
  341. # TODO(infer_string) should this behave differently?
  342. # -> https://github.com/pandas-dev/pandas/issues/60234
  343. with pytest.raises(
  344. TypeError, match="not supported for dtype|unsupported operand type"
  345. ):
  346. result = a[a | e]
  347. else:
  348. result = a[a | e]
  349. tm.assert_series_equal(result, a[a])
  350. # vs scalars
  351. index = list("bca")
  352. t = Series([True, False, True])
  353. for v in [True, 1, 2]:
  354. result = Series([True, False, True], index=index) | v
  355. expected = Series([True, True, True], index=index)
  356. tm.assert_series_equal(result, expected)
  357. msg = "Cannot perform.+with a dtyped.+array and scalar of type"
  358. for v in [np.nan, "foo"]:
  359. with pytest.raises(TypeError, match=msg):
  360. t | v
  361. for v in [False, 0]:
  362. result = Series([True, False, True], index=index) | v
  363. expected = Series([True, False, True], index=index)
  364. tm.assert_series_equal(result, expected)
  365. for v in [True, 1]:
  366. result = Series([True, False, True], index=index) & v
  367. expected = Series([True, False, True], index=index)
  368. tm.assert_series_equal(result, expected)
  369. for v in [False, 0]:
  370. result = Series([True, False, True], index=index) & v
  371. expected = Series([False, False, False], index=index)
  372. tm.assert_series_equal(result, expected)
  373. msg = "Cannot perform.+with a dtyped.+array and scalar of type"
  374. for v in [np.nan]:
  375. with pytest.raises(TypeError, match=msg):
  376. t & v
  377. def test_logical_ops_df_compat(self):
  378. # GH#1134
  379. s1 = Series([True, False, True], index=list("ABC"), name="x")
  380. s2 = Series([True, True, False], index=list("ABD"), name="x")
  381. exp = Series([True, False, False, False], index=list("ABCD"), name="x")
  382. tm.assert_series_equal(s1 & s2, exp)
  383. tm.assert_series_equal(s2 & s1, exp)
  384. # True | np.nan => True
  385. exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x")
  386. tm.assert_series_equal(s1 | s2, exp_or1)
  387. # np.nan | True => np.nan, filled with False
  388. exp_or = Series([True, True, False, False], index=list("ABCD"), name="x")
  389. tm.assert_series_equal(s2 | s1, exp_or)
  390. # DataFrame doesn't fill nan with False
  391. tm.assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame())
  392. tm.assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame())
  393. exp = DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
  394. tm.assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame())
  395. tm.assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame())
  396. # different length
  397. s3 = Series([True, False, True], index=list("ABC"), name="x")
  398. s4 = Series([True, True, True, True], index=list("ABCD"), name="x")
  399. exp = Series([True, False, True, False], index=list("ABCD"), name="x")
  400. tm.assert_series_equal(s3 & s4, exp)
  401. tm.assert_series_equal(s4 & s3, exp)
  402. # np.nan | True => np.nan, filled with False
  403. exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x")
  404. tm.assert_series_equal(s3 | s4, exp_or1)
  405. # True | np.nan => True
  406. exp_or = Series([True, True, True, True], index=list("ABCD"), name="x")
  407. tm.assert_series_equal(s4 | s3, exp_or)
  408. tm.assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame())
  409. tm.assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame())
  410. tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame())
  411. tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame())
  412. @pytest.mark.xfail(reason="Will pass once #52839 deprecation is enforced")
  413. def test_int_dtype_different_index_not_bool(self):
  414. # GH 52500
  415. ser1 = Series([1, 2, 3], index=[10, 11, 23], name="a")
  416. ser2 = Series([10, 20, 30], index=[11, 10, 23], name="a")
  417. result = np.bitwise_xor(ser1, ser2)
  418. expected = Series([21, 8, 29], index=[10, 11, 23], name="a")
  419. tm.assert_series_equal(result, expected)
  420. result = ser1 ^ ser2
  421. tm.assert_series_equal(result, expected)
  422. # TODO: this belongs in comparison tests
  423. def test_pyarrow_numpy_string_invalid(self):
  424. # GH#56008
  425. pa = pytest.importorskip("pyarrow")
  426. ser = Series([False, True])
  427. ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
  428. result = ser == ser2
  429. expected_eq = Series(False, index=ser.index)
  430. tm.assert_series_equal(result, expected_eq)
  431. result = ser != ser2
  432. expected_ne = Series(True, index=ser.index)
  433. tm.assert_series_equal(result, expected_ne)
  434. with pytest.raises(TypeError, match="Invalid comparison"):
  435. ser > ser2
  436. # GH#59505
  437. ser3 = ser2.astype("string[pyarrow]")
  438. result3_eq = ser3 == ser
  439. tm.assert_series_equal(result3_eq, expected_eq.astype("bool[pyarrow]"))
  440. result3_ne = ser3 != ser
  441. tm.assert_series_equal(result3_ne, expected_ne.astype("bool[pyarrow]"))
  442. with pytest.raises(TypeError, match="Invalid comparison"):
  443. ser > ser3
  444. ser4 = ser2.astype(ArrowDtype(pa.string()))
  445. result4_eq = ser4 == ser
  446. tm.assert_series_equal(result4_eq, expected_eq.astype("bool[pyarrow]"))
  447. result4_ne = ser4 != ser
  448. tm.assert_series_equal(result4_ne, expected_ne.astype("bool[pyarrow]"))
  449. with pytest.raises(TypeError, match="Invalid comparison"):
  450. ser > ser4