test_logical_ops.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. import operator
  2. import re
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. CategoricalIndex,
  7. DataFrame,
  8. Interval,
  9. Series,
  10. isnull,
  11. )
  12. import pandas._testing as tm
  13. class TestDataFrameLogicalOperators:
  14. # &, |, ^
  15. @pytest.mark.parametrize(
  16. "left, right, op, expected",
  17. [
  18. (
  19. [True, False, np.nan],
  20. [True, False, True],
  21. operator.and_,
  22. [True, False, False],
  23. ),
  24. (
  25. [True, False, True],
  26. [True, False, np.nan],
  27. operator.and_,
  28. [True, False, False],
  29. ),
  30. (
  31. [True, False, np.nan],
  32. [True, False, True],
  33. operator.or_,
  34. [True, False, False],
  35. ),
  36. (
  37. [True, False, True],
  38. [True, False, np.nan],
  39. operator.or_,
  40. [True, False, True],
  41. ),
  42. ],
  43. )
  44. def test_logical_operators_nans(self, left, right, op, expected, frame_or_series):
  45. # GH#13896
  46. result = op(frame_or_series(left), frame_or_series(right))
  47. expected = frame_or_series(expected)
  48. tm.assert_equal(result, expected)
  49. def test_logical_ops_empty_frame(self):
  50. # GH#5808
  51. # empty frames, non-mixed dtype
  52. df = DataFrame(index=[1])
  53. result = df & df
  54. tm.assert_frame_equal(result, df)
  55. result = df | df
  56. tm.assert_frame_equal(result, df)
  57. df2 = DataFrame(index=[1, 2])
  58. result = df & df2
  59. tm.assert_frame_equal(result, df2)
  60. dfa = DataFrame(index=[1], columns=["A"])
  61. result = dfa & dfa
  62. expected = DataFrame(False, index=[1], columns=["A"])
  63. tm.assert_frame_equal(result, expected)
  64. def test_logical_ops_bool_frame(self):
  65. # GH#5808
  66. df1a_bool = DataFrame(True, index=[1], columns=["A"])
  67. result = df1a_bool & df1a_bool
  68. tm.assert_frame_equal(result, df1a_bool)
  69. result = df1a_bool | df1a_bool
  70. tm.assert_frame_equal(result, df1a_bool)
  71. def test_logical_ops_int_frame(self):
  72. # GH#5808
  73. df1a_int = DataFrame(1, index=[1], columns=["A"])
  74. df1a_bool = DataFrame(True, index=[1], columns=["A"])
  75. result = df1a_int | df1a_bool
  76. tm.assert_frame_equal(result, df1a_bool)
  77. # Check that this matches Series behavior
  78. res_ser = df1a_int["A"] | df1a_bool["A"]
  79. tm.assert_series_equal(res_ser, df1a_bool["A"])
  80. def test_logical_ops_invalid(self, using_infer_string):
  81. # GH#5808
  82. df1 = DataFrame(1.0, index=[1], columns=["A"])
  83. df2 = DataFrame(True, index=[1], columns=["A"])
  84. msg = re.escape("unsupported operand type(s) for |: 'float' and 'bool'")
  85. with pytest.raises(TypeError, match=msg):
  86. df1 | df2
  87. df1 = DataFrame("foo", index=[1], columns=["A"])
  88. df2 = DataFrame(True, index=[1], columns=["A"])
  89. if using_infer_string and df1["A"].dtype.storage == "pyarrow":
  90. msg = "operation 'or_' not supported for dtype 'str'"
  91. else:
  92. msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
  93. with pytest.raises(TypeError, match=msg):
  94. df1 | df2
  95. def test_logical_operators(self):
  96. def _check_bin_op(op):
  97. result = op(df1, df2)
  98. expected = DataFrame(
  99. op(df1.values, df2.values), index=df1.index, columns=df1.columns
  100. )
  101. assert result.values.dtype == np.bool_
  102. tm.assert_frame_equal(result, expected)
  103. def _check_unary_op(op):
  104. result = op(df1)
  105. expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns)
  106. assert result.values.dtype == np.bool_
  107. tm.assert_frame_equal(result, expected)
  108. df1 = {
  109. "a": {"a": True, "b": False, "c": False, "d": True, "e": True},
  110. "b": {"a": False, "b": True, "c": False, "d": False, "e": False},
  111. "c": {"a": False, "b": False, "c": True, "d": False, "e": False},
  112. "d": {"a": True, "b": False, "c": False, "d": True, "e": True},
  113. "e": {"a": True, "b": False, "c": False, "d": True, "e": True},
  114. }
  115. df2 = {
  116. "a": {"a": True, "b": False, "c": True, "d": False, "e": False},
  117. "b": {"a": False, "b": True, "c": False, "d": False, "e": False},
  118. "c": {"a": True, "b": False, "c": True, "d": False, "e": False},
  119. "d": {"a": False, "b": False, "c": False, "d": True, "e": False},
  120. "e": {"a": False, "b": False, "c": False, "d": False, "e": True},
  121. }
  122. df1 = DataFrame(df1)
  123. df2 = DataFrame(df2)
  124. _check_bin_op(operator.and_)
  125. _check_bin_op(operator.or_)
  126. _check_bin_op(operator.xor)
  127. _check_unary_op(operator.inv) # TODO: belongs elsewhere
  128. @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
  129. def test_logical_with_nas(self):
  130. d = DataFrame({"a": [np.nan, False], "b": [True, True]})
  131. # GH4947
  132. # bool comparisons should return bool
  133. result = d["a"] | d["b"]
  134. expected = Series([False, True])
  135. tm.assert_series_equal(result, expected)
  136. # GH4604, automatic casting here
  137. result = d["a"].fillna(False) | d["b"]
  138. expected = Series([True, True])
  139. tm.assert_series_equal(result, expected)
  140. msg = "The 'downcast' keyword in fillna is deprecated"
  141. with tm.assert_produces_warning(FutureWarning, match=msg):
  142. result = d["a"].fillna(False, downcast=False) | d["b"]
  143. expected = Series([True, True])
  144. tm.assert_series_equal(result, expected)
  145. def test_logical_ops_categorical_columns(self):
  146. # GH#38367
  147. intervals = [Interval(1, 2), Interval(3, 4)]
  148. data = DataFrame(
  149. [[1, np.nan], [2, np.nan]],
  150. columns=CategoricalIndex(
  151. intervals, categories=intervals + [Interval(5, 6)]
  152. ),
  153. )
  154. mask = DataFrame(
  155. [[False, False], [False, False]], columns=data.columns, dtype=bool
  156. )
  157. result = mask | isnull(data)
  158. expected = DataFrame(
  159. [[False, True], [False, True]],
  160. columns=CategoricalIndex(
  161. intervals, categories=intervals + [Interval(5, 6)]
  162. ),
  163. )
  164. tm.assert_frame_equal(result, expected)
  165. def test_int_dtype_different_index_not_bool(self):
  166. # GH 52500
  167. df1 = DataFrame([1, 2, 3], index=[10, 11, 23], columns=["a"])
  168. df2 = DataFrame([10, 20, 30], index=[11, 10, 23], columns=["a"])
  169. result = np.bitwise_xor(df1, df2)
  170. expected = DataFrame([21, 8, 29], index=[10, 11, 23], columns=["a"])
  171. tm.assert_frame_equal(result, expected)
  172. result = df1 ^ df2
  173. tm.assert_frame_equal(result, expected)
  174. def test_different_dtypes_different_index_raises(self):
  175. # GH 52538
  176. df1 = DataFrame([1, 2], index=["a", "b"])
  177. df2 = DataFrame([3, 4], index=["b", "c"])
  178. with pytest.raises(TypeError, match="unsupported operand type"):
  179. df1 & df2