test_clip.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Series,
  6. )
  7. import pandas._testing as tm
  8. class TestDataFrameClip:
  9. def test_clip(self, float_frame):
  10. median = float_frame.median().median()
  11. original = float_frame.copy()
  12. double = float_frame.clip(upper=median, lower=median)
  13. assert not (double.values != median).any()
  14. # Verify that float_frame was not changed inplace
  15. assert (float_frame.values == original.values).all()
  16. def test_inplace_clip(self, float_frame):
  17. # GH#15388
  18. median = float_frame.median().median()
  19. frame_copy = float_frame.copy()
  20. return_value = frame_copy.clip(upper=median, lower=median, inplace=True)
  21. assert return_value is None
  22. assert not (frame_copy.values != median).any()
  23. def test_dataframe_clip(self):
  24. # GH#2747
  25. df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
  26. for lb, ub in [(-1, 1), (1, -1)]:
  27. clipped_df = df.clip(lb, ub)
  28. lb, ub = min(lb, ub), max(ub, lb)
  29. lb_mask = df.values <= lb
  30. ub_mask = df.values >= ub
  31. mask = ~lb_mask & ~ub_mask
  32. assert (clipped_df.values[lb_mask] == lb).all()
  33. assert (clipped_df.values[ub_mask] == ub).all()
  34. assert (clipped_df.values[mask] == df.values[mask]).all()
  35. def test_clip_mixed_numeric(self):
  36. # clip on mixed integer or floats
  37. # GH#24162, clipping now preserves numeric types per column
  38. df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
  39. result = df.clip(1, 2)
  40. expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
  41. tm.assert_frame_equal(result, expected)
  42. df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
  43. expected = df.dtypes
  44. result = df.clip(upper=3).dtypes
  45. tm.assert_series_equal(result, expected)
  46. @pytest.mark.parametrize("inplace", [True, False])
  47. def test_clip_against_series(self, inplace):
  48. # GH#6966
  49. df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
  50. lb = Series(np.random.default_rng(2).standard_normal(1000))
  51. ub = lb + 1
  52. original = df.copy()
  53. clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)
  54. if inplace:
  55. clipped_df = df
  56. for i in range(2):
  57. lb_mask = original.iloc[:, i] <= lb
  58. ub_mask = original.iloc[:, i] >= ub
  59. mask = ~lb_mask & ~ub_mask
  60. result = clipped_df.loc[lb_mask, i]
  61. tm.assert_series_equal(result, lb[lb_mask], check_names=False)
  62. assert result.name == i
  63. result = clipped_df.loc[ub_mask, i]
  64. tm.assert_series_equal(result, ub[ub_mask], check_names=False)
  65. assert result.name == i
  66. tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
  67. @pytest.mark.parametrize("inplace", [True, False])
  68. @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
  69. @pytest.mark.parametrize(
  70. "axis,res",
  71. [
  72. (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]),
  73. (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]),
  74. ],
  75. )
  76. def test_clip_against_list_like(self, inplace, lower, axis, res):
  77. # GH#15390
  78. arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
  79. original = DataFrame(
  80. arr, columns=["one", "two", "three"], index=["a", "b", "c"]
  81. )
  82. result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace)
  83. expected = DataFrame(res, columns=original.columns, index=original.index)
  84. if inplace:
  85. result = original
  86. tm.assert_frame_equal(result, expected, check_exact=True)
  87. @pytest.mark.parametrize("axis", [0, 1, None])
  88. def test_clip_against_frame(self, axis):
  89. df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
  90. lb = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
  91. ub = lb + 1
  92. clipped_df = df.clip(lb, ub, axis=axis)
  93. lb_mask = df <= lb
  94. ub_mask = df >= ub
  95. mask = ~lb_mask & ~ub_mask
  96. tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
  97. tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
  98. tm.assert_frame_equal(clipped_df[mask], df[mask])
  99. def test_clip_against_unordered_columns(self):
  100. # GH#20911
  101. df1 = DataFrame(
  102. np.random.default_rng(2).standard_normal((1000, 4)),
  103. columns=["A", "B", "C", "D"],
  104. )
  105. df2 = DataFrame(
  106. np.random.default_rng(2).standard_normal((1000, 4)),
  107. columns=["D", "A", "B", "C"],
  108. )
  109. df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"])
  110. result_upper = df1.clip(lower=0, upper=df2)
  111. expected_upper = df1.clip(lower=0, upper=df2[df1.columns])
  112. result_lower = df1.clip(lower=df3, upper=3)
  113. expected_lower = df1.clip(lower=df3[df1.columns], upper=3)
  114. result_lower_upper = df1.clip(lower=df3, upper=df2)
  115. expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns])
  116. tm.assert_frame_equal(result_upper, expected_upper)
  117. tm.assert_frame_equal(result_lower, expected_lower)
  118. tm.assert_frame_equal(result_lower_upper, expected_lower_upper)
  119. def test_clip_with_na_args(self, float_frame):
  120. """Should process np.nan argument as None"""
  121. # GH#17276
  122. tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
  123. tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)
  124. # GH#19992 and adjusted in GH#40420
  125. df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
  126. msg = "Downcasting behavior in Series and DataFrame methods 'where'"
  127. # TODO: avoid this warning here? seems like we should never be upcasting
  128. # in the first place?
  129. with tm.assert_produces_warning(FutureWarning, match=msg):
  130. result = df.clip(lower=[4, 5, np.nan], axis=0)
  131. expected = DataFrame(
  132. {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
  133. )
  134. tm.assert_frame_equal(result, expected)
  135. result = df.clip(lower=[4, 5, np.nan], axis=1)
  136. expected = DataFrame(
  137. {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
  138. )
  139. tm.assert_frame_equal(result, expected)
  140. # GH#40420
  141. data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
  142. df = DataFrame(data)
  143. t = Series([2, -4, np.nan, 6, 3])
  144. with tm.assert_produces_warning(FutureWarning, match=msg):
  145. result = df.clip(lower=t, axis=0)
  146. expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
  147. tm.assert_frame_equal(result, expected)
  148. def test_clip_int_data_with_float_bound(self):
  149. # GH51472
  150. df = DataFrame({"a": [1, 2, 3]})
  151. result = df.clip(lower=1.5)
  152. expected = DataFrame({"a": [1.5, 2.0, 3.0]})
  153. tm.assert_frame_equal(result, expected)
  154. def test_clip_with_list_bound(self):
  155. # GH#54817
  156. df = DataFrame([1, 5])
  157. expected = DataFrame([3, 5])
  158. result = df.clip([3])
  159. tm.assert_frame_equal(result, expected)
  160. expected = DataFrame([1, 3])
  161. result = df.clip(upper=[3])
  162. tm.assert_frame_equal(result, expected)