test_clip.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. Series,
  7. Timestamp,
  8. isna,
  9. notna,
  10. )
  11. import pandas._testing as tm
  12. class TestSeriesClip:
  13. def test_clip(self, datetime_series):
  14. val = datetime_series.median()
  15. assert datetime_series.clip(lower=val).min() == val
  16. assert datetime_series.clip(upper=val).max() == val
  17. result = datetime_series.clip(-0.5, 0.5)
  18. expected = np.clip(datetime_series, -0.5, 0.5)
  19. tm.assert_series_equal(result, expected)
  20. assert isinstance(expected, Series)
  21. def test_clip_types_and_nulls(self):
  22. sers = [
  23. Series([np.nan, 1.0, 2.0, 3.0]),
  24. Series([None, "a", "b", "c"]),
  25. Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
  26. ]
  27. for s in sers:
  28. thresh = s[2]
  29. lower = s.clip(lower=thresh)
  30. upper = s.clip(upper=thresh)
  31. assert lower[notna(lower)].min() == thresh
  32. assert upper[notna(upper)].max() == thresh
  33. assert list(isna(s)) == list(isna(lower))
  34. assert list(isna(s)) == list(isna(upper))
  35. def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
  36. # Ensure that clipping method can handle NA values with out failing
  37. # GH#40581
  38. if nulls_fixture is pd.NaT:
  39. # constructor will raise, see
  40. # test_constructor_mismatched_null_nullable_dtype
  41. pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
  42. ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
  43. s_clipped_upper = ser.clip(upper=2.0)
  44. s_clipped_lower = ser.clip(lower=2.0)
  45. expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
  46. expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
  47. tm.assert_series_equal(s_clipped_upper, expected_upper)
  48. tm.assert_series_equal(s_clipped_lower, expected_lower)
  49. def test_clip_with_na_args(self):
  50. """Should process np.nan argument as None"""
  51. # GH#17276
  52. s = Series([1, 2, 3])
  53. tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
  54. tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
  55. # GH#19992
  56. msg = "Downcasting behavior in Series and DataFrame methods 'where'"
  57. # TODO: avoid this warning here? seems like we should never be upcasting
  58. # in the first place?
  59. with tm.assert_produces_warning(FutureWarning, match=msg):
  60. res = s.clip(lower=[0, 4, np.nan])
  61. tm.assert_series_equal(res, Series([1, 4, 3]))
  62. with tm.assert_produces_warning(FutureWarning, match=msg):
  63. res = s.clip(upper=[1, np.nan, 1])
  64. tm.assert_series_equal(res, Series([1, 2, 1]))
  65. # GH#40420
  66. s = Series([1, 2, 3])
  67. result = s.clip(0, [np.nan, np.nan, np.nan])
  68. tm.assert_series_equal(s, result)
  69. def test_clip_against_series(self):
  70. # GH#6966
  71. s = Series([1.0, 1.0, 4.0])
  72. lower = Series([1.0, 2.0, 3.0])
  73. upper = Series([1.5, 2.5, 3.5])
  74. tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
  75. tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
  76. @pytest.mark.parametrize("inplace", [True, False])
  77. @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
  78. def test_clip_against_list_like(self, inplace, upper):
  79. # GH#15390
  80. original = Series([5, 6, 7])
  81. result = original.clip(upper=upper, inplace=inplace)
  82. expected = Series([1, 2, 3])
  83. if inplace:
  84. result = original
  85. tm.assert_series_equal(result, expected, check_exact=True)
  86. def test_clip_with_datetimes(self):
  87. # GH#11838
  88. # naive and tz-aware datetimes
  89. t = Timestamp("2015-12-01 09:30:30")
  90. s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
  91. result = s.clip(upper=t)
  92. expected = Series(
  93. [Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
  94. )
  95. tm.assert_series_equal(result, expected)
  96. t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
  97. s = Series(
  98. [
  99. Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
  100. Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
  101. ]
  102. )
  103. result = s.clip(upper=t)
  104. expected = Series(
  105. [
  106. Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
  107. Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
  108. ]
  109. )
  110. tm.assert_series_equal(result, expected)
  111. @pytest.mark.parametrize("dtype", [object, "M8[us]"])
  112. def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
  113. # GH-42794
  114. ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
  115. result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
  116. expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
  117. tm.assert_series_equal(result, expected)