test_asof.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import IncompatibleFrequency
  4. from pandas import (
  5. DataFrame,
  6. Period,
  7. Series,
  8. Timestamp,
  9. date_range,
  10. period_range,
  11. to_datetime,
  12. )
  13. import pandas._testing as tm
  14. @pytest.fixture
  15. def date_range_frame():
  16. """
  17. Fixture for DataFrame of ints with date_range index
  18. Columns are ['A', 'B'].
  19. """
  20. N = 50
  21. rng = date_range("1/1/1990", periods=N, freq="53s")
  22. return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng)
  23. class TestFrameAsof:
  24. def test_basic(self, date_range_frame):
  25. # Explicitly cast to float to avoid implicit cast when setting np.nan
  26. df = date_range_frame.astype({"A": "float"})
  27. N = 50
  28. df.loc[df.index[15:30], "A"] = np.nan
  29. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  30. result = df.asof(dates)
  31. assert result.notna().all(1).all()
  32. lb = df.index[14]
  33. ub = df.index[30]
  34. dates = list(dates)
  35. result = df.asof(dates)
  36. assert result.notna().all(1).all()
  37. mask = (result.index >= lb) & (result.index < ub)
  38. rs = result[mask]
  39. assert (rs == 14).all(1).all()
  40. def test_subset(self, date_range_frame):
  41. N = 10
  42. # explicitly cast to float to avoid implicit upcast when setting to np.nan
  43. df = date_range_frame.iloc[:N].copy().astype({"A": "float"})
  44. df.loc[df.index[4:8], "A"] = np.nan
  45. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  46. # with a subset of A should be the same
  47. result = df.asof(dates, subset="A")
  48. expected = df.asof(dates)
  49. tm.assert_frame_equal(result, expected)
  50. # same with A/B
  51. result = df.asof(dates, subset=["A", "B"])
  52. expected = df.asof(dates)
  53. tm.assert_frame_equal(result, expected)
  54. # B gives df.asof
  55. result = df.asof(dates, subset="B")
  56. expected = df.resample("25s", closed="right").ffill().reindex(dates)
  57. expected.iloc[20:] = 9
  58. # no "missing", so "B" can retain int dtype (df["A"].dtype platform-dependent)
  59. expected["B"] = expected["B"].astype(df["B"].dtype)
  60. tm.assert_frame_equal(result, expected)
  61. def test_missing(self, date_range_frame):
  62. # GH 15118
  63. # no match found - `where` value before earliest date in index
  64. N = 10
  65. # Cast to 'float64' to avoid upcast when introducing nan in df.asof
  66. df = date_range_frame.iloc[:N].copy().astype("float64")
  67. result = df.asof("1989-12-31")
  68. expected = Series(
  69. index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64
  70. )
  71. tm.assert_series_equal(result, expected)
  72. result = df.asof(to_datetime(["1989-12-31"]))
  73. expected = DataFrame(
  74. index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64"
  75. )
  76. tm.assert_frame_equal(result, expected)
  77. # Check that we handle PeriodIndex correctly, dont end up with
  78. # period.ordinal for series name
  79. df = df.to_period("D")
  80. result = df.asof("1989-12-31")
  81. assert isinstance(result.name, Period)
  82. def test_asof_all_nans(self, frame_or_series):
  83. # GH 15713
  84. # DataFrame/Series is all nans
  85. result = frame_or_series([np.nan]).asof([0])
  86. expected = frame_or_series([np.nan])
  87. tm.assert_equal(result, expected)
  88. def test_all_nans(self, date_range_frame):
  89. # GH 15713
  90. # DataFrame is all nans
  91. # testing non-default indexes, multiple inputs
  92. N = 150
  93. rng = date_range_frame.index
  94. dates = date_range("1/1/1990", periods=N, freq="25s")
  95. result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates)
  96. expected = DataFrame(np.nan, index=dates, columns=["A"])
  97. tm.assert_frame_equal(result, expected)
  98. # testing multiple columns
  99. dates = date_range("1/1/1990", periods=N, freq="25s")
  100. result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates)
  101. expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"])
  102. tm.assert_frame_equal(result, expected)
  103. # testing scalar input
  104. result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3])
  105. expected = DataFrame(np.nan, index=[3], columns=["A", "B"])
  106. tm.assert_frame_equal(result, expected)
  107. result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3)
  108. expected = Series(np.nan, index=["A", "B"], name=3)
  109. tm.assert_series_equal(result, expected)
  110. @pytest.mark.parametrize(
  111. "stamp,expected",
  112. [
  113. (
  114. Timestamp("2018-01-01 23:22:43.325+00:00"),
  115. Series(2, name=Timestamp("2018-01-01 23:22:43.325+00:00")),
  116. ),
  117. (
  118. Timestamp("2018-01-01 22:33:20.682+01:00"),
  119. Series(1, name=Timestamp("2018-01-01 22:33:20.682+01:00")),
  120. ),
  121. ],
  122. )
  123. def test_time_zone_aware_index(self, stamp, expected):
  124. # GH21194
  125. # Testing awareness of DataFrame index considering different
  126. # UTC and timezone
  127. df = DataFrame(
  128. data=[1, 2],
  129. index=[
  130. Timestamp("2018-01-01 21:00:05.001+00:00"),
  131. Timestamp("2018-01-01 22:35:10.550+00:00"),
  132. ],
  133. )
  134. result = df.asof(stamp)
  135. tm.assert_series_equal(result, expected)
  136. def test_is_copy(self, date_range_frame):
  137. # GH-27357, GH-30784: ensure the result of asof is an actual copy and
  138. # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
  139. df = date_range_frame.astype({"A": "float"})
  140. N = 50
  141. df.loc[df.index[15:30], "A"] = np.nan
  142. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  143. result = df.asof(dates)
  144. with tm.assert_produces_warning(None):
  145. result["C"] = 1
  146. def test_asof_periodindex_mismatched_freq(self):
  147. N = 50
  148. rng = period_range("1/1/1990", periods=N, freq="h")
  149. df = DataFrame(np.random.default_rng(2).standard_normal(N), index=rng)
  150. # Mismatched freq
  151. msg = "Input has different freq"
  152. with pytest.raises(IncompatibleFrequency, match=msg):
  153. df.asof(rng.asfreq("D"))
  154. def test_asof_preserves_bool_dtype(self):
  155. # GH#16063 was casting bools to floats
  156. dti = date_range("2017-01-01", freq="MS", periods=4)
  157. ser = Series([True, False, True], index=dti[:-1])
  158. ts = dti[-1]
  159. res = ser.asof([ts])
  160. expected = Series([True], index=[ts])
  161. tm.assert_series_equal(res, expected)