test_asof.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import IncompatibleFrequency
  4. from pandas import (
  5. DatetimeIndex,
  6. PeriodIndex,
  7. Series,
  8. Timestamp,
  9. date_range,
  10. isna,
  11. notna,
  12. offsets,
  13. period_range,
  14. )
  15. import pandas._testing as tm
  16. class TestSeriesAsof:
  17. def test_asof_nanosecond_index_access(self):
  18. ts = Timestamp("20130101").as_unit("ns")._value
  19. dti = DatetimeIndex([ts + 50 + i for i in range(100)])
  20. ser = Series(np.random.default_rng(2).standard_normal(100), index=dti)
  21. first_value = ser.asof(ser.index[0])
  22. # GH#46903 previously incorrectly was "day"
  23. assert dti.resolution == "nanosecond"
  24. # this used to not work bc parsing was done by dateutil that didn't
  25. # handle nanoseconds
  26. assert first_value == ser["2013-01-01 00:00:00.000000050"]
  27. expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
  28. assert first_value == ser[Timestamp(expected_ts)]
  29. def test_basic(self):
  30. # array or list or dates
  31. N = 50
  32. rng = date_range("1/1/1990", periods=N, freq="53s")
  33. ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
  34. ts.iloc[15:30] = np.nan
  35. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  36. result = ts.asof(dates)
  37. assert notna(result).all()
  38. lb = ts.index[14]
  39. ub = ts.index[30]
  40. result = ts.asof(list(dates))
  41. assert notna(result).all()
  42. lb = ts.index[14]
  43. ub = ts.index[30]
  44. mask = (result.index >= lb) & (result.index < ub)
  45. rs = result[mask]
  46. assert (rs == ts[lb]).all()
  47. val = result[result.index[result.index >= ub][0]]
  48. assert ts[ub] == val
  49. def test_scalar(self):
  50. N = 30
  51. rng = date_range("1/1/1990", periods=N, freq="53s")
  52. # Explicit cast to float avoid implicit cast when setting nan
  53. ts = Series(np.arange(N), index=rng, dtype="float")
  54. ts.iloc[5:10] = np.nan
  55. ts.iloc[15:20] = np.nan
  56. val1 = ts.asof(ts.index[7])
  57. val2 = ts.asof(ts.index[19])
  58. assert val1 == ts.iloc[4]
  59. assert val2 == ts.iloc[14]
  60. # accepts strings
  61. val1 = ts.asof(str(ts.index[7]))
  62. assert val1 == ts.iloc[4]
  63. # in there
  64. result = ts.asof(ts.index[3])
  65. assert result == ts.iloc[3]
  66. # no as of value
  67. d = ts.index[0] - offsets.BDay()
  68. assert np.isnan(ts.asof(d))
  69. def test_with_nan(self):
  70. # basic asof test
  71. rng = date_range("1/1/2000", "1/2/2000", freq="4h")
  72. s = Series(np.arange(len(rng)), index=rng)
  73. r = s.resample("2h").mean()
  74. result = r.asof(r.index)
  75. expected = Series(
  76. [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
  77. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  78. )
  79. tm.assert_series_equal(result, expected)
  80. r.iloc[3:5] = np.nan
  81. result = r.asof(r.index)
  82. expected = Series(
  83. [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
  84. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  85. )
  86. tm.assert_series_equal(result, expected)
  87. r.iloc[-3:] = np.nan
  88. result = r.asof(r.index)
  89. expected = Series(
  90. [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
  91. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  92. )
  93. tm.assert_series_equal(result, expected)
  94. def test_periodindex(self):
  95. # array or list or dates
  96. N = 50
  97. rng = period_range("1/1/1990", periods=N, freq="h")
  98. ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
  99. ts.iloc[15:30] = np.nan
  100. dates = date_range("1/1/1990", periods=N * 3, freq="37min")
  101. result = ts.asof(dates)
  102. assert notna(result).all()
  103. lb = ts.index[14]
  104. ub = ts.index[30]
  105. result = ts.asof(list(dates))
  106. assert notna(result).all()
  107. lb = ts.index[14]
  108. ub = ts.index[30]
  109. pix = PeriodIndex(result.index.values, freq="h")
  110. mask = (pix >= lb) & (pix < ub)
  111. rs = result[mask]
  112. assert (rs == ts[lb]).all()
  113. ts.iloc[5:10] = np.nan
  114. ts.iloc[15:20] = np.nan
  115. val1 = ts.asof(ts.index[7])
  116. val2 = ts.asof(ts.index[19])
  117. assert val1 == ts.iloc[4]
  118. assert val2 == ts.iloc[14]
  119. # accepts strings
  120. val1 = ts.asof(str(ts.index[7]))
  121. assert val1 == ts.iloc[4]
  122. # in there
  123. assert ts.asof(ts.index[3]) == ts.iloc[3]
  124. # no as of value
  125. d = ts.index[0].to_timestamp() - offsets.BDay()
  126. assert isna(ts.asof(d))
  127. # Mismatched freq
  128. msg = "Input has different freq"
  129. with pytest.raises(IncompatibleFrequency, match=msg):
  130. ts.asof(rng.asfreq("D"))
  131. def test_errors(self):
  132. s = Series(
  133. [1, 2, 3],
  134. index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
  135. )
  136. # non-monotonic
  137. assert not s.index.is_monotonic_increasing
  138. with pytest.raises(ValueError, match="requires a sorted index"):
  139. s.asof(s.index[0])
  140. # subset with Series
  141. N = 10
  142. rng = date_range("1/1/1990", periods=N, freq="53s")
  143. s = Series(np.random.default_rng(2).standard_normal(N), index=rng)
  144. with pytest.raises(ValueError, match="not valid for Series"):
  145. s.asof(s.index[0], subset="foo")
  146. def test_all_nans(self):
  147. # GH 15713
  148. # series is all nans
  149. # testing non-default indexes
  150. N = 50
  151. rng = date_range("1/1/1990", periods=N, freq="53s")
  152. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  153. result = Series(np.nan, index=rng).asof(dates)
  154. expected = Series(np.nan, index=dates)
  155. tm.assert_series_equal(result, expected)
  156. # testing scalar input
  157. date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
  158. result = Series(np.nan, index=rng).asof(date)
  159. assert isna(result)
  160. # test name is propagated
  161. result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
  162. expected = Series(np.nan, index=[4, 5], name="test")
  163. tm.assert_series_equal(result, expected)