test_timedelta.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. from datetime import timedelta
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. from pandas.core.indexes.timedeltas import timedelta_range
  12. def test_asfreq_bug():
  13. df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
  14. result = df.resample("1min").asfreq()
  15. expected = DataFrame(
  16. data=[1, np.nan, np.nan, 3],
  17. index=timedelta_range("0 day", periods=4, freq="1min"),
  18. )
  19. tm.assert_frame_equal(result, expected)
  20. def test_resample_with_nat():
  21. # GH 13223
  22. index = pd.to_timedelta(["0s", pd.NaT, "2s"])
  23. result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
  24. expected = DataFrame(
  25. {"value": [2.5, np.nan, 5.0]},
  26. index=timedelta_range("0 day", periods=3, freq="1s"),
  27. )
  28. tm.assert_frame_equal(result, expected)
  29. def test_resample_as_freq_with_subperiod():
  30. # GH 13022
  31. index = timedelta_range("00:00:00", "00:10:00", freq="5min")
  32. df = DataFrame(data={"value": [1, 5, 10]}, index=index)
  33. result = df.resample("2min").asfreq()
  34. expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
  35. expected = DataFrame(
  36. data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
  37. )
  38. tm.assert_frame_equal(result, expected)
  39. def test_resample_with_timedeltas():
  40. expected = DataFrame({"A": np.arange(1480)})
  41. expected = expected.groupby(expected.index // 30).sum()
  42. expected.index = timedelta_range("0 days", freq="30min", periods=50)
  43. df = DataFrame(
  44. {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min")
  45. )
  46. result = df.resample("30min").sum()
  47. tm.assert_frame_equal(result, expected)
  48. s = df["A"]
  49. result = s.resample("30min").sum()
  50. tm.assert_series_equal(result, expected["A"])
  51. def test_resample_single_period_timedelta():
  52. s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
  53. result = s.resample("2s").sum()
  54. expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
  55. tm.assert_series_equal(result, expected)
  56. def test_resample_timedelta_idempotency():
  57. # GH 12072
  58. index = timedelta_range("0", periods=9, freq="10ms")
  59. series = Series(range(9), index=index)
  60. result = series.resample("10ms").mean()
  61. expected = series.astype(float)
  62. tm.assert_series_equal(result, expected)
  63. def test_resample_offset_with_timedeltaindex():
  64. # GH 10530 & 31809
  65. rng = timedelta_range(start="0s", periods=25, freq="s")
  66. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  67. with_base = ts.resample("2s", offset="5s").mean()
  68. without_base = ts.resample("2s").mean()
  69. exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
  70. exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
  71. tm.assert_index_equal(without_base.index, exp_without_base)
  72. tm.assert_index_equal(with_base.index, exp_with_base)
  73. def test_resample_categorical_data_with_timedeltaindex():
  74. # GH #12169
  75. df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
  76. df["Group"] = df["Group_obj"].astype("category")
  77. result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
  78. exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit(
  79. "ns"
  80. )
  81. expected = DataFrame(
  82. {"Group_obj": ["A", "A"], "Group": ["A", "A"]},
  83. index=exp_tdi,
  84. )
  85. expected = expected.reindex(["Group_obj", "Group"], axis=1)
  86. expected["Group"] = expected["Group_obj"].astype("category")
  87. tm.assert_frame_equal(result, expected)
  88. def test_resample_timedelta_values():
  89. # GH 13119
  90. # check that timedelta dtype is preserved when NaT values are
  91. # introduced by the resampling
  92. times = timedelta_range("1 day", "6 day", freq="4D")
  93. df = DataFrame({"time": times}, index=times)
  94. times2 = timedelta_range("1 day", "6 day", freq="2D")
  95. exp = Series(times2, index=times2, name="time")
  96. exp.iloc[1] = pd.NaT
  97. res = df.resample("2D").first()["time"]
  98. tm.assert_series_equal(res, exp)
  99. res = df["time"].resample("2D").first()
  100. tm.assert_series_equal(res, exp)
  101. @pytest.mark.parametrize(
  102. "start, end, freq, resample_freq",
  103. [
  104. ("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
  105. ("3h", "22h", "1h", "5h"),
  106. ("527D", "5006D", "3D", "10D"),
  107. ("1D", "10D", "1D", "2D"), # GH 13022 example
  108. # tests that worked before GH 33498:
  109. ("8h", "21h59min50s", "10s", "2h"),
  110. ("0h", "21h59min50s", "10s", "3h"),
  111. ("10D", "85D", "D", "2D"),
  112. ],
  113. )
  114. def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
  115. # GH 33498
  116. # check that the timedelta bins does not contains an extra bin
  117. idx = timedelta_range(start=start, end=end, freq=freq)
  118. s = Series(np.arange(len(idx)), index=idx)
  119. result = s.resample(resample_freq).min()
  120. expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
  121. tm.assert_index_equal(result.index, expected_index)
  122. assert result.index.freq == expected_index.freq
  123. assert not np.isnan(result.iloc[-1])
  124. @pytest.mark.parametrize("duplicates", [True, False])
  125. def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
  126. # GH 10603
  127. df = DataFrame(
  128. np.random.default_rng(2).normal(size=(10000, 4)),
  129. index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
  130. )
  131. if duplicates:
  132. # case with non-unique columns
  133. df.columns = ["A", "B", "A", "C"]
  134. result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
  135. expected = DataFrame(
  136. [[768] * 4] * 12 + [[528] * 4],
  137. index=timedelta_range(start="1s", periods=13, freq="3s"),
  138. )
  139. expected.columns = df.columns
  140. tm.assert_frame_equal(result, expected)
  141. @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
  142. def test_resample_quantile_timedelta(unit):
  143. # GH: 29485
  144. dtype = np.dtype(f"m8[{unit}]")
  145. df = DataFrame(
  146. {"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
  147. index=pd.date_range("20200101", periods=4, tz="UTC"),
  148. )
  149. result = df.resample("2D").quantile(0.99)
  150. expected = DataFrame(
  151. {
  152. "value": [
  153. pd.Timedelta("0 days 00:00:00.990000"),
  154. pd.Timedelta("0 days 00:00:02.990000"),
  155. ]
  156. },
  157. index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
  158. ).astype(dtype)
  159. tm.assert_frame_equal(result, expected)
  160. def test_resample_closed_right():
  161. # GH#45414
  162. idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
  163. ser = Series(range(10), index=idx)
  164. result = ser.resample("min", closed="right", label="right").sum()
  165. expected = Series(
  166. [0, 3, 7, 11, 15, 9],
  167. index=pd.TimedeltaIndex(
  168. [pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
  169. ),
  170. )
  171. tm.assert_series_equal(result, expected)
  172. @td.skip_if_no("pyarrow")
  173. def test_arrow_duration_resample():
  174. # GH 56371
  175. idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
  176. expected = Series(np.arange(5, dtype=np.float64), index=idx)
  177. result = expected.resample("1D").mean()
  178. tm.assert_series_equal(result, expected)