test_period_index.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062
  1. from datetime import datetime
  2. import warnings
  3. import dateutil
  4. import numpy as np
  5. import pytest
  6. import pytz
  7. from pandas._libs.tslibs.ccalendar import (
  8. DAYS,
  9. MONTHS,
  10. )
  11. from pandas._libs.tslibs.period import IncompatibleFrequency
  12. from pandas.errors import InvalidIndexError
  13. import pandas as pd
  14. from pandas import (
  15. DataFrame,
  16. Series,
  17. Timestamp,
  18. )
  19. import pandas._testing as tm
  20. from pandas.core.indexes.datetimes import date_range
  21. from pandas.core.indexes.period import (
  22. Period,
  23. PeriodIndex,
  24. period_range,
  25. )
  26. from pandas.core.resample import _get_period_range_edges
  27. from pandas.tseries import offsets
  28. @pytest.fixture()
  29. def _index_factory():
  30. return period_range
  31. @pytest.fixture
  32. def _series_name():
  33. return "pi"
  34. @pytest.fixture
  35. def simple_period_range_series():
  36. """
  37. Series with period range index and random data for test purposes.
  38. """
  39. def _simple_period_range_series(start, end, freq="D"):
  40. with warnings.catch_warnings():
  41. # suppress Period[B] deprecation warning
  42. msg = "|".join(["Period with BDay freq", r"PeriodDtype\[B\] is deprecated"])
  43. warnings.filterwarnings(
  44. "ignore",
  45. msg,
  46. category=FutureWarning,
  47. )
  48. rng = period_range(start, end, freq=freq)
  49. return Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  50. return _simple_period_range_series
  51. class TestPeriodIndex:
  52. @pytest.mark.parametrize("freq", ["2D", "1h", "2h"])
  53. @pytest.mark.parametrize("kind", ["period", None, "timestamp"])
  54. def test_asfreq(self, series_and_frame, freq, kind):
  55. # GH 12884, 15944
  56. # make sure .asfreq() returns PeriodIndex (except kind='timestamp')
  57. obj = series_and_frame
  58. if kind == "timestamp":
  59. expected = obj.to_timestamp().resample(freq).asfreq()
  60. else:
  61. start = obj.index[0].to_timestamp(how="start")
  62. end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
  63. new_index = date_range(start=start, end=end, freq=freq, inclusive="left")
  64. expected = obj.to_timestamp().reindex(new_index).to_period(freq)
  65. msg = "The 'kind' keyword in (Series|DataFrame).resample is deprecated"
  66. with tm.assert_produces_warning(FutureWarning, match=msg):
  67. result = obj.resample(freq, kind=kind).asfreq()
  68. tm.assert_almost_equal(result, expected)
  69. def test_asfreq_fill_value(self, series):
  70. # test for fill value during resampling, issue 3715
  71. s = series
  72. new_index = date_range(
  73. s.index[0].to_timestamp(how="start"),
  74. (s.index[-1]).to_timestamp(how="start"),
  75. freq="1h",
  76. )
  77. expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
  78. msg = "The 'kind' keyword in Series.resample is deprecated"
  79. with tm.assert_produces_warning(FutureWarning, match=msg):
  80. result = s.resample("1h", kind="timestamp").asfreq(fill_value=4.0)
  81. tm.assert_series_equal(result, expected)
  82. frame = s.to_frame("value")
  83. new_index = date_range(
  84. frame.index[0].to_timestamp(how="start"),
  85. (frame.index[-1]).to_timestamp(how="start"),
  86. freq="1h",
  87. )
  88. expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
  89. msg = "The 'kind' keyword in DataFrame.resample is deprecated"
  90. with tm.assert_produces_warning(FutureWarning, match=msg):
  91. result = frame.resample("1h", kind="timestamp").asfreq(fill_value=3.0)
  92. tm.assert_frame_equal(result, expected)
  93. @pytest.mark.parametrize("freq", ["h", "12h", "2D", "W"])
  94. @pytest.mark.parametrize("kind", [None, "period", "timestamp"])
  95. @pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}])
  96. def test_selection(self, index, freq, kind, kwargs):
  97. # This is a bug, these should be implemented
  98. # GH 14008
  99. rng = np.arange(len(index), dtype=np.int64)
  100. df = DataFrame(
  101. {"date": index, "a": rng},
  102. index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
  103. )
  104. msg = (
  105. "Resampling from level= or on= selection with a PeriodIndex is "
  106. r"not currently supported, use \.set_index\(\.\.\.\) to "
  107. "explicitly set index"
  108. )
  109. depr_msg = "The 'kind' keyword in DataFrame.resample is deprecated"
  110. with pytest.raises(NotImplementedError, match=msg):
  111. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  112. df.resample(freq, kind=kind, **kwargs)
  113. @pytest.mark.parametrize("month", MONTHS)
  114. @pytest.mark.parametrize("meth", ["ffill", "bfill"])
  115. @pytest.mark.parametrize("conv", ["start", "end"])
  116. @pytest.mark.parametrize(
  117. ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M"), ("QE", "Q")]
  118. )
  119. def test_annual_upsample_cases(
  120. self, offset, period, conv, meth, month, simple_period_range_series
  121. ):
  122. ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"Y-{month}")
  123. warn = FutureWarning if period == "B" else None
  124. msg = r"PeriodDtype\[B\] is deprecated"
  125. with tm.assert_produces_warning(warn, match=msg):
  126. result = getattr(ts.resample(period, convention=conv), meth)()
  127. expected = result.to_timestamp(period, how=conv)
  128. expected = expected.asfreq(offset, meth).to_period()
  129. tm.assert_series_equal(result, expected)
  130. def test_basic_downsample(self, simple_period_range_series):
  131. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
  132. result = ts.resample("Y-DEC").mean()
  133. expected = ts.groupby(ts.index.year).mean()
  134. expected.index = period_range("1/1/1990", "6/30/1995", freq="Y-DEC")
  135. tm.assert_series_equal(result, expected)
  136. # this is ok
  137. tm.assert_series_equal(ts.resample("Y-DEC").mean(), result)
  138. tm.assert_series_equal(ts.resample("Y").mean(), result)
  139. @pytest.mark.parametrize(
  140. "rule,expected_error_msg",
  141. [
  142. ("Y-DEC", "<YearEnd: month=12>"),
  143. ("Q-MAR", "<QuarterEnd: startingMonth=3>"),
  144. ("M", "<MonthEnd>"),
  145. ("w-thu", "<Week: weekday=3>"),
  146. ],
  147. )
  148. def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
  149. # These are incompatible period rules for resampling
  150. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
  151. msg = (
  152. "Frequency <Week: weekday=2> cannot be resampled to "
  153. f"{expected_error_msg}, as they are not sub or super periods"
  154. )
  155. with pytest.raises(IncompatibleFrequency, match=msg):
  156. ts.resample(rule).mean()
  157. @pytest.mark.parametrize("freq", ["D", "2D"])
  158. def test_basic_upsample(self, freq, simple_period_range_series):
  159. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
  160. result = ts.resample("Y-DEC").mean()
  161. resampled = result.resample(freq, convention="end").ffill()
  162. expected = result.to_timestamp(freq, how="end")
  163. expected = expected.asfreq(freq, "ffill").to_period(freq)
  164. tm.assert_series_equal(resampled, expected)
  165. def test_upsample_with_limit(self):
  166. rng = period_range("1/1/2000", periods=5, freq="Y")
  167. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng)
  168. result = ts.resample("M", convention="end").ffill(limit=2)
  169. expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
  170. tm.assert_series_equal(result, expected)
  171. def test_annual_upsample(self, simple_period_range_series):
  172. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="Y-DEC")
  173. df = DataFrame({"a": ts})
  174. rdf = df.resample("D").ffill()
  175. exp = df["a"].resample("D").ffill()
  176. tm.assert_series_equal(rdf["a"], exp)
  177. def test_annual_upsample2(self):
  178. rng = period_range("2000", "2003", freq="Y-DEC")
  179. ts = Series([1, 2, 3, 4], index=rng)
  180. result = ts.resample("M").ffill()
  181. ex_index = period_range("2000-01", "2003-12", freq="M")
  182. expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
  183. tm.assert_series_equal(result, expected)
  184. @pytest.mark.parametrize("month", MONTHS)
  185. @pytest.mark.parametrize("convention", ["start", "end"])
  186. @pytest.mark.parametrize(
  187. ("offset", "period"), [("D", "D"), ("B", "B"), ("ME", "M")]
  188. )
  189. def test_quarterly_upsample(
  190. self, month, offset, period, convention, simple_period_range_series
  191. ):
  192. freq = f"Q-{month}"
  193. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
  194. warn = FutureWarning if period == "B" else None
  195. msg = r"PeriodDtype\[B\] is deprecated"
  196. with tm.assert_produces_warning(warn, match=msg):
  197. result = ts.resample(period, convention=convention).ffill()
  198. expected = result.to_timestamp(period, how=convention)
  199. expected = expected.asfreq(offset, "ffill").to_period()
  200. tm.assert_series_equal(result, expected)
  201. @pytest.mark.parametrize("target", ["D", "B"])
  202. @pytest.mark.parametrize("convention", ["start", "end"])
  203. def test_monthly_upsample(self, target, convention, simple_period_range_series):
  204. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
  205. warn = None if target == "D" else FutureWarning
  206. msg = r"PeriodDtype\[B\] is deprecated"
  207. with tm.assert_produces_warning(warn, match=msg):
  208. result = ts.resample(target, convention=convention).ffill()
  209. expected = result.to_timestamp(target, how=convention)
  210. expected = expected.asfreq(target, "ffill").to_period()
  211. tm.assert_series_equal(result, expected)
  212. def test_resample_basic(self):
  213. # GH3609
  214. s = Series(
  215. range(100),
  216. index=date_range("20130101", freq="s", periods=100, name="idx"),
  217. dtype="float",
  218. )
  219. s[10:30] = np.nan
  220. index = PeriodIndex(
  221. [Period("2013-01-01 00:00", "min"), Period("2013-01-01 00:01", "min")],
  222. name="idx",
  223. )
  224. expected = Series([34.5, 79.5], index=index)
  225. msg = "The 'kind' keyword in Series.resample is deprecated"
  226. with tm.assert_produces_warning(FutureWarning, match=msg):
  227. result = s.to_period().resample("min", kind="period").mean()
  228. tm.assert_series_equal(result, expected)
  229. with tm.assert_produces_warning(FutureWarning, match=msg):
  230. result2 = s.resample("min", kind="period").mean()
  231. tm.assert_series_equal(result2, expected)
  232. @pytest.mark.parametrize(
  233. "freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
  234. )
  235. def test_resample_count(self, freq, expected_vals):
  236. # GH12774
  237. series = Series(1, index=period_range(start="2000", periods=100))
  238. result = series.resample(freq).count()
  239. expected_index = period_range(
  240. start="2000", freq=freq, periods=len(expected_vals)
  241. )
  242. expected = Series(expected_vals, index=expected_index)
  243. tm.assert_series_equal(result, expected)
  244. def test_resample_same_freq(self, resample_method):
  245. # GH12770
  246. series = Series(range(3), index=period_range(start="2000", periods=3, freq="M"))
  247. expected = series
  248. result = getattr(series.resample("M"), resample_method)()
  249. tm.assert_series_equal(result, expected)
  250. def test_resample_incompat_freq(self):
  251. msg = (
  252. "Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, "
  253. "as they are not sub or super periods"
  254. )
  255. pi = period_range(start="2000", periods=3, freq="M")
  256. ser = Series(range(3), index=pi)
  257. rs = ser.resample("W")
  258. with pytest.raises(IncompatibleFrequency, match=msg):
  259. # TODO: should this raise at the resample call instead of at the mean call?
  260. rs.mean()
  261. @pytest.mark.parametrize(
  262. "tz",
  263. [
  264. pytz.timezone("America/Los_Angeles"),
  265. dateutil.tz.gettz("America/Los_Angeles"),
  266. ],
  267. )
  268. def test_with_local_timezone(self, tz):
  269. # see gh-5430
  270. local_timezone = tz
  271. start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
  272. # 1 day later
  273. end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
  274. index = date_range(start, end, freq="h", name="idx")
  275. series = Series(1, index=index)
  276. series = series.tz_convert(local_timezone)
  277. msg = "The 'kind' keyword in Series.resample is deprecated"
  278. with tm.assert_produces_warning(FutureWarning, match=msg):
  279. result = series.resample("D", kind="period").mean()
  280. # Create the expected series
  281. # Index is moved back a day with the timezone conversion from UTC to
  282. # Pacific
  283. expected_index = (
  284. period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
  285. )
  286. expected = Series(1.0, index=expected_index)
  287. tm.assert_series_equal(result, expected)
  288. @pytest.mark.parametrize(
  289. "tz",
  290. [
  291. pytz.timezone("America/Los_Angeles"),
  292. dateutil.tz.gettz("America/Los_Angeles"),
  293. ],
  294. )
  295. def test_resample_with_tz(self, tz, unit):
  296. # GH 13238
  297. dti = date_range("2017-01-01", periods=48, freq="h", tz=tz, unit=unit)
  298. ser = Series(2, index=dti)
  299. result = ser.resample("D").mean()
  300. exp_dti = pd.DatetimeIndex(
  301. ["2017-01-01", "2017-01-02"], tz=tz, freq="D"
  302. ).as_unit(unit)
  303. expected = Series(
  304. 2.0,
  305. index=exp_dti,
  306. )
  307. tm.assert_series_equal(result, expected)
  308. # Especially assert that the timezone is LMT for pytz
  309. assert result.index.tz == tz
  310. def test_resample_nonexistent_time_bin_edge(self):
  311. # GH 19375
  312. index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15min")
  313. s = Series(np.zeros(len(index)), index=index)
  314. expected = s.tz_localize("US/Pacific")
  315. expected.index = pd.DatetimeIndex(expected.index, freq="900s")
  316. result = expected.resample("900s").mean()
  317. tm.assert_series_equal(result, expected)
  318. def test_resample_nonexistent_time_bin_edge2(self):
  319. # GH 23742
  320. index = date_range(start="2017-10-10", end="2017-10-20", freq="1h")
  321. index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
  322. df = DataFrame(data=list(range(len(index))), index=index)
  323. result = df.groupby(pd.Grouper(freq="1D")).count()
  324. expected = date_range(
  325. start="2017-10-09",
  326. end="2017-10-20",
  327. freq="D",
  328. tz="America/Sao_Paulo",
  329. nonexistent="shift_forward",
  330. inclusive="left",
  331. )
  332. tm.assert_index_equal(result.index, expected)
  333. def test_resample_ambiguous_time_bin_edge(self):
  334. # GH 10117
  335. idx = date_range(
  336. "2014-10-25 22:00:00",
  337. "2014-10-26 00:30:00",
  338. freq="30min",
  339. tz="Europe/London",
  340. )
  341. expected = Series(np.zeros(len(idx)), index=idx)
  342. result = expected.resample("30min").mean()
  343. tm.assert_series_equal(result, expected)
  344. def test_fill_method_and_how_upsample(self):
  345. # GH2073
  346. s = Series(
  347. np.arange(9, dtype="int64"),
  348. index=date_range("2010-01-01", periods=9, freq="QE"),
  349. )
  350. last = s.resample("ME").ffill()
  351. both = s.resample("ME").ffill().resample("ME").last().astype("int64")
  352. tm.assert_series_equal(last, both)
  353. @pytest.mark.parametrize("day", DAYS)
  354. @pytest.mark.parametrize("target", ["D", "B"])
  355. @pytest.mark.parametrize("convention", ["start", "end"])
  356. def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
  357. freq = f"W-{day}"
  358. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
  359. warn = None if target == "D" else FutureWarning
  360. msg = r"PeriodDtype\[B\] is deprecated"
  361. with tm.assert_produces_warning(warn, match=msg):
  362. result = ts.resample(target, convention=convention).ffill()
  363. expected = result.to_timestamp(target, how=convention)
  364. expected = expected.asfreq(target, "ffill").to_period()
  365. tm.assert_series_equal(result, expected)
  366. def test_resample_to_timestamps(self, simple_period_range_series):
  367. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
  368. msg = "The 'kind' keyword in Series.resample is deprecated"
  369. with tm.assert_produces_warning(FutureWarning, match=msg):
  370. result = ts.resample("Y-DEC", kind="timestamp").mean()
  371. expected = ts.to_timestamp(how="start").resample("YE-DEC").mean()
  372. tm.assert_series_equal(result, expected)
  373. @pytest.mark.parametrize("month", MONTHS)
  374. def test_resample_to_quarterly(self, simple_period_range_series, month):
  375. ts = simple_period_range_series("1990", "1992", freq=f"Y-{month}")
  376. quar_ts = ts.resample(f"Q-{month}").ffill()
  377. stamps = ts.to_timestamp("D", how="start")
  378. qdates = period_range(
  379. ts.index[0].asfreq("D", "start"),
  380. ts.index[-1].asfreq("D", "end"),
  381. freq=f"Q-{month}",
  382. )
  383. expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
  384. expected.index = qdates
  385. tm.assert_series_equal(quar_ts, expected)
  386. @pytest.mark.parametrize("how", ["start", "end"])
  387. def test_resample_to_quarterly_start_end(self, simple_period_range_series, how):
  388. # conforms, but different month
  389. ts = simple_period_range_series("1990", "1992", freq="Y-JUN")
  390. result = ts.resample("Q-MAR", convention=how).ffill()
  391. expected = ts.asfreq("Q-MAR", how=how)
  392. expected = expected.reindex(result.index, method="ffill")
  393. # FIXME: don't leave commented-out
  394. # .to_timestamp('D')
  395. # expected = expected.resample('Q-MAR').ffill()
  396. tm.assert_series_equal(result, expected)
  397. def test_resample_fill_missing(self):
  398. rng = PeriodIndex([2000, 2005, 2007, 2009], freq="Y")
  399. s = Series(np.random.default_rng(2).standard_normal(4), index=rng)
  400. stamps = s.to_timestamp()
  401. filled = s.resample("Y").ffill()
  402. expected = stamps.resample("YE").ffill().to_period("Y")
  403. tm.assert_series_equal(filled, expected)
  404. def test_cant_fill_missing_dups(self):
  405. rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="Y")
  406. s = Series(np.random.default_rng(2).standard_normal(5), index=rng)
  407. msg = "Reindexing only valid with uniquely valued Index objects"
  408. with pytest.raises(InvalidIndexError, match=msg):
  409. s.resample("Y").ffill()
  410. @pytest.mark.parametrize("freq", ["5min"])
  411. @pytest.mark.parametrize("kind", ["period", None, "timestamp"])
  412. def test_resample_5minute(self, freq, kind):
  413. rng = period_range("1/1/2000", "1/5/2000", freq="min")
  414. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  415. expected = ts.to_timestamp().resample(freq).mean()
  416. if kind != "timestamp":
  417. expected = expected.to_period(freq)
  418. msg = "The 'kind' keyword in Series.resample is deprecated"
  419. with tm.assert_produces_warning(FutureWarning, match=msg):
  420. result = ts.resample(freq, kind=kind).mean()
  421. tm.assert_series_equal(result, expected)
  422. def test_upsample_daily_business_daily(self, simple_period_range_series):
  423. ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
  424. result = ts.resample("D").asfreq()
  425. expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
  426. tm.assert_series_equal(result, expected)
  427. ts = simple_period_range_series("1/1/2000", "2/1/2000")
  428. result = ts.resample("h", convention="s").asfreq()
  429. exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="h")
  430. expected = ts.asfreq("h", how="s").reindex(exp_rng)
  431. tm.assert_series_equal(result, expected)
  432. def test_resample_irregular_sparse(self):
  433. dr = date_range(start="1/1/2012", freq="5min", periods=1000)
  434. s = Series(np.array(100), index=dr)
  435. # subset the data.
  436. subset = s[:"2012-01-04 06:55"]
  437. result = subset.resample("10min").apply(len)
  438. expected = s.resample("10min").apply(len).loc[result.index]
  439. tm.assert_series_equal(result, expected)
  440. def test_resample_weekly_all_na(self):
  441. rng = date_range("1/1/2000", periods=10, freq="W-WED")
  442. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  443. result = ts.resample("W-THU").asfreq()
  444. assert result.isna().all()
  445. result = ts.resample("W-THU").asfreq().ffill()[:-1]
  446. expected = ts.asfreq("W-THU").ffill()
  447. tm.assert_series_equal(result, expected)
  448. def test_resample_tz_localized(self, unit):
  449. dr = date_range(start="2012-4-13", end="2012-5-1", unit=unit)
  450. ts = Series(range(len(dr)), index=dr)
  451. ts_utc = ts.tz_localize("UTC")
  452. ts_local = ts_utc.tz_convert("America/Los_Angeles")
  453. result = ts_local.resample("W").mean()
  454. ts_local_naive = ts_local.copy()
  455. ts_local_naive.index = ts_local_naive.index.tz_localize(None)
  456. exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
  457. exp.index = pd.DatetimeIndex(exp.index, freq="W")
  458. tm.assert_series_equal(result, exp)
  459. # it works
  460. result = ts_local.resample("D").mean()
  461. def test_resample_tz_localized2(self):
  462. # #2245
  463. idx = date_range(
  464. "2001-09-20 15:59", "2001-09-20 16:00", freq="min", tz="Australia/Sydney"
  465. )
  466. s = Series([1, 2], index=idx)
  467. result = s.resample("D", closed="right", label="right").mean()
  468. ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
  469. expected = Series([1.5], index=ex_index)
  470. tm.assert_series_equal(result, expected)
  471. # for good measure
  472. msg = "The 'kind' keyword in Series.resample is deprecated"
  473. with tm.assert_produces_warning(FutureWarning, match=msg):
  474. result = s.resample("D", kind="period").mean()
  475. ex_index = period_range("2001-09-20", periods=1, freq="D")
  476. expected = Series([1.5], index=ex_index)
  477. tm.assert_series_equal(result, expected)
  478. def test_resample_tz_localized3(self):
  479. # GH 6397
  480. # comparing an offset that doesn't propagate tz's
  481. rng = date_range("1/1/2011", periods=20000, freq="h")
  482. rng = rng.tz_localize("EST")
  483. ts = DataFrame(index=rng)
  484. ts["first"] = np.random.default_rng(2).standard_normal(len(rng))
  485. ts["second"] = np.cumsum(np.random.default_rng(2).standard_normal(len(rng)))
  486. expected = DataFrame(
  487. {
  488. "first": ts.resample("YE").sum()["first"],
  489. "second": ts.resample("YE").mean()["second"],
  490. },
  491. columns=["first", "second"],
  492. )
  493. result = (
  494. ts.resample("YE")
  495. .agg({"first": "sum", "second": "mean"})
  496. .reindex(columns=["first", "second"])
  497. )
  498. tm.assert_frame_equal(result, expected)
  499. def test_closed_left_corner(self):
  500. # #1465
  501. s = Series(
  502. np.random.default_rng(2).standard_normal(21),
  503. index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
  504. )
  505. s.iloc[0] = np.nan
  506. result = s.resample("10min", closed="left", label="right").mean()
  507. exp = s[1:].resample("10min", closed="left", label="right").mean()
  508. tm.assert_series_equal(result, exp)
  509. result = s.resample("10min", closed="left", label="left").mean()
  510. exp = s[1:].resample("10min", closed="left", label="left").mean()
  511. ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
  512. tm.assert_index_equal(result.index, ex_index)
  513. tm.assert_series_equal(result, exp)
  514. def test_quarterly_resampling(self):
  515. rng = period_range("2000Q1", periods=10, freq="Q-DEC")
  516. ts = Series(np.arange(10), index=rng)
  517. result = ts.resample("Y").mean()
  518. exp = ts.to_timestamp().resample("YE").mean().to_period()
  519. tm.assert_series_equal(result, exp)
  520. def test_resample_weekly_bug_1726(self):
  521. # 8/6/12 is a Monday
  522. ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
  523. n = len(ind)
  524. data = [[x] * 5 for x in range(n)]
  525. df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
  526. # it works!
  527. df.resample("W-MON", closed="left", label="left").first()
  528. def test_resample_with_dst_time_change(self):
  529. # GH 15549
  530. index = (
  531. pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
  532. .tz_localize("UTC")
  533. .tz_convert("America/Chicago")
  534. )
  535. df = DataFrame([1, 2], index=index)
  536. result = df.resample("12h", closed="right", label="right").last().ffill()
  537. expected_index_values = [
  538. "2016-03-09 12:00:00-06:00",
  539. "2016-03-10 00:00:00-06:00",
  540. "2016-03-10 12:00:00-06:00",
  541. "2016-03-11 00:00:00-06:00",
  542. "2016-03-11 12:00:00-06:00",
  543. "2016-03-12 00:00:00-06:00",
  544. "2016-03-12 12:00:00-06:00",
  545. "2016-03-13 00:00:00-06:00",
  546. "2016-03-13 13:00:00-05:00",
  547. "2016-03-14 01:00:00-05:00",
  548. "2016-03-14 13:00:00-05:00",
  549. "2016-03-15 01:00:00-05:00",
  550. "2016-03-15 13:00:00-05:00",
  551. ]
  552. index = (
  553. pd.to_datetime(expected_index_values, utc=True)
  554. .tz_convert("America/Chicago")
  555. .as_unit(index.unit)
  556. )
  557. index = pd.DatetimeIndex(index, freq="12h")
  558. expected = DataFrame(
  559. [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
  560. index=index,
  561. )
  562. tm.assert_frame_equal(result, expected)
  563. def test_resample_bms_2752(self):
  564. # GH2753
  565. timeseries = Series(
  566. index=pd.bdate_range("20000101", "20000201"), dtype=np.float64
  567. )
  568. res1 = timeseries.resample("BMS").mean()
  569. res2 = timeseries.resample("BMS").mean().resample("B").mean()
  570. assert res1.index[0] == Timestamp("20000103")
  571. assert res1.index[0] == res2.index[0]
  572. @pytest.mark.xfail(reason="Commented out for more than 3 years. Should this work?")
  573. def test_monthly_convention_span(self):
  574. rng = period_range("2000-01", periods=3, freq="ME")
  575. ts = Series(np.arange(3), index=rng)
  576. # hacky way to get same thing
  577. exp_index = period_range("2000-01-01", "2000-03-31", freq="D")
  578. expected = ts.asfreq("D", how="end").reindex(exp_index)
  579. expected = expected.fillna(method="bfill")
  580. result = ts.resample("D").mean()
  581. tm.assert_series_equal(result, expected)
  582. @pytest.mark.parametrize(
  583. "from_freq, to_freq", [("D", "ME"), ("QE", "YE"), ("ME", "QE"), ("D", "W")]
  584. )
  585. def test_default_right_closed_label(self, from_freq, to_freq):
  586. idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
  587. df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx)
  588. resampled = df.resample(to_freq).mean()
  589. tm.assert_frame_equal(
  590. resampled, df.resample(to_freq, closed="right", label="right").mean()
  591. )
  592. @pytest.mark.parametrize(
  593. "from_freq, to_freq",
  594. [("D", "MS"), ("QE", "YS"), ("ME", "QS"), ("h", "D"), ("min", "h")],
  595. )
  596. def test_default_left_closed_label(self, from_freq, to_freq):
  597. idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
  598. df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx)
  599. resampled = df.resample(to_freq).mean()
  600. tm.assert_frame_equal(
  601. resampled, df.resample(to_freq, closed="left", label="left").mean()
  602. )
  603. def test_all_values_single_bin(self):
  604. # GH#2070
  605. index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
  606. ser = Series(np.random.default_rng(2).standard_normal(len(index)), index=index)
  607. result = ser.resample("Y").mean()
  608. tm.assert_almost_equal(result.iloc[0], ser.mean())
  609. def test_evenly_divisible_with_no_extra_bins(self):
  610. # GH#4076
  611. # when the frequency is evenly divisible, sometimes extra bins
  612. df = DataFrame(
  613. np.random.default_rng(2).standard_normal((9, 3)),
  614. index=date_range("2000-1-1", periods=9),
  615. )
  616. result = df.resample("5D").mean()
  617. expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
  618. expected.index = pd.DatetimeIndex(
  619. [Timestamp("2000-1-1"), Timestamp("2000-1-6")], dtype="M8[ns]", freq="5D"
  620. )
  621. tm.assert_frame_equal(result, expected)
  622. def test_evenly_divisible_with_no_extra_bins2(self):
  623. index = date_range(start="2001-5-4", periods=28)
  624. df = DataFrame(
  625. [
  626. {
  627. "REST_KEY": 1,
  628. "DLY_TRN_QT": 80,
  629. "DLY_SLS_AMT": 90,
  630. "COOP_DLY_TRN_QT": 30,
  631. "COOP_DLY_SLS_AMT": 20,
  632. }
  633. ]
  634. * 28
  635. + [
  636. {
  637. "REST_KEY": 2,
  638. "DLY_TRN_QT": 70,
  639. "DLY_SLS_AMT": 10,
  640. "COOP_DLY_TRN_QT": 50,
  641. "COOP_DLY_SLS_AMT": 20,
  642. }
  643. ]
  644. * 28,
  645. index=index.append(index),
  646. ).sort_index()
  647. index = date_range("2001-5-4", periods=4, freq="7D")
  648. expected = DataFrame(
  649. [
  650. {
  651. "REST_KEY": 14,
  652. "DLY_TRN_QT": 14,
  653. "DLY_SLS_AMT": 14,
  654. "COOP_DLY_TRN_QT": 14,
  655. "COOP_DLY_SLS_AMT": 14,
  656. }
  657. ]
  658. * 4,
  659. index=index,
  660. )
  661. result = df.resample("7D").count()
  662. tm.assert_frame_equal(result, expected)
  663. expected = DataFrame(
  664. [
  665. {
  666. "REST_KEY": 21,
  667. "DLY_TRN_QT": 1050,
  668. "DLY_SLS_AMT": 700,
  669. "COOP_DLY_TRN_QT": 560,
  670. "COOP_DLY_SLS_AMT": 280,
  671. }
  672. ]
  673. * 4,
  674. index=index,
  675. )
  676. result = df.resample("7D").sum()
  677. tm.assert_frame_equal(result, expected)
  678. @pytest.mark.parametrize("freq, period_mult", [("h", 24), ("12h", 2)])
  679. @pytest.mark.parametrize("kind", [None, "period"])
  680. def test_upsampling_ohlc(self, freq, period_mult, kind):
  681. # GH 13083
  682. pi = period_range(start="2000", freq="D", periods=10)
  683. s = Series(range(len(pi)), index=pi)
  684. expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
  685. # timestamp-based resampling doesn't include all sub-periods
  686. # of the last original period, so extend accordingly:
  687. new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
  688. expected = expected.reindex(new_index)
  689. msg = "The 'kind' keyword in Series.resample is deprecated"
  690. with tm.assert_produces_warning(FutureWarning, match=msg):
  691. result = s.resample(freq, kind=kind).ohlc()
  692. tm.assert_frame_equal(result, expected)
  693. @pytest.mark.parametrize(
  694. "periods, values",
  695. [
  696. (
  697. [
  698. pd.NaT,
  699. "1970-01-01 00:00:00",
  700. pd.NaT,
  701. "1970-01-01 00:00:02",
  702. "1970-01-01 00:00:03",
  703. ],
  704. [2, 3, 5, 7, 11],
  705. ),
  706. (
  707. [
  708. pd.NaT,
  709. pd.NaT,
  710. "1970-01-01 00:00:00",
  711. pd.NaT,
  712. pd.NaT,
  713. pd.NaT,
  714. "1970-01-01 00:00:02",
  715. "1970-01-01 00:00:03",
  716. pd.NaT,
  717. pd.NaT,
  718. ],
  719. [1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
  720. ),
  721. ],
  722. )
  723. @pytest.mark.parametrize(
  724. "freq, expected_values",
  725. [
  726. ("1s", [3, np.nan, 7, 11]),
  727. ("2s", [3, (7 + 11) / 2]),
  728. ("3s", [(3 + 7) / 2, 11]),
  729. ],
  730. )
  731. def test_resample_with_nat(self, periods, values, freq, expected_values):
  732. # GH 13224
  733. index = PeriodIndex(periods, freq="s")
  734. frame = DataFrame(values, index=index)
  735. expected_index = period_range(
  736. "1970-01-01 00:00:00", periods=len(expected_values), freq=freq
  737. )
  738. expected = DataFrame(expected_values, index=expected_index)
  739. result = frame.resample(freq).mean()
  740. tm.assert_frame_equal(result, expected)
  741. def test_resample_with_only_nat(self):
  742. # GH 13224
  743. pi = PeriodIndex([pd.NaT] * 3, freq="s")
  744. frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
  745. expected_index = PeriodIndex(data=[], freq=pi.freq)
  746. expected = DataFrame(index=expected_index, columns=["a"], dtype="float64")
  747. result = frame.resample("1s").mean()
  748. tm.assert_frame_equal(result, expected)
  749. @pytest.mark.parametrize(
  750. "start,end,start_freq,end_freq,offset",
  751. [
  752. ("19910905", "19910909 03:00", "h", "24h", "10h"),
  753. ("19910905", "19910909 12:00", "h", "24h", "10h"),
  754. ("19910905", "19910909 23:00", "h", "24h", "10h"),
  755. ("19910905 10:00", "19910909", "h", "24h", "10h"),
  756. ("19910905 10:00", "19910909 10:00", "h", "24h", "10h"),
  757. ("19910905", "19910909 10:00", "h", "24h", "10h"),
  758. ("19910905 12:00", "19910909", "h", "24h", "10h"),
  759. ("19910905 12:00", "19910909 03:00", "h", "24h", "10h"),
  760. ("19910905 12:00", "19910909 12:00", "h", "24h", "10h"),
  761. ("19910905 12:00", "19910909 12:00", "h", "24h", "34h"),
  762. ("19910905 12:00", "19910909 12:00", "h", "17h", "10h"),
  763. ("19910905 12:00", "19910909 12:00", "h", "17h", "3h"),
  764. ("19910905", "19910913 06:00", "2h", "24h", "10h"),
  765. ("19910905", "19910905 01:39", "Min", "5Min", "3Min"),
  766. ("19910905", "19910905 03:18", "2Min", "5Min", "3Min"),
  767. ],
  768. )
  769. def test_resample_with_offset(self, start, end, start_freq, end_freq, offset):
  770. # GH 23882 & 31809
  771. pi = period_range(start, end, freq=start_freq)
  772. ser = Series(np.arange(len(pi)), index=pi)
  773. result = ser.resample(end_freq, offset=offset).mean()
  774. result = result.to_timestamp(end_freq)
  775. expected = ser.to_timestamp().resample(end_freq, offset=offset).mean()
  776. tm.assert_series_equal(result, expected)
  777. def test_resample_with_offset_month(self):
  778. # GH 23882 & 31809
  779. pi = period_range("19910905 12:00", "19910909 1:00", freq="h")
  780. ser = Series(np.arange(len(pi)), index=pi)
  781. result = ser.resample("M", offset="3h").mean()
  782. result = result.to_timestamp("M")
  783. expected = ser.to_timestamp().resample("ME", offset="3h").mean()
  784. # TODO: is non-tick the relevant characteristic? (GH 33815)
  785. expected.index = expected.index._with_freq(None)
  786. tm.assert_series_equal(result, expected)
  787. @pytest.mark.parametrize(
  788. "first,last,freq,freq_to_offset,exp_first,exp_last",
  789. [
  790. ("19910905", "19920406", "D", "D", "19910905", "19920406"),
  791. ("19910905 00:00", "19920406 06:00", "D", "D", "19910905", "19920406"),
  792. (
  793. "19910905 06:00",
  794. "19920406 06:00",
  795. "h",
  796. "h",
  797. "19910905 06:00",
  798. "19920406 06:00",
  799. ),
  800. ("19910906", "19920406", "M", "ME", "1991-09", "1992-04"),
  801. ("19910831", "19920430", "M", "ME", "1991-08", "1992-04"),
  802. ("1991-08", "1992-04", "M", "ME", "1991-08", "1992-04"),
  803. ],
  804. )
  805. def test_get_period_range_edges(
  806. self, first, last, freq, freq_to_offset, exp_first, exp_last
  807. ):
  808. first = Period(first)
  809. last = Period(last)
  810. exp_first = Period(exp_first, freq=freq)
  811. exp_last = Period(exp_last, freq=freq)
  812. freq = pd.tseries.frequencies.to_offset(freq_to_offset)
  813. result = _get_period_range_edges(first, last, freq)
  814. expected = (exp_first, exp_last)
  815. assert result == expected
  816. def test_sum_min_count(self):
  817. # GH 19974
  818. index = date_range(start="2018", freq="ME", periods=6)
  819. data = np.ones(6)
  820. data[3:6] = np.nan
  821. s = Series(data, index).to_period()
  822. result = s.resample("Q").sum(min_count=1)
  823. expected = Series(
  824. [3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC")
  825. )
  826. tm.assert_series_equal(result, expected)
  827. def test_resample_t_l_deprecated(self):
  828. # GH#52536
  829. msg_t = "'T' is deprecated and will be removed in a future version."
  830. msg_l = "'L' is deprecated and will be removed in a future version."
  831. with tm.assert_produces_warning(FutureWarning, match=msg_l):
  832. rng_l = period_range(
  833. "2020-01-01 00:00:00 00:00", "2020-01-01 00:00:00 00:01", freq="L"
  834. )
  835. ser = Series(np.arange(len(rng_l)), index=rng_l)
  836. rng = period_range(
  837. "2020-01-01 00:00:00 00:00", "2020-01-01 00:00:00 00:01", freq="min"
  838. )
  839. expected = Series([29999.5, 60000.0], index=rng)
  840. with tm.assert_produces_warning(FutureWarning, match=msg_t):
  841. result = ser.resample("T").mean()
  842. tm.assert_series_equal(result, expected)
  843. @pytest.mark.parametrize(
  844. "freq, freq_depr, freq_res, freq_depr_res, data",
  845. [
  846. ("2Q", "2q", "2Y", "2y", [0.5]),
  847. ("2M", "2m", "2Q", "2q", [1.0, 3.0]),
  848. ],
  849. )
  850. def test_resample_lowercase_frequency_deprecated(
  851. self, freq, freq_depr, freq_res, freq_depr_res, data
  852. ):
  853. depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
  854. f"future version. Please use '{freq[1:]}' instead."
  855. depr_msg_res = f"'{freq_depr_res[1:]}' is deprecated and will be removed in a "
  856. f"future version. Please use '{freq_res[1:]}' instead."
  857. with tm.assert_produces_warning(FutureWarning, match=depr_msg):
  858. rng_l = period_range("2020-01-01", "2020-08-01", freq=freq_depr)
  859. ser = Series(np.arange(len(rng_l)), index=rng_l)
  860. rng = period_range("2020-01-01", "2020-08-01", freq=freq_res)
  861. expected = Series(data=data, index=rng)
  862. with tm.assert_produces_warning(FutureWarning, match=depr_msg_res):
  863. result = ser.resample(freq_depr_res).mean()
  864. tm.assert_series_equal(result, expected)
  865. @pytest.mark.parametrize(
  866. "offset",
  867. [
  868. offsets.MonthBegin(),
  869. offsets.BYearBegin(2),
  870. offsets.BusinessHour(2),
  871. ],
  872. )
  873. def test_asfreq_invalid_period_offset(self, offset, series_and_frame):
  874. # GH#55785
  875. msg = f"Invalid offset: '{offset.base}' for converting time series "
  876. df = series_and_frame
  877. with pytest.raises(ValueError, match=msg):
  878. df.asfreq(freq=offset)
  879. @pytest.mark.parametrize(
  880. "freq,freq_depr",
  881. [
  882. ("2M", "2ME"),
  883. ("2Q", "2QE"),
  884. ("2Q-FEB", "2QE-FEB"),
  885. ("2Y", "2YE"),
  886. ("2Y-MAR", "2YE-MAR"),
  887. ("2M", "2me"),
  888. ("2Q", "2qe"),
  889. ("2Y-MAR", "2ye-mar"),
  890. ],
  891. )
  892. def test_resample_frequency_ME_QE_YE_error_message(series_and_frame, freq, freq_depr):
  893. # GH#9586
  894. msg = f"for Period, please use '{freq[1:]}' instead of '{freq_depr[1:]}'"
  895. obj = series_and_frame
  896. with pytest.raises(ValueError, match=msg):
  897. obj.resample(freq_depr)
  898. def test_corner_cases_period(simple_period_range_series):
  899. # miscellaneous test coverage
  900. len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0]
  901. # it works
  902. result = len0pts.resample("Y-DEC").mean()
  903. assert len(result) == 0
  904. @pytest.mark.parametrize(
  905. "freq_depr",
  906. [
  907. "2BME",
  908. "2CBME",
  909. "2SME",
  910. "2BQE-FEB",
  911. "2BYE-MAR",
  912. ],
  913. )
  914. def test_resample_frequency_invalid_freq(series_and_frame, freq_depr):
  915. # GH#9586
  916. msg = f"Invalid frequency: {freq_depr[1:]}"
  917. obj = series_and_frame
  918. with pytest.raises(ValueError, match=msg):
  919. obj.resample(freq_depr)