test_resample_api.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. from datetime import datetime
  2. import re
  3. import numpy as np
  4. import pytest
  5. from pandas._libs import lib
  6. from pandas.errors import UnsupportedFunctionCall
  7. import pandas as pd
  8. from pandas import (
  9. DataFrame,
  10. NamedAgg,
  11. Series,
  12. )
  13. import pandas._testing as tm
  14. from pandas.core.indexes.datetimes import date_range
  15. @pytest.fixture
  16. def dti():
  17. return date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
  18. @pytest.fixture
  19. def _test_series(dti):
  20. return Series(np.random.default_rng(2).random(len(dti)), dti)
  21. @pytest.fixture
  22. def test_frame(dti, _test_series):
  23. return DataFrame({"A": _test_series, "B": _test_series, "C": np.arange(len(dti))})
  24. def test_str(_test_series):
  25. r = _test_series.resample("h")
  26. assert (
  27. "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
  28. "label=left, convention=start, origin=start_day]" in str(r)
  29. )
  30. r = _test_series.resample("h", origin="2000-01-01")
  31. assert (
  32. "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
  33. "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r)
  34. )
  35. def test_api(_test_series):
  36. r = _test_series.resample("h")
  37. result = r.mean()
  38. assert isinstance(result, Series)
  39. assert len(result) == 217
  40. r = _test_series.to_frame().resample("h")
  41. result = r.mean()
  42. assert isinstance(result, DataFrame)
  43. assert len(result) == 217
  44. def test_groupby_resample_api():
  45. # GH 12448
  46. # .groupby(...).resample(...) hitting warnings
  47. # when appropriate
  48. df = DataFrame(
  49. {
  50. "date": date_range(start="2016-01-01", periods=4, freq="W"),
  51. "group": [1, 1, 2, 2],
  52. "val": [5, 6, 7, 8],
  53. }
  54. ).set_index("date")
  55. # replication step
  56. i = (
  57. date_range("2016-01-03", periods=8).tolist()
  58. + date_range("2016-01-17", periods=8).tolist()
  59. )
  60. index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
  61. expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
  62. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  63. with tm.assert_produces_warning(FutureWarning, match=msg):
  64. result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
  65. tm.assert_frame_equal(result, expected)
  66. def test_groupby_resample_on_api():
  67. # GH 15021
  68. # .groupby(...).resample(on=...) results in an unexpected
  69. # keyword warning.
  70. df = DataFrame(
  71. {
  72. "key": ["A", "B"] * 5,
  73. "dates": date_range("2016-01-01", periods=10),
  74. "values": np.random.default_rng(2).standard_normal(10),
  75. }
  76. )
  77. expected = df.set_index("dates").groupby("key").resample("D").mean()
  78. result = df.groupby("key").resample("D", on="dates").mean()
  79. tm.assert_frame_equal(result, expected)
  80. def test_resample_group_keys():
  81. df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
  82. expected = df.copy()
  83. # group_keys=False
  84. g = df.resample("5D", group_keys=False)
  85. result = g.apply(lambda x: x)
  86. tm.assert_frame_equal(result, expected)
  87. # group_keys defaults to False
  88. g = df.resample("5D")
  89. result = g.apply(lambda x: x)
  90. tm.assert_frame_equal(result, expected)
  91. # group_keys=True
  92. expected.index = pd.MultiIndex.from_arrays(
  93. [
  94. pd.to_datetime(["2000-01-01", "2000-01-06"]).as_unit("ns").repeat(5),
  95. expected.index,
  96. ]
  97. )
  98. g = df.resample("5D", group_keys=True)
  99. result = g.apply(lambda x: x)
  100. tm.assert_frame_equal(result, expected)
  101. def test_pipe(test_frame, _test_series):
  102. # GH17905
  103. # series
  104. r = _test_series.resample("h")
  105. expected = r.max() - r.mean()
  106. result = r.pipe(lambda x: x.max() - x.mean())
  107. tm.assert_series_equal(result, expected)
  108. # dataframe
  109. r = test_frame.resample("h")
  110. expected = r.max() - r.mean()
  111. result = r.pipe(lambda x: x.max() - x.mean())
  112. tm.assert_frame_equal(result, expected)
  113. def test_getitem(test_frame):
  114. r = test_frame.resample("h")
  115. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
  116. r = test_frame.resample("h")["B"]
  117. assert r._selected_obj.name == test_frame.columns[1]
  118. # technically this is allowed
  119. r = test_frame.resample("h")["A", "B"]
  120. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
  121. r = test_frame.resample("h")["A", "B"]
  122. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
  123. @pytest.mark.parametrize("key", [["D"], ["A", "D"]])
  124. def test_select_bad_cols(key, test_frame):
  125. g = test_frame.resample("h")
  126. # 'A' should not be referenced as a bad column...
  127. # will have to rethink regex if you change message!
  128. msg = r"^\"Columns not found: 'D'\"$"
  129. with pytest.raises(KeyError, match=msg):
  130. g[key]
  131. def test_attribute_access(test_frame):
  132. r = test_frame.resample("h")
  133. tm.assert_series_equal(r.A.sum(), r["A"].sum())
  134. @pytest.mark.parametrize("attr", ["groups", "ngroups", "indices"])
  135. def test_api_compat_before_use(attr):
  136. # make sure that we are setting the binner
  137. # on these attributes
  138. rng = date_range("1/1/2012", periods=100, freq="s")
  139. ts = Series(np.arange(len(rng)), index=rng)
  140. rs = ts.resample("30s")
  141. # before use
  142. getattr(rs, attr)
  143. # after grouper is initialized is ok
  144. rs.mean()
  145. getattr(rs, attr)
  146. def tests_raises_on_nuisance(test_frame, using_infer_string):
  147. df = test_frame
  148. df["D"] = "foo"
  149. r = df.resample("h")
  150. result = r[["A", "B"]].mean()
  151. expected = pd.concat([r.A.mean(), r.B.mean()], axis=1)
  152. tm.assert_frame_equal(result, expected)
  153. expected = r[["A", "B", "C"]].mean()
  154. msg = re.escape("agg function failed [how->mean,dtype->")
  155. if using_infer_string:
  156. msg = "dtype 'str' does not support operation 'mean'"
  157. with pytest.raises(TypeError, match=msg):
  158. r.mean()
  159. result = r.mean(numeric_only=True)
  160. tm.assert_frame_equal(result, expected)
  161. def test_downsample_but_actually_upsampling():
  162. # this is reindex / asfreq
  163. rng = date_range("1/1/2012", periods=100, freq="s")
  164. ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
  165. result = ts.resample("20s").asfreq()
  166. expected = Series(
  167. [0, 20, 40, 60, 80],
  168. index=date_range("2012-01-01 00:00:00", freq="20s", periods=5),
  169. )
  170. tm.assert_series_equal(result, expected)
  171. def test_combined_up_downsampling_of_irregular():
  172. # since we are really doing an operation like this
  173. # ts2.resample('2s').mean().ffill()
  174. # preserve these semantics
  175. rng = date_range("1/1/2012", periods=100, freq="s")
  176. ts = Series(np.arange(len(rng)), index=rng)
  177. ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
  178. result = ts2.resample("2s").mean().ffill()
  179. expected = Series(
  180. [
  181. 0.5,
  182. 2.5,
  183. 5.0,
  184. 7.0,
  185. 7.0,
  186. 11.0,
  187. 11.0,
  188. 15.0,
  189. 16.0,
  190. 16.0,
  191. 16.0,
  192. 16.0,
  193. 25.0,
  194. 25.0,
  195. 25.0,
  196. 30.0,
  197. ],
  198. index=pd.DatetimeIndex(
  199. [
  200. "2012-01-01 00:00:00",
  201. "2012-01-01 00:00:02",
  202. "2012-01-01 00:00:04",
  203. "2012-01-01 00:00:06",
  204. "2012-01-01 00:00:08",
  205. "2012-01-01 00:00:10",
  206. "2012-01-01 00:00:12",
  207. "2012-01-01 00:00:14",
  208. "2012-01-01 00:00:16",
  209. "2012-01-01 00:00:18",
  210. "2012-01-01 00:00:20",
  211. "2012-01-01 00:00:22",
  212. "2012-01-01 00:00:24",
  213. "2012-01-01 00:00:26",
  214. "2012-01-01 00:00:28",
  215. "2012-01-01 00:00:30",
  216. ],
  217. dtype="datetime64[ns]",
  218. freq="2s",
  219. ),
  220. )
  221. tm.assert_series_equal(result, expected)
  222. def test_transform_series(_test_series):
  223. r = _test_series.resample("20min")
  224. expected = _test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
  225. result = r.transform("mean")
  226. tm.assert_series_equal(result, expected)
  227. @pytest.mark.parametrize("on", [None, "date"])
  228. def test_transform_frame(on):
  229. # GH#47079
  230. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  231. index.name = "date"
  232. df = DataFrame(
  233. np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index
  234. )
  235. expected = df.groupby(pd.Grouper(freq="20min")).transform("mean")
  236. if on == "date":
  237. # Move date to being a column; result will then have a RangeIndex
  238. expected = expected.reset_index(drop=True)
  239. df = df.reset_index()
  240. r = df.resample("20min", on=on)
  241. result = r.transform("mean")
  242. tm.assert_frame_equal(result, expected)
  243. def test_fillna():
  244. # need to upsample here
  245. rng = date_range("1/1/2012", periods=10, freq="2s")
  246. ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
  247. r = ts.resample("s")
  248. expected = r.ffill()
  249. msg = "DatetimeIndexResampler.fillna is deprecated"
  250. with tm.assert_produces_warning(FutureWarning, match=msg):
  251. result = r.fillna(method="ffill")
  252. tm.assert_series_equal(result, expected)
  253. expected = r.bfill()
  254. with tm.assert_produces_warning(FutureWarning, match=msg):
  255. result = r.fillna(method="bfill")
  256. tm.assert_series_equal(result, expected)
  257. msg2 = (
  258. r"Invalid fill method\. Expecting pad \(ffill\), backfill "
  259. r"\(bfill\) or nearest\. Got 0"
  260. )
  261. with pytest.raises(ValueError, match=msg2):
  262. with tm.assert_produces_warning(FutureWarning, match=msg):
  263. r.fillna(0)
  264. @pytest.mark.parametrize(
  265. "func",
  266. [
  267. lambda x: x.resample("20min", group_keys=False),
  268. lambda x: x.groupby(pd.Grouper(freq="20min"), group_keys=False),
  269. ],
  270. ids=["resample", "groupby"],
  271. )
  272. def test_apply_without_aggregation(func, _test_series):
  273. # both resample and groupby should work w/o aggregation
  274. t = func(_test_series)
  275. result = t.apply(lambda x: x)
  276. tm.assert_series_equal(result, _test_series)
  277. def test_apply_without_aggregation2(_test_series):
  278. grouped = _test_series.to_frame(name="foo").resample("20min", group_keys=False)
  279. result = grouped["foo"].apply(lambda x: x)
  280. tm.assert_series_equal(result, _test_series.rename("foo"))
  281. def test_agg_consistency():
  282. # make sure that we are consistent across
  283. # similar aggregations with and w/o selection list
  284. df = DataFrame(
  285. np.random.default_rng(2).standard_normal((1000, 3)),
  286. index=date_range("1/1/2012", freq="s", periods=1000),
  287. columns=["A", "B", "C"],
  288. )
  289. r = df.resample("3min")
  290. msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
  291. with pytest.raises(KeyError, match=msg):
  292. r.agg({"r1": "mean", "r2": "sum"})
  293. def test_agg_consistency_int_str_column_mix():
  294. # GH#39025
  295. df = DataFrame(
  296. np.random.default_rng(2).standard_normal((1000, 2)),
  297. index=date_range("1/1/2012", freq="s", periods=1000),
  298. columns=[1, "a"],
  299. )
  300. r = df.resample("3min")
  301. msg = r"Column\(s\) \[2, 'b'\] do not exist"
  302. with pytest.raises(KeyError, match=msg):
  303. r.agg({2: "mean", "b": "sum"})
  304. # TODO(GH#14008): once GH 14008 is fixed, move these tests into
  305. # `Base` test class
  306. @pytest.fixture
  307. def index():
  308. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  309. index.name = "date"
  310. return index
  311. @pytest.fixture
  312. def df(index):
  313. frame = DataFrame(
  314. np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index
  315. )
  316. return frame
  317. @pytest.fixture
  318. def df_col(df):
  319. return df.reset_index()
  320. @pytest.fixture
  321. def df_mult(df_col, index):
  322. df_mult = df_col.copy()
  323. df_mult.index = pd.MultiIndex.from_arrays(
  324. [range(10), index], names=["index", "date"]
  325. )
  326. return df_mult
  327. @pytest.fixture
  328. def a_mean(df):
  329. return df.resample("2D")["A"].mean()
  330. @pytest.fixture
  331. def a_std(df):
  332. return df.resample("2D")["A"].std()
  333. @pytest.fixture
  334. def a_sum(df):
  335. return df.resample("2D")["A"].sum()
  336. @pytest.fixture
  337. def b_mean(df):
  338. return df.resample("2D")["B"].mean()
  339. @pytest.fixture
  340. def b_std(df):
  341. return df.resample("2D")["B"].std()
  342. @pytest.fixture
  343. def b_sum(df):
  344. return df.resample("2D")["B"].sum()
  345. @pytest.fixture
  346. def df_resample(df):
  347. return df.resample("2D")
  348. @pytest.fixture
  349. def df_col_resample(df_col):
  350. return df_col.resample("2D", on="date")
  351. @pytest.fixture
  352. def df_mult_resample(df_mult):
  353. return df_mult.resample("2D", level="date")
  354. @pytest.fixture
  355. def df_grouper_resample(df):
  356. return df.groupby(pd.Grouper(freq="2D"))
  357. @pytest.fixture(
  358. params=["df_resample", "df_col_resample", "df_mult_resample", "df_grouper_resample"]
  359. )
  360. def cases(request):
  361. return request.getfixturevalue(request.param)
  362. def test_agg_mixed_column_aggregation(cases, a_mean, a_std, b_mean, b_std, request):
  363. expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
  364. expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
  365. msg = "using SeriesGroupBy.[mean|std]"
  366. # "date" is an index and a column, so get included in the agg
  367. if "df_mult" in request.node.callspec.id:
  368. date_mean = cases["date"].mean()
  369. date_std = cases["date"].std()
  370. expected = pd.concat([date_mean, date_std, expected], axis=1)
  371. expected.columns = pd.MultiIndex.from_product(
  372. [["date", "A", "B"], ["mean", "std"]]
  373. )
  374. with tm.assert_produces_warning(FutureWarning, match=msg):
  375. result = cases.aggregate([np.mean, np.std])
  376. tm.assert_frame_equal(result, expected)
  377. @pytest.mark.parametrize(
  378. "agg",
  379. [
  380. {"func": {"A": np.mean, "B": np.std}},
  381. {"A": ("A", np.mean), "B": ("B", np.std)},
  382. {"A": NamedAgg("A", np.mean), "B": NamedAgg("B", np.std)},
  383. ],
  384. )
  385. def test_agg_both_mean_std_named_result(cases, a_mean, b_std, agg):
  386. msg = "using SeriesGroupBy.[mean|std]"
  387. expected = pd.concat([a_mean, b_std], axis=1)
  388. with tm.assert_produces_warning(FutureWarning, match=msg):
  389. result = cases.aggregate(**agg)
  390. tm.assert_frame_equal(result, expected, check_like=True)
  391. def test_agg_both_mean_std_dict_of_list(cases, a_mean, a_std):
  392. expected = pd.concat([a_mean, a_std], axis=1)
  393. expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
  394. result = cases.aggregate({"A": ["mean", "std"]})
  395. tm.assert_frame_equal(result, expected)
  396. @pytest.mark.parametrize(
  397. "agg", [{"func": ["mean", "sum"]}, {"mean": "mean", "sum": "sum"}]
  398. )
  399. def test_agg_both_mean_sum(cases, a_mean, a_sum, agg):
  400. expected = pd.concat([a_mean, a_sum], axis=1)
  401. expected.columns = ["mean", "sum"]
  402. result = cases["A"].aggregate(**agg)
  403. tm.assert_frame_equal(result, expected)
  404. @pytest.mark.parametrize(
  405. "agg",
  406. [
  407. {"A": {"mean": "mean", "sum": "sum"}},
  408. {
  409. "A": {"mean": "mean", "sum": "sum"},
  410. "B": {"mean2": "mean", "sum2": "sum"},
  411. },
  412. ],
  413. )
  414. def test_agg_dict_of_dict_specificationerror(cases, agg):
  415. msg = "nested renamer is not supported"
  416. with pytest.raises(pd.errors.SpecificationError, match=msg):
  417. cases.aggregate(agg)
  418. def test_agg_dict_of_lists(cases, a_mean, a_std, b_mean, b_std):
  419. expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
  420. expected.columns = pd.MultiIndex.from_tuples(
  421. [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
  422. )
  423. result = cases.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
  424. tm.assert_frame_equal(result, expected, check_like=True)
  425. @pytest.mark.parametrize(
  426. "agg",
  427. [
  428. {"func": {"A": np.sum, "B": lambda x: np.std(x, ddof=1)}},
  429. {"A": ("A", np.sum), "B": ("B", lambda x: np.std(x, ddof=1))},
  430. {"A": NamedAgg("A", np.sum), "B": NamedAgg("B", lambda x: np.std(x, ddof=1))},
  431. ],
  432. )
  433. def test_agg_with_lambda(cases, agg):
  434. # passed lambda
  435. msg = "using SeriesGroupBy.sum"
  436. rcustom = cases["B"].apply(lambda x: np.std(x, ddof=1))
  437. expected = pd.concat([cases["A"].sum(), rcustom], axis=1)
  438. with tm.assert_produces_warning(FutureWarning, match=msg):
  439. result = cases.agg(**agg)
  440. tm.assert_frame_equal(result, expected, check_like=True)
  441. @pytest.mark.parametrize(
  442. "agg",
  443. [
  444. {"func": {"result1": np.sum, "result2": np.mean}},
  445. {"A": ("result1", np.sum), "B": ("result2", np.mean)},
  446. {"A": NamedAgg("result1", np.sum), "B": NamedAgg("result2", np.mean)},
  447. ],
  448. )
  449. def test_agg_no_column(cases, agg):
  450. msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
  451. with pytest.raises(KeyError, match=msg):
  452. cases[["A", "B"]].agg(**agg)
  453. @pytest.mark.parametrize(
  454. "cols, agg",
  455. [
  456. [None, {"A": ["sum", "std"], "B": ["mean", "std"]}],
  457. [
  458. [
  459. "A",
  460. "B",
  461. ],
  462. {"A": ["sum", "std"], "B": ["mean", "std"]},
  463. ],
  464. ],
  465. )
  466. def test_agg_specificationerror_nested(cases, cols, agg, a_sum, a_std, b_mean, b_std):
  467. # agg with different hows
  468. # equivalent of using a selection list / or not
  469. expected = pd.concat([a_sum, a_std, b_mean, b_std], axis=1)
  470. expected.columns = pd.MultiIndex.from_tuples(
  471. [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
  472. )
  473. if cols is not None:
  474. obj = cases[cols]
  475. else:
  476. obj = cases
  477. result = obj.agg(agg)
  478. tm.assert_frame_equal(result, expected, check_like=True)
  479. @pytest.mark.parametrize(
  480. "agg", [{"A": ["sum", "std"]}, {"A": ["sum", "std"], "B": ["mean", "std"]}]
  481. )
  482. def test_agg_specificationerror_series(cases, agg):
  483. msg = "nested renamer is not supported"
  484. # series like aggs
  485. with pytest.raises(pd.errors.SpecificationError, match=msg):
  486. cases["A"].agg(agg)
  487. def test_agg_specificationerror_invalid_names(cases):
  488. # errors
  489. # invalid names in the agg specification
  490. msg = r"Column\(s\) \['B'\] do not exist"
  491. with pytest.raises(KeyError, match=msg):
  492. cases[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
  493. @pytest.mark.parametrize(
  494. "func", [["min"], ["mean", "max"], {"A": "sum"}, {"A": "prod", "B": "median"}]
  495. )
  496. def test_multi_agg_axis_1_raises(func):
  497. # GH#46904
  498. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  499. index.name = "date"
  500. df = DataFrame(
  501. np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index
  502. ).T
  503. warning_msg = "DataFrame.resample with axis=1 is deprecated."
  504. with tm.assert_produces_warning(FutureWarning, match=warning_msg):
  505. res = df.resample("ME", axis=1)
  506. with pytest.raises(
  507. NotImplementedError, match="axis other than 0 is not supported"
  508. ):
  509. res.agg(func)
  510. def test_agg_nested_dicts():
  511. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  512. index.name = "date"
  513. df = DataFrame(
  514. np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index
  515. )
  516. df_col = df.reset_index()
  517. df_mult = df_col.copy()
  518. df_mult.index = pd.MultiIndex.from_arrays(
  519. [range(10), df.index], names=["index", "date"]
  520. )
  521. r = df.resample("2D")
  522. cases = [
  523. r,
  524. df_col.resample("2D", on="date"),
  525. df_mult.resample("2D", level="date"),
  526. df.groupby(pd.Grouper(freq="2D")),
  527. ]
  528. msg = "nested renamer is not supported"
  529. for t in cases:
  530. with pytest.raises(pd.errors.SpecificationError, match=msg):
  531. t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
  532. for t in cases:
  533. with pytest.raises(pd.errors.SpecificationError, match=msg):
  534. t[["A", "B"]].agg(
  535. {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
  536. )
  537. with pytest.raises(pd.errors.SpecificationError, match=msg):
  538. t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
  539. def test_try_aggregate_non_existing_column():
  540. # GH 16766
  541. data = [
  542. {"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
  543. {"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
  544. {"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
  545. ]
  546. df = DataFrame(data).set_index("dt")
  547. # Error as we don't have 'z' column
  548. msg = r"Column\(s\) \['z'\] do not exist"
  549. with pytest.raises(KeyError, match=msg):
  550. df.resample("30min").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
  551. def test_agg_list_like_func_with_args():
  552. # 50624
  553. df = DataFrame(
  554. {"x": [1, 2, 3]}, index=date_range("2020-01-01", periods=3, freq="D")
  555. )
  556. def foo1(x, a=1, c=0):
  557. return x + a + c
  558. def foo2(x, b=2, c=0):
  559. return x + b + c
  560. msg = r"foo1\(\) got an unexpected keyword argument 'b'"
  561. with pytest.raises(TypeError, match=msg):
  562. df.resample("D").agg([foo1, foo2], 3, b=3, c=4)
  563. result = df.resample("D").agg([foo1, foo2], 3, c=4)
  564. expected = DataFrame(
  565. [[8, 8], [9, 9], [10, 10]],
  566. index=date_range("2020-01-01", periods=3, freq="D"),
  567. columns=pd.MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
  568. )
  569. tm.assert_frame_equal(result, expected)
  570. def test_selection_api_validation():
  571. # GH 13500
  572. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  573. rng = np.arange(len(index), dtype=np.int64)
  574. df = DataFrame(
  575. {"date": index, "a": rng},
  576. index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
  577. )
  578. df_exp = DataFrame({"a": rng}, index=index)
  579. # non DatetimeIndex
  580. msg = (
  581. "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
  582. "but got an instance of 'Index'"
  583. )
  584. with pytest.raises(TypeError, match=msg):
  585. df.resample("2D", level="v")
  586. msg = "The Grouper cannot specify both a key and a level!"
  587. with pytest.raises(ValueError, match=msg):
  588. df.resample("2D", on="date", level="d")
  589. msg = "unhashable type: 'list'"
  590. with pytest.raises(TypeError, match=msg):
  591. df.resample("2D", on=["a", "date"])
  592. msg = r"\"Level \['a', 'date'\] not found\""
  593. with pytest.raises(KeyError, match=msg):
  594. df.resample("2D", level=["a", "date"])
  595. # upsampling not allowed
  596. msg = (
  597. "Upsampling from level= or on= selection is not supported, use "
  598. r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like"
  599. )
  600. with pytest.raises(ValueError, match=msg):
  601. df.resample("2D", level="d").asfreq()
  602. with pytest.raises(ValueError, match=msg):
  603. df.resample("2D", on="date").asfreq()
  604. exp = df_exp.resample("2D").sum()
  605. exp.index.name = "date"
  606. result = df.resample("2D", on="date").sum()
  607. tm.assert_frame_equal(exp, result)
  608. exp.index.name = "d"
  609. with pytest.raises(TypeError, match="datetime64 type does not support sum"):
  610. df.resample("2D", level="d").sum()
  611. result = df.resample("2D", level="d").sum(numeric_only=True)
  612. tm.assert_frame_equal(exp, result)
  613. @pytest.mark.parametrize(
  614. "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
  615. )
  616. def test_agg_with_datetime_index_list_agg_func(col_name):
  617. # GH 22660
  618. # The parametrized column names would get converted to dates by our
  619. # date parser. Some would result in OutOfBoundsError (ValueError) while
  620. # others would result in OverflowError when passed into Timestamp.
  621. # We catch these errors and move on to the correct branch.
  622. df = DataFrame(
  623. list(range(200)),
  624. index=date_range(
  625. start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
  626. ),
  627. columns=[col_name],
  628. )
  629. result = df.resample("1d").aggregate(["mean"])
  630. expected = DataFrame(
  631. [47.5, 143.5, 195.5],
  632. index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
  633. columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
  634. )
  635. tm.assert_frame_equal(result, expected)
  636. def test_resample_agg_readonly():
  637. # GH#31710 cython needs to allow readonly data
  638. index = date_range("2020-01-01", "2020-01-02", freq="1h")
  639. arr = np.zeros_like(index)
  640. arr.setflags(write=False)
  641. ser = Series(arr, index=index)
  642. rs = ser.resample("1D")
  643. expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24])
  644. result = rs.agg("last")
  645. tm.assert_series_equal(result, expected)
  646. result = rs.agg("first")
  647. tm.assert_series_equal(result, expected)
  648. result = rs.agg("max")
  649. tm.assert_series_equal(result, expected)
  650. result = rs.agg("min")
  651. tm.assert_series_equal(result, expected)
  652. @pytest.mark.parametrize(
  653. "start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods",
  654. [
  655. (
  656. "2000-10-01 23:30:00",
  657. "2000-10-02 00:26:00",
  658. "7min",
  659. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  660. "17min",
  661. "end",
  662. None,
  663. [0, 18, 27, 63],
  664. "20001002 00:26:00",
  665. 4,
  666. ),
  667. (
  668. "20200101 8:26:35",
  669. "20200101 9:31:58",
  670. "77s",
  671. [1] * 51,
  672. "7min",
  673. "end",
  674. "right",
  675. [1, 6, 5, 6, 5, 6, 5, 6, 5, 6],
  676. "2020-01-01 09:30:45",
  677. 10,
  678. ),
  679. (
  680. "2000-10-01 23:30:00",
  681. "2000-10-02 00:26:00",
  682. "7min",
  683. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  684. "17min",
  685. "end",
  686. "left",
  687. [0, 18, 27, 39, 24],
  688. "20001002 00:43:00",
  689. 5,
  690. ),
  691. (
  692. "2000-10-01 23:30:00",
  693. "2000-10-02 00:26:00",
  694. "7min",
  695. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  696. "17min",
  697. "end_day",
  698. None,
  699. [3, 15, 45, 45],
  700. "2000-10-02 00:29:00",
  701. 4,
  702. ),
  703. ],
  704. )
  705. def test_end_and_end_day_origin(
  706. start,
  707. end,
  708. freq,
  709. data,
  710. resample_freq,
  711. origin,
  712. closed,
  713. exp_data,
  714. exp_end,
  715. exp_periods,
  716. ):
  717. rng = date_range(start, end, freq=freq)
  718. ts = Series(data, index=rng)
  719. res = ts.resample(resample_freq, origin=origin, closed=closed).sum()
  720. expected = Series(
  721. exp_data,
  722. index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods),
  723. )
  724. tm.assert_series_equal(res, expected)
  725. @pytest.mark.parametrize(
  726. # expected_data is a string when op raises a ValueError
  727. "method, numeric_only, expected_data",
  728. [
  729. ("sum", True, {"num": [25]}),
  730. ("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}),
  731. ("sum", lib.no_default, {"cat": ["cat_1cat_2"], "num": [25]}),
  732. ("prod", True, {"num": [100]}),
  733. ("prod", False, "can't multiply sequence"),
  734. ("prod", lib.no_default, "can't multiply sequence"),
  735. ("min", True, {"num": [5]}),
  736. ("min", False, {"cat": ["cat_1"], "num": [5]}),
  737. ("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
  738. ("max", True, {"num": [20]}),
  739. ("max", False, {"cat": ["cat_2"], "num": [20]}),
  740. ("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
  741. ("first", True, {"num": [5]}),
  742. ("first", False, {"cat": ["cat_1"], "num": [5]}),
  743. ("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
  744. ("last", True, {"num": [20]}),
  745. ("last", False, {"cat": ["cat_2"], "num": [20]}),
  746. ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
  747. ("mean", True, {"num": [12.5]}),
  748. ("mean", False, "Could not convert"),
  749. ("mean", lib.no_default, "Could not convert"),
  750. ("median", True, {"num": [12.5]}),
  751. ("median", False, r"Cannot convert \['cat_1' 'cat_2'\] to numeric"),
  752. ("median", lib.no_default, r"Cannot convert \['cat_1' 'cat_2'\] to numeric"),
  753. ("std", True, {"num": [10.606601717798213]}),
  754. ("std", False, "could not convert string to float"),
  755. ("std", lib.no_default, "could not convert string to float"),
  756. ("var", True, {"num": [112.5]}),
  757. ("var", False, "could not convert string to float"),
  758. ("var", lib.no_default, "could not convert string to float"),
  759. ("sem", True, {"num": [7.5]}),
  760. ("sem", False, "could not convert string to float"),
  761. ("sem", lib.no_default, "could not convert string to float"),
  762. ],
  763. )
  764. def test_frame_downsample_method(
  765. method, numeric_only, expected_data, using_infer_string
  766. ):
  767. # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
  768. index = date_range("2018-01-01", periods=2, freq="D")
  769. expected_index = date_range("2018-12-31", periods=1, freq="YE")
  770. df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index)
  771. resampled = df.resample("YE")
  772. if numeric_only is lib.no_default:
  773. kwargs = {}
  774. else:
  775. kwargs = {"numeric_only": numeric_only}
  776. func = getattr(resampled, method)
  777. if isinstance(expected_data, str):
  778. if method in ("var", "mean", "median", "prod"):
  779. klass = TypeError
  780. msg = re.escape(f"agg function failed [how->{method},dtype->")
  781. if using_infer_string:
  782. msg = f"dtype 'str' does not support operation '{method}'"
  783. elif method in ["sum", "std", "sem"] and using_infer_string:
  784. klass = TypeError
  785. msg = f"dtype 'str' does not support operation '{method}'"
  786. else:
  787. klass = ValueError
  788. msg = expected_data
  789. with pytest.raises(klass, match=msg):
  790. _ = func(**kwargs)
  791. else:
  792. result = func(**kwargs)
  793. expected = DataFrame(expected_data, index=expected_index)
  794. tm.assert_frame_equal(result, expected)
  795. @pytest.mark.parametrize(
  796. "method, numeric_only, expected_data",
  797. [
  798. ("sum", True, ()),
  799. ("sum", False, ["cat_1cat_2"]),
  800. ("sum", lib.no_default, ["cat_1cat_2"]),
  801. ("prod", True, ()),
  802. ("prod", False, ()),
  803. ("prod", lib.no_default, ()),
  804. ("min", True, ()),
  805. ("min", False, ["cat_1"]),
  806. ("min", lib.no_default, ["cat_1"]),
  807. ("max", True, ()),
  808. ("max", False, ["cat_2"]),
  809. ("max", lib.no_default, ["cat_2"]),
  810. ("first", True, ()),
  811. ("first", False, ["cat_1"]),
  812. ("first", lib.no_default, ["cat_1"]),
  813. ("last", True, ()),
  814. ("last", False, ["cat_2"]),
  815. ("last", lib.no_default, ["cat_2"]),
  816. ],
  817. )
  818. def test_series_downsample_method(
  819. method, numeric_only, expected_data, using_infer_string
  820. ):
  821. # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
  822. index = date_range("2018-01-01", periods=2, freq="D")
  823. expected_index = date_range("2018-12-31", periods=1, freq="YE")
  824. df = Series(["cat_1", "cat_2"], index=index)
  825. resampled = df.resample("YE")
  826. kwargs = {} if numeric_only is lib.no_default else {"numeric_only": numeric_only}
  827. func = getattr(resampled, method)
  828. if numeric_only and numeric_only is not lib.no_default:
  829. msg = rf"Cannot use numeric_only=True with SeriesGroupBy\.{method}"
  830. with pytest.raises(TypeError, match=msg):
  831. func(**kwargs)
  832. elif method == "prod":
  833. msg = re.escape("agg function failed [how->prod,dtype->")
  834. if using_infer_string:
  835. msg = "dtype 'str' does not support operation 'prod'"
  836. with pytest.raises(TypeError, match=msg):
  837. func(**kwargs)
  838. else:
  839. result = func(**kwargs)
  840. expected = Series(expected_data, index=expected_index)
  841. tm.assert_series_equal(result, expected)
  842. @pytest.mark.parametrize(
  843. "method, raises",
  844. [
  845. ("sum", True),
  846. ("prod", True),
  847. ("min", True),
  848. ("max", True),
  849. ("first", False),
  850. ("last", False),
  851. ("median", False),
  852. ("mean", True),
  853. ("std", True),
  854. ("var", True),
  855. ("sem", False),
  856. ("ohlc", False),
  857. ("nunique", False),
  858. ],
  859. )
  860. def test_args_kwargs_depr(method, raises):
  861. index = date_range("20180101", periods=3, freq="h")
  862. df = Series([2, 4, 6], index=index)
  863. resampled = df.resample("30min")
  864. args = ()
  865. func = getattr(resampled, method)
  866. error_msg = "numpy operations are not valid with resample."
  867. error_msg_type = "too many arguments passed in"
  868. warn_msg = f"Passing additional args to DatetimeIndexResampler.{method}"
  869. if raises:
  870. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  871. with pytest.raises(UnsupportedFunctionCall, match=error_msg):
  872. func(*args, 1, 2, 3, 4)
  873. else:
  874. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  875. with pytest.raises(TypeError, match=error_msg_type):
  876. func(*args, 1, 2, 3, 4)
  877. def test_df_axis_param_depr():
  878. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  879. index.name = "date"
  880. df = DataFrame(
  881. np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index
  882. ).T
  883. # Deprecation error when axis=1 is explicitly passed
  884. warning_msg = "DataFrame.resample with axis=1 is deprecated."
  885. with tm.assert_produces_warning(FutureWarning, match=warning_msg):
  886. df.resample("ME", axis=1)
  887. # Deprecation error when axis=0 is explicitly passed
  888. df = df.T
  889. warning_msg = (
  890. "The 'axis' keyword in DataFrame.resample is deprecated and "
  891. "will be removed in a future version."
  892. )
  893. with tm.assert_produces_warning(FutureWarning, match=warning_msg):
  894. df.resample("ME", axis=0)
  895. def test_series_axis_param_depr(_test_series):
  896. warning_msg = (
  897. "The 'axis' keyword in Series.resample is "
  898. "deprecated and will be removed in a future version."
  899. )
  900. with tm.assert_produces_warning(FutureWarning, match=warning_msg):
  901. _test_series.resample("h", axis=0)
  902. def test_resample_empty():
  903. # GH#52484
  904. df = DataFrame(
  905. index=pd.to_datetime(
  906. ["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
  907. )
  908. )
  909. expected = DataFrame(
  910. index=pd.to_datetime(
  911. [
  912. "2018-01-01 00:00:00",
  913. "2018-01-01 08:00:00",
  914. "2018-01-01 16:00:00",
  915. "2018-01-02 00:00:00",
  916. ]
  917. )
  918. )
  919. result = df.resample("8h").mean()
  920. tm.assert_frame_equal(result, expected)