test_resampler_grouper.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735
  1. from textwrap import dedent
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import is_platform_windows
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame,
  8. Index,
  9. Series,
  10. TimedeltaIndex,
  11. Timestamp,
  12. )
  13. import pandas._testing as tm
  14. from pandas.core.indexes.datetimes import date_range
  15. @pytest.fixture
  16. def test_frame():
  17. return DataFrame(
  18. {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
  19. index=date_range("1/1/2000", freq="s", periods=40),
  20. )
  21. def test_tab_complete_ipython6_warning(ip):
  22. from IPython.core.completer import provisionalcompleter
  23. code = dedent(
  24. """\
  25. import numpy as np
  26. from pandas import Series, date_range
  27. data = np.arange(10, dtype=np.float64)
  28. index = date_range("2020-01-01", periods=len(data))
  29. s = Series(data, index=index)
  30. rs = s.resample("D")
  31. """
  32. )
  33. ip.run_cell(code)
  34. # GH 31324 newer jedi version raises Deprecation warning;
  35. # appears resolved 2021-02-02
  36. with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
  37. with provisionalcompleter("ignore"):
  38. list(ip.Completer.completions("rs.", 1))
  39. def test_deferred_with_groupby():
  40. # GH 12486
  41. # support deferred resample ops with groupby
  42. data = [
  43. ["2010-01-01", "A", 2],
  44. ["2010-01-02", "A", 3],
  45. ["2010-01-05", "A", 8],
  46. ["2010-01-10", "A", 7],
  47. ["2010-01-13", "A", 3],
  48. ["2010-01-01", "B", 5],
  49. ["2010-01-03", "B", 2],
  50. ["2010-01-04", "B", 1],
  51. ["2010-01-11", "B", 7],
  52. ["2010-01-14", "B", 3],
  53. ]
  54. df = DataFrame(data, columns=["date", "id", "score"])
  55. df.date = pd.to_datetime(df.date)
  56. def f_0(x):
  57. return x.set_index("date").resample("D").asfreq()
  58. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  59. with tm.assert_produces_warning(FutureWarning, match=msg):
  60. expected = df.groupby("id").apply(f_0)
  61. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  62. with tm.assert_produces_warning(FutureWarning, match=msg):
  63. result = df.set_index("date").groupby("id").resample("D").asfreq()
  64. tm.assert_frame_equal(result, expected)
  65. df = DataFrame(
  66. {
  67. "date": date_range(start="2016-01-01", periods=4, freq="W"),
  68. "group": [1, 1, 2, 2],
  69. "val": [5, 6, 7, 8],
  70. }
  71. ).set_index("date")
  72. def f_1(x):
  73. return x.resample("1D").ffill()
  74. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  75. with tm.assert_produces_warning(FutureWarning, match=msg):
  76. expected = df.groupby("group").apply(f_1)
  77. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  78. with tm.assert_produces_warning(FutureWarning, match=msg):
  79. result = df.groupby("group").resample("1D").ffill()
  80. tm.assert_frame_equal(result, expected)
  81. def test_getitem(test_frame):
  82. g = test_frame.groupby("A")
  83. expected = g.B.apply(lambda x: x.resample("2s").mean())
  84. result = g.resample("2s").B.mean()
  85. tm.assert_series_equal(result, expected)
  86. result = g.B.resample("2s").mean()
  87. tm.assert_series_equal(result, expected)
  88. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  89. with tm.assert_produces_warning(FutureWarning, match=msg):
  90. result = g.resample("2s").mean().B
  91. tm.assert_series_equal(result, expected)
  92. def test_getitem_multiple():
  93. # GH 13174
  94. # multiple calls after selection causing an issue with aliasing
  95. data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
  96. df = DataFrame(data, index=date_range("2016-01-01", periods=2))
  97. r = df.groupby("id").resample("1D")
  98. result = r["buyer"].count()
  99. exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
  100. expected = Series(
  101. [1, 1],
  102. index=exp_mi,
  103. name="buyer",
  104. )
  105. tm.assert_series_equal(result, expected)
  106. result = r["buyer"].count()
  107. tm.assert_series_equal(result, expected)
  108. def test_groupby_resample_on_api_with_getitem():
  109. # GH 17813
  110. df = DataFrame(
  111. {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
  112. )
  113. exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
  114. result = df.groupby("id").resample("2D", on="date")["data"].sum()
  115. tm.assert_series_equal(result, exp)
  116. def test_groupby_with_origin():
  117. # GH 31809
  118. freq = "1399min" # prime number that is smaller than 24h
  119. start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
  120. middle = "1/15/2000 00:00:00"
  121. rng = date_range(start, end, freq="1231min") # prime number
  122. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  123. ts2 = ts[middle:end]
  124. # proves that grouper without a fixed origin does not work
  125. # when dealing with unusual frequencies
  126. simple_grouper = pd.Grouper(freq=freq)
  127. count_ts = ts.groupby(simple_grouper).agg("count")
  128. count_ts = count_ts[middle:end]
  129. count_ts2 = ts2.groupby(simple_grouper).agg("count")
  130. with pytest.raises(AssertionError, match="Index are different"):
  131. tm.assert_index_equal(count_ts.index, count_ts2.index)
  132. # test origin on 1970-01-01 00:00:00
  133. origin = Timestamp(0)
  134. adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
  135. adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
  136. adjusted_count_ts = adjusted_count_ts[middle:end]
  137. adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
  138. tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
  139. # test origin on 2049-10-18 20:00:00
  140. origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
  141. adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
  142. adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
  143. adjusted2_count_ts = adjusted2_count_ts[middle:end]
  144. adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
  145. tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
  146. # both grouper use an adjusted timestamp that is a multiple of 1399 min
  147. # they should be equals even if the adjusted_timestamp is in the future
  148. tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
  149. def test_nearest():
  150. # GH 17496
  151. # Resample nearest
  152. index = date_range("1/1/2000", periods=3, freq="min")
  153. result = Series(range(3), index=index).resample("20s").nearest()
  154. expected = Series(
  155. [0, 0, 1, 1, 1, 2, 2],
  156. index=pd.DatetimeIndex(
  157. [
  158. "2000-01-01 00:00:00",
  159. "2000-01-01 00:00:20",
  160. "2000-01-01 00:00:40",
  161. "2000-01-01 00:01:00",
  162. "2000-01-01 00:01:20",
  163. "2000-01-01 00:01:40",
  164. "2000-01-01 00:02:00",
  165. ],
  166. dtype="datetime64[ns]",
  167. freq="20s",
  168. ),
  169. )
  170. tm.assert_series_equal(result, expected)
  171. @pytest.mark.parametrize(
  172. "f",
  173. [
  174. "first",
  175. "last",
  176. "median",
  177. "sem",
  178. "sum",
  179. "mean",
  180. "min",
  181. "max",
  182. "size",
  183. "count",
  184. "nearest",
  185. "bfill",
  186. "ffill",
  187. "asfreq",
  188. "ohlc",
  189. ],
  190. )
  191. def test_methods(f, test_frame):
  192. g = test_frame.groupby("A")
  193. r = g.resample("2s")
  194. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  195. with tm.assert_produces_warning(FutureWarning, match=msg):
  196. result = getattr(r, f)()
  197. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  198. with tm.assert_produces_warning(FutureWarning, match=msg):
  199. expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
  200. tm.assert_equal(result, expected)
  201. def test_methods_nunique(test_frame):
  202. # series only
  203. g = test_frame.groupby("A")
  204. r = g.resample("2s")
  205. result = r.B.nunique()
  206. expected = g.B.apply(lambda x: x.resample("2s").nunique())
  207. tm.assert_series_equal(result, expected)
  208. @pytest.mark.parametrize("f", ["std", "var"])
  209. def test_methods_std_var(f, test_frame):
  210. g = test_frame.groupby("A")
  211. r = g.resample("2s")
  212. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  213. with tm.assert_produces_warning(FutureWarning, match=msg):
  214. result = getattr(r, f)(ddof=1)
  215. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  216. with tm.assert_produces_warning(FutureWarning, match=msg):
  217. expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
  218. tm.assert_frame_equal(result, expected)
  219. def test_apply(test_frame):
  220. g = test_frame.groupby("A")
  221. r = g.resample("2s")
  222. # reduction
  223. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  224. with tm.assert_produces_warning(FutureWarning, match=msg):
  225. expected = g.resample("2s").sum()
  226. def f_0(x):
  227. return x.resample("2s").sum()
  228. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  229. with tm.assert_produces_warning(FutureWarning, match=msg):
  230. result = r.apply(f_0)
  231. tm.assert_frame_equal(result, expected)
  232. def f_1(x):
  233. return x.resample("2s").apply(lambda y: y.sum())
  234. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  235. with tm.assert_produces_warning(FutureWarning, match=msg):
  236. result = g.apply(f_1)
  237. # y.sum() results in int64 instead of int32 on 32-bit architectures
  238. expected = expected.astype("int64")
  239. tm.assert_frame_equal(result, expected)
  240. def test_apply_with_mutated_index():
  241. # GH 15169
  242. index = date_range("1-1-2015", "12-31-15", freq="D")
  243. df = DataFrame(
  244. data={"col1": np.random.default_rng(2).random(len(index))}, index=index
  245. )
  246. def f(x):
  247. s = Series([1, 2], index=["a", "b"])
  248. return s
  249. expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
  250. result = df.resample("ME").apply(f)
  251. tm.assert_frame_equal(result, expected)
  252. # A case for series
  253. expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
  254. result = df["col1"].resample("ME").apply(f)
  255. tm.assert_series_equal(result, expected)
  256. def test_apply_columns_multilevel():
  257. # GH 16231
  258. cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
  259. ind = date_range(start="2017-01-01", freq="15Min", periods=8)
  260. df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
  261. agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
  262. result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
  263. expected = DataFrame(
  264. 2 * [[0, 0.0]],
  265. index=date_range(start="2017-01-01", freq="1h", periods=2),
  266. columns=pd.MultiIndex.from_tuples(
  267. [("A", "a", "", "one"), ("B", "b", "i", "two")]
  268. ),
  269. )
  270. tm.assert_frame_equal(result, expected)
  271. def test_apply_non_naive_index():
  272. def weighted_quantile(series, weights, q):
  273. series = series.sort_values()
  274. cumsum = weights.reindex(series.index).fillna(0).cumsum()
  275. cutoff = cumsum.iloc[-1] * q
  276. return series[cumsum >= cutoff].iloc[0]
  277. times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
  278. data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
  279. weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
  280. result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
  281. ind = date_range(
  282. "2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
  283. )
  284. expected = Series([1.0], index=ind)
  285. tm.assert_series_equal(result, expected)
  286. def test_resample_groupby_with_label(unit):
  287. # GH 13235
  288. index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
  289. df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
  290. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  291. with tm.assert_produces_warning(FutureWarning, match=msg):
  292. result = df.groupby("col0").resample("1W", label="left").sum()
  293. mi = [
  294. np.array([0, 0, 1, 2], dtype=np.int64),
  295. np.array(
  296. ["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
  297. dtype=f"M8[{unit}]",
  298. ),
  299. ]
  300. mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
  301. expected = DataFrame(
  302. data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
  303. )
  304. tm.assert_frame_equal(result, expected)
  305. def test_consistency_with_window(test_frame):
  306. # consistent return values with window
  307. df = test_frame
  308. expected = Index([1, 2, 3], name="A")
  309. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  310. with tm.assert_produces_warning(FutureWarning, match=msg):
  311. result = df.groupby("A").resample("2s").mean()
  312. assert result.index.nlevels == 2
  313. tm.assert_index_equal(result.index.levels[0], expected)
  314. result = df.groupby("A").rolling(20).mean()
  315. assert result.index.nlevels == 2
  316. tm.assert_index_equal(result.index.levels[0], expected)
  317. def test_median_duplicate_columns():
  318. # GH 14233
  319. df = DataFrame(
  320. np.random.default_rng(2).standard_normal((20, 3)),
  321. columns=list("aaa"),
  322. index=date_range("2012-01-01", periods=20, freq="s"),
  323. )
  324. df2 = df.copy()
  325. df2.columns = ["a", "b", "c"]
  326. expected = df2.resample("5s").median()
  327. result = df.resample("5s").median()
  328. expected.columns = result.columns
  329. tm.assert_frame_equal(result, expected)
  330. def test_apply_to_one_column_of_df():
  331. # GH: 36951
  332. df = DataFrame(
  333. {"col": range(10), "col1": range(10, 20)},
  334. index=date_range("2012-01-01", periods=10, freq="20min"),
  335. )
  336. # access "col" via getattr -> make sure we handle AttributeError
  337. result = df.resample("h").apply(lambda group: group.col.sum())
  338. expected = Series(
  339. [3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
  340. )
  341. tm.assert_series_equal(result, expected)
  342. # access "col" via _getitem__ -> make sure we handle KeyErrpr
  343. result = df.resample("h").apply(lambda group: group["col"].sum())
  344. tm.assert_series_equal(result, expected)
  345. def test_resample_groupby_agg():
  346. # GH: 33548
  347. df = DataFrame(
  348. {
  349. "cat": [
  350. "cat_1",
  351. "cat_1",
  352. "cat_2",
  353. "cat_1",
  354. "cat_2",
  355. "cat_1",
  356. "cat_2",
  357. "cat_1",
  358. ],
  359. "num": [5, 20, 22, 3, 4, 30, 10, 50],
  360. "date": [
  361. "2019-2-1",
  362. "2018-02-03",
  363. "2020-3-11",
  364. "2019-2-2",
  365. "2019-2-2",
  366. "2018-12-4",
  367. "2020-3-11",
  368. "2020-12-12",
  369. ],
  370. }
  371. )
  372. df["date"] = pd.to_datetime(df["date"])
  373. resampled = df.groupby("cat").resample("YE", on="date")
  374. expected = resampled[["num"]].sum()
  375. result = resampled.agg({"num": "sum"})
  376. tm.assert_frame_equal(result, expected)
  377. def test_resample_groupby_agg_listlike():
  378. # GH 42905
  379. ts = Timestamp("2021-02-28 00:00:00")
  380. df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
  381. resampled = df.groupby("class").resample("ME")["value"]
  382. result = resampled.agg(["sum", "size"])
  383. expected = DataFrame(
  384. [[69, 1]],
  385. index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
  386. columns=["sum", "size"],
  387. )
  388. tm.assert_frame_equal(result, expected)
  389. @pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
  390. def test_empty(keys):
  391. # GH 26411
  392. df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
  393. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  394. with tm.assert_produces_warning(FutureWarning, match=msg):
  395. result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
  396. expected = (
  397. DataFrame(columns=["a", "b"])
  398. .set_index(keys, drop=False)
  399. .set_index(TimedeltaIndex([]), append=True)
  400. )
  401. if len(keys) == 1:
  402. expected.index.name = keys[0]
  403. tm.assert_frame_equal(result, expected)
  404. @pytest.mark.parametrize("consolidate", [True, False])
  405. def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
  406. # https://github.com/pandas-dev/pandas/issues/39329
  407. dates = date_range("2020-01-01", periods=15, freq="D")
  408. df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
  409. df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
  410. df = pd.concat([df1, df2], ignore_index=True)
  411. if consolidate:
  412. df = df._consolidate()
  413. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  414. with tm.assert_produces_warning(FutureWarning, match=msg):
  415. result = df.groupby(["key"]).resample("W", on="date").min()
  416. idx = pd.MultiIndex.from_arrays(
  417. [
  418. ["A"] * 3 + ["B"] * 3,
  419. pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
  420. "ns"
  421. ),
  422. ],
  423. names=["key", "date"],
  424. )
  425. expected = DataFrame(
  426. {
  427. "key": ["A"] * 3 + ["B"] * 3,
  428. "col1": [0, 5, 12] * 2,
  429. "col_object": ["val"] * 3 + [np.nan] * 3,
  430. },
  431. index=idx,
  432. )
  433. tm.assert_frame_equal(result, expected)
  434. @pytest.mark.parametrize("min_count", [0, 1])
  435. def test_groupby_resample_empty_sum_string(
  436. string_dtype_no_object, test_frame, min_count
  437. ):
  438. # https://github.com/pandas-dev/pandas/issues/60229
  439. dtype = string_dtype_no_object
  440. test_frame = test_frame.assign(B=pd.array([pd.NA] * len(test_frame), dtype=dtype))
  441. gbrs = test_frame.groupby("A").resample("40s", include_groups=False)
  442. result = gbrs.sum(min_count=min_count)
  443. index = pd.MultiIndex(
  444. levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]],
  445. codes=[[0, 1, 2], [0, 0, 0]],
  446. names=["A", None],
  447. )
  448. value = "" if min_count == 0 else pd.NA
  449. expected = DataFrame({"B": value}, index=index, dtype=dtype)
  450. tm.assert_frame_equal(result, expected)
  451. def test_groupby_resample_with_list_of_keys():
  452. # GH 47362
  453. df = DataFrame(
  454. data={
  455. "date": date_range(start="2016-01-01", periods=8),
  456. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  457. "val": [1, 7, 5, 2, 3, 10, 5, 1],
  458. }
  459. )
  460. result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
  461. mi_exp = pd.MultiIndex.from_arrays(
  462. [[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
  463. )
  464. expected = DataFrame(
  465. data={
  466. "val": [4.0, 3.5, 6.5, 3.0],
  467. },
  468. index=mi_exp,
  469. )
  470. tm.assert_frame_equal(result, expected)
  471. @pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
  472. def test_resample_no_index(keys):
  473. # GH 47705
  474. df = DataFrame([], columns=["a", "b", "date"])
  475. df["date"] = pd.to_datetime(df["date"])
  476. df = df.set_index("date")
  477. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  478. with tm.assert_produces_warning(FutureWarning, match=msg):
  479. result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
  480. expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
  481. expected["date"] = pd.to_datetime(expected["date"])
  482. expected = expected.set_index("date", append=True, drop=True)
  483. if len(keys) == 1:
  484. expected.index.name = keys[0]
  485. tm.assert_frame_equal(result, expected)
  486. def test_resample_no_columns():
  487. # GH#52484
  488. df = DataFrame(
  489. index=Index(
  490. pd.to_datetime(
  491. ["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
  492. ),
  493. name="date",
  494. )
  495. )
  496. result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
  497. index = pd.to_datetime(
  498. [
  499. "2018-01-01 00:00:00",
  500. "2018-01-01 06:00:00",
  501. "2018-01-01 12:00:00",
  502. "2018-01-02 00:00:00",
  503. ]
  504. )
  505. expected = DataFrame(
  506. index=pd.MultiIndex(
  507. levels=[np.array([0, 1], dtype=np.intp), index],
  508. codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
  509. names=[None, "date"],
  510. )
  511. )
  512. # GH#52710 - Index comes out as 32-bit on 64-bit Windows
  513. tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
  514. def test_groupby_resample_size_all_index_same():
  515. # GH 46826
  516. df = DataFrame(
  517. {"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
  518. index=date_range("31/12/2000 18:00", freq="h", periods=12),
  519. )
  520. msg = "DataFrameGroupBy.resample operated on the grouping columns"
  521. with tm.assert_produces_warning(FutureWarning, match=msg):
  522. result = df.groupby("A").resample("D").size()
  523. mi_exp = pd.MultiIndex.from_arrays(
  524. [
  525. [1, 1, 2, 2],
  526. pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
  527. ],
  528. names=["A", None],
  529. )
  530. expected = Series(
  531. 3,
  532. index=mi_exp,
  533. )
  534. tm.assert_series_equal(result, expected)
  535. def test_groupby_resample_on_index_with_list_of_keys():
  536. # GH 50840
  537. df = DataFrame(
  538. data={
  539. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  540. "val": [3, 1, 4, 1, 5, 9, 2, 6],
  541. },
  542. index=date_range(start="2016-01-01", periods=8, name="date"),
  543. )
  544. result = df.groupby("group").resample("2D")[["val"]].mean()
  545. mi_exp = pd.MultiIndex.from_arrays(
  546. [[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
  547. )
  548. expected = DataFrame(
  549. data={
  550. "val": [2.0, 2.5, 7.0, 4.0],
  551. },
  552. index=mi_exp,
  553. )
  554. tm.assert_frame_equal(result, expected)
  555. def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
  556. # GH 50876
  557. df = DataFrame(
  558. data={
  559. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  560. "first_val": [3, 1, 4, 1, 5, 9, 2, 6],
  561. "second_val": [2, 7, 1, 8, 2, 8, 1, 8],
  562. "third_val": [1, 4, 1, 4, 2, 1, 3, 5],
  563. },
  564. index=date_range(start="2016-01-01", periods=8, name="date"),
  565. )
  566. result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
  567. mi_exp = pd.MultiIndex.from_arrays(
  568. [[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
  569. )
  570. expected = DataFrame(
  571. data={
  572. "first_val": [2.0, 2.5, 7.0, 4.0],
  573. "second_val": [4.5, 4.5, 5.0, 4.5],
  574. },
  575. index=mi_exp,
  576. )
  577. tm.assert_frame_equal(result, expected)
  578. def test_groupby_resample_on_index_with_list_of_keys_missing_column():
  579. # GH 50876
  580. df = DataFrame(
  581. data={
  582. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  583. "val": [3, 1, 4, 1, 5, 9, 2, 6],
  584. },
  585. index=Series(
  586. date_range(start="2016-01-01", periods=8),
  587. name="date",
  588. ),
  589. )
  590. gb = df.groupby("group")
  591. rs = gb.resample("2D")
  592. with pytest.raises(KeyError, match="Columns not found"):
  593. rs[["val_not_in_dataframe"]]
  594. @pytest.mark.parametrize("kind", ["datetime", "period"])
  595. def test_groupby_resample_kind(kind):
  596. # GH 24103
  597. df = DataFrame(
  598. {
  599. "datetime": pd.to_datetime(
  600. ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"]
  601. ),
  602. "group": ["A", "B", "A", "B"],
  603. "value": [1, 2, 3, 4],
  604. }
  605. )
  606. df = df.set_index("datetime")
  607. result = df.groupby("group")["value"].resample("D", kind=kind).last()
  608. dt_level = pd.DatetimeIndex(["2018-11-01", "2018-11-02"])
  609. if kind == "period":
  610. dt_level = dt_level.to_period(freq="D")
  611. expected_index = pd.MultiIndex.from_product(
  612. [["A", "B"], dt_level],
  613. names=["group", "datetime"],
  614. )
  615. expected = Series([1, 3, 2, 4], index=expected_index, name="value")
  616. tm.assert_series_equal(result, expected)