test_timeseries_window.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. from pandas import (
  5. DataFrame,
  6. DatetimeIndex,
  7. Index,
  8. MultiIndex,
  9. NaT,
  10. Series,
  11. Timestamp,
  12. date_range,
  13. )
  14. import pandas._testing as tm
  15. from pandas.tseries import offsets
  16. @pytest.fixture
  17. def regular():
  18. return DataFrame(
  19. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  20. ).set_index("A")
  21. @pytest.fixture
  22. def ragged():
  23. df = DataFrame({"B": range(5)})
  24. df.index = [
  25. Timestamp("20130101 09:00:00"),
  26. Timestamp("20130101 09:00:02"),
  27. Timestamp("20130101 09:00:03"),
  28. Timestamp("20130101 09:00:05"),
  29. Timestamp("20130101 09:00:06"),
  30. ]
  31. return df
  32. class TestRollingTS:
  33. # rolling time-series friendly
  34. # xref GH13327
  35. def test_doc_string(self):
  36. df = DataFrame(
  37. {"B": [0, 1, 2, np.nan, 4]},
  38. index=[
  39. Timestamp("20130101 09:00:00"),
  40. Timestamp("20130101 09:00:02"),
  41. Timestamp("20130101 09:00:03"),
  42. Timestamp("20130101 09:00:05"),
  43. Timestamp("20130101 09:00:06"),
  44. ],
  45. )
  46. df
  47. df.rolling("2s").sum()
  48. def test_invalid_window_non_int(self, regular):
  49. # not a valid freq
  50. msg = "passed window foobar is not compatible with a datetimelike index"
  51. with pytest.raises(ValueError, match=msg):
  52. regular.rolling(window="foobar")
  53. # not a datetimelike index
  54. msg = "window must be an integer"
  55. with pytest.raises(ValueError, match=msg):
  56. regular.reset_index().rolling(window="foobar")
  57. @pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
  58. def test_invalid_window_nonfixed(self, freq, regular):
  59. # non-fixed freqs
  60. msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
  61. with pytest.raises(ValueError, match=msg):
  62. regular.rolling(window=freq)
  63. @pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
  64. def test_valid_window(self, freq, regular):
  65. regular.rolling(window=freq)
  66. @pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
  67. def test_invalid_minp(self, minp, regular):
  68. # non-integer min_periods
  69. msg = (
  70. r"local variable 'minp' referenced before assignment|"
  71. "min_periods must be an integer"
  72. )
  73. with pytest.raises(ValueError, match=msg):
  74. regular.rolling(window="1D", min_periods=minp)
  75. def test_on(self, regular):
  76. df = regular
  77. # not a valid column
  78. msg = (
  79. r"invalid on specified as foobar, must be a column "
  80. "\\(of DataFrame\\), an Index or None"
  81. )
  82. with pytest.raises(ValueError, match=msg):
  83. df.rolling(window="2s", on="foobar")
  84. # column is valid
  85. df = df.copy()
  86. df["C"] = date_range("20130101", periods=len(df))
  87. df.rolling(window="2d", on="C").sum()
  88. # invalid columns
  89. msg = "window must be an integer"
  90. with pytest.raises(ValueError, match=msg):
  91. df.rolling(window="2d", on="B")
  92. # ok even though on non-selected
  93. df.rolling(window="2d", on="C").B.sum()
  94. def test_monotonic_on(self):
  95. # on/index must be monotonic
  96. df = DataFrame(
  97. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  98. )
  99. assert df.A.is_monotonic_increasing
  100. df.rolling("2s", on="A").sum()
  101. df = df.set_index("A")
  102. assert df.index.is_monotonic_increasing
  103. df.rolling("2s").sum()
  104. def test_non_monotonic_on(self):
  105. # GH 19248
  106. df = DataFrame(
  107. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  108. )
  109. df = df.set_index("A")
  110. non_monotonic_index = df.index.to_list()
  111. non_monotonic_index[0] = non_monotonic_index[3]
  112. df.index = non_monotonic_index
  113. assert not df.index.is_monotonic_increasing
  114. msg = "index values must be monotonic"
  115. with pytest.raises(ValueError, match=msg):
  116. df.rolling("2s").sum()
  117. df = df.reset_index()
  118. msg = (
  119. r"invalid on specified as A, must be a column "
  120. "\\(of DataFrame\\), an Index or None"
  121. )
  122. with pytest.raises(ValueError, match=msg):
  123. df.rolling("2s", on="A").sum()
  124. def test_frame_on(self):
  125. df = DataFrame(
  126. {"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
  127. )
  128. df["A"] = [
  129. Timestamp("20130101 09:00:00"),
  130. Timestamp("20130101 09:00:02"),
  131. Timestamp("20130101 09:00:03"),
  132. Timestamp("20130101 09:00:05"),
  133. Timestamp("20130101 09:00:06"),
  134. ]
  135. # we are doing simulating using 'on'
  136. expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
  137. result = df.rolling("2s", on="A").B.sum()
  138. tm.assert_series_equal(result, expected)
  139. # test as a frame
  140. # we should be ignoring the 'on' as an aggregation column
  141. # note that the expected is setting, computing, and resetting
  142. # so the columns need to be switched compared
  143. # to the actual result where they are ordered as in the
  144. # original
  145. expected = (
  146. df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
  147. )
  148. result = df.rolling("2s", on="A")[["B"]].sum()
  149. tm.assert_frame_equal(result, expected)
  150. def test_frame_on2(self, unit):
  151. # using multiple aggregation columns
  152. dti = DatetimeIndex(
  153. [
  154. Timestamp("20130101 09:00:00"),
  155. Timestamp("20130101 09:00:02"),
  156. Timestamp("20130101 09:00:03"),
  157. Timestamp("20130101 09:00:05"),
  158. Timestamp("20130101 09:00:06"),
  159. ]
  160. ).as_unit(unit)
  161. df = DataFrame(
  162. {
  163. "A": [0, 1, 2, 3, 4],
  164. "B": [0, 1, 2, np.nan, 4],
  165. "C": dti,
  166. },
  167. columns=["A", "C", "B"],
  168. )
  169. expected1 = DataFrame(
  170. {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
  171. columns=["A", "C", "B"],
  172. )
  173. result = df.rolling("2s", on="C").sum()
  174. expected = expected1
  175. tm.assert_frame_equal(result, expected)
  176. expected = Series([0, 1, 3, np.nan, 4], name="B")
  177. result = df.rolling("2s", on="C").B.sum()
  178. tm.assert_series_equal(result, expected)
  179. expected = expected1[["A", "B", "C"]]
  180. result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
  181. tm.assert_frame_equal(result, expected)
  182. def test_basic_regular(self, regular):
  183. df = regular.copy()
  184. df.index = date_range("20130101", periods=5, freq="D")
  185. expected = df.rolling(window=1, min_periods=1).sum()
  186. result = df.rolling(window="1D").sum()
  187. tm.assert_frame_equal(result, expected)
  188. df.index = date_range("20130101", periods=5, freq="2D")
  189. expected = df.rolling(window=1, min_periods=1).sum()
  190. result = df.rolling(window="2D", min_periods=1).sum()
  191. tm.assert_frame_equal(result, expected)
  192. expected = df.rolling(window=1, min_periods=1).sum()
  193. result = df.rolling(window="2D", min_periods=1).sum()
  194. tm.assert_frame_equal(result, expected)
  195. expected = df.rolling(window=1).sum()
  196. result = df.rolling(window="2D").sum()
  197. tm.assert_frame_equal(result, expected)
  198. def test_min_periods(self, regular):
  199. # compare for min_periods
  200. df = regular
  201. # these slightly different
  202. expected = df.rolling(2, min_periods=1).sum()
  203. result = df.rolling("2s").sum()
  204. tm.assert_frame_equal(result, expected)
  205. expected = df.rolling(2, min_periods=1).sum()
  206. result = df.rolling("2s", min_periods=1).sum()
  207. tm.assert_frame_equal(result, expected)
  208. def test_closed(self, regular, unit):
  209. # xref GH13965
  210. dti = DatetimeIndex(
  211. [
  212. Timestamp("20130101 09:00:01"),
  213. Timestamp("20130101 09:00:02"),
  214. Timestamp("20130101 09:00:03"),
  215. Timestamp("20130101 09:00:04"),
  216. Timestamp("20130101 09:00:06"),
  217. ]
  218. ).as_unit(unit)
  219. df = DataFrame(
  220. {"A": [1] * 5},
  221. index=dti,
  222. )
  223. # closed must be 'right', 'left', 'both', 'neither'
  224. msg = "closed must be 'right', 'left', 'both' or 'neither'"
  225. with pytest.raises(ValueError, match=msg):
  226. regular.rolling(window="2s", closed="blabla")
  227. expected = df.copy()
  228. expected["A"] = [1.0, 2, 2, 2, 1]
  229. result = df.rolling("2s", closed="right").sum()
  230. tm.assert_frame_equal(result, expected)
  231. # default should be 'right'
  232. result = df.rolling("2s").sum()
  233. tm.assert_frame_equal(result, expected)
  234. expected = df.copy()
  235. expected["A"] = [1.0, 2, 3, 3, 2]
  236. result = df.rolling("2s", closed="both").sum()
  237. tm.assert_frame_equal(result, expected)
  238. expected = df.copy()
  239. expected["A"] = [np.nan, 1.0, 2, 2, 1]
  240. result = df.rolling("2s", closed="left").sum()
  241. tm.assert_frame_equal(result, expected)
  242. expected = df.copy()
  243. expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
  244. result = df.rolling("2s", closed="neither").sum()
  245. tm.assert_frame_equal(result, expected)
  246. def test_ragged_sum(self, ragged):
  247. df = ragged
  248. result = df.rolling(window="1s", min_periods=1).sum()
  249. expected = df.copy()
  250. expected["B"] = [0.0, 1, 2, 3, 4]
  251. tm.assert_frame_equal(result, expected)
  252. result = df.rolling(window="2s", min_periods=1).sum()
  253. expected = df.copy()
  254. expected["B"] = [0.0, 1, 3, 3, 7]
  255. tm.assert_frame_equal(result, expected)
  256. result = df.rolling(window="2s", min_periods=2).sum()
  257. expected = df.copy()
  258. expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
  259. tm.assert_frame_equal(result, expected)
  260. result = df.rolling(window="3s", min_periods=1).sum()
  261. expected = df.copy()
  262. expected["B"] = [0.0, 1, 3, 5, 7]
  263. tm.assert_frame_equal(result, expected)
  264. result = df.rolling(window="3s").sum()
  265. expected = df.copy()
  266. expected["B"] = [0.0, 1, 3, 5, 7]
  267. tm.assert_frame_equal(result, expected)
  268. result = df.rolling(window="4s", min_periods=1).sum()
  269. expected = df.copy()
  270. expected["B"] = [0.0, 1, 3, 6, 9]
  271. tm.assert_frame_equal(result, expected)
  272. result = df.rolling(window="4s", min_periods=3).sum()
  273. expected = df.copy()
  274. expected["B"] = [np.nan, np.nan, 3, 6, 9]
  275. tm.assert_frame_equal(result, expected)
  276. result = df.rolling(window="5s", min_periods=1).sum()
  277. expected = df.copy()
  278. expected["B"] = [0.0, 1, 3, 6, 10]
  279. tm.assert_frame_equal(result, expected)
  280. def test_ragged_mean(self, ragged):
  281. df = ragged
  282. result = df.rolling(window="1s", min_periods=1).mean()
  283. expected = df.copy()
  284. expected["B"] = [0.0, 1, 2, 3, 4]
  285. tm.assert_frame_equal(result, expected)
  286. result = df.rolling(window="2s", min_periods=1).mean()
  287. expected = df.copy()
  288. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  289. tm.assert_frame_equal(result, expected)
  290. def test_ragged_median(self, ragged):
  291. df = ragged
  292. result = df.rolling(window="1s", min_periods=1).median()
  293. expected = df.copy()
  294. expected["B"] = [0.0, 1, 2, 3, 4]
  295. tm.assert_frame_equal(result, expected)
  296. result = df.rolling(window="2s", min_periods=1).median()
  297. expected = df.copy()
  298. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  299. tm.assert_frame_equal(result, expected)
  300. def test_ragged_quantile(self, ragged):
  301. df = ragged
  302. result = df.rolling(window="1s", min_periods=1).quantile(0.5)
  303. expected = df.copy()
  304. expected["B"] = [0.0, 1, 2, 3, 4]
  305. tm.assert_frame_equal(result, expected)
  306. result = df.rolling(window="2s", min_periods=1).quantile(0.5)
  307. expected = df.copy()
  308. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  309. tm.assert_frame_equal(result, expected)
  310. def test_ragged_std(self, ragged):
  311. df = ragged
  312. result = df.rolling(window="1s", min_periods=1).std(ddof=0)
  313. expected = df.copy()
  314. expected["B"] = [0.0] * 5
  315. tm.assert_frame_equal(result, expected)
  316. result = df.rolling(window="1s", min_periods=1).std(ddof=1)
  317. expected = df.copy()
  318. expected["B"] = [np.nan] * 5
  319. tm.assert_frame_equal(result, expected)
  320. result = df.rolling(window="3s", min_periods=1).std(ddof=0)
  321. expected = df.copy()
  322. expected["B"] = [0.0] + [0.5] * 4
  323. tm.assert_frame_equal(result, expected)
  324. result = df.rolling(window="5s", min_periods=1).std(ddof=1)
  325. expected = df.copy()
  326. expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
  327. tm.assert_frame_equal(result, expected)
  328. def test_ragged_var(self, ragged):
  329. df = ragged
  330. result = df.rolling(window="1s", min_periods=1).var(ddof=0)
  331. expected = df.copy()
  332. expected["B"] = [0.0] * 5
  333. tm.assert_frame_equal(result, expected)
  334. result = df.rolling(window="1s", min_periods=1).var(ddof=1)
  335. expected = df.copy()
  336. expected["B"] = [np.nan] * 5
  337. tm.assert_frame_equal(result, expected)
  338. result = df.rolling(window="3s", min_periods=1).var(ddof=0)
  339. expected = df.copy()
  340. expected["B"] = [0.0] + [0.25] * 4
  341. tm.assert_frame_equal(result, expected)
  342. result = df.rolling(window="5s", min_periods=1).var(ddof=1)
  343. expected = df.copy()
  344. expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
  345. tm.assert_frame_equal(result, expected)
  346. def test_ragged_skew(self, ragged):
  347. df = ragged
  348. result = df.rolling(window="3s", min_periods=1).skew()
  349. expected = df.copy()
  350. expected["B"] = [np.nan] * 5
  351. tm.assert_frame_equal(result, expected)
  352. result = df.rolling(window="5s", min_periods=1).skew()
  353. expected = df.copy()
  354. expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
  355. tm.assert_frame_equal(result, expected)
  356. def test_ragged_kurt(self, ragged):
  357. df = ragged
  358. result = df.rolling(window="3s", min_periods=1).kurt()
  359. expected = df.copy()
  360. expected["B"] = [np.nan] * 5
  361. tm.assert_frame_equal(result, expected)
  362. result = df.rolling(window="5s", min_periods=1).kurt()
  363. expected = df.copy()
  364. expected["B"] = [np.nan] * 4 + [-1.2]
  365. tm.assert_frame_equal(result, expected)
  366. def test_ragged_count(self, ragged):
  367. df = ragged
  368. result = df.rolling(window="1s", min_periods=1).count()
  369. expected = df.copy()
  370. expected["B"] = [1.0, 1, 1, 1, 1]
  371. tm.assert_frame_equal(result, expected)
  372. df = ragged
  373. result = df.rolling(window="1s").count()
  374. tm.assert_frame_equal(result, expected)
  375. result = df.rolling(window="2s", min_periods=1).count()
  376. expected = df.copy()
  377. expected["B"] = [1.0, 1, 2, 1, 2]
  378. tm.assert_frame_equal(result, expected)
  379. result = df.rolling(window="2s", min_periods=2).count()
  380. expected = df.copy()
  381. expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
  382. tm.assert_frame_equal(result, expected)
  383. def test_regular_min(self):
  384. df = DataFrame(
  385. {"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
  386. ).set_index("A")
  387. result = df.rolling("1s").min()
  388. expected = df.copy()
  389. expected["B"] = [0.0, 1, 2, 3, 4]
  390. tm.assert_frame_equal(result, expected)
  391. df = DataFrame(
  392. {"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
  393. ).set_index("A")
  394. tm.assert_frame_equal(result, expected)
  395. result = df.rolling("2s").min()
  396. expected = df.copy()
  397. expected["B"] = [5.0, 4, 3, 3, 4]
  398. tm.assert_frame_equal(result, expected)
  399. result = df.rolling("5s").min()
  400. expected = df.copy()
  401. expected["B"] = [5.0, 4, 3, 3, 3]
  402. tm.assert_frame_equal(result, expected)
  403. def test_ragged_min(self, ragged):
  404. df = ragged
  405. result = df.rolling(window="1s", min_periods=1).min()
  406. expected = df.copy()
  407. expected["B"] = [0.0, 1, 2, 3, 4]
  408. tm.assert_frame_equal(result, expected)
  409. result = df.rolling(window="2s", min_periods=1).min()
  410. expected = df.copy()
  411. expected["B"] = [0.0, 1, 1, 3, 3]
  412. tm.assert_frame_equal(result, expected)
  413. result = df.rolling(window="5s", min_periods=1).min()
  414. expected = df.copy()
  415. expected["B"] = [0.0, 0, 0, 1, 1]
  416. tm.assert_frame_equal(result, expected)
  417. def test_perf_min(self):
  418. N = 10000
  419. dfp = DataFrame(
  420. {"B": np.random.default_rng(2).standard_normal(N)},
  421. index=date_range("20130101", periods=N, freq="s"),
  422. )
  423. expected = dfp.rolling(2, min_periods=1).min()
  424. result = dfp.rolling("2s").min()
  425. assert ((result - expected) < 0.01).all().all()
  426. expected = dfp.rolling(200, min_periods=1).min()
  427. result = dfp.rolling("200s").min()
  428. assert ((result - expected) < 0.01).all().all()
  429. def test_ragged_max(self, ragged):
  430. df = ragged
  431. result = df.rolling(window="1s", min_periods=1).max()
  432. expected = df.copy()
  433. expected["B"] = [0.0, 1, 2, 3, 4]
  434. tm.assert_frame_equal(result, expected)
  435. result = df.rolling(window="2s", min_periods=1).max()
  436. expected = df.copy()
  437. expected["B"] = [0.0, 1, 2, 3, 4]
  438. tm.assert_frame_equal(result, expected)
  439. result = df.rolling(window="5s", min_periods=1).max()
  440. expected = df.copy()
  441. expected["B"] = [0.0, 1, 2, 3, 4]
  442. tm.assert_frame_equal(result, expected)
  443. @pytest.mark.parametrize(
  444. "freq, op, result_data",
  445. [
  446. ("ms", "min", [0.0] * 10),
  447. ("ms", "mean", [0.0] * 9 + [2.0 / 9]),
  448. ("ms", "max", [0.0] * 9 + [2.0]),
  449. ("s", "min", [0.0] * 10),
  450. ("s", "mean", [0.0] * 9 + [2.0 / 9]),
  451. ("s", "max", [0.0] * 9 + [2.0]),
  452. ("min", "min", [0.0] * 10),
  453. ("min", "mean", [0.0] * 9 + [2.0 / 9]),
  454. ("min", "max", [0.0] * 9 + [2.0]),
  455. ("h", "min", [0.0] * 10),
  456. ("h", "mean", [0.0] * 9 + [2.0 / 9]),
  457. ("h", "max", [0.0] * 9 + [2.0]),
  458. ("D", "min", [0.0] * 10),
  459. ("D", "mean", [0.0] * 9 + [2.0 / 9]),
  460. ("D", "max", [0.0] * 9 + [2.0]),
  461. ],
  462. )
  463. def test_freqs_ops(self, freq, op, result_data):
  464. # GH 21096
  465. index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10)
  466. # Explicit cast to float to avoid implicit cast when setting nan
  467. s = Series(data=0, index=index, dtype="float")
  468. s.iloc[1] = np.nan
  469. s.iloc[-1] = 2
  470. result = getattr(s.rolling(window=f"10{freq}"), op)()
  471. expected = Series(data=result_data, index=index)
  472. tm.assert_series_equal(result, expected)
  473. @pytest.mark.parametrize(
  474. "f",
  475. [
  476. "sum",
  477. "mean",
  478. "count",
  479. "median",
  480. "std",
  481. "var",
  482. "kurt",
  483. "skew",
  484. "min",
  485. "max",
  486. ],
  487. )
  488. def test_all(self, f, regular):
  489. # simple comparison of integer vs time-based windowing
  490. df = regular * 2
  491. er = df.rolling(window=1)
  492. r = df.rolling(window="1s")
  493. result = getattr(r, f)()
  494. expected = getattr(er, f)()
  495. tm.assert_frame_equal(result, expected)
  496. result = r.quantile(0.5)
  497. expected = er.quantile(0.5)
  498. tm.assert_frame_equal(result, expected)
  499. def test_all2(self, arithmetic_win_operators):
  500. f = arithmetic_win_operators
  501. # more sophisticated comparison of integer vs.
  502. # time-based windowing
  503. df = DataFrame(
  504. {"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="h")
  505. )
  506. # in-range data
  507. dft = df.between_time("09:00", "16:00")
  508. r = dft.rolling(window="5h")
  509. result = getattr(r, f)()
  510. # we need to roll the days separately
  511. # to compare with a time-based roll
  512. # finally groupby-apply will return a multi-index
  513. # so we need to drop the day
  514. def agg_by_day(x):
  515. x = x.between_time("09:00", "16:00")
  516. return getattr(x.rolling(5, min_periods=1), f)()
  517. expected = (
  518. df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True)
  519. )
  520. tm.assert_frame_equal(result, expected)
  521. def test_rolling_cov_offset(self):
  522. # GH16058
  523. idx = date_range("2017-01-01", periods=24, freq="1h")
  524. ss = Series(np.arange(len(idx)), index=idx)
  525. result = ss.rolling("2h").cov()
  526. expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
  527. tm.assert_series_equal(result, expected)
  528. expected2 = ss.rolling(2, min_periods=1).cov()
  529. tm.assert_series_equal(result, expected2)
  530. result = ss.rolling("3h").cov()
  531. expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
  532. tm.assert_series_equal(result, expected)
  533. expected2 = ss.rolling(3, min_periods=1).cov()
  534. tm.assert_series_equal(result, expected2)
  535. def test_rolling_on_decreasing_index(self, unit):
  536. # GH-19248, GH-32385
  537. index = DatetimeIndex(
  538. [
  539. Timestamp("20190101 09:00:30"),
  540. Timestamp("20190101 09:00:27"),
  541. Timestamp("20190101 09:00:20"),
  542. Timestamp("20190101 09:00:18"),
  543. Timestamp("20190101 09:00:10"),
  544. ]
  545. ).as_unit(unit)
  546. df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
  547. result = df.rolling("5s").min()
  548. expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
  549. tm.assert_frame_equal(result, expected)
  550. def test_rolling_on_empty(self):
  551. # GH-32385
  552. df = DataFrame({"column": []}, index=[])
  553. result = df.rolling("5s").min()
  554. expected = DataFrame({"column": []}, index=[])
  555. tm.assert_frame_equal(result, expected)
  556. def test_rolling_on_multi_index_level(self):
  557. # GH-15584
  558. df = DataFrame(
  559. {"column": range(6)},
  560. index=MultiIndex.from_product(
  561. [date_range("20190101", periods=3), range(2)], names=["date", "seq"]
  562. ),
  563. )
  564. result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
  565. expected = DataFrame(
  566. {"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
  567. )
  568. tm.assert_frame_equal(result, expected)
  569. @pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
  570. def test_nat_axis_error(msg, axis):
  571. idx = [Timestamp("2020"), NaT]
  572. kwargs = {"columns" if axis == 1 else "index": idx}
  573. df = DataFrame(np.eye(2), **kwargs)
  574. warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
  575. if axis == 1:
  576. warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  577. with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
  578. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  579. df.rolling("D", axis=axis).mean()
  580. @td.skip_if_no("pyarrow")
  581. def test_arrow_datetime_axis():
  582. # GH 55849
  583. expected = Series(
  584. np.arange(5, dtype=np.float64),
  585. index=Index(
  586. date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]"
  587. ),
  588. )
  589. result = expected.rolling("1D").sum()
  590. tm.assert_series_equal(result, expected)