test_rolling.py 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import (
  8. IS64,
  9. is_platform_arm,
  10. is_platform_power,
  11. )
  12. from pandas import (
  13. DataFrame,
  14. DatetimeIndex,
  15. MultiIndex,
  16. Series,
  17. Timedelta,
  18. Timestamp,
  19. date_range,
  20. period_range,
  21. to_datetime,
  22. to_timedelta,
  23. )
  24. import pandas._testing as tm
  25. from pandas.api.indexers import BaseIndexer
  26. from pandas.core.indexers.objects import VariableOffsetWindowIndexer
  27. from pandas.tseries.offsets import BusinessDay
  28. def test_doc_string():
  29. df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
  30. df
  31. df.rolling(2).sum()
  32. df.rolling(2, min_periods=1).sum()
  33. def test_constructor(frame_or_series):
  34. # GH 12669
  35. c = frame_or_series(range(5)).rolling
  36. # valid
  37. c(0)
  38. c(window=2)
  39. c(window=2, min_periods=1)
  40. c(window=2, min_periods=1, center=True)
  41. c(window=2, min_periods=1, center=False)
  42. # GH 13383
  43. msg = "window must be an integer 0 or greater"
  44. with pytest.raises(ValueError, match=msg):
  45. c(-1)
  46. @pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
  47. def test_invalid_constructor(frame_or_series, w):
  48. # not valid
  49. c = frame_or_series(range(5)).rolling
  50. msg = "|".join(
  51. [
  52. "window must be an integer",
  53. "passed window foo is not compatible with a datetimelike index",
  54. ]
  55. )
  56. with pytest.raises(ValueError, match=msg):
  57. c(window=w)
  58. msg = "min_periods must be an integer"
  59. with pytest.raises(ValueError, match=msg):
  60. c(window=2, min_periods=w)
  61. msg = "center must be a boolean"
  62. with pytest.raises(ValueError, match=msg):
  63. c(window=2, min_periods=1, center=w)
  64. @pytest.mark.parametrize(
  65. "window",
  66. [
  67. timedelta(days=3),
  68. Timedelta(days=3),
  69. "3D",
  70. VariableOffsetWindowIndexer(
  71. index=date_range("2015-12-25", periods=5), offset=BusinessDay(1)
  72. ),
  73. ],
  74. )
  75. def test_freq_window_not_implemented(window):
  76. # GH 15354
  77. df = DataFrame(
  78. np.arange(10),
  79. index=date_range("2015-12-24", periods=10, freq="D"),
  80. )
  81. with pytest.raises(
  82. NotImplementedError, match="^step (not implemented|is not supported)"
  83. ):
  84. df.rolling(window, step=3).sum()
  85. @pytest.mark.parametrize("agg", ["cov", "corr"])
  86. def test_step_not_implemented_for_cov_corr(agg):
  87. # GH 15354
  88. roll = DataFrame(range(2)).rolling(1, step=2)
  89. with pytest.raises(NotImplementedError, match="step not implemented"):
  90. getattr(roll, agg)()
  91. @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)])
  92. def test_constructor_with_timedelta_window(window):
  93. # GH 15440
  94. n = 10
  95. df = DataFrame(
  96. {"value": np.arange(n)},
  97. index=date_range("2015-12-24", periods=n, freq="D"),
  98. )
  99. expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3))
  100. result = df.rolling(window=window).sum()
  101. expected = DataFrame(
  102. {"value": expected_data},
  103. index=date_range("2015-12-24", periods=n, freq="D"),
  104. )
  105. tm.assert_frame_equal(result, expected)
  106. expected = df.rolling("3D").sum()
  107. tm.assert_frame_equal(result, expected)
  108. @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"])
  109. def test_constructor_timedelta_window_and_minperiods(window, raw):
  110. # GH 15305
  111. n = 10
  112. df = DataFrame(
  113. {"value": np.arange(n)},
  114. index=date_range("2017-08-08", periods=n, freq="D"),
  115. )
  116. expected = DataFrame(
  117. {"value": np.append([np.nan, 1.0], np.arange(3.0, 27.0, 3))},
  118. index=date_range("2017-08-08", periods=n, freq="D"),
  119. )
  120. result_roll_sum = df.rolling(window=window, min_periods=2).sum()
  121. result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw)
  122. tm.assert_frame_equal(result_roll_sum, expected)
  123. tm.assert_frame_equal(result_roll_generic, expected)
  124. def test_closed_fixed(closed, arithmetic_win_operators):
  125. # GH 34315
  126. func_name = arithmetic_win_operators
  127. df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]})
  128. df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5))
  129. result = getattr(
  130. df_fixed.rolling(2, closed=closed, min_periods=1),
  131. func_name,
  132. )()
  133. expected = getattr(
  134. df_time.rolling("2D", closed=closed, min_periods=1),
  135. func_name,
  136. )().reset_index(drop=True)
  137. tm.assert_frame_equal(result, expected)
  138. @pytest.mark.parametrize(
  139. "closed, window_selections",
  140. [
  141. (
  142. "both",
  143. [
  144. [True, True, False, False, False],
  145. [True, True, True, False, False],
  146. [False, True, True, True, False],
  147. [False, False, True, True, True],
  148. [False, False, False, True, True],
  149. ],
  150. ),
  151. (
  152. "left",
  153. [
  154. [True, False, False, False, False],
  155. [True, True, False, False, False],
  156. [False, True, True, False, False],
  157. [False, False, True, True, False],
  158. [False, False, False, True, True],
  159. ],
  160. ),
  161. (
  162. "right",
  163. [
  164. [True, True, False, False, False],
  165. [False, True, True, False, False],
  166. [False, False, True, True, False],
  167. [False, False, False, True, True],
  168. [False, False, False, False, True],
  169. ],
  170. ),
  171. (
  172. "neither",
  173. [
  174. [True, False, False, False, False],
  175. [False, True, False, False, False],
  176. [False, False, True, False, False],
  177. [False, False, False, True, False],
  178. [False, False, False, False, True],
  179. ],
  180. ),
  181. ],
  182. )
  183. def test_datetimelike_centered_selections(
  184. closed, window_selections, arithmetic_win_operators
  185. ):
  186. # GH 34315
  187. func_name = arithmetic_win_operators
  188. df_time = DataFrame(
  189. {"A": [0.0, 1.0, 2.0, 3.0, 4.0]}, index=date_range("2020", periods=5)
  190. )
  191. expected = DataFrame(
  192. {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]},
  193. index=date_range("2020", periods=5),
  194. )
  195. if func_name == "sem":
  196. kwargs = {"ddof": 0}
  197. else:
  198. kwargs = {}
  199. result = getattr(
  200. df_time.rolling("2D", closed=closed, min_periods=1, center=True),
  201. func_name,
  202. )(**kwargs)
  203. tm.assert_frame_equal(result, expected, check_dtype=False)
  204. @pytest.mark.parametrize(
  205. "window,closed,expected",
  206. [
  207. ("3s", "right", [3.0, 3.0, 3.0]),
  208. ("3s", "both", [3.0, 3.0, 3.0]),
  209. ("3s", "left", [3.0, 3.0, 3.0]),
  210. ("3s", "neither", [3.0, 3.0, 3.0]),
  211. ("2s", "right", [3.0, 2.0, 2.0]),
  212. ("2s", "both", [3.0, 3.0, 3.0]),
  213. ("2s", "left", [1.0, 3.0, 3.0]),
  214. ("2s", "neither", [1.0, 2.0, 2.0]),
  215. ],
  216. )
  217. def test_datetimelike_centered_offset_covers_all(
  218. window, closed, expected, frame_or_series
  219. ):
  220. # GH 42753
  221. index = [
  222. Timestamp("20130101 09:00:01"),
  223. Timestamp("20130101 09:00:02"),
  224. Timestamp("20130101 09:00:02"),
  225. ]
  226. df = frame_or_series([1, 1, 1], index=index)
  227. result = df.rolling(window, closed=closed, center=True).sum()
  228. expected = frame_or_series(expected, index=index)
  229. tm.assert_equal(result, expected)
  230. @pytest.mark.parametrize(
  231. "window,closed,expected",
  232. [
  233. ("2D", "right", [4, 4, 4, 4, 4, 4, 2, 2]),
  234. ("2D", "left", [2, 2, 4, 4, 4, 4, 4, 4]),
  235. ("2D", "both", [4, 4, 6, 6, 6, 6, 4, 4]),
  236. ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]),
  237. ],
  238. )
  239. def test_datetimelike_nonunique_index_centering(
  240. window, closed, expected, frame_or_series
  241. ):
  242. index = DatetimeIndex(
  243. [
  244. "2020-01-01",
  245. "2020-01-01",
  246. "2020-01-02",
  247. "2020-01-02",
  248. "2020-01-03",
  249. "2020-01-03",
  250. "2020-01-04",
  251. "2020-01-04",
  252. ]
  253. )
  254. df = frame_or_series([1] * 8, index=index, dtype=float)
  255. expected = frame_or_series(expected, index=index, dtype=float)
  256. result = df.rolling(window, center=True, closed=closed).sum()
  257. tm.assert_equal(result, expected)
  258. @pytest.mark.parametrize(
  259. "closed,expected",
  260. [
  261. ("left", [np.nan, np.nan, 1, 1, 1, 10, 14, 14, 18, 21]),
  262. ("neither", [np.nan, np.nan, 1, 1, 1, 9, 5, 5, 13, 8]),
  263. ("right", [0, 1, 3, 6, 10, 14, 11, 18, 21, 17]),
  264. ("both", [0, 1, 3, 6, 10, 15, 20, 27, 26, 30]),
  265. ],
  266. )
  267. def test_variable_window_nonunique(closed, expected, frame_or_series):
  268. # GH 20712
  269. index = DatetimeIndex(
  270. [
  271. "2011-01-01",
  272. "2011-01-01",
  273. "2011-01-02",
  274. "2011-01-02",
  275. "2011-01-02",
  276. "2011-01-03",
  277. "2011-01-04",
  278. "2011-01-04",
  279. "2011-01-05",
  280. "2011-01-06",
  281. ]
  282. )
  283. df = frame_or_series(range(10), index=index, dtype=float)
  284. expected = frame_or_series(expected, index=index, dtype=float)
  285. result = df.rolling("2D", closed=closed).sum()
  286. tm.assert_equal(result, expected)
  287. @pytest.mark.parametrize(
  288. "closed,expected",
  289. [
  290. ("left", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 18, 21]),
  291. ("neither", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 13, 8]),
  292. ("right", [0, 1, 3, 6, 10, 15, 21, 28, 21, 17]),
  293. ("both", [0, 1, 3, 6, 10, 15, 21, 28, 26, 30]),
  294. ],
  295. )
  296. def test_variable_offset_window_nonunique(closed, expected, frame_or_series):
  297. # GH 20712
  298. index = DatetimeIndex(
  299. [
  300. "2011-01-01",
  301. "2011-01-01",
  302. "2011-01-02",
  303. "2011-01-02",
  304. "2011-01-02",
  305. "2011-01-03",
  306. "2011-01-04",
  307. "2011-01-04",
  308. "2011-01-05",
  309. "2011-01-06",
  310. ]
  311. )
  312. df = frame_or_series(range(10), index=index, dtype=float)
  313. expected = frame_or_series(expected, index=index, dtype=float)
  314. offset = BusinessDay(2)
  315. indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
  316. result = df.rolling(indexer, closed=closed, min_periods=1).sum()
  317. tm.assert_equal(result, expected)
  318. def test_even_number_window_alignment():
  319. # see discussion in GH 38780
  320. s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))
  321. # behavior of index- and datetime-based windows differs here!
  322. # s.rolling(window=2, min_periods=1, center=True).mean()
  323. result = s.rolling(window="2D", min_periods=1, center=True).mean()
  324. expected = Series([0.5, 1.5, 2], index=s.index)
  325. tm.assert_series_equal(result, expected)
  326. def test_closed_fixed_binary_col(center, step):
  327. # GH 34315
  328. data = [0, 1, 1, 0, 0, 1, 0, 1]
  329. df = DataFrame(
  330. {"binary_col": data},
  331. index=date_range(start="2020-01-01", freq="min", periods=len(data)),
  332. )
  333. if center:
  334. expected_data = [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5]
  335. else:
  336. expected_data = [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571]
  337. expected = DataFrame(
  338. expected_data,
  339. columns=["binary_col"],
  340. index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)),
  341. )[::step]
  342. rolling = df.rolling(
  343. window=len(df), closed="left", min_periods=1, center=center, step=step
  344. )
  345. result = rolling.mean()
  346. tm.assert_frame_equal(result, expected)
  347. @pytest.mark.parametrize("closed", ["neither", "left"])
  348. def test_closed_empty(closed, arithmetic_win_operators):
  349. # GH 26005
  350. func_name = arithmetic_win_operators
  351. ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D"))
  352. roll = ser.rolling("1D", closed=closed)
  353. result = getattr(roll, func_name)()
  354. expected = Series([np.nan] * 5, index=ser.index)
  355. tm.assert_series_equal(result, expected)
  356. @pytest.mark.parametrize("func", ["min", "max"])
  357. def test_closed_one_entry(func):
  358. # GH24718
  359. ser = Series(data=[2], index=date_range("2000", periods=1))
  360. result = getattr(ser.rolling("10D", closed="left"), func)()
  361. tm.assert_series_equal(result, Series([np.nan], index=ser.index))
  362. @pytest.mark.parametrize("func", ["min", "max"])
  363. def test_closed_one_entry_groupby(func):
  364. # GH24718
  365. ser = DataFrame(
  366. data={"A": [1, 1, 2], "B": [3, 2, 1]},
  367. index=date_range("2000", periods=3),
  368. )
  369. result = getattr(
  370. ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func
  371. )()
  372. exp_idx = MultiIndex.from_arrays(arrays=[[1, 1, 2], ser.index], names=("A", None))
  373. expected = Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B")
  374. tm.assert_series_equal(result, expected)
  375. @pytest.mark.parametrize("input_dtype", ["int", "float"])
  376. @pytest.mark.parametrize(
  377. "func,closed,expected",
  378. [
  379. ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
  380. ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
  381. ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
  382. ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
  383. ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
  384. ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
  385. ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
  386. ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
  387. ],
  388. )
  389. def test_closed_min_max_datetime(input_dtype, func, closed, expected):
  390. # see gh-21704
  391. ser = Series(
  392. data=np.arange(10).astype(input_dtype),
  393. index=date_range("2000", periods=10),
  394. )
  395. result = getattr(ser.rolling("3D", closed=closed), func)()
  396. expected = Series(expected, index=ser.index)
  397. tm.assert_series_equal(result, expected)
  398. def test_closed_uneven():
  399. # see gh-21704
  400. ser = Series(data=np.arange(10), index=date_range("2000", periods=10))
  401. # uneven
  402. ser = ser.drop(index=ser.index[[1, 5]])
  403. result = ser.rolling("3D", closed="left").min()
  404. expected = Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index)
  405. tm.assert_series_equal(result, expected)
  406. @pytest.mark.parametrize(
  407. "func,closed,expected",
  408. [
  409. ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
  410. ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
  411. ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
  412. ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
  413. ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
  414. ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
  415. ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
  416. ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]),
  417. ],
  418. )
  419. def test_closed_min_max_minp(func, closed, expected):
  420. # see gh-21704
  421. ser = Series(data=np.arange(10), index=date_range("2000", periods=10))
  422. # Explicit cast to float to avoid implicit cast when setting nan
  423. ser = ser.astype("float")
  424. ser[ser.index[-3:]] = np.nan
  425. result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)()
  426. expected = Series(expected, index=ser.index)
  427. tm.assert_series_equal(result, expected)
  428. @pytest.mark.parametrize(
  429. "closed,expected",
  430. [
  431. ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]),
  432. ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
  433. ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
  434. ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]),
  435. ],
  436. )
  437. def test_closed_median_quantile(closed, expected):
  438. # GH 26005
  439. ser = Series(data=np.arange(10), index=date_range("2000", periods=10))
  440. roll = ser.rolling("3D", closed=closed)
  441. expected = Series(expected, index=ser.index)
  442. result = roll.median()
  443. tm.assert_series_equal(result, expected)
  444. result = roll.quantile(0.5)
  445. tm.assert_series_equal(result, expected)
  446. @pytest.mark.parametrize("roller", ["1s", 1])
  447. def tests_empty_df_rolling(roller):
  448. # GH 15819 Verifies that datetime and integer rolling windows can be
  449. # applied to empty DataFrames
  450. expected = DataFrame()
  451. result = DataFrame().rolling(roller).sum()
  452. tm.assert_frame_equal(result, expected)
  453. # Verifies that datetime and integer rolling windows can be applied to
  454. # empty DataFrames with datetime index
  455. expected = DataFrame(index=DatetimeIndex([]))
  456. result = DataFrame(index=DatetimeIndex([])).rolling(roller).sum()
  457. tm.assert_frame_equal(result, expected)
  458. def test_empty_window_median_quantile():
  459. # GH 26005
  460. expected = Series([np.nan, np.nan, np.nan])
  461. roll = Series(np.arange(3)).rolling(0)
  462. result = roll.median()
  463. tm.assert_series_equal(result, expected)
  464. result = roll.quantile(0.1)
  465. tm.assert_series_equal(result, expected)
  466. def test_missing_minp_zero():
  467. # https://github.com/pandas-dev/pandas/pull/18921
  468. # minp=0
  469. x = Series([np.nan])
  470. result = x.rolling(1, min_periods=0).sum()
  471. expected = Series([0.0])
  472. tm.assert_series_equal(result, expected)
  473. # minp=1
  474. result = x.rolling(1, min_periods=1).sum()
  475. expected = Series([np.nan])
  476. tm.assert_series_equal(result, expected)
  477. def test_missing_minp_zero_variable():
  478. # https://github.com/pandas-dev/pandas/pull/18921
  479. x = Series(
  480. [np.nan] * 4,
  481. index=DatetimeIndex(["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]),
  482. )
  483. result = x.rolling(Timedelta("2d"), min_periods=0).sum()
  484. expected = Series(0.0, index=x.index)
  485. tm.assert_series_equal(result, expected)
  486. def test_multi_index_names():
  487. # GH 16789, 16825
  488. cols = MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"])
  489. df = DataFrame(np.ones((10, 6)), columns=cols)
  490. result = df.rolling(3).cov()
  491. tm.assert_index_equal(result.columns, df.columns)
  492. assert result.index.names == [None, "1", "2"]
  493. def test_rolling_axis_sum(axis_frame):
  494. # see gh-23372.
  495. df = DataFrame(np.ones((10, 20)))
  496. axis = df._get_axis_number(axis_frame)
  497. if axis == 0:
  498. msg = "The 'axis' keyword in DataFrame.rolling"
  499. expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)})
  500. else:
  501. # axis == 1
  502. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  503. expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10)
  504. with tm.assert_produces_warning(FutureWarning, match=msg):
  505. result = df.rolling(3, axis=axis_frame).sum()
  506. tm.assert_frame_equal(result, expected)
  507. def test_rolling_axis_count(axis_frame):
  508. # see gh-26055
  509. df = DataFrame({"x": range(3), "y": range(3)})
  510. axis = df._get_axis_number(axis_frame)
  511. if axis in [0, "index"]:
  512. msg = "The 'axis' keyword in DataFrame.rolling"
  513. expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]})
  514. else:
  515. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  516. expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
  517. with tm.assert_produces_warning(FutureWarning, match=msg):
  518. result = df.rolling(2, axis=axis_frame, min_periods=0).count()
  519. tm.assert_frame_equal(result, expected)
  520. def test_readonly_array():
  521. # GH-27766
  522. arr = np.array([1, 3, np.nan, 3, 5])
  523. arr.setflags(write=False)
  524. result = Series(arr).rolling(2).mean()
  525. expected = Series([np.nan, 2, np.nan, np.nan, 4])
  526. tm.assert_series_equal(result, expected)
  527. def test_rolling_datetime(axis_frame, tz_naive_fixture):
  528. # GH-28192
  529. tz = tz_naive_fixture
  530. df = DataFrame(
  531. {i: [1] * 2 for i in date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)}
  532. )
  533. if axis_frame in [0, "index"]:
  534. msg = "The 'axis' keyword in DataFrame.rolling"
  535. with tm.assert_produces_warning(FutureWarning, match=msg):
  536. result = df.T.rolling("2D", axis=axis_frame).sum().T
  537. else:
  538. msg = "Support for axis=1 in DataFrame.rolling"
  539. with tm.assert_produces_warning(FutureWarning, match=msg):
  540. result = df.rolling("2D", axis=axis_frame).sum()
  541. expected = DataFrame(
  542. {
  543. **{
  544. i: [1.0] * 2
  545. for i in date_range("2019-8-01", periods=1, freq="D", tz=tz)
  546. },
  547. **{
  548. i: [2.0] * 2
  549. for i in date_range("2019-8-02", "2019-8-03", freq="D", tz=tz)
  550. },
  551. }
  552. )
  553. tm.assert_frame_equal(result, expected)
  554. @pytest.mark.parametrize("center", [True, False])
  555. def test_rolling_window_as_string(center):
  556. # see gh-22590
  557. date_today = datetime.now()
  558. days = date_range(date_today, date_today + timedelta(365), freq="D")
  559. data = np.ones(len(days))
  560. df = DataFrame({"DateCol": days, "metric": data})
  561. df.set_index("DateCol", inplace=True)
  562. result = df.rolling(window="21D", min_periods=2, closed="left", center=center)[
  563. "metric"
  564. ].agg("max")
  565. index = days.rename("DateCol")
  566. index = index._with_freq(None)
  567. expected_data = np.ones(len(days), dtype=np.float64)
  568. if not center:
  569. expected_data[:2] = np.nan
  570. expected = Series(expected_data, index=index, name="metric")
  571. tm.assert_series_equal(result, expected)
  572. def test_min_periods1():
  573. # GH#6795
  574. df = DataFrame([0, 1, 2, 1, 0], columns=["a"])
  575. result = df["a"].rolling(3, center=True, min_periods=1).max()
  576. expected = Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a")
  577. tm.assert_series_equal(result, expected)
  578. def test_rolling_count_with_min_periods(frame_or_series):
  579. # GH 26996
  580. result = frame_or_series(range(5)).rolling(3, min_periods=3).count()
  581. expected = frame_or_series([np.nan, np.nan, 3.0, 3.0, 3.0])
  582. tm.assert_equal(result, expected)
  583. def test_rolling_count_default_min_periods_with_null_values(frame_or_series):
  584. # GH 26996
  585. values = [1, 2, 3, np.nan, 4, 5, 6]
  586. expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]
  587. # GH 31302
  588. result = frame_or_series(values).rolling(3, min_periods=0).count()
  589. expected = frame_or_series(expected_counts)
  590. tm.assert_equal(result, expected)
  591. @pytest.mark.parametrize(
  592. "df,expected,window,min_periods",
  593. [
  594. (
  595. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  596. [
  597. ({"A": [1], "B": [4]}, [0]),
  598. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  599. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  600. ],
  601. 3,
  602. None,
  603. ),
  604. (
  605. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  606. [
  607. ({"A": [1], "B": [4]}, [0]),
  608. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  609. ({"A": [2, 3], "B": [5, 6]}, [1, 2]),
  610. ],
  611. 2,
  612. 1,
  613. ),
  614. (
  615. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  616. [
  617. ({"A": [1], "B": [4]}, [0]),
  618. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  619. ({"A": [2, 3], "B": [5, 6]}, [1, 2]),
  620. ],
  621. 2,
  622. 2,
  623. ),
  624. (
  625. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  626. [
  627. ({"A": [1], "B": [4]}, [0]),
  628. ({"A": [2], "B": [5]}, [1]),
  629. ({"A": [3], "B": [6]}, [2]),
  630. ],
  631. 1,
  632. 1,
  633. ),
  634. (
  635. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  636. [
  637. ({"A": [1], "B": [4]}, [0]),
  638. ({"A": [2], "B": [5]}, [1]),
  639. ({"A": [3], "B": [6]}, [2]),
  640. ],
  641. 1,
  642. 0,
  643. ),
  644. (DataFrame({"A": [1], "B": [4]}), [], 2, None),
  645. (DataFrame({"A": [1], "B": [4]}), [], 2, 1),
  646. (DataFrame(), [({}, [])], 2, None),
  647. (
  648. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  649. [
  650. ({"A": [1.0], "B": [np.nan]}, [0]),
  651. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  652. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  653. ],
  654. 3,
  655. 2,
  656. ),
  657. ],
  658. )
  659. def test_iter_rolling_dataframe(df, expected, window, min_periods):
  660. # GH 11704
  661. expected = [DataFrame(values, index=index) for (values, index) in expected]
  662. for expected, actual in zip(expected, df.rolling(window, min_periods=min_periods)):
  663. tm.assert_frame_equal(actual, expected)
  664. @pytest.mark.parametrize(
  665. "expected,window",
  666. [
  667. (
  668. [
  669. ({"A": [1], "B": [4]}, [0]),
  670. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  671. ({"A": [2, 3], "B": [5, 6]}, [1, 2]),
  672. ],
  673. "2D",
  674. ),
  675. (
  676. [
  677. ({"A": [1], "B": [4]}, [0]),
  678. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  679. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  680. ],
  681. "3D",
  682. ),
  683. (
  684. [
  685. ({"A": [1], "B": [4]}, [0]),
  686. ({"A": [2], "B": [5]}, [1]),
  687. ({"A": [3], "B": [6]}, [2]),
  688. ],
  689. "1D",
  690. ),
  691. ],
  692. )
  693. def test_iter_rolling_on_dataframe(expected, window):
  694. # GH 11704, 40373
  695. df = DataFrame(
  696. {
  697. "A": [1, 2, 3, 4, 5],
  698. "B": [4, 5, 6, 7, 8],
  699. "C": date_range(start="2016-01-01", periods=5, freq="D"),
  700. }
  701. )
  702. expected = [
  703. DataFrame(values, index=df.loc[index, "C"]) for (values, index) in expected
  704. ]
  705. for expected, actual in zip(expected, df.rolling(window, on="C")):
  706. tm.assert_frame_equal(actual, expected)
  707. def test_iter_rolling_on_dataframe_unordered():
  708. # GH 43386
  709. df = DataFrame({"a": ["x", "y", "x"], "b": [0, 1, 2]})
  710. results = list(df.groupby("a").rolling(2))
  711. expecteds = [df.iloc[idx, [1]] for idx in [[0], [0, 2], [1]]]
  712. for result, expected in zip(results, expecteds):
  713. tm.assert_frame_equal(result, expected)
  714. @pytest.mark.parametrize(
  715. "ser,expected,window, min_periods",
  716. [
  717. (
  718. Series([1, 2, 3]),
  719. [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])],
  720. 3,
  721. None,
  722. ),
  723. (
  724. Series([1, 2, 3]),
  725. [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])],
  726. 3,
  727. 1,
  728. ),
  729. (
  730. Series([1, 2, 3]),
  731. [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])],
  732. 2,
  733. 1,
  734. ),
  735. (
  736. Series([1, 2, 3]),
  737. [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])],
  738. 2,
  739. 2,
  740. ),
  741. (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0),
  742. (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 1),
  743. (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0),
  744. (Series([], dtype="int64"), [], 2, 1),
  745. ],
  746. )
  747. def test_iter_rolling_series(ser, expected, window, min_periods):
  748. # GH 11704
  749. expected = [Series(values, index=index) for (values, index) in expected]
  750. for expected, actual in zip(expected, ser.rolling(window, min_periods=min_periods)):
  751. tm.assert_series_equal(actual, expected)
  752. @pytest.mark.parametrize(
  753. "expected,expected_index,window",
  754. [
  755. (
  756. [[0], [1], [2], [3], [4]],
  757. [
  758. date_range("2020-01-01", periods=1, freq="D"),
  759. date_range("2020-01-02", periods=1, freq="D"),
  760. date_range("2020-01-03", periods=1, freq="D"),
  761. date_range("2020-01-04", periods=1, freq="D"),
  762. date_range("2020-01-05", periods=1, freq="D"),
  763. ],
  764. "1D",
  765. ),
  766. (
  767. [[0], [0, 1], [1, 2], [2, 3], [3, 4]],
  768. [
  769. date_range("2020-01-01", periods=1, freq="D"),
  770. date_range("2020-01-01", periods=2, freq="D"),
  771. date_range("2020-01-02", periods=2, freq="D"),
  772. date_range("2020-01-03", periods=2, freq="D"),
  773. date_range("2020-01-04", periods=2, freq="D"),
  774. ],
  775. "2D",
  776. ),
  777. (
  778. [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]],
  779. [
  780. date_range("2020-01-01", periods=1, freq="D"),
  781. date_range("2020-01-01", periods=2, freq="D"),
  782. date_range("2020-01-01", periods=3, freq="D"),
  783. date_range("2020-01-02", periods=3, freq="D"),
  784. date_range("2020-01-03", periods=3, freq="D"),
  785. ],
  786. "3D",
  787. ),
  788. ],
  789. )
  790. def test_iter_rolling_datetime(expected, expected_index, window):
  791. # GH 11704
  792. ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D"))
  793. expected = [
  794. Series(values, index=idx) for (values, idx) in zip(expected, expected_index)
  795. ]
  796. for expected, actual in zip(expected, ser.rolling(window)):
  797. tm.assert_series_equal(actual, expected)
  798. @pytest.mark.parametrize(
  799. "grouping,_index",
  800. [
  801. (
  802. {"level": 0},
  803. MultiIndex.from_tuples(
  804. [(0, 0), (0, 0), (1, 1), (1, 1), (1, 1)], names=[None, None]
  805. ),
  806. ),
  807. (
  808. {"by": "X"},
  809. MultiIndex.from_tuples(
  810. [(0, 0), (1, 0), (2, 1), (3, 1), (4, 1)], names=["X", None]
  811. ),
  812. ),
  813. ],
  814. )
  815. def test_rolling_positional_argument(grouping, _index, raw):
  816. # GH 34605
  817. def scaled_sum(*args):
  818. if len(args) < 2:
  819. raise ValueError("The function needs two arguments")
  820. array, scale = args
  821. return array.sum() / scale
  822. df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1])
  823. expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index)
  824. # GH 40341
  825. if "by" in grouping:
  826. expected = expected.drop(columns="X", errors="ignore")
  827. result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,))
  828. tm.assert_frame_equal(result, expected)
  829. @pytest.mark.parametrize("add", [0.0, 2.0])
  830. def test_rolling_numerical_accuracy_kahan_mean(add, unit):
  831. # GH: 36031 implementing kahan summation
  832. dti = DatetimeIndex(
  833. [
  834. Timestamp("19700101 09:00:00"),
  835. Timestamp("19700101 09:00:03"),
  836. Timestamp("19700101 09:00:06"),
  837. ]
  838. ).as_unit(unit)
  839. df = DataFrame(
  840. {"A": [3002399751580331.0 + add, -0.0, -0.0]},
  841. index=dti,
  842. )
  843. result = (
  844. df.resample("1s").ffill().rolling("3s", closed="left", min_periods=3).mean()
  845. )
  846. dates = date_range("19700101 09:00:00", periods=7, freq="s", unit=unit)
  847. expected = DataFrame(
  848. {
  849. "A": [
  850. np.nan,
  851. np.nan,
  852. np.nan,
  853. 3002399751580330.5,
  854. 2001599834386887.25,
  855. 1000799917193443.625,
  856. 0.0,
  857. ]
  858. },
  859. index=dates,
  860. )
  861. tm.assert_frame_equal(result, expected)
  862. def test_rolling_numerical_accuracy_kahan_sum():
  863. # GH: 13254
  864. df = DataFrame([2.186, -1.647, 0.0, 0.0, 0.0, 0.0], columns=["x"])
  865. result = df["x"].rolling(3).sum()
  866. expected = Series([np.nan, np.nan, 0.539, -1.647, 0.0, 0.0], name="x")
  867. tm.assert_series_equal(result, expected)
  868. def test_rolling_numerical_accuracy_jump():
  869. # GH: 32761
  870. index = date_range(start="2020-01-01", end="2020-01-02", freq="60s").append(
  871. DatetimeIndex(["2020-01-03"])
  872. )
  873. data = np.random.default_rng(2).random(len(index))
  874. df = DataFrame({"data": data}, index=index)
  875. result = df.rolling("60s").mean()
  876. tm.assert_frame_equal(result, df[["data"]])
  877. def test_rolling_numerical_accuracy_small_values():
  878. # GH: 10319
  879. s = Series(
  880. data=[0.00012456, 0.0003, -0.0, -0.0],
  881. index=date_range("1999-02-03", "1999-02-06"),
  882. )
  883. result = s.rolling(1).mean()
  884. tm.assert_series_equal(result, s)
  885. def test_rolling_numerical_too_large_numbers():
  886. # GH: 11645
  887. dates = date_range("2015-01-01", periods=10, freq="D")
  888. ds = Series(data=range(10), index=dates, dtype=np.float64)
  889. ds.iloc[2] = -9e33
  890. result = ds.rolling(5).mean()
  891. expected = Series(
  892. [
  893. np.nan,
  894. np.nan,
  895. np.nan,
  896. np.nan,
  897. -1.8e33,
  898. -1.8e33,
  899. -1.8e33,
  900. 5.0,
  901. 6.0,
  902. 7.0,
  903. ],
  904. index=dates,
  905. )
  906. tm.assert_series_equal(result, expected)
  907. @pytest.mark.parametrize(
  908. ("func", "value"),
  909. [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)],
  910. )
  911. def test_rolling_mixed_dtypes_axis_1(func, value):
  912. # GH: 20649
  913. df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"])
  914. df["c"] = 1.0
  915. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  916. with tm.assert_produces_warning(FutureWarning, match=msg):
  917. roll = df.rolling(window=2, min_periods=1, axis=1)
  918. result = getattr(roll, func)()
  919. expected = DataFrame(
  920. {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]},
  921. index=[1, 2],
  922. )
  923. tm.assert_frame_equal(result, expected)
  924. def test_rolling_axis_one_with_nan():
  925. # GH: 35596
  926. df = DataFrame(
  927. [
  928. [0, 1, 2, 4, np.nan, np.nan, np.nan],
  929. [0, 1, 2, np.nan, np.nan, np.nan, np.nan],
  930. [0, 2, 2, np.nan, 2, np.nan, 1],
  931. ]
  932. )
  933. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  934. with tm.assert_produces_warning(FutureWarning, match=msg):
  935. result = df.rolling(window=7, min_periods=1, axis="columns").sum()
  936. expected = DataFrame(
  937. [
  938. [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0],
  939. [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0],
  940. [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0],
  941. ]
  942. )
  943. tm.assert_frame_equal(result, expected)
  944. @pytest.mark.parametrize(
  945. "value",
  946. ["test", to_datetime("2019-12-31"), to_timedelta("1 days 06:05:01.00003")],
  947. )
  948. def test_rolling_axis_1_non_numeric_dtypes(value):
  949. # GH: 20649
  950. df = DataFrame({"a": [1, 2]})
  951. df["b"] = value
  952. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  953. with tm.assert_produces_warning(FutureWarning, match=msg):
  954. result = df.rolling(window=2, min_periods=1, axis=1).sum()
  955. expected = DataFrame({"a": [1.0, 2.0]})
  956. tm.assert_frame_equal(result, expected)
  957. def test_rolling_on_df_transposed():
  958. # GH: 32724
  959. df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]})
  960. expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]})
  961. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  962. with tm.assert_produces_warning(FutureWarning, match=msg):
  963. result = df.rolling(min_periods=1, window=2, axis=1).sum()
  964. tm.assert_frame_equal(result, expected)
  965. result = df.T.rolling(min_periods=1, window=2).sum().T
  966. tm.assert_frame_equal(result, expected)
  967. @pytest.mark.parametrize(
  968. ("index", "window"),
  969. [
  970. (
  971. period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="min"),
  972. "2min",
  973. ),
  974. (
  975. period_range(
  976. start="2020-01-01 08:00", end="2020-01-01 12:00", freq="30min"
  977. ),
  978. "1h",
  979. ),
  980. ],
  981. )
  982. @pytest.mark.parametrize(
  983. ("func", "values"),
  984. [
  985. ("min", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6]),
  986. ("max", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7]),
  987. ("sum", [np.nan, 0, 1, 3, 5, 7, 9, 11, 13]),
  988. ],
  989. )
  990. def test_rolling_period_index(index, window, func, values):
  991. # GH: 34225
  992. ds = Series([0, 1, 2, 3, 4, 5, 6, 7, 8], index=index)
  993. result = getattr(ds.rolling(window, closed="left"), func)()
  994. expected = Series(values, index=index)
  995. tm.assert_series_equal(result, expected)
  996. def test_rolling_sem(frame_or_series):
  997. # GH: 26476
  998. obj = frame_or_series([0, 1, 2])
  999. result = obj.rolling(2, min_periods=1).sem()
  1000. if isinstance(result, DataFrame):
  1001. result = Series(result[0].values)
  1002. expected = Series([np.nan] + [0.7071067811865476] * 2)
  1003. tm.assert_series_equal(result, expected)
  1004. @pytest.mark.xfail(
  1005. is_platform_arm() or is_platform_power(),
  1006. reason="GH 38921",
  1007. )
  1008. @pytest.mark.parametrize(
  1009. ("func", "third_value", "values"),
  1010. [
  1011. ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]),
  1012. ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]),
  1013. ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]),
  1014. ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]),
  1015. ],
  1016. )
  1017. def test_rolling_var_numerical_issues(func, third_value, values):
  1018. # GH: 37051
  1019. ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1])
  1020. result = getattr(ds.rolling(2), func)()
  1021. expected = Series([np.nan] + values)
  1022. tm.assert_series_equal(result, expected)
  1023. # GH 42064
  1024. # new `roll_var` will output 0.0 correctly
  1025. tm.assert_series_equal(result == 0, expected == 0)
  1026. def test_timeoffset_as_window_parameter_for_corr(unit):
  1027. # GH: 28266
  1028. dti = DatetimeIndex(
  1029. [
  1030. Timestamp("20130101 09:00:00"),
  1031. Timestamp("20130102 09:00:02"),
  1032. Timestamp("20130103 09:00:03"),
  1033. Timestamp("20130105 09:00:05"),
  1034. Timestamp("20130106 09:00:06"),
  1035. ]
  1036. ).as_unit(unit)
  1037. mi = MultiIndex.from_product([dti, ["B", "A"]])
  1038. exp = DataFrame(
  1039. {
  1040. "B": [
  1041. np.nan,
  1042. np.nan,
  1043. 0.9999999999999998,
  1044. -1.0,
  1045. 1.0,
  1046. -0.3273268353539892,
  1047. 0.9999999999999998,
  1048. 1.0,
  1049. 0.9999999999999998,
  1050. 1.0,
  1051. ],
  1052. "A": [
  1053. np.nan,
  1054. np.nan,
  1055. -1.0,
  1056. 1.0000000000000002,
  1057. -0.3273268353539892,
  1058. 0.9999999999999966,
  1059. 1.0,
  1060. 1.0000000000000002,
  1061. 1.0,
  1062. 1.0000000000000002,
  1063. ],
  1064. },
  1065. index=mi,
  1066. )
  1067. df = DataFrame(
  1068. {"B": [0, 1, 2, 4, 3], "A": [7, 4, 6, 9, 3]},
  1069. index=dti,
  1070. )
  1071. res = df.rolling(window="3d").corr()
  1072. tm.assert_frame_equal(exp, res)
  1073. @pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"])
  1074. def test_rolling_decreasing_indices(method):
  1075. """
  1076. Make sure that decreasing indices give the same results as increasing indices.
  1077. GH 36933
  1078. """
  1079. df = DataFrame({"values": np.arange(-15, 10) ** 2})
  1080. df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1])
  1081. increasing = getattr(df.rolling(window=5), method)()
  1082. decreasing = getattr(df_reverse.rolling(window=5), method)()
  1083. assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12
  1084. @pytest.mark.parametrize(
  1085. "window,closed,expected",
  1086. [
  1087. ("2s", "right", [1.0, 3.0, 5.0, 3.0]),
  1088. ("2s", "left", [0.0, 1.0, 3.0, 5.0]),
  1089. ("2s", "both", [1.0, 3.0, 6.0, 5.0]),
  1090. ("2s", "neither", [0.0, 1.0, 2.0, 3.0]),
  1091. ("3s", "right", [1.0, 3.0, 6.0, 5.0]),
  1092. ("3s", "left", [1.0, 3.0, 6.0, 5.0]),
  1093. ("3s", "both", [1.0, 3.0, 6.0, 5.0]),
  1094. ("3s", "neither", [1.0, 3.0, 6.0, 5.0]),
  1095. ],
  1096. )
  1097. def test_rolling_decreasing_indices_centered(window, closed, expected, frame_or_series):
  1098. """
  1099. Ensure that a symmetrical inverted index return same result as non-inverted.
  1100. """
  1101. # GH 43927
  1102. index = date_range("2020", periods=4, freq="1s")
  1103. df_inc = frame_or_series(range(4), index=index)
  1104. df_dec = frame_or_series(range(4), index=index[::-1])
  1105. expected_inc = frame_or_series(expected, index=index)
  1106. expected_dec = frame_or_series(expected, index=index[::-1])
  1107. result_inc = df_inc.rolling(window, closed=closed, center=True).sum()
  1108. result_dec = df_dec.rolling(window, closed=closed, center=True).sum()
  1109. tm.assert_equal(result_inc, expected_inc)
  1110. tm.assert_equal(result_dec, expected_dec)
  1111. @pytest.mark.parametrize(
  1112. "window,expected",
  1113. [
  1114. ("1ns", [1.0, 1.0, 1.0, 1.0]),
  1115. ("3ns", [2.0, 3.0, 3.0, 2.0]),
  1116. ],
  1117. )
  1118. def test_rolling_center_nanosecond_resolution(
  1119. window, closed, expected, frame_or_series
  1120. ):
  1121. index = date_range("2020", periods=4, freq="1ns")
  1122. df = frame_or_series([1, 1, 1, 1], index=index, dtype=float)
  1123. expected = frame_or_series(expected, index=index, dtype=float)
  1124. result = df.rolling(window, closed=closed, center=True).sum()
  1125. tm.assert_equal(result, expected)
  1126. @pytest.mark.parametrize(
  1127. "method,expected",
  1128. [
  1129. (
  1130. "var",
  1131. [
  1132. float("nan"),
  1133. 43.0,
  1134. float("nan"),
  1135. 136.333333,
  1136. 43.5,
  1137. 94.966667,
  1138. 182.0,
  1139. 318.0,
  1140. ],
  1141. ),
  1142. (
  1143. "mean",
  1144. [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5],
  1145. ),
  1146. (
  1147. "sum",
  1148. [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0],
  1149. ),
  1150. (
  1151. "skew",
  1152. [
  1153. float("nan"),
  1154. 0.709296,
  1155. float("nan"),
  1156. 0.407073,
  1157. 0.984656,
  1158. 0.919184,
  1159. 0.874674,
  1160. 0.842418,
  1161. ],
  1162. ),
  1163. (
  1164. "kurt",
  1165. [
  1166. float("nan"),
  1167. -0.5916711736073559,
  1168. float("nan"),
  1169. -1.0028993131317954,
  1170. -0.06103844629409494,
  1171. -0.254143227116194,
  1172. -0.37362637362637585,
  1173. -0.45439658241367054,
  1174. ],
  1175. ),
  1176. ],
  1177. )
  1178. def test_rolling_non_monotonic(method, expected):
  1179. """
  1180. Make sure the (rare) branch of non-monotonic indices is covered by a test.
  1181. output from 1.1.3 is assumed to be the expected output. Output of sum/mean has
  1182. manually been verified.
  1183. GH 36933.
  1184. """
  1185. # Based on an example found in computation.rst
  1186. use_expanding = [True, False, True, False, True, True, True, True]
  1187. df = DataFrame({"values": np.arange(len(use_expanding)) ** 2})
  1188. class CustomIndexer(BaseIndexer):
  1189. def get_window_bounds(self, num_values, min_periods, center, closed, step):
  1190. start = np.empty(num_values, dtype=np.int64)
  1191. end = np.empty(num_values, dtype=np.int64)
  1192. for i in range(num_values):
  1193. if self.use_expanding[i]:
  1194. start[i] = 0
  1195. end[i] = i + 1
  1196. else:
  1197. start[i] = i
  1198. end[i] = i + self.window_size
  1199. return start, end
  1200. indexer = CustomIndexer(window_size=4, use_expanding=use_expanding)
  1201. result = getattr(df.rolling(indexer), method)()
  1202. expected = DataFrame({"values": expected})
  1203. tm.assert_frame_equal(result, expected)
  1204. @pytest.mark.parametrize(
  1205. ("index", "window"),
  1206. [
  1207. ([0, 1, 2, 3, 4], 2),
  1208. (date_range("2001-01-01", freq="D", periods=5), "2D"),
  1209. ],
  1210. )
  1211. def test_rolling_corr_timedelta_index(index, window):
  1212. # GH: 31286
  1213. x = Series([1, 2, 3, 4, 5], index=index)
  1214. y = x.copy()
  1215. x.iloc[0:2] = 0.0
  1216. result = x.rolling(window).corr(y)
  1217. expected = Series([np.nan, np.nan, 1, 1, 1], index=index)
  1218. tm.assert_almost_equal(result, expected)
  1219. def test_groupby_rolling_nan_included():
  1220. # GH 35542
  1221. data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
  1222. df = DataFrame(data)
  1223. result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean()
  1224. expected = DataFrame(
  1225. {"B": [0.0, 2.0, 3.0, 1.0, 4.0]},
  1226. # GH-38057 from_tuples puts the NaNs in the codes, result expects them
  1227. # to be in the levels, at the moment
  1228. # index=MultiIndex.from_tuples(
  1229. # [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)],
  1230. # names=["group", None],
  1231. # ),
  1232. index=MultiIndex(
  1233. [["g1", "g2", np.nan], [0, 1, 2, 3, 4]],
  1234. [[0, 0, 1, 2, 2], [0, 2, 3, 1, 4]],
  1235. names=["group", None],
  1236. ),
  1237. )
  1238. tm.assert_frame_equal(result, expected)
  1239. @pytest.mark.parametrize("method", ["skew", "kurt"])
  1240. def test_rolling_skew_kurt_numerical_stability(method):
  1241. # GH#6929
  1242. ser = Series(np.random.default_rng(2).random(10))
  1243. ser_copy = ser.copy()
  1244. expected = getattr(ser.rolling(3), method)()
  1245. tm.assert_series_equal(ser, ser_copy)
  1246. ser = ser + 50000
  1247. result = getattr(ser.rolling(3), method)()
  1248. tm.assert_series_equal(result, expected)
  1249. @pytest.mark.parametrize(
  1250. ("method", "values"),
  1251. [
  1252. ("skew", [2.0, 0.854563, 0.0, 1.999984]),
  1253. ("kurt", [4.0, -1.289256, -1.2, 3.999946]),
  1254. ],
  1255. )
  1256. def test_rolling_skew_kurt_large_value_range(method, values):
  1257. # GH: 37557
  1258. s = Series([3000000, 1, 1, 2, 3, 4, 999])
  1259. result = getattr(s.rolling(4), method)()
  1260. expected = Series([np.nan] * 3 + values)
  1261. tm.assert_series_equal(result, expected)
  1262. def test_invalid_method():
  1263. with pytest.raises(ValueError, match="method must be 'table' or 'single"):
  1264. Series(range(1)).rolling(1, method="foo")
  1265. @pytest.mark.parametrize("window", [1, "1d"])
  1266. def test_rolling_descending_date_order_with_offset(window, frame_or_series):
  1267. # GH#40002
  1268. idx = date_range(start="2020-01-01", end="2020-01-03", freq="1d")
  1269. obj = frame_or_series(range(1, 4), index=idx)
  1270. result = obj.rolling("1d", closed="left").sum()
  1271. expected = frame_or_series([np.nan, 1, 2], index=idx)
  1272. tm.assert_equal(result, expected)
  1273. result = obj.iloc[::-1].rolling("1d", closed="left").sum()
  1274. idx = date_range(start="2020-01-03", end="2020-01-01", freq="-1d")
  1275. expected = frame_or_series([np.nan, 3, 2], index=idx)
  1276. tm.assert_equal(result, expected)
  1277. def test_rolling_var_floating_artifact_precision():
  1278. # GH 37051
  1279. s = Series([7, 5, 5, 5])
  1280. result = s.rolling(3).var()
  1281. expected = Series([np.nan, np.nan, 4 / 3, 0])
  1282. tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
  1283. # GH 42064
  1284. # new `roll_var` will output 0.0 correctly
  1285. tm.assert_series_equal(result == 0, expected == 0)
  1286. def test_rolling_std_small_values():
  1287. # GH 37051
  1288. s = Series(
  1289. [
  1290. 0.00000054,
  1291. 0.00000053,
  1292. 0.00000054,
  1293. ]
  1294. )
  1295. result = s.rolling(2).std()
  1296. expected = Series([np.nan, 7.071068e-9, 7.071068e-9])
  1297. tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
  1298. @pytest.mark.parametrize(
  1299. "start, exp_values",
  1300. [
  1301. (1, [0.03, 0.0155, 0.0155, 0.011, 0.01025]),
  1302. (2, [0.001, 0.001, 0.0015, 0.00366666]),
  1303. ],
  1304. )
  1305. def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values):
  1306. # GH#41053
  1307. df = DataFrame(
  1308. [
  1309. 0.03,
  1310. 0.03,
  1311. 0.001,
  1312. np.nan,
  1313. 0.002,
  1314. 0.008,
  1315. np.nan,
  1316. np.nan,
  1317. np.nan,
  1318. np.nan,
  1319. np.nan,
  1320. np.nan,
  1321. 0.005,
  1322. 0.2,
  1323. ]
  1324. )
  1325. values = exp_values + [
  1326. 0.00366666,
  1327. 0.005,
  1328. 0.005,
  1329. 0.008,
  1330. np.nan,
  1331. np.nan,
  1332. 0.005,
  1333. 0.102500,
  1334. ]
  1335. expected = DataFrame(
  1336. values,
  1337. index=list(range(start, len(values) + start)),
  1338. )
  1339. result = df.iloc[start:].rolling(5, min_periods=0).mean()
  1340. tm.assert_frame_equal(result, expected)
  1341. def test_rolling_sum_all_nan_window_floating_artifacts():
  1342. # GH#41053
  1343. df = DataFrame([0.002, 0.008, 0.005, np.nan, np.nan, np.nan])
  1344. result = df.rolling(3, min_periods=0).sum()
  1345. expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0])
  1346. tm.assert_frame_equal(result, expected)
  1347. def test_rolling_zero_window():
  1348. # GH 22719
  1349. s = Series(range(1))
  1350. result = s.rolling(0).min()
  1351. expected = Series([np.nan])
  1352. tm.assert_series_equal(result, expected)
  1353. def test_rolling_float_dtype(float_numpy_dtype):
  1354. # GH#42452
  1355. df = DataFrame({"A": range(5), "B": range(10, 15)}, dtype=float_numpy_dtype)
  1356. expected = DataFrame(
  1357. {"A": [np.nan] * 5, "B": range(10, 20, 2)},
  1358. dtype=float_numpy_dtype,
  1359. )
  1360. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  1361. with tm.assert_produces_warning(FutureWarning, match=msg):
  1362. result = df.rolling(2, axis=1).sum()
  1363. tm.assert_frame_equal(result, expected, check_dtype=False)
  1364. def test_rolling_numeric_dtypes():
  1365. # GH#41779
  1366. df = DataFrame(np.arange(40).reshape(4, 10), columns=list("abcdefghij")).astype(
  1367. {
  1368. "a": "float16",
  1369. "b": "float32",
  1370. "c": "float64",
  1371. "d": "int8",
  1372. "e": "int16",
  1373. "f": "int32",
  1374. "g": "uint8",
  1375. "h": "uint16",
  1376. "i": "uint32",
  1377. "j": "uint64",
  1378. }
  1379. )
  1380. msg = "Support for axis=1 in DataFrame.rolling is deprecated"
  1381. with tm.assert_produces_warning(FutureWarning, match=msg):
  1382. result = df.rolling(window=2, min_periods=1, axis=1).min()
  1383. expected = DataFrame(
  1384. {
  1385. "a": range(0, 40, 10),
  1386. "b": range(0, 40, 10),
  1387. "c": range(1, 40, 10),
  1388. "d": range(2, 40, 10),
  1389. "e": range(3, 40, 10),
  1390. "f": range(4, 40, 10),
  1391. "g": range(5, 40, 10),
  1392. "h": range(6, 40, 10),
  1393. "i": range(7, 40, 10),
  1394. "j": range(8, 40, 10),
  1395. },
  1396. dtype="float64",
  1397. )
  1398. tm.assert_frame_equal(result, expected)
  1399. @pytest.mark.parametrize("window", [1, 3, 10, 20])
  1400. @pytest.mark.parametrize("method", ["min", "max", "average"])
  1401. @pytest.mark.parametrize("pct", [True, False])
  1402. @pytest.mark.parametrize("ascending", [True, False])
  1403. @pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
  1404. def test_rank(window, method, pct, ascending, test_data):
  1405. length = 20
  1406. if test_data == "default":
  1407. ser = Series(data=np.random.default_rng(2).random(length))
  1408. elif test_data == "duplicates":
  1409. ser = Series(data=np.random.default_rng(2).choice(3, length))
  1410. elif test_data == "nans":
  1411. ser = Series(
  1412. data=np.random.default_rng(2).choice(
  1413. [1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
  1414. )
  1415. )
  1416. expected = ser.rolling(window).apply(
  1417. lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
  1418. )
  1419. result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending)
  1420. tm.assert_series_equal(result, expected)
  1421. def test_rolling_quantile_np_percentile():
  1422. # #9413: Tests that rolling window's quantile default behavior
  1423. # is analogous to Numpy's percentile
  1424. row = 10
  1425. col = 5
  1426. idx = date_range("20100101", periods=row, freq="B")
  1427. df = DataFrame(
  1428. np.random.default_rng(2).random(row * col).reshape((row, -1)), index=idx
  1429. )
  1430. df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0)
  1431. np_percentile = np.percentile(df, [25, 50, 75], axis=0)
  1432. tm.assert_almost_equal(df_quantile.values, np.array(np_percentile))
  1433. @pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1])
  1434. @pytest.mark.parametrize(
  1435. "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
  1436. )
  1437. @pytest.mark.parametrize(
  1438. "data",
  1439. [
  1440. [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
  1441. [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0],
  1442. [0.0, np.nan, 0.2, np.nan, 0.4],
  1443. [np.nan, np.nan, np.nan, np.nan],
  1444. [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5],
  1445. [0.5],
  1446. [np.nan, 0.7, 0.6],
  1447. ],
  1448. )
  1449. def test_rolling_quantile_interpolation_options(quantile, interpolation, data):
  1450. # Tests that rolling window's quantile behavior is analogous to
  1451. # Series' quantile for each interpolation option
  1452. s = Series(data)
  1453. q1 = s.quantile(quantile, interpolation)
  1454. q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1]
  1455. if np.isnan(q1):
  1456. assert np.isnan(q2)
  1457. else:
  1458. if not IS64:
  1459. # Less precision on 32-bit
  1460. assert np.allclose([q1], [q2], rtol=1e-07, atol=0)
  1461. else:
  1462. assert q1 == q2
  1463. def test_invalid_quantile_value():
  1464. data = np.arange(5)
  1465. s = Series(data)
  1466. msg = "Interpolation 'invalid' is not supported"
  1467. with pytest.raises(ValueError, match=msg):
  1468. s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid")
  1469. def test_rolling_quantile_param():
  1470. ser = Series([0.0, 0.1, 0.5, 0.9, 1.0])
  1471. msg = "quantile value -0.1 not in \\[0, 1\\]"
  1472. with pytest.raises(ValueError, match=msg):
  1473. ser.rolling(3).quantile(-0.1)
  1474. msg = "quantile value 10.0 not in \\[0, 1\\]"
  1475. with pytest.raises(ValueError, match=msg):
  1476. ser.rolling(3).quantile(10.0)
  1477. msg = "must be real number, not str"
  1478. with pytest.raises(TypeError, match=msg):
  1479. ser.rolling(3).quantile("foo")
  1480. def test_rolling_std_1obs():
  1481. vals = Series([1.0, 2.0, 3.0, 4.0, 5.0])
  1482. result = vals.rolling(1, min_periods=1).std()
  1483. expected = Series([np.nan] * 5)
  1484. tm.assert_series_equal(result, expected)
  1485. result = vals.rolling(1, min_periods=1).std(ddof=0)
  1486. expected = Series([0.0] * 5)
  1487. tm.assert_series_equal(result, expected)
  1488. result = Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std()
  1489. assert np.isnan(result[2])
  1490. def test_rolling_std_neg_sqrt():
  1491. # unit test from Bottleneck
  1492. # Test move_nanstd for neg sqrt.
  1493. a = Series(
  1494. [
  1495. 0.0011448196318903589,
  1496. 0.00028718669878572767,
  1497. 0.00028718669878572767,
  1498. 0.00028718669878572767,
  1499. 0.00028718669878572767,
  1500. ]
  1501. )
  1502. b = a.rolling(window=3).std()
  1503. assert np.isfinite(b[2:]).all()
  1504. b = a.ewm(span=3).std()
  1505. assert np.isfinite(b[2:]).all()
  1506. def test_step_not_integer_raises():
  1507. with pytest.raises(ValueError, match="step must be an integer"):
  1508. DataFrame(range(2)).rolling(1, step="foo")
  1509. def test_step_not_positive_raises():
  1510. with pytest.raises(ValueError, match="step must be >= 0"):
  1511. DataFrame(range(2)).rolling(1, step=-1)
  1512. @pytest.mark.parametrize(
  1513. ["values", "window", "min_periods", "expected"],
  1514. [
  1515. [
  1516. [20, 10, 10, np.inf, 1, 1, 2, 3],
  1517. 3,
  1518. 1,
  1519. [np.nan, 50, 100 / 3, 0, 40.5, 0, 1 / 3, 1],
  1520. ],
  1521. [
  1522. [20, 10, 10, np.nan, 10, 1, 2, 3],
  1523. 3,
  1524. 1,
  1525. [np.nan, 50, 100 / 3, 0, 0, 40.5, 73 / 3, 1],
  1526. ],
  1527. [
  1528. [np.nan, 5, 6, 7, 5, 5, 5],
  1529. 3,
  1530. 3,
  1531. [np.nan] * 3 + [1, 1, 4 / 3, 0],
  1532. ],
  1533. [
  1534. [5, 7, 7, 7, np.nan, np.inf, 4, 3, 3, 3],
  1535. 3,
  1536. 3,
  1537. [np.nan] * 2 + [4 / 3, 0] + [np.nan] * 4 + [1 / 3, 0],
  1538. ],
  1539. [
  1540. [5, 7, 7, 7, np.nan, np.inf, 7, 3, 3, 3],
  1541. 3,
  1542. 3,
  1543. [np.nan] * 2 + [4 / 3, 0] + [np.nan] * 4 + [16 / 3, 0],
  1544. ],
  1545. [
  1546. [5, 7] * 4,
  1547. 3,
  1548. 3,
  1549. [np.nan] * 2 + [4 / 3] * 6,
  1550. ],
  1551. [
  1552. [5, 7, 5, np.nan, 7, 5, 7],
  1553. 3,
  1554. 2,
  1555. [np.nan, 2, 4 / 3] + [2] * 3 + [4 / 3],
  1556. ],
  1557. ],
  1558. )
  1559. def test_rolling_var_same_value_count_logic(values, window, min_periods, expected):
  1560. # GH 42064.
  1561. expected = Series(expected)
  1562. sr = Series(values)
  1563. # With new algo implemented, result will be set to .0 in rolling var
  1564. # if sufficient amount of consecutively same values are found.
  1565. result_var = sr.rolling(window, min_periods=min_periods).var()
  1566. # use `assert_series_equal` twice to check for equality,
  1567. # because `check_exact=True` will fail in 32-bit tests due to
  1568. # precision loss.
  1569. # 1. result should be close to correct value
  1570. # non-zero values can still differ slightly from "truth"
  1571. # as the result of online algorithm
  1572. tm.assert_series_equal(result_var, expected)
  1573. # 2. zeros should be exactly the same since the new algo takes effect here
  1574. tm.assert_series_equal(expected == 0, result_var == 0)
  1575. # std should also pass as it's just a sqrt of var
  1576. result_std = sr.rolling(window, min_periods=min_periods).std()
  1577. tm.assert_series_equal(result_std, np.sqrt(expected))
  1578. tm.assert_series_equal(expected == 0, result_std == 0)
  1579. def test_rolling_mean_sum_floating_artifacts():
  1580. # GH 42064.
  1581. sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
  1582. r = sr.rolling(3)
  1583. result = r.mean()
  1584. assert (result[-3:] == 0).all()
  1585. result = r.sum()
  1586. assert (result[-3:] == 0).all()
  1587. def test_rolling_skew_kurt_floating_artifacts():
  1588. # GH 42064 46431
  1589. sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
  1590. r = sr.rolling(4)
  1591. result = r.skew()
  1592. assert (result[-2:] == 0).all()
  1593. result = r.kurt()
  1594. assert (result[-2:] == -3).all()
  1595. def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
  1596. # GH#46560
  1597. kernel = arithmetic_win_operators
  1598. df = DataFrame({"a": [1], "b": 2, "c": 3})
  1599. df["c"] = df["c"].astype(object)
  1600. rolling = df.rolling(2, min_periods=1)
  1601. op = getattr(rolling, kernel)
  1602. result = op(numeric_only=numeric_only)
  1603. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  1604. expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
  1605. assert list(expected.columns) == columns
  1606. tm.assert_frame_equal(result, expected)
  1607. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  1608. @pytest.mark.parametrize("use_arg", [True, False])
  1609. def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
  1610. # GH#46560
  1611. df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
  1612. df["c"] = df["c"].astype(object)
  1613. arg = (df,) if use_arg else ()
  1614. rolling = df.rolling(2, min_periods=1)
  1615. op = getattr(rolling, kernel)
  1616. result = op(*arg, numeric_only=numeric_only)
  1617. # Compare result to op using float dtypes, dropping c when numeric_only is True
  1618. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  1619. df2 = df[columns].astype(float)
  1620. arg2 = (df2,) if use_arg else ()
  1621. rolling2 = df2.rolling(2, min_periods=1)
  1622. op2 = getattr(rolling2, kernel)
  1623. expected = op2(*arg2, numeric_only=numeric_only)
  1624. tm.assert_frame_equal(result, expected)
  1625. @pytest.mark.parametrize("dtype", [int, object])
  1626. def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
  1627. # GH#46560
  1628. kernel = arithmetic_win_operators
  1629. ser = Series([1], dtype=dtype)
  1630. rolling = ser.rolling(2, min_periods=1)
  1631. op = getattr(rolling, kernel)
  1632. if numeric_only and dtype is object:
  1633. msg = f"Rolling.{kernel} does not implement numeric_only"
  1634. with pytest.raises(NotImplementedError, match=msg):
  1635. op(numeric_only=numeric_only)
  1636. else:
  1637. result = op(numeric_only=numeric_only)
  1638. expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
  1639. tm.assert_series_equal(result, expected)
  1640. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  1641. @pytest.mark.parametrize("use_arg", [True, False])
  1642. @pytest.mark.parametrize("dtype", [int, object])
  1643. def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
  1644. # GH#46560
  1645. ser = Series([1, 2, 3], dtype=dtype)
  1646. arg = (ser,) if use_arg else ()
  1647. rolling = ser.rolling(2, min_periods=1)
  1648. op = getattr(rolling, kernel)
  1649. if numeric_only and dtype is object:
  1650. msg = f"Rolling.{kernel} does not implement numeric_only"
  1651. with pytest.raises(NotImplementedError, match=msg):
  1652. op(*arg, numeric_only=numeric_only)
  1653. else:
  1654. result = op(*arg, numeric_only=numeric_only)
  1655. ser2 = ser.astype(float)
  1656. arg2 = (ser2,) if use_arg else ()
  1657. rolling2 = ser2.rolling(2, min_periods=1)
  1658. op2 = getattr(rolling2, kernel)
  1659. expected = op2(*arg2, numeric_only=numeric_only)
  1660. tm.assert_series_equal(result, expected)
  1661. @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
  1662. @pytest.mark.parametrize("tz", [None, "UTC", "Europe/Prague"])
  1663. def test_rolling_timedelta_window_non_nanoseconds(unit, tz):
  1664. # Test Sum, GH#55106
  1665. df_time = DataFrame(
  1666. {"A": range(5)}, index=date_range("2013-01-01", freq="1s", periods=5, tz=tz)
  1667. )
  1668. sum_in_nanosecs = df_time.rolling("1s").sum()
  1669. # microseconds / milliseconds should not break the correct rolling
  1670. df_time.index = df_time.index.as_unit(unit)
  1671. sum_in_microsecs = df_time.rolling("1s").sum()
  1672. sum_in_microsecs.index = sum_in_microsecs.index.as_unit("ns")
  1673. tm.assert_frame_equal(sum_in_nanosecs, sum_in_microsecs)
  1674. # Test max, GH#55026
  1675. ref_dates = date_range("2023-01-01", "2023-01-10", unit="ns", tz=tz)
  1676. ref_series = Series(0, index=ref_dates)
  1677. ref_series.iloc[0] = 1
  1678. ref_max_series = ref_series.rolling(Timedelta(days=4)).max()
  1679. dates = date_range("2023-01-01", "2023-01-10", unit=unit, tz=tz)
  1680. series = Series(0, index=dates)
  1681. series.iloc[0] = 1
  1682. max_series = series.rolling(Timedelta(days=4)).max()
  1683. ref_df = DataFrame(ref_max_series)
  1684. df = DataFrame(max_series)
  1685. df.index = df.index.as_unit("ns")
  1686. tm.assert_frame_equal(ref_df, df)