test_expanding.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. DatetimeIndex,
  6. Index,
  7. MultiIndex,
  8. Series,
  9. isna,
  10. notna,
  11. )
  12. import pandas._testing as tm
  13. def test_doc_string():
  14. df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
  15. df
  16. df.expanding(2).sum()
  17. def test_constructor(frame_or_series):
  18. # GH 12669
  19. c = frame_or_series(range(5)).expanding
  20. # valid
  21. c(min_periods=1)
  22. @pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
  23. def test_constructor_invalid(frame_or_series, w):
  24. # not valid
  25. c = frame_or_series(range(5)).expanding
  26. msg = "min_periods must be an integer"
  27. with pytest.raises(ValueError, match=msg):
  28. c(min_periods=w)
  29. @pytest.mark.parametrize(
  30. "expander",
  31. [
  32. 1,
  33. pytest.param(
  34. "ls",
  35. marks=pytest.mark.xfail(
  36. reason="GH#16425 expanding with offset not supported"
  37. ),
  38. ),
  39. ],
  40. )
  41. def test_empty_df_expanding(expander):
  42. # GH 15819 Verifies that datetime and integer expanding windows can be
  43. # applied to empty DataFrames
  44. expected = DataFrame()
  45. result = DataFrame().expanding(expander).sum()
  46. tm.assert_frame_equal(result, expected)
  47. # Verifies that datetime and integer expanding windows can be applied
  48. # to empty DataFrames with datetime index
  49. expected = DataFrame(index=DatetimeIndex([]))
  50. result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum()
  51. tm.assert_frame_equal(result, expected)
  52. def test_missing_minp_zero():
  53. # https://github.com/pandas-dev/pandas/pull/18921
  54. # minp=0
  55. x = Series([np.nan])
  56. result = x.expanding(min_periods=0).sum()
  57. expected = Series([0.0])
  58. tm.assert_series_equal(result, expected)
  59. # minp=1
  60. result = x.expanding(min_periods=1).sum()
  61. expected = Series([np.nan])
  62. tm.assert_series_equal(result, expected)
  63. def test_expanding_axis(axis_frame):
  64. # see gh-23372.
  65. df = DataFrame(np.ones((10, 20)))
  66. axis = df._get_axis_number(axis_frame)
  67. if axis == 0:
  68. msg = "The 'axis' keyword in DataFrame.expanding is deprecated"
  69. expected = DataFrame(
  70. {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
  71. )
  72. else:
  73. # axis == 1
  74. msg = "Support for axis=1 in DataFrame.expanding is deprecated"
  75. expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
  76. with tm.assert_produces_warning(FutureWarning, match=msg):
  77. result = df.expanding(3, axis=axis_frame).sum()
  78. tm.assert_frame_equal(result, expected)
  79. def test_expanding_count_with_min_periods(frame_or_series):
  80. # GH 26996
  81. result = frame_or_series(range(5)).expanding(min_periods=3).count()
  82. expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0])
  83. tm.assert_equal(result, expected)
  84. def test_expanding_count_default_min_periods_with_null_values(frame_or_series):
  85. # GH 26996
  86. values = [1, 2, 3, np.nan, 4, 5, 6]
  87. expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
  88. result = frame_or_series(values).expanding().count()
  89. expected = frame_or_series(expected_counts)
  90. tm.assert_equal(result, expected)
  91. def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series):
  92. # GH 25857
  93. result = frame_or_series(range(5)).expanding(min_periods=6).count()
  94. expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
  95. tm.assert_equal(result, expected)
  96. @pytest.mark.parametrize(
  97. "df,expected,min_periods",
  98. [
  99. (
  100. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  101. [
  102. ({"A": [1], "B": [4]}, [0]),
  103. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  104. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  105. ],
  106. 3,
  107. ),
  108. (
  109. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  110. [
  111. ({"A": [1], "B": [4]}, [0]),
  112. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  113. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  114. ],
  115. 2,
  116. ),
  117. (
  118. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  119. [
  120. ({"A": [1], "B": [4]}, [0]),
  121. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  122. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  123. ],
  124. 1,
  125. ),
  126. (DataFrame({"A": [1], "B": [4]}), [], 2),
  127. (DataFrame(), [({}, [])], 1),
  128. (
  129. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  130. [
  131. ({"A": [1.0], "B": [np.nan]}, [0]),
  132. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  133. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  134. ],
  135. 3,
  136. ),
  137. (
  138. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  139. [
  140. ({"A": [1.0], "B": [np.nan]}, [0]),
  141. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  142. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  143. ],
  144. 2,
  145. ),
  146. (
  147. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  148. [
  149. ({"A": [1.0], "B": [np.nan]}, [0]),
  150. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  151. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  152. ],
  153. 1,
  154. ),
  155. ],
  156. )
  157. def test_iter_expanding_dataframe(df, expected, min_periods):
  158. # GH 11704
  159. expected = [DataFrame(values, index=index) for (values, index) in expected]
  160. for expected, actual in zip(expected, df.expanding(min_periods)):
  161. tm.assert_frame_equal(actual, expected)
  162. @pytest.mark.parametrize(
  163. "ser,expected,min_periods",
  164. [
  165. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
  166. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
  167. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
  168. (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
  169. (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
  170. (Series([], dtype="int64"), [], 2),
  171. ],
  172. )
  173. def test_iter_expanding_series(ser, expected, min_periods):
  174. # GH 11704
  175. expected = [Series(values, index=index) for (values, index) in expected]
  176. for expected, actual in zip(expected, ser.expanding(min_periods)):
  177. tm.assert_series_equal(actual, expected)
  178. def test_center_invalid():
  179. # GH 20647
  180. df = DataFrame()
  181. with pytest.raises(TypeError, match=".* got an unexpected keyword"):
  182. df.expanding(center=True)
  183. def test_expanding_sem(frame_or_series):
  184. # GH: 26476
  185. obj = frame_or_series([0, 1, 2])
  186. result = obj.expanding().sem()
  187. if isinstance(result, DataFrame):
  188. result = Series(result[0].values)
  189. expected = Series([np.nan] + [0.707107] * 2)
  190. tm.assert_series_equal(result, expected)
  191. @pytest.mark.parametrize("method", ["skew", "kurt"])
  192. def test_expanding_skew_kurt_numerical_stability(method):
  193. # GH: 6929
  194. s = Series(np.random.default_rng(2).random(10))
  195. expected = getattr(s.expanding(3), method)()
  196. s = s + 5000
  197. result = getattr(s.expanding(3), method)()
  198. tm.assert_series_equal(result, expected)
  199. @pytest.mark.parametrize("window", [1, 3, 10, 20])
  200. @pytest.mark.parametrize("method", ["min", "max", "average"])
  201. @pytest.mark.parametrize("pct", [True, False])
  202. @pytest.mark.parametrize("ascending", [True, False])
  203. @pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
  204. def test_rank(window, method, pct, ascending, test_data):
  205. length = 20
  206. if test_data == "default":
  207. ser = Series(data=np.random.default_rng(2).random(length))
  208. elif test_data == "duplicates":
  209. ser = Series(data=np.random.default_rng(2).choice(3, length))
  210. elif test_data == "nans":
  211. ser = Series(
  212. data=np.random.default_rng(2).choice(
  213. [1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
  214. )
  215. )
  216. expected = ser.expanding(window).apply(
  217. lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
  218. )
  219. result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending)
  220. tm.assert_series_equal(result, expected)
  221. def test_expanding_corr(series):
  222. A = series.dropna()
  223. B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
  224. result = A.expanding().corr(B)
  225. rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
  226. tm.assert_almost_equal(rolling_result, result)
  227. def test_expanding_count(series):
  228. result = series.expanding(min_periods=0).count()
  229. tm.assert_almost_equal(
  230. result, series.rolling(window=len(series), min_periods=0).count()
  231. )
  232. def test_expanding_quantile(series):
  233. result = series.expanding().quantile(0.5)
  234. rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
  235. tm.assert_almost_equal(result, rolling_result)
  236. def test_expanding_cov(series):
  237. A = series
  238. B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
  239. result = A.expanding().cov(B)
  240. rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
  241. tm.assert_almost_equal(rolling_result, result)
  242. def test_expanding_cov_pairwise(frame):
  243. result = frame.expanding().cov()
  244. rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
  245. tm.assert_frame_equal(result, rolling_result)
  246. def test_expanding_corr_pairwise(frame):
  247. result = frame.expanding().corr()
  248. rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
  249. tm.assert_frame_equal(result, rolling_result)
  250. @pytest.mark.parametrize(
  251. "func,static_comp",
  252. [
  253. ("sum", np.sum),
  254. ("mean", lambda x: np.mean(x, axis=0)),
  255. ("max", lambda x: np.max(x, axis=0)),
  256. ("min", lambda x: np.min(x, axis=0)),
  257. ],
  258. ids=["sum", "mean", "max", "min"],
  259. )
  260. def test_expanding_func(func, static_comp, frame_or_series):
  261. data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
  262. msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated"
  263. with tm.assert_produces_warning(FutureWarning, match=msg):
  264. obj = data.expanding(min_periods=1, axis=0)
  265. result = getattr(obj, func)()
  266. assert isinstance(result, frame_or_series)
  267. msg = "The behavior of DataFrame.sum with axis=None is deprecated"
  268. warn = None
  269. if frame_or_series is DataFrame and static_comp is np.sum:
  270. warn = FutureWarning
  271. with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
  272. expected = static_comp(data[:11])
  273. if frame_or_series is Series:
  274. tm.assert_almost_equal(result[10], expected)
  275. else:
  276. tm.assert_series_equal(result.iloc[10], expected, check_names=False)
  277. @pytest.mark.parametrize(
  278. "func,static_comp",
  279. [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
  280. ids=["sum", "mean", "max", "min"],
  281. )
  282. def test_expanding_min_periods(func, static_comp):
  283. ser = Series(np.random.default_rng(2).standard_normal(50))
  284. msg = "The 'axis' keyword in Series.expanding is deprecated"
  285. with tm.assert_produces_warning(FutureWarning, match=msg):
  286. result = getattr(ser.expanding(min_periods=30, axis=0), func)()
  287. assert result[:29].isna().all()
  288. tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
  289. # min_periods is working correctly
  290. with tm.assert_produces_warning(FutureWarning, match=msg):
  291. result = getattr(ser.expanding(min_periods=15, axis=0), func)()
  292. assert isna(result.iloc[13])
  293. assert notna(result.iloc[14])
  294. ser2 = Series(np.random.default_rng(2).standard_normal(20))
  295. with tm.assert_produces_warning(FutureWarning, match=msg):
  296. result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
  297. assert isna(result[3])
  298. assert notna(result[4])
  299. # min_periods=0
  300. with tm.assert_produces_warning(FutureWarning, match=msg):
  301. result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
  302. with tm.assert_produces_warning(FutureWarning, match=msg):
  303. result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
  304. tm.assert_almost_equal(result0, result1)
  305. with tm.assert_produces_warning(FutureWarning, match=msg):
  306. result = getattr(ser.expanding(min_periods=1, axis=0), func)()
  307. tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
  308. def test_expanding_apply(engine_and_raw, frame_or_series):
  309. engine, raw = engine_and_raw
  310. data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
  311. result = data.expanding(min_periods=1).apply(
  312. lambda x: x.mean(), raw=raw, engine=engine
  313. )
  314. assert isinstance(result, frame_or_series)
  315. if frame_or_series is Series:
  316. tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0))
  317. else:
  318. tm.assert_series_equal(
  319. result.iloc[9], np.mean(data[:11], axis=0), check_names=False
  320. )
  321. def test_expanding_min_periods_apply(engine_and_raw):
  322. engine, raw = engine_and_raw
  323. ser = Series(np.random.default_rng(2).standard_normal(50))
  324. result = ser.expanding(min_periods=30).apply(
  325. lambda x: x.mean(), raw=raw, engine=engine
  326. )
  327. assert result[:29].isna().all()
  328. tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
  329. # min_periods is working correctly
  330. result = ser.expanding(min_periods=15).apply(
  331. lambda x: x.mean(), raw=raw, engine=engine
  332. )
  333. assert isna(result.iloc[13])
  334. assert notna(result.iloc[14])
  335. ser2 = Series(np.random.default_rng(2).standard_normal(20))
  336. result = ser2.expanding(min_periods=5).apply(
  337. lambda x: x.mean(), raw=raw, engine=engine
  338. )
  339. assert isna(result[3])
  340. assert notna(result[4])
  341. # min_periods=0
  342. result0 = ser.expanding(min_periods=0).apply(
  343. lambda x: x.mean(), raw=raw, engine=engine
  344. )
  345. result1 = ser.expanding(min_periods=1).apply(
  346. lambda x: x.mean(), raw=raw, engine=engine
  347. )
  348. tm.assert_almost_equal(result0, result1)
  349. result = ser.expanding(min_periods=1).apply(
  350. lambda x: x.mean(), raw=raw, engine=engine
  351. )
  352. tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
  353. @pytest.mark.parametrize(
  354. "f",
  355. [
  356. lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
  357. lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
  358. ],
  359. )
  360. def test_moment_functions_zero_length_pairwise(f):
  361. df1 = DataFrame()
  362. df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
  363. df2["a"] = df2["a"].astype("float64")
  364. df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
  365. df2_expected = DataFrame(
  366. index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
  367. columns=Index(["a"], name="foo"),
  368. dtype="float64",
  369. )
  370. df1_result = f(df1)
  371. tm.assert_frame_equal(df1_result, df1_expected)
  372. df2_result = f(df2)
  373. tm.assert_frame_equal(df2_result, df2_expected)
  374. @pytest.mark.parametrize(
  375. "f",
  376. [
  377. lambda x: x.expanding().count(),
  378. lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
  379. lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
  380. lambda x: x.expanding(min_periods=5).max(),
  381. lambda x: x.expanding(min_periods=5).min(),
  382. lambda x: x.expanding(min_periods=5).sum(),
  383. lambda x: x.expanding(min_periods=5).mean(),
  384. lambda x: x.expanding(min_periods=5).std(),
  385. lambda x: x.expanding(min_periods=5).var(),
  386. lambda x: x.expanding(min_periods=5).skew(),
  387. lambda x: x.expanding(min_periods=5).kurt(),
  388. lambda x: x.expanding(min_periods=5).quantile(0.5),
  389. lambda x: x.expanding(min_periods=5).median(),
  390. lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
  391. lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
  392. ],
  393. )
  394. def test_moment_functions_zero_length(f):
  395. # GH 8056
  396. s = Series(dtype=np.float64)
  397. s_expected = s
  398. df1 = DataFrame()
  399. df1_expected = df1
  400. df2 = DataFrame(columns=["a"])
  401. df2["a"] = df2["a"].astype("float64")
  402. df2_expected = df2
  403. s_result = f(s)
  404. tm.assert_series_equal(s_result, s_expected)
  405. df1_result = f(df1)
  406. tm.assert_frame_equal(df1_result, df1_expected)
  407. df2_result = f(df2)
  408. tm.assert_frame_equal(df2_result, df2_expected)
  409. def test_expanding_apply_empty_series(engine_and_raw):
  410. engine, raw = engine_and_raw
  411. ser = Series([], dtype=np.float64)
  412. tm.assert_series_equal(
  413. ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
  414. )
  415. def test_expanding_apply_min_periods_0(engine_and_raw):
  416. # GH 8080
  417. engine, raw = engine_and_raw
  418. s = Series([None, None, None])
  419. result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
  420. expected = Series([1.0, 2.0, 3.0])
  421. tm.assert_series_equal(result, expected)
  422. def test_expanding_cov_diff_index():
  423. # GH 7512
  424. s1 = Series([1, 2, 3], index=[0, 1, 2])
  425. s2 = Series([1, 3], index=[0, 2])
  426. result = s1.expanding().cov(s2)
  427. expected = Series([None, None, 2.0])
  428. tm.assert_series_equal(result, expected)
  429. s2a = Series([1, None, 3], index=[0, 1, 2])
  430. result = s1.expanding().cov(s2a)
  431. tm.assert_series_equal(result, expected)
  432. s1 = Series([7, 8, 10], index=[0, 1, 3])
  433. s2 = Series([7, 9, 10], index=[0, 2, 3])
  434. result = s1.expanding().cov(s2)
  435. expected = Series([None, None, None, 4.5])
  436. tm.assert_series_equal(result, expected)
  437. def test_expanding_corr_diff_index():
  438. # GH 7512
  439. s1 = Series([1, 2, 3], index=[0, 1, 2])
  440. s2 = Series([1, 3], index=[0, 2])
  441. result = s1.expanding().corr(s2)
  442. expected = Series([None, None, 1.0])
  443. tm.assert_series_equal(result, expected)
  444. s2a = Series([1, None, 3], index=[0, 1, 2])
  445. result = s1.expanding().corr(s2a)
  446. tm.assert_series_equal(result, expected)
  447. s1 = Series([7, 8, 10], index=[0, 1, 3])
  448. s2 = Series([7, 9, 10], index=[0, 2, 3])
  449. result = s1.expanding().corr(s2)
  450. expected = Series([None, None, None, 1.0])
  451. tm.assert_series_equal(result, expected)
  452. def test_expanding_cov_pairwise_diff_length():
  453. # GH 7512
  454. df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
  455. df1a = DataFrame(
  456. [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
  457. )
  458. df2 = DataFrame(
  459. [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
  460. )
  461. df2a = DataFrame(
  462. [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
  463. )
  464. # TODO: xref gh-15826
  465. # .loc is not preserving the names
  466. result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
  467. result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
  468. result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
  469. result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
  470. expected = DataFrame(
  471. [[-3.0, -6.0], [-5.0, -10.0]],
  472. columns=Index(["A", "B"], name="foo"),
  473. index=Index(["X", "Y"], name="foo"),
  474. )
  475. tm.assert_frame_equal(result1, expected)
  476. tm.assert_frame_equal(result2, expected)
  477. tm.assert_frame_equal(result3, expected)
  478. tm.assert_frame_equal(result4, expected)
  479. def test_expanding_corr_pairwise_diff_length():
  480. # GH 7512
  481. df1 = DataFrame(
  482. [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
  483. )
  484. df1a = DataFrame(
  485. [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
  486. )
  487. df2 = DataFrame(
  488. [[5, 6], [None, None], [2, 1]],
  489. columns=["X", "Y"],
  490. index=Index(range(3), name="bar"),
  491. )
  492. df2a = DataFrame(
  493. [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
  494. )
  495. result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
  496. result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
  497. result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
  498. result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
  499. expected = DataFrame(
  500. [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
  501. )
  502. tm.assert_frame_equal(result1, expected)
  503. tm.assert_frame_equal(result2, expected)
  504. tm.assert_frame_equal(result3, expected)
  505. tm.assert_frame_equal(result4, expected)
  506. def test_expanding_apply_args_kwargs(engine_and_raw):
  507. def mean_w_arg(x, const):
  508. return np.mean(x) + const
  509. engine, raw = engine_and_raw
  510. df = DataFrame(np.random.default_rng(2).random((20, 3)))
  511. expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
  512. result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
  513. tm.assert_frame_equal(result, expected)
  514. result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
  515. tm.assert_frame_equal(result, expected)
  516. def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
  517. # GH#46560
  518. kernel = arithmetic_win_operators
  519. df = DataFrame({"a": [1], "b": 2, "c": 3})
  520. df["c"] = df["c"].astype(object)
  521. expanding = df.expanding()
  522. op = getattr(expanding, kernel, None)
  523. if op is not None:
  524. result = op(numeric_only=numeric_only)
  525. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  526. expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
  527. assert list(expected.columns) == columns
  528. tm.assert_frame_equal(result, expected)
  529. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  530. @pytest.mark.parametrize("use_arg", [True, False])
  531. def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
  532. # GH#46560
  533. df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
  534. df["c"] = df["c"].astype(object)
  535. arg = (df,) if use_arg else ()
  536. expanding = df.expanding()
  537. op = getattr(expanding, kernel)
  538. result = op(*arg, numeric_only=numeric_only)
  539. # Compare result to op using float dtypes, dropping c when numeric_only is True
  540. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  541. df2 = df[columns].astype(float)
  542. arg2 = (df2,) if use_arg else ()
  543. expanding2 = df2.expanding()
  544. op2 = getattr(expanding2, kernel)
  545. expected = op2(*arg2, numeric_only=numeric_only)
  546. tm.assert_frame_equal(result, expected)
  547. @pytest.mark.parametrize("dtype", [int, object])
  548. def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
  549. # GH#46560
  550. kernel = arithmetic_win_operators
  551. ser = Series([1], dtype=dtype)
  552. expanding = ser.expanding()
  553. op = getattr(expanding, kernel)
  554. if numeric_only and dtype is object:
  555. msg = f"Expanding.{kernel} does not implement numeric_only"
  556. with pytest.raises(NotImplementedError, match=msg):
  557. op(numeric_only=numeric_only)
  558. else:
  559. result = op(numeric_only=numeric_only)
  560. expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
  561. tm.assert_series_equal(result, expected)
  562. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  563. @pytest.mark.parametrize("use_arg", [True, False])
  564. @pytest.mark.parametrize("dtype", [int, object])
  565. def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
  566. # GH#46560
  567. ser = Series([1, 2, 3], dtype=dtype)
  568. arg = (ser,) if use_arg else ()
  569. expanding = ser.expanding()
  570. op = getattr(expanding, kernel)
  571. if numeric_only and dtype is object:
  572. msg = f"Expanding.{kernel} does not implement numeric_only"
  573. with pytest.raises(NotImplementedError, match=msg):
  574. op(*arg, numeric_only=numeric_only)
  575. else:
  576. result = op(*arg, numeric_only=numeric_only)
  577. ser2 = ser.astype(float)
  578. arg2 = (ser2,) if use_arg else ()
  579. expanding2 = ser2.expanding()
  580. op2 = getattr(expanding2, kernel)
  581. expected = op2(*arg2, numeric_only=numeric_only)
  582. tm.assert_series_equal(result, expected)
  583. def test_keyword_quantile_deprecated():
  584. # GH #52550
  585. ser = Series([1, 2, 3, 4])
  586. with tm.assert_produces_warning(FutureWarning):
  587. ser.expanding().quantile(quantile=0.5)