test_interpolate.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. from pandas import (
  6. Index,
  7. MultiIndex,
  8. Series,
  9. date_range,
  10. isna,
  11. )
  12. import pandas._testing as tm
  13. @pytest.fixture(
  14. params=[
  15. "linear",
  16. "index",
  17. "values",
  18. "nearest",
  19. "slinear",
  20. "zero",
  21. "quadratic",
  22. "cubic",
  23. "barycentric",
  24. "krogh",
  25. "polynomial",
  26. "spline",
  27. "piecewise_polynomial",
  28. "from_derivatives",
  29. "pchip",
  30. "akima",
  31. "cubicspline",
  32. ]
  33. )
  34. def nontemporal_method(request):
  35. """Fixture that returns an (method name, required kwargs) pair.
  36. This fixture does not include method 'time' as a parameterization; that
  37. method requires a Series with a DatetimeIndex, and is generally tested
  38. separately from these non-temporal methods.
  39. """
  40. method = request.param
  41. kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
  42. return method, kwargs
  43. @pytest.fixture(
  44. params=[
  45. "linear",
  46. "slinear",
  47. "zero",
  48. "quadratic",
  49. "cubic",
  50. "barycentric",
  51. "krogh",
  52. "polynomial",
  53. "spline",
  54. "piecewise_polynomial",
  55. "from_derivatives",
  56. "pchip",
  57. "akima",
  58. "cubicspline",
  59. ]
  60. )
  61. def interp_methods_ind(request):
  62. """Fixture that returns a (method name, required kwargs) pair to
  63. be tested for various Index types.
  64. This fixture does not include methods - 'time', 'index', 'nearest',
  65. 'values' as a parameterization
  66. """
  67. method = request.param
  68. kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
  69. return method, kwargs
  70. class TestSeriesInterpolateData:
  71. @pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
  72. def test_interpolate_period_values(self):
  73. orig = Series(date_range("2012-01-01", periods=5))
  74. ser = orig.copy()
  75. ser[2] = pd.NaT
  76. # period cast
  77. ser_per = ser.dt.to_period("D")
  78. res_per = ser_per.interpolate()
  79. expected_per = orig.dt.to_period("D")
  80. tm.assert_series_equal(res_per, expected_per)
  81. def test_interpolate(self, datetime_series):
  82. ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
  83. ts_copy = ts.copy()
  84. ts_copy[5:10] = np.nan
  85. linear_interp = ts_copy.interpolate(method="linear")
  86. tm.assert_series_equal(linear_interp, ts)
  87. ord_ts = Series(
  88. [d.toordinal() for d in datetime_series.index], index=datetime_series.index
  89. ).astype(float)
  90. ord_ts_copy = ord_ts.copy()
  91. ord_ts_copy[5:10] = np.nan
  92. time_interp = ord_ts_copy.interpolate(method="time")
  93. tm.assert_series_equal(time_interp, ord_ts)
  94. def test_interpolate_time_raises_for_non_timeseries(self):
  95. # When method='time' is used on a non-TimeSeries that contains a null
  96. # value, a ValueError should be raised.
  97. non_ts = Series([0, 1, 2, np.nan])
  98. msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
  99. with pytest.raises(ValueError, match=msg):
  100. non_ts.interpolate(method="time")
  101. def test_interpolate_cubicspline(self):
  102. pytest.importorskip("scipy")
  103. ser = Series([10, 11, 12, 13])
  104. expected = Series(
  105. [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
  106. index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
  107. )
  108. # interpolate at new_index
  109. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
  110. float
  111. )
  112. result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3]
  113. tm.assert_series_equal(result, expected)
  114. def test_interpolate_pchip(self):
  115. pytest.importorskip("scipy")
  116. ser = Series(np.sort(np.random.default_rng(2).uniform(size=100)))
  117. # interpolate at new_index
  118. new_index = ser.index.union(
  119. Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
  120. ).astype(float)
  121. interp_s = ser.reindex(new_index).interpolate(method="pchip")
  122. # does not blow up, GH5977
  123. interp_s.loc[49:51]
  124. def test_interpolate_akima(self):
  125. pytest.importorskip("scipy")
  126. ser = Series([10, 11, 12, 13])
  127. # interpolate at new_index where `der` is zero
  128. expected = Series(
  129. [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
  130. index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
  131. )
  132. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
  133. float
  134. )
  135. interp_s = ser.reindex(new_index).interpolate(method="akima")
  136. tm.assert_series_equal(interp_s.loc[1:3], expected)
  137. # interpolate at new_index where `der` is a non-zero int
  138. expected = Series(
  139. [11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0],
  140. index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
  141. )
  142. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
  143. float
  144. )
  145. interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
  146. tm.assert_series_equal(interp_s.loc[1:3], expected)
  147. def test_interpolate_piecewise_polynomial(self):
  148. pytest.importorskip("scipy")
  149. ser = Series([10, 11, 12, 13])
  150. expected = Series(
  151. [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
  152. index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
  153. )
  154. # interpolate at new_index
  155. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
  156. float
  157. )
  158. interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
  159. tm.assert_series_equal(interp_s.loc[1:3], expected)
  160. def test_interpolate_from_derivatives(self):
  161. pytest.importorskip("scipy")
  162. ser = Series([10, 11, 12, 13])
  163. expected = Series(
  164. [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
  165. index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
  166. )
  167. # interpolate at new_index
  168. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
  169. float
  170. )
  171. interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
  172. tm.assert_series_equal(interp_s.loc[1:3], expected)
  173. @pytest.mark.parametrize(
  174. "kwargs",
  175. [
  176. {},
  177. pytest.param(
  178. {"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
  179. ),
  180. ],
  181. )
  182. def test_interpolate_corners(self, kwargs):
  183. s = Series([np.nan, np.nan])
  184. tm.assert_series_equal(s.interpolate(**kwargs), s)
  185. s = Series([], dtype=object).interpolate()
  186. tm.assert_series_equal(s.interpolate(**kwargs), s)
  187. def test_interpolate_index_values(self):
  188. s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30)))
  189. s.loc[::3] = np.random.default_rng(2).standard_normal(10)
  190. vals = s.index.values.astype(float)
  191. result = s.interpolate(method="index")
  192. expected = s.copy()
  193. bad = isna(expected.values)
  194. good = ~bad
  195. expected = Series(
  196. np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]
  197. )
  198. tm.assert_series_equal(result[bad], expected)
  199. # 'values' is synonymous with 'index' for the method kwarg
  200. other_result = s.interpolate(method="values")
  201. tm.assert_series_equal(other_result, result)
  202. tm.assert_series_equal(other_result[bad], expected)
  203. def test_interpolate_non_ts(self):
  204. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  205. msg = (
  206. "time-weighted interpolation only works on Series or DataFrames "
  207. "with a DatetimeIndex"
  208. )
  209. with pytest.raises(ValueError, match=msg):
  210. s.interpolate(method="time")
  211. @pytest.mark.parametrize(
  212. "kwargs",
  213. [
  214. {},
  215. pytest.param(
  216. {"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
  217. ),
  218. ],
  219. )
  220. def test_nan_interpolate(self, kwargs):
  221. s = Series([0, 1, np.nan, 3])
  222. result = s.interpolate(**kwargs)
  223. expected = Series([0.0, 1.0, 2.0, 3.0])
  224. tm.assert_series_equal(result, expected)
  225. def test_nan_irregular_index(self):
  226. s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
  227. result = s.interpolate()
  228. expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
  229. tm.assert_series_equal(result, expected)
  230. def test_nan_str_index(self):
  231. s = Series([0, 1, 2, np.nan], index=list("abcd"))
  232. result = s.interpolate()
  233. expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
  234. tm.assert_series_equal(result, expected)
  235. def test_interp_quad(self):
  236. pytest.importorskip("scipy")
  237. sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
  238. result = sq.interpolate(method="quadratic")
  239. expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
  240. tm.assert_series_equal(result, expected)
  241. def test_interp_scipy_basic(self):
  242. pytest.importorskip("scipy")
  243. s = Series([1, 3, np.nan, 12, np.nan, 25])
  244. # slinear
  245. expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
  246. result = s.interpolate(method="slinear")
  247. tm.assert_series_equal(result, expected)
  248. msg = "The 'downcast' keyword in Series.interpolate is deprecated"
  249. with tm.assert_produces_warning(FutureWarning, match=msg):
  250. result = s.interpolate(method="slinear", downcast="infer")
  251. tm.assert_series_equal(result, expected)
  252. # nearest
  253. expected = Series([1, 3, 3, 12, 12, 25])
  254. result = s.interpolate(method="nearest")
  255. tm.assert_series_equal(result, expected.astype("float"))
  256. with tm.assert_produces_warning(FutureWarning, match=msg):
  257. result = s.interpolate(method="nearest", downcast="infer")
  258. tm.assert_series_equal(result, expected)
  259. # zero
  260. expected = Series([1, 3, 3, 12, 12, 25])
  261. result = s.interpolate(method="zero")
  262. tm.assert_series_equal(result, expected.astype("float"))
  263. with tm.assert_produces_warning(FutureWarning, match=msg):
  264. result = s.interpolate(method="zero", downcast="infer")
  265. tm.assert_series_equal(result, expected)
  266. # quadratic
  267. # GH #15662.
  268. expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
  269. result = s.interpolate(method="quadratic")
  270. tm.assert_series_equal(result, expected)
  271. with tm.assert_produces_warning(FutureWarning, match=msg):
  272. result = s.interpolate(method="quadratic", downcast="infer")
  273. tm.assert_series_equal(result, expected)
  274. # cubic
  275. expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
  276. result = s.interpolate(method="cubic")
  277. tm.assert_series_equal(result, expected)
  278. def test_interp_limit(self):
  279. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  280. expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
  281. result = s.interpolate(method="linear", limit=2)
  282. tm.assert_series_equal(result, expected)
  283. @pytest.mark.parametrize("limit", [-1, 0])
  284. def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit):
  285. # GH 9217: make sure limit is greater than zero.
  286. s = Series([1, 2, np.nan, 4])
  287. method, kwargs = nontemporal_method
  288. with pytest.raises(ValueError, match="Limit must be greater than 0"):
  289. s.interpolate(limit=limit, method=method, **kwargs)
  290. def test_interpolate_invalid_float_limit(self, nontemporal_method):
  291. # GH 9217: make sure limit is an integer.
  292. s = Series([1, 2, np.nan, 4])
  293. method, kwargs = nontemporal_method
  294. limit = 2.0
  295. with pytest.raises(ValueError, match="Limit must be an integer"):
  296. s.interpolate(limit=limit, method=method, **kwargs)
  297. @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
  298. def test_interp_invalid_method(self, invalid_method):
  299. s = Series([1, 3, np.nan, 12, np.nan, 25])
  300. msg = f"method must be one of.* Got '{invalid_method}' instead"
  301. if invalid_method is None:
  302. msg = "'method' should be a string, not None"
  303. with pytest.raises(ValueError, match=msg):
  304. s.interpolate(method=invalid_method)
  305. # When an invalid method and invalid limit (such as -1) are
  306. # provided, the error message reflects the invalid method.
  307. with pytest.raises(ValueError, match=msg):
  308. s.interpolate(method=invalid_method, limit=-1)
  309. def test_interp_invalid_method_and_value(self):
  310. # GH#36624
  311. ser = Series([1, 3, np.nan, 12, np.nan, 25])
  312. msg = "'fill_value' is not a valid keyword for Series.interpolate"
  313. msg2 = "Series.interpolate with method=pad"
  314. with pytest.raises(ValueError, match=msg):
  315. with tm.assert_produces_warning(FutureWarning, match=msg2):
  316. ser.interpolate(fill_value=3, method="pad")
  317. def test_interp_limit_forward(self):
  318. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  319. # Provide 'forward' (the default) explicitly here.
  320. expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
  321. result = s.interpolate(method="linear", limit=2, limit_direction="forward")
  322. tm.assert_series_equal(result, expected)
  323. result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD")
  324. tm.assert_series_equal(result, expected)
  325. def test_interp_unlimited(self):
  326. # these test are for issue #16282 default Limit=None is unlimited
  327. s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
  328. expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
  329. result = s.interpolate(method="linear", limit_direction="both")
  330. tm.assert_series_equal(result, expected)
  331. expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
  332. result = s.interpolate(method="linear", limit_direction="forward")
  333. tm.assert_series_equal(result, expected)
  334. expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
  335. result = s.interpolate(method="linear", limit_direction="backward")
  336. tm.assert_series_equal(result, expected)
  337. def test_interp_limit_bad_direction(self):
  338. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  339. msg = (
  340. r"Invalid limit_direction: expecting one of \['forward', "
  341. r"'backward', 'both'\], got 'abc'"
  342. )
  343. with pytest.raises(ValueError, match=msg):
  344. s.interpolate(method="linear", limit=2, limit_direction="abc")
  345. # raises an error even if no limit is specified.
  346. with pytest.raises(ValueError, match=msg):
  347. s.interpolate(method="linear", limit_direction="abc")
  348. # limit_area introduced GH #16284
  349. def test_interp_limit_area(self):
  350. # These tests are for issue #9218 -- fill NaNs in both directions.
  351. s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])
  352. expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
  353. result = s.interpolate(method="linear", limit_area="inside")
  354. tm.assert_series_equal(result, expected)
  355. expected = Series(
  356. [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]
  357. )
  358. result = s.interpolate(method="linear", limit_area="inside", limit=1)
  359. tm.assert_series_equal(result, expected)
  360. expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
  361. result = s.interpolate(
  362. method="linear", limit_area="inside", limit_direction="both", limit=1
  363. )
  364. tm.assert_series_equal(result, expected)
  365. expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
  366. result = s.interpolate(method="linear", limit_area="outside")
  367. tm.assert_series_equal(result, expected)
  368. expected = Series(
  369. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]
  370. )
  371. result = s.interpolate(method="linear", limit_area="outside", limit=1)
  372. tm.assert_series_equal(result, expected)
  373. expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
  374. result = s.interpolate(
  375. method="linear", limit_area="outside", limit_direction="both", limit=1
  376. )
  377. tm.assert_series_equal(result, expected)
  378. expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
  379. result = s.interpolate(
  380. method="linear", limit_area="outside", limit_direction="backward"
  381. )
  382. tm.assert_series_equal(result, expected)
  383. # raises an error even if limit type is wrong.
  384. msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
  385. with pytest.raises(ValueError, match=msg):
  386. s.interpolate(method="linear", limit_area="abc")
  387. @pytest.mark.parametrize(
  388. "method, limit_direction, expected",
  389. [
  390. ("pad", "backward", "forward"),
  391. ("ffill", "backward", "forward"),
  392. ("backfill", "forward", "backward"),
  393. ("bfill", "forward", "backward"),
  394. ("pad", "both", "forward"),
  395. ("ffill", "both", "forward"),
  396. ("backfill", "both", "backward"),
  397. ("bfill", "both", "backward"),
  398. ],
  399. )
  400. def test_interp_limit_direction_raises(self, method, limit_direction, expected):
  401. # https://github.com/pandas-dev/pandas/pull/34746
  402. s = Series([1, 2, 3])
  403. msg = f"`limit_direction` must be '{expected}' for method `{method}`"
  404. msg2 = "Series.interpolate with method="
  405. with pytest.raises(ValueError, match=msg):
  406. with tm.assert_produces_warning(FutureWarning, match=msg2):
  407. s.interpolate(method=method, limit_direction=limit_direction)
  408. @pytest.mark.parametrize(
  409. "data, expected_data, kwargs",
  410. (
  411. (
  412. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  413. [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
  414. {"method": "pad", "limit_area": "inside"},
  415. ),
  416. (
  417. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  418. [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
  419. {"method": "pad", "limit_area": "inside", "limit": 1},
  420. ),
  421. (
  422. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  423. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
  424. {"method": "pad", "limit_area": "outside"},
  425. ),
  426. (
  427. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  428. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
  429. {"method": "pad", "limit_area": "outside", "limit": 1},
  430. ),
  431. (
  432. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  433. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  434. {"method": "pad", "limit_area": "outside", "limit": 1},
  435. ),
  436. (
  437. range(5),
  438. range(5),
  439. {"method": "pad", "limit_area": "outside", "limit": 1},
  440. ),
  441. ),
  442. )
  443. def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
  444. # GH26796
  445. s = Series(data)
  446. expected = Series(expected_data)
  447. msg = "Series.interpolate with method=pad"
  448. with tm.assert_produces_warning(FutureWarning, match=msg):
  449. result = s.interpolate(**kwargs)
  450. tm.assert_series_equal(result, expected)
  451. @pytest.mark.parametrize(
  452. "data, expected_data, kwargs",
  453. (
  454. (
  455. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  456. [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
  457. {"method": "bfill", "limit_area": "inside"},
  458. ),
  459. (
  460. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  461. [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
  462. {"method": "bfill", "limit_area": "inside", "limit": 1},
  463. ),
  464. (
  465. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  466. [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  467. {"method": "bfill", "limit_area": "outside"},
  468. ),
  469. (
  470. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  471. [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  472. {"method": "bfill", "limit_area": "outside", "limit": 1},
  473. ),
  474. ),
  475. )
  476. def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
  477. # GH26796
  478. s = Series(data)
  479. expected = Series(expected_data)
  480. msg = "Series.interpolate with method=bfill"
  481. with tm.assert_produces_warning(FutureWarning, match=msg):
  482. result = s.interpolate(**kwargs)
  483. tm.assert_series_equal(result, expected)
  484. def test_interp_limit_direction(self):
  485. # These tests are for issue #9218 -- fill NaNs in both directions.
  486. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  487. expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
  488. result = s.interpolate(method="linear", limit=2, limit_direction="backward")
  489. tm.assert_series_equal(result, expected)
  490. expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
  491. result = s.interpolate(method="linear", limit=1, limit_direction="both")
  492. tm.assert_series_equal(result, expected)
  493. # Check that this works on a longer series of nans.
  494. s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])
  495. expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
  496. result = s.interpolate(method="linear", limit=2, limit_direction="both")
  497. tm.assert_series_equal(result, expected)
  498. expected = Series(
  499. [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]
  500. )
  501. result = s.interpolate(method="linear", limit=1, limit_direction="both")
  502. tm.assert_series_equal(result, expected)
  503. def test_interp_limit_to_ends(self):
  504. # These test are for issue #10420 -- flow back to beginning.
  505. s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
  506. expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
  507. result = s.interpolate(method="linear", limit=2, limit_direction="backward")
  508. tm.assert_series_equal(result, expected)
  509. expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
  510. result = s.interpolate(method="linear", limit=2, limit_direction="both")
  511. tm.assert_series_equal(result, expected)
  512. def test_interp_limit_before_ends(self):
  513. # These test are for issue #11115 -- limit ends properly.
  514. s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
  515. expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
  516. result = s.interpolate(method="linear", limit=1, limit_direction="forward")
  517. tm.assert_series_equal(result, expected)
  518. expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
  519. result = s.interpolate(method="linear", limit=1, limit_direction="backward")
  520. tm.assert_series_equal(result, expected)
  521. expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
  522. result = s.interpolate(method="linear", limit=1, limit_direction="both")
  523. tm.assert_series_equal(result, expected)
  524. def test_interp_all_good(self):
  525. pytest.importorskip("scipy")
  526. s = Series([1, 2, 3])
  527. result = s.interpolate(method="polynomial", order=1)
  528. tm.assert_series_equal(result, s)
  529. # non-scipy
  530. result = s.interpolate()
  531. tm.assert_series_equal(result, s)
  532. @pytest.mark.parametrize(
  533. "check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))]
  534. )
  535. def test_interp_multiIndex(self, check_scipy):
  536. idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
  537. s = Series([1, 2, np.nan], index=idx)
  538. expected = s.copy()
  539. expected.loc[2] = 2
  540. result = s.interpolate()
  541. tm.assert_series_equal(result, expected)
  542. msg = "Only `method=linear` interpolation is supported on MultiIndexes"
  543. if check_scipy:
  544. with pytest.raises(ValueError, match=msg):
  545. s.interpolate(method="polynomial", order=1)
  546. def test_interp_nonmono_raise(self):
  547. pytest.importorskip("scipy")
  548. s = Series([1, np.nan, 3], index=[0, 2, 1])
  549. msg = "krogh interpolation requires that the index be monotonic"
  550. with pytest.raises(ValueError, match=msg):
  551. s.interpolate(method="krogh")
  552. @pytest.mark.parametrize("method", ["nearest", "pad"])
  553. def test_interp_datetime64(self, method, tz_naive_fixture):
  554. pytest.importorskip("scipy")
  555. df = Series(
  556. [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
  557. )
  558. warn = None if method == "nearest" else FutureWarning
  559. msg = "Series.interpolate with method=pad is deprecated"
  560. with tm.assert_produces_warning(warn, match=msg):
  561. result = df.interpolate(method=method)
  562. if warn is not None:
  563. # check the "use ffill instead" is equivalent
  564. alt = df.ffill()
  565. tm.assert_series_equal(result, alt)
  566. expected = Series(
  567. [1.0, 1.0, 3.0],
  568. index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
  569. )
  570. tm.assert_series_equal(result, expected)
  571. def test_interp_pad_datetime64tz_values(self):
  572. # GH#27628 missing.interpolate_2d should handle datetimetz values
  573. dti = date_range("2015-04-05", periods=3, tz="US/Central")
  574. ser = Series(dti)
  575. ser[1] = pd.NaT
  576. msg = "Series.interpolate with method=pad is deprecated"
  577. with tm.assert_produces_warning(FutureWarning, match=msg):
  578. result = ser.interpolate(method="pad")
  579. # check the "use ffill instead" is equivalent
  580. alt = ser.ffill()
  581. tm.assert_series_equal(result, alt)
  582. expected = Series(dti)
  583. expected[1] = expected[0]
  584. tm.assert_series_equal(result, expected)
  585. def test_interp_limit_no_nans(self):
  586. # GH 7173
  587. s = Series([1.0, 2.0, 3.0])
  588. result = s.interpolate(limit=1)
  589. expected = s
  590. tm.assert_series_equal(result, expected)
  591. @pytest.mark.parametrize("method", ["polynomial", "spline"])
  592. def test_no_order(self, method):
  593. # see GH-10633, GH-24014
  594. pytest.importorskip("scipy")
  595. s = Series([0, 1, np.nan, 3])
  596. msg = "You must specify the order of the spline or polynomial"
  597. with pytest.raises(ValueError, match=msg):
  598. s.interpolate(method=method)
  599. @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
  600. def test_interpolate_spline_invalid_order(self, order):
  601. pytest.importorskip("scipy")
  602. s = Series([0, 1, np.nan, 3])
  603. msg = "order needs to be specified and greater than 0"
  604. with pytest.raises(ValueError, match=msg):
  605. s.interpolate(method="spline", order=order)
  606. def test_spline(self):
  607. pytest.importorskip("scipy")
  608. s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
  609. result = s.interpolate(method="spline", order=1)
  610. expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
  611. tm.assert_series_equal(result, expected)
  612. def test_spline_extrapolate(self):
  613. pytest.importorskip("scipy")
  614. s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
  615. result3 = s.interpolate(method="spline", order=1, ext=3)
  616. expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
  617. tm.assert_series_equal(result3, expected3)
  618. result1 = s.interpolate(method="spline", order=1, ext=0)
  619. expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
  620. tm.assert_series_equal(result1, expected1)
  621. def test_spline_smooth(self):
  622. pytest.importorskip("scipy")
  623. s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
  624. assert (
  625. s.interpolate(method="spline", order=3, s=0)[5]
  626. != s.interpolate(method="spline", order=3)[5]
  627. )
  628. def test_spline_interpolation(self):
  629. # Explicit cast to float to avoid implicit cast when setting np.nan
  630. pytest.importorskip("scipy")
  631. s = Series(np.arange(10) ** 2, dtype="float")
  632. s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan
  633. result1 = s.interpolate(method="spline", order=1)
  634. expected1 = s.interpolate(method="spline", order=1)
  635. tm.assert_series_equal(result1, expected1)
  636. def test_interp_timedelta64(self):
  637. # GH 6424
  638. df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
  639. result = df.interpolate(method="time")
  640. expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
  641. tm.assert_series_equal(result, expected)
  642. # test for non uniform spacing
  643. df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
  644. result = df.interpolate(method="time")
  645. expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4]))
  646. tm.assert_series_equal(result, expected)
  647. def test_series_interpolate_method_values(self):
  648. # GH#1646
  649. rng = date_range("1/1/2000", "1/20/2000", freq="D")
  650. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  651. ts[::2] = np.nan
  652. result = ts.interpolate(method="values")
  653. exp = ts.interpolate()
  654. tm.assert_series_equal(result, exp)
  655. def test_series_interpolate_intraday(self):
  656. # #1698
  657. index = date_range("1/1/2012", periods=4, freq="12D")
  658. ts = Series([0, 12, 24, 36], index)
  659. new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
  660. exp = ts.reindex(new_index).interpolate(method="time")
  661. index = date_range("1/1/2012", periods=4, freq="12h")
  662. ts = Series([0, 12, 24, 36], index)
  663. new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
  664. result = ts.reindex(new_index).interpolate(method="time")
  665. tm.assert_numpy_array_equal(result.values, exp.values)
  666. @pytest.mark.parametrize(
  667. "ind",
  668. [
  669. ["a", "b", "c", "d"],
  670. pd.period_range(start="2019-01-01", periods=4),
  671. pd.interval_range(start=0, end=4),
  672. ],
  673. )
  674. def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
  675. # gh 21662
  676. df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
  677. method, kwargs = interp_methods_ind
  678. if method == "pchip":
  679. pytest.importorskip("scipy")
  680. if method == "linear":
  681. result = df[0].interpolate(**kwargs)
  682. expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
  683. tm.assert_series_equal(result, expected)
  684. else:
  685. expected_error = (
  686. "Index column must be numeric or datetime type when "
  687. f"using {method} method other than linear. "
  688. "Try setting a numeric or datetime index column before "
  689. "interpolating."
  690. )
  691. with pytest.raises(ValueError, match=expected_error):
  692. df[0].interpolate(method=method, **kwargs)
  693. def test_interpolate_timedelta_index(self, request, interp_methods_ind):
  694. """
  695. Tests for non numerical index types - object, period, timedelta
  696. Note that all methods except time, index, nearest and values
  697. are tested here.
  698. """
  699. # gh 21662
  700. pytest.importorskip("scipy")
  701. ind = pd.timedelta_range(start=1, periods=4)
  702. df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
  703. method, kwargs = interp_methods_ind
  704. if method in {"cubic", "zero"}:
  705. request.applymarker(
  706. pytest.mark.xfail(
  707. reason=f"{method} interpolation is not supported for TimedeltaIndex"
  708. )
  709. )
  710. result = df[0].interpolate(method=method, **kwargs)
  711. expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
  712. tm.assert_series_equal(result, expected)
  713. @pytest.mark.parametrize(
  714. "ascending, expected_values",
  715. [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
  716. )
  717. def test_interpolate_unsorted_index(self, ascending, expected_values):
  718. # GH 21037
  719. ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
  720. result = ts.sort_index(ascending=ascending).interpolate(method="index")
  721. expected = Series(data=expected_values, index=expected_values, dtype=float)
  722. tm.assert_series_equal(result, expected)
  723. def test_interpolate_asfreq_raises(self):
  724. ser = Series(["a", None, "b"], dtype=object)
  725. msg2 = "Series.interpolate with object dtype"
  726. msg = "Invalid fill method"
  727. with pytest.raises(ValueError, match=msg):
  728. with tm.assert_produces_warning(FutureWarning, match=msg2):
  729. ser.interpolate(method="asfreq")
  730. def test_interpolate_fill_value(self):
  731. # GH#54920
  732. pytest.importorskip("scipy")
  733. ser = Series([np.nan, 0, 1, np.nan, 3, np.nan])
  734. result = ser.interpolate(method="nearest", fill_value=0)
  735. expected = Series([np.nan, 0, 1, 1, 3, 0])
  736. tm.assert_series_equal(result, expected)