test_interpolate.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. import numpy as np
  2. import pytest
  3. from pandas._config import using_string_dtype
  4. from pandas.compat import WARNING_CHECK_DISABLED
  5. from pandas.errors import ChainedAssignmentError
  6. import pandas.util._test_decorators as td
  7. from pandas import (
  8. DataFrame,
  9. NaT,
  10. Series,
  11. date_range,
  12. )
  13. import pandas._testing as tm
  14. class TestDataFrameInterpolate:
  15. def test_interpolate_complex(self):
  16. # GH#53635
  17. ser = Series([complex("1+1j"), float("nan"), complex("2+2j")])
  18. assert ser.dtype.kind == "c"
  19. res = ser.interpolate()
  20. expected = Series([ser[0], ser[0] * 1.5, ser[2]])
  21. tm.assert_series_equal(res, expected)
  22. df = ser.to_frame()
  23. res = df.interpolate()
  24. expected = expected.to_frame()
  25. tm.assert_frame_equal(res, expected)
  26. def test_interpolate_datetimelike_values(self, frame_or_series):
  27. # GH#11312, GH#51005
  28. orig = Series(date_range("2012-01-01", periods=5))
  29. ser = orig.copy()
  30. ser[2] = NaT
  31. res = frame_or_series(ser).interpolate()
  32. expected = frame_or_series(orig)
  33. tm.assert_equal(res, expected)
  34. # datetime64tz cast
  35. ser_tz = ser.dt.tz_localize("US/Pacific")
  36. res_tz = frame_or_series(ser_tz).interpolate()
  37. expected_tz = frame_or_series(orig.dt.tz_localize("US/Pacific"))
  38. tm.assert_equal(res_tz, expected_tz)
  39. # timedelta64 cast
  40. ser_td = ser - ser[0]
  41. res_td = frame_or_series(ser_td).interpolate()
  42. expected_td = frame_or_series(orig - orig[0])
  43. tm.assert_equal(res_td, expected_td)
  44. def test_interpolate_inplace(self, frame_or_series, using_array_manager, request):
  45. # GH#44749
  46. if using_array_manager and frame_or_series is DataFrame:
  47. mark = pytest.mark.xfail(reason=".values-based in-place check is invalid")
  48. request.applymarker(mark)
  49. obj = frame_or_series([1, np.nan, 2])
  50. orig = obj.values
  51. obj.interpolate(inplace=True)
  52. expected = frame_or_series([1, 1.5, 2])
  53. tm.assert_equal(obj, expected)
  54. # check we operated *actually* inplace
  55. assert np.shares_memory(orig, obj.values)
  56. assert orig.squeeze()[1] == 1.5
  57. def test_interp_basic(self, using_copy_on_write, using_infer_string):
  58. df = DataFrame(
  59. {
  60. "A": [1, 2, np.nan, 4],
  61. "B": [1, 4, 9, np.nan],
  62. "C": [1, 2, 3, 5],
  63. "D": list("abcd"),
  64. }
  65. )
  66. expected = DataFrame(
  67. {
  68. "A": [1.0, 2.0, 3.0, 4.0],
  69. "B": [1.0, 4.0, 9.0, 9.0],
  70. "C": [1, 2, 3, 5],
  71. "D": list("abcd"),
  72. }
  73. )
  74. if using_infer_string:
  75. dtype = "str" if using_infer_string else "object"
  76. msg = f"[Cc]annot interpolate with {dtype} dtype"
  77. with pytest.raises(TypeError, match=msg):
  78. df.interpolate()
  79. return
  80. msg = "DataFrame.interpolate with object dtype"
  81. with tm.assert_produces_warning(FutureWarning, match=msg):
  82. result = df.interpolate()
  83. tm.assert_frame_equal(result, expected)
  84. # check we didn't operate inplace GH#45791
  85. cvalues = df["C"]._values
  86. dvalues = df["D"].values
  87. if using_copy_on_write:
  88. assert np.shares_memory(cvalues, result["C"]._values)
  89. assert np.shares_memory(dvalues, result["D"]._values)
  90. else:
  91. assert not np.shares_memory(cvalues, result["C"]._values)
  92. assert not np.shares_memory(dvalues, result["D"]._values)
  93. with tm.assert_produces_warning(FutureWarning, match=msg):
  94. res = df.interpolate(inplace=True)
  95. assert res is None
  96. tm.assert_frame_equal(df, expected)
  97. # check we DID operate inplace
  98. assert tm.shares_memory(df["C"]._values, cvalues)
  99. assert tm.shares_memory(df["D"]._values, dvalues)
  100. @pytest.mark.xfail(
  101. using_string_dtype(), reason="interpolate doesn't work for string"
  102. )
  103. def test_interp_basic_with_non_range_index(self, using_infer_string):
  104. df = DataFrame(
  105. {
  106. "A": [1, 2, np.nan, 4],
  107. "B": [1, 4, 9, np.nan],
  108. "C": [1, 2, 3, 5],
  109. "D": list("abcd"),
  110. }
  111. )
  112. msg = "DataFrame.interpolate with object dtype"
  113. warning = FutureWarning if not using_infer_string else None
  114. with tm.assert_produces_warning(warning, match=msg):
  115. result = df.set_index("C").interpolate()
  116. expected = df.set_index("C")
  117. expected.loc[3, "A"] = 3
  118. expected.loc[5, "B"] = 9
  119. tm.assert_frame_equal(result, expected)
  120. def test_interp_empty(self):
  121. # https://github.com/pandas-dev/pandas/issues/35598
  122. df = DataFrame()
  123. result = df.interpolate()
  124. assert result is not df
  125. expected = df
  126. tm.assert_frame_equal(result, expected)
  127. def test_interp_bad_method(self):
  128. df = DataFrame(
  129. {
  130. "A": [1, 2, np.nan, 4],
  131. "B": [1, 4, 9, np.nan],
  132. "C": [1, 2, 3, 5],
  133. }
  134. )
  135. msg = (
  136. r"method must be one of \['linear', 'time', 'index', 'values', "
  137. r"'nearest', 'zero', 'slinear', 'quadratic', 'cubic', "
  138. r"'barycentric', 'krogh', 'spline', 'polynomial', "
  139. r"'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima', "
  140. r"'cubicspline'\]. Got 'not_a_method' instead."
  141. )
  142. with pytest.raises(ValueError, match=msg):
  143. df.interpolate(method="not_a_method")
  144. def test_interp_combo(self):
  145. df = DataFrame(
  146. {
  147. "A": [1.0, 2.0, np.nan, 4.0],
  148. "B": [1, 4, 9, np.nan],
  149. "C": [1, 2, 3, 5],
  150. "D": list("abcd"),
  151. }
  152. )
  153. result = df["A"].interpolate()
  154. expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
  155. tm.assert_series_equal(result, expected)
  156. msg = "The 'downcast' keyword in Series.interpolate is deprecated"
  157. with tm.assert_produces_warning(FutureWarning, match=msg):
  158. result = df["A"].interpolate(downcast="infer")
  159. expected = Series([1, 2, 3, 4], name="A")
  160. tm.assert_series_equal(result, expected)
  161. def test_inerpolate_invalid_downcast(self):
  162. # GH#53103
  163. df = DataFrame(
  164. {
  165. "A": [1.0, 2.0, np.nan, 4.0],
  166. "B": [1, 4, 9, np.nan],
  167. "C": [1, 2, 3, 5],
  168. "D": list("abcd"),
  169. }
  170. )
  171. msg = "downcast must be either None or 'infer'"
  172. msg2 = "The 'downcast' keyword in DataFrame.interpolate is deprecated"
  173. msg3 = "The 'downcast' keyword in Series.interpolate is deprecated"
  174. with pytest.raises(ValueError, match=msg):
  175. with tm.assert_produces_warning(FutureWarning, match=msg2):
  176. df.interpolate(downcast="int64")
  177. with pytest.raises(ValueError, match=msg):
  178. with tm.assert_produces_warning(FutureWarning, match=msg3):
  179. df["A"].interpolate(downcast="int64")
  180. def test_interp_nan_idx(self):
  181. df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]})
  182. df = df.set_index("A")
  183. msg = (
  184. "Interpolation with NaNs in the index has not been implemented. "
  185. "Try filling those NaNs before interpolating."
  186. )
  187. with pytest.raises(NotImplementedError, match=msg):
  188. df.interpolate(method="values")
  189. def test_interp_various(self):
  190. pytest.importorskip("scipy")
  191. df = DataFrame(
  192. {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
  193. )
  194. df = df.set_index("C")
  195. expected = df.copy()
  196. result = df.interpolate(method="polynomial", order=1)
  197. expected.loc[3, "A"] = 2.66666667
  198. expected.loc[13, "A"] = 5.76923076
  199. tm.assert_frame_equal(result, expected)
  200. result = df.interpolate(method="cubic")
  201. # GH #15662.
  202. expected.loc[3, "A"] = 2.81547781
  203. expected.loc[13, "A"] = 5.52964175
  204. tm.assert_frame_equal(result, expected)
  205. result = df.interpolate(method="nearest")
  206. expected.loc[3, "A"] = 2
  207. expected.loc[13, "A"] = 5
  208. tm.assert_frame_equal(result, expected, check_dtype=False)
  209. result = df.interpolate(method="quadratic")
  210. expected.loc[3, "A"] = 2.82150771
  211. expected.loc[13, "A"] = 6.12648668
  212. tm.assert_frame_equal(result, expected)
  213. result = df.interpolate(method="slinear")
  214. expected.loc[3, "A"] = 2.66666667
  215. expected.loc[13, "A"] = 5.76923077
  216. tm.assert_frame_equal(result, expected)
  217. result = df.interpolate(method="zero")
  218. expected.loc[3, "A"] = 2.0
  219. expected.loc[13, "A"] = 5
  220. tm.assert_frame_equal(result, expected, check_dtype=False)
  221. def test_interp_alt_scipy(self):
  222. pytest.importorskip("scipy")
  223. df = DataFrame(
  224. {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
  225. )
  226. result = df.interpolate(method="barycentric")
  227. expected = df.copy()
  228. expected.loc[2, "A"] = 3
  229. expected.loc[5, "A"] = 6
  230. tm.assert_frame_equal(result, expected)
  231. msg = "The 'downcast' keyword in DataFrame.interpolate is deprecated"
  232. with tm.assert_produces_warning(FutureWarning, match=msg):
  233. result = df.interpolate(method="barycentric", downcast="infer")
  234. tm.assert_frame_equal(result, expected.astype(np.int64))
  235. result = df.interpolate(method="krogh")
  236. expectedk = df.copy()
  237. expectedk["A"] = expected["A"]
  238. tm.assert_frame_equal(result, expectedk)
  239. result = df.interpolate(method="pchip")
  240. expected.loc[2, "A"] = 3
  241. expected.loc[5, "A"] = 6.0
  242. tm.assert_frame_equal(result, expected)
  243. def test_interp_rowwise(self):
  244. df = DataFrame(
  245. {
  246. 0: [1, 2, np.nan, 4],
  247. 1: [2, 3, 4, np.nan],
  248. 2: [np.nan, 4, 5, 6],
  249. 3: [4, np.nan, 6, 7],
  250. 4: [1, 2, 3, 4],
  251. }
  252. )
  253. result = df.interpolate(axis=1)
  254. expected = df.copy()
  255. expected.loc[3, 1] = 5
  256. expected.loc[0, 2] = 3
  257. expected.loc[1, 3] = 3
  258. expected[4] = expected[4].astype(np.float64)
  259. tm.assert_frame_equal(result, expected)
  260. result = df.interpolate(axis=1, method="values")
  261. tm.assert_frame_equal(result, expected)
  262. result = df.interpolate(axis=0)
  263. expected = df.interpolate()
  264. tm.assert_frame_equal(result, expected)
  265. @pytest.mark.parametrize(
  266. "axis_name, axis_number",
  267. [
  268. pytest.param("rows", 0, id="rows_0"),
  269. pytest.param("index", 0, id="index_0"),
  270. pytest.param("columns", 1, id="columns_1"),
  271. ],
  272. )
  273. def test_interp_axis_names(self, axis_name, axis_number):
  274. # GH 29132: test axis names
  275. data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]}
  276. df = DataFrame(data, dtype=np.float64)
  277. result = df.interpolate(axis=axis_name, method="linear")
  278. expected = df.interpolate(axis=axis_number, method="linear")
  279. tm.assert_frame_equal(result, expected)
  280. def test_rowwise_alt(self):
  281. df = DataFrame(
  282. {
  283. 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64],
  284. 1: [1, 2, 3, 4, 3, 2, 1, 0, -1],
  285. }
  286. )
  287. df.interpolate(axis=0)
  288. # TODO: assert something?
  289. @pytest.mark.parametrize(
  290. "check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))]
  291. )
  292. def test_interp_leading_nans(self, check_scipy):
  293. df = DataFrame(
  294. {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}
  295. )
  296. result = df.interpolate()
  297. expected = df.copy()
  298. expected.loc[3, "B"] = -3.75
  299. tm.assert_frame_equal(result, expected)
  300. if check_scipy:
  301. result = df.interpolate(method="polynomial", order=1)
  302. tm.assert_frame_equal(result, expected)
  303. def test_interp_raise_on_only_mixed(self, axis):
  304. df = DataFrame(
  305. {
  306. "A": [1, 2, np.nan, 4],
  307. "B": ["a", "b", "c", "d"],
  308. "C": [np.nan, 2, 5, 7],
  309. "D": [np.nan, np.nan, 9, 9],
  310. "E": [1, 2, 3, 4],
  311. }
  312. )
  313. msg = (
  314. "Cannot interpolate with all object-dtype columns "
  315. "in the DataFrame. Try setting at least one "
  316. "column to a numeric dtype."
  317. )
  318. with pytest.raises(TypeError, match=msg):
  319. df.astype("object").interpolate(axis=axis)
  320. def test_interp_raise_on_all_object_dtype(self):
  321. # GH 22985
  322. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object")
  323. msg = (
  324. "Cannot interpolate with all object-dtype columns "
  325. "in the DataFrame. Try setting at least one "
  326. "column to a numeric dtype."
  327. )
  328. with pytest.raises(TypeError, match=msg):
  329. df.interpolate()
  330. def test_interp_inplace(self, using_copy_on_write):
  331. df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]})
  332. expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]})
  333. expected_cow = df.copy()
  334. result = df.copy()
  335. if using_copy_on_write:
  336. with tm.raises_chained_assignment_error():
  337. return_value = result["a"].interpolate(inplace=True)
  338. assert return_value is None
  339. tm.assert_frame_equal(result, expected_cow)
  340. else:
  341. with tm.assert_produces_warning(
  342. FutureWarning if not WARNING_CHECK_DISABLED else None,
  343. match="inplace method",
  344. ):
  345. return_value = result["a"].interpolate(inplace=True)
  346. assert return_value is None
  347. tm.assert_frame_equal(result, expected)
  348. result = df.copy()
  349. msg = "The 'downcast' keyword in Series.interpolate is deprecated"
  350. if using_copy_on_write:
  351. with tm.assert_produces_warning(
  352. (FutureWarning, ChainedAssignmentError), match=msg
  353. ):
  354. return_value = result["a"].interpolate(inplace=True, downcast="infer")
  355. assert return_value is None
  356. tm.assert_frame_equal(result, expected_cow)
  357. else:
  358. with tm.assert_produces_warning(FutureWarning, match=msg):
  359. return_value = result["a"].interpolate(inplace=True, downcast="infer")
  360. assert return_value is None
  361. tm.assert_frame_equal(result, expected.astype("int64"))
  362. def test_interp_inplace_row(self):
  363. # GH 10395
  364. result = DataFrame(
  365. {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]}
  366. )
  367. expected = result.interpolate(method="linear", axis=1, inplace=False)
  368. return_value = result.interpolate(method="linear", axis=1, inplace=True)
  369. assert return_value is None
  370. tm.assert_frame_equal(result, expected)
  371. def test_interp_ignore_all_good(self):
  372. # GH
  373. df = DataFrame(
  374. {
  375. "A": [1, 2, np.nan, 4],
  376. "B": [1, 2, 3, 4],
  377. "C": [1.0, 2.0, np.nan, 4.0],
  378. "D": [1.0, 2.0, 3.0, 4.0],
  379. }
  380. )
  381. expected = DataFrame(
  382. {
  383. "A": np.array([1, 2, 3, 4], dtype="float64"),
  384. "B": np.array([1, 2, 3, 4], dtype="int64"),
  385. "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"),
  386. "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"),
  387. }
  388. )
  389. msg = "The 'downcast' keyword in DataFrame.interpolate is deprecated"
  390. with tm.assert_produces_warning(FutureWarning, match=msg):
  391. result = df.interpolate(downcast=None)
  392. tm.assert_frame_equal(result, expected)
  393. # all good
  394. with tm.assert_produces_warning(FutureWarning, match=msg):
  395. result = df[["B", "D"]].interpolate(downcast=None)
  396. tm.assert_frame_equal(result, df[["B", "D"]])
  397. def test_interp_time_inplace_axis(self):
  398. # GH 9687
  399. periods = 5
  400. idx = date_range(start="2014-01-01", periods=periods)
  401. data = np.random.default_rng(2).random((periods, periods))
  402. data[data < 0.5] = np.nan
  403. expected = DataFrame(index=idx, columns=idx, data=data)
  404. result = expected.interpolate(axis=0, method="time")
  405. return_value = expected.interpolate(axis=0, method="time", inplace=True)
  406. assert return_value is None
  407. tm.assert_frame_equal(result, expected)
  408. @pytest.mark.parametrize("axis_name, axis_number", [("index", 0), ("columns", 1)])
  409. def test_interp_string_axis(self, axis_name, axis_number):
  410. # https://github.com/pandas-dev/pandas/issues/25190
  411. x = np.linspace(0, 100, 1000)
  412. y = np.sin(x)
  413. df = DataFrame(
  414. data=np.tile(y, (10, 1)), index=np.arange(10), columns=x
  415. ).reindex(columns=x * 1.005)
  416. result = df.interpolate(method="linear", axis=axis_name)
  417. expected = df.interpolate(method="linear", axis=axis_number)
  418. tm.assert_frame_equal(result, expected)
  419. @pytest.mark.parametrize("multiblock", [True, False])
  420. @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"])
  421. def test_interp_fillna_methods(
  422. self, request, axis, multiblock, method, using_array_manager
  423. ):
  424. # GH 12918
  425. if using_array_manager and axis in (1, "columns"):
  426. # TODO(ArrayManager) support axis=1
  427. td.mark_array_manager_not_yet_implemented(request)
  428. df = DataFrame(
  429. {
  430. "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
  431. "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0],
  432. "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0],
  433. }
  434. )
  435. if multiblock:
  436. df["D"] = np.nan
  437. df["E"] = 1.0
  438. method2 = method if method != "pad" else "ffill"
  439. expected = getattr(df, method2)(axis=axis)
  440. msg = f"DataFrame.interpolate with method={method} is deprecated"
  441. with tm.assert_produces_warning(FutureWarning, match=msg):
  442. result = df.interpolate(method=method, axis=axis)
  443. tm.assert_frame_equal(result, expected)
  444. def test_interpolate_empty_df(self):
  445. # GH#53199
  446. df = DataFrame()
  447. expected = df.copy()
  448. result = df.interpolate(inplace=True)
  449. assert result is None
  450. tm.assert_frame_equal(df, expected)
  451. def test_interpolate_ea(self, any_int_ea_dtype):
  452. # GH#55347
  453. df = DataFrame({"a": [1, None, None, None, 3]}, dtype=any_int_ea_dtype)
  454. orig = df.copy()
  455. result = df.interpolate(limit=2)
  456. expected = DataFrame({"a": [1, 1.5, 2.0, None, 3]}, dtype="Float64")
  457. tm.assert_frame_equal(result, expected)
  458. tm.assert_frame_equal(df, orig)
  459. @pytest.mark.parametrize(
  460. "dtype",
  461. [
  462. "Float64",
  463. "Float32",
  464. pytest.param("float32[pyarrow]", marks=td.skip_if_no("pyarrow")),
  465. pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
  466. ],
  467. )
  468. def test_interpolate_ea_float(self, dtype):
  469. # GH#55347
  470. df = DataFrame({"a": [1, None, None, None, 3]}, dtype=dtype)
  471. orig = df.copy()
  472. result = df.interpolate(limit=2)
  473. expected = DataFrame({"a": [1, 1.5, 2.0, None, 3]}, dtype=dtype)
  474. tm.assert_frame_equal(result, expected)
  475. tm.assert_frame_equal(df, orig)
  476. @pytest.mark.parametrize(
  477. "dtype",
  478. ["int64", "uint64", "int32", "int16", "int8", "uint32", "uint16", "uint8"],
  479. )
  480. def test_interpolate_arrow(self, dtype):
  481. # GH#55347
  482. pytest.importorskip("pyarrow")
  483. df = DataFrame({"a": [1, None, None, None, 3]}, dtype=dtype + "[pyarrow]")
  484. result = df.interpolate(limit=2)
  485. expected = DataFrame({"a": [1, 1.5, 2.0, None, 3]}, dtype="float64[pyarrow]")
  486. tm.assert_frame_equal(result, expected)