test_fillna.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import WARNING_CHECK_DISABLED
  4. import pandas.util._test_decorators as td
  5. from pandas import (
  6. Categorical,
  7. DataFrame,
  8. DatetimeIndex,
  9. NaT,
  10. PeriodIndex,
  11. Series,
  12. TimedeltaIndex,
  13. Timestamp,
  14. date_range,
  15. to_datetime,
  16. )
  17. import pandas._testing as tm
  18. from pandas.tests.frame.common import _check_mixed_float
  19. class TestFillNA:
  20. def test_fillna_dict_inplace_nonunique_columns(
  21. self, using_copy_on_write, warn_copy_on_write
  22. ):
  23. df = DataFrame(
  24. {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]}
  25. )
  26. df.columns = ["A", "A", "A"]
  27. orig = df[:]
  28. # TODO(CoW-warn) better warning message
  29. with tm.assert_cow_warning(warn_copy_on_write):
  30. df.fillna({"A": 2}, inplace=True)
  31. # The first and third columns can be set inplace, while the second cannot.
  32. expected = DataFrame(
  33. {"A": [2.0] * 3, "B": [2, Timestamp(1), 2], "C": [2, "foo", 2]}
  34. )
  35. expected.columns = ["A", "A", "A"]
  36. tm.assert_frame_equal(df, expected)
  37. # TODO: what's the expected/desired behavior with CoW?
  38. if not using_copy_on_write:
  39. assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0])
  40. assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1])
  41. if not using_copy_on_write:
  42. assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2])
  43. @td.skip_array_manager_not_yet_implemented
  44. def test_fillna_on_column_view(self, using_copy_on_write):
  45. # GH#46149 avoid unnecessary copies
  46. arr = np.full((40, 50), np.nan)
  47. df = DataFrame(arr, copy=False)
  48. if using_copy_on_write:
  49. with tm.raises_chained_assignment_error():
  50. df[0].fillna(-1, inplace=True)
  51. assert np.isnan(arr[:, 0]).all()
  52. else:
  53. with tm.assert_produces_warning(
  54. FutureWarning if not WARNING_CHECK_DISABLED else None,
  55. match="inplace method",
  56. ):
  57. df[0].fillna(-1, inplace=True)
  58. assert (arr[:, 0] == -1).all()
  59. # i.e. we didn't create a new 49-column block
  60. assert len(df._mgr.arrays) == 1
  61. assert np.shares_memory(df.values, arr)
  62. def test_fillna_datetime(self, datetime_frame):
  63. tf = datetime_frame
  64. tf.loc[tf.index[:5], "A"] = np.nan
  65. tf.loc[tf.index[-5:], "A"] = np.nan
  66. zero_filled = datetime_frame.fillna(0)
  67. assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all()
  68. msg = "DataFrame.fillna with 'method' is deprecated"
  69. with tm.assert_produces_warning(FutureWarning, match=msg):
  70. padded = datetime_frame.fillna(method="pad")
  71. assert np.isnan(padded.loc[padded.index[:5], "A"]).all()
  72. assert (
  73. padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
  74. ).all()
  75. msg = "Must specify a fill 'value' or 'method'"
  76. with pytest.raises(ValueError, match=msg):
  77. datetime_frame.fillna()
  78. msg = "Cannot specify both 'value' and 'method'"
  79. with pytest.raises(ValueError, match=msg):
  80. datetime_frame.fillna(5, method="ffill")
  81. def test_fillna_mixed_type(self, float_string_frame):
  82. mf = float_string_frame
  83. mf.loc[mf.index[5:20], "foo"] = np.nan
  84. mf.loc[mf.index[-10:], "A"] = np.nan
  85. # TODO: make stronger assertion here, GH 25640
  86. mf.fillna(value=0)
  87. msg = "DataFrame.fillna with 'method' is deprecated"
  88. with tm.assert_produces_warning(FutureWarning, match=msg):
  89. mf.fillna(method="pad")
  90. def test_fillna_mixed_float(self, mixed_float_frame):
  91. # mixed numeric (but no float16)
  92. mf = mixed_float_frame.reindex(columns=["A", "B", "D"])
  93. mf.loc[mf.index[-10:], "A"] = np.nan
  94. result = mf.fillna(value=0)
  95. _check_mixed_float(result, dtype={"C": None})
  96. msg = "DataFrame.fillna with 'method' is deprecated"
  97. with tm.assert_produces_warning(FutureWarning, match=msg):
  98. result = mf.fillna(method="pad")
  99. _check_mixed_float(result, dtype={"C": None})
  100. def test_fillna_empty(self, using_copy_on_write):
  101. if using_copy_on_write:
  102. pytest.skip("condition is unnecessary complex and is deprecated anyway")
  103. # empty frame (GH#2778)
  104. df = DataFrame(columns=["x"])
  105. for m in ["pad", "backfill"]:
  106. msg = "Series.fillna with 'method' is deprecated"
  107. with tm.assert_produces_warning(FutureWarning, match=msg):
  108. df.x.fillna(method=m, inplace=True)
  109. df.x.fillna(method=m)
  110. def test_fillna_different_dtype(self):
  111. # with different dtype (GH#3386)
  112. df = DataFrame(
  113. [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
  114. )
  115. result = df.fillna({2: "foo"})
  116. expected = DataFrame(
  117. [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
  118. )
  119. # column is originally float (all-NaN) -> filling with string gives object dtype
  120. expected[2] = expected[2].astype("object")
  121. tm.assert_frame_equal(result, expected)
  122. return_value = df.fillna({2: "foo"}, inplace=True)
  123. tm.assert_frame_equal(df, expected)
  124. assert return_value is None
  125. def test_fillna_limit_and_value(self):
  126. # limit and value
  127. df = DataFrame(np.random.default_rng(2).standard_normal((10, 3)))
  128. df.iloc[2:7, 0] = np.nan
  129. df.iloc[3:5, 2] = np.nan
  130. expected = df.copy()
  131. expected.iloc[2, 0] = 999
  132. expected.iloc[3, 2] = 999
  133. result = df.fillna(999, limit=1)
  134. tm.assert_frame_equal(result, expected)
  135. def test_fillna_datelike(self):
  136. # with datelike
  137. # GH#6344
  138. df = DataFrame(
  139. {
  140. "Date": [NaT, Timestamp("2014-1-1")],
  141. "Date2": [Timestamp("2013-1-1"), NaT],
  142. }
  143. )
  144. expected = df.copy()
  145. expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"])
  146. result = df.fillna(value={"Date": df["Date2"]})
  147. tm.assert_frame_equal(result, expected)
  148. def test_fillna_tzaware(self):
  149. # with timezone
  150. # GH#15855
  151. df = DataFrame({"A": [Timestamp("2012-11-11 00:00:00+01:00"), NaT]})
  152. exp = DataFrame(
  153. {
  154. "A": [
  155. Timestamp("2012-11-11 00:00:00+01:00"),
  156. Timestamp("2012-11-11 00:00:00+01:00"),
  157. ]
  158. }
  159. )
  160. msg = "DataFrame.fillna with 'method' is deprecated"
  161. with tm.assert_produces_warning(FutureWarning, match=msg):
  162. res = df.fillna(method="pad")
  163. tm.assert_frame_equal(res, exp)
  164. df = DataFrame({"A": [NaT, Timestamp("2012-11-11 00:00:00+01:00")]})
  165. exp = DataFrame(
  166. {
  167. "A": [
  168. Timestamp("2012-11-11 00:00:00+01:00"),
  169. Timestamp("2012-11-11 00:00:00+01:00"),
  170. ]
  171. }
  172. )
  173. msg = "DataFrame.fillna with 'method' is deprecated"
  174. with tm.assert_produces_warning(FutureWarning, match=msg):
  175. res = df.fillna(method="bfill")
  176. tm.assert_frame_equal(res, exp)
  177. def test_fillna_tzaware_different_column(self):
  178. # with timezone in another column
  179. # GH#15522
  180. df = DataFrame(
  181. {
  182. "A": date_range("20130101", periods=4, tz="US/Eastern"),
  183. "B": [1, 2, np.nan, np.nan],
  184. }
  185. )
  186. msg = "DataFrame.fillna with 'method' is deprecated"
  187. with tm.assert_produces_warning(FutureWarning, match=msg):
  188. result = df.fillna(method="pad")
  189. expected = DataFrame(
  190. {
  191. "A": date_range("20130101", periods=4, tz="US/Eastern"),
  192. "B": [1.0, 2.0, 2.0, 2.0],
  193. }
  194. )
  195. tm.assert_frame_equal(result, expected)
  196. def test_na_actions_categorical(self):
  197. cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
  198. vals = ["a", "b", np.nan, "d"]
  199. df = DataFrame({"cats": cat, "vals": vals})
  200. cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3])
  201. vals2 = ["a", "b", "b", "d"]
  202. df_exp_fill = DataFrame({"cats": cat2, "vals": vals2})
  203. cat3 = Categorical([1, 2, 3], categories=[1, 2, 3])
  204. vals3 = ["a", "b", np.nan]
  205. df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3})
  206. cat4 = Categorical([1, 2], categories=[1, 2, 3])
  207. vals4 = ["a", "b"]
  208. df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4})
  209. # fillna
  210. res = df.fillna(value={"cats": 3, "vals": "b"})
  211. tm.assert_frame_equal(res, df_exp_fill)
  212. msg = "Cannot setitem on a Categorical with a new category"
  213. with pytest.raises(TypeError, match=msg):
  214. df.fillna(value={"cats": 4, "vals": "c"})
  215. msg = "DataFrame.fillna with 'method' is deprecated"
  216. with tm.assert_produces_warning(FutureWarning, match=msg):
  217. res = df.fillna(method="pad")
  218. tm.assert_frame_equal(res, df_exp_fill)
  219. # dropna
  220. res = df.dropna(subset=["cats"])
  221. tm.assert_frame_equal(res, df_exp_drop_cats)
  222. res = df.dropna()
  223. tm.assert_frame_equal(res, df_exp_drop_all)
  224. # make sure that fillna takes missing values into account
  225. c = Categorical([np.nan, "b", np.nan], categories=["a", "b"])
  226. df = DataFrame({"cats": c, "vals": [1, 2, 3]})
  227. cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"])
  228. df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
  229. res = df.fillna("a")
  230. tm.assert_frame_equal(res, df_exp)
  231. def test_fillna_categorical_nan(self):
  232. # GH#14021
  233. # np.nan should always be a valid filler
  234. cat = Categorical([np.nan, 2, np.nan])
  235. val = Categorical([np.nan, np.nan, np.nan])
  236. df = DataFrame({"cats": cat, "vals": val})
  237. # GH#32950 df.median() is poorly behaved because there is no
  238. # Categorical.median
  239. median = Series({"cats": 2.0, "vals": np.nan})
  240. res = df.fillna(median)
  241. v_exp = [np.nan, np.nan, np.nan]
  242. df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
  243. tm.assert_frame_equal(res, df_exp)
  244. result = df.cats.fillna(np.nan)
  245. tm.assert_series_equal(result, df.cats)
  246. result = df.vals.fillna(np.nan)
  247. tm.assert_series_equal(result, df.vals)
  248. idx = DatetimeIndex(
  249. ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", NaT, NaT]
  250. )
  251. df = DataFrame({"a": Categorical(idx)})
  252. tm.assert_frame_equal(df.fillna(value=NaT), df)
  253. idx = PeriodIndex(["2011-01", "2011-01", "2011-01", NaT, NaT], freq="M")
  254. df = DataFrame({"a": Categorical(idx)})
  255. tm.assert_frame_equal(df.fillna(value=NaT), df)
  256. idx = TimedeltaIndex(["1 days", "2 days", "1 days", NaT, NaT])
  257. df = DataFrame({"a": Categorical(idx)})
  258. tm.assert_frame_equal(df.fillna(value=NaT), df)
  259. def test_fillna_downcast(self):
  260. # GH#15277
  261. # infer int64 from float64
  262. df = DataFrame({"a": [1.0, np.nan]})
  263. msg = "The 'downcast' keyword in fillna is deprecated"
  264. with tm.assert_produces_warning(FutureWarning, match=msg):
  265. result = df.fillna(0, downcast="infer")
  266. expected = DataFrame({"a": [1, 0]})
  267. tm.assert_frame_equal(result, expected)
  268. # infer int64 from float64 when fillna value is a dict
  269. df = DataFrame({"a": [1.0, np.nan]})
  270. with tm.assert_produces_warning(FutureWarning, match=msg):
  271. result = df.fillna({"a": 0}, downcast="infer")
  272. expected = DataFrame({"a": [1, 0]})
  273. tm.assert_frame_equal(result, expected)
  274. def test_fillna_downcast_false(self, frame_or_series):
  275. # GH#45603 preserve object dtype with downcast=False
  276. obj = frame_or_series([1, 2, 3], dtype="object")
  277. msg = "The 'downcast' keyword in fillna"
  278. with tm.assert_produces_warning(FutureWarning, match=msg):
  279. result = obj.fillna("", downcast=False)
  280. tm.assert_equal(result, obj)
  281. def test_fillna_downcast_noop(self, frame_or_series):
  282. # GH#45423
  283. # Two relevant paths:
  284. # 1) not _can_hold_na (e.g. integer)
  285. # 2) _can_hold_na + noop + not can_hold_element
  286. obj = frame_or_series([1, 2, 3], dtype=np.int64)
  287. msg = "The 'downcast' keyword in fillna"
  288. with tm.assert_produces_warning(FutureWarning, match=msg):
  289. # GH#40988
  290. res = obj.fillna("foo", downcast=np.dtype(np.int32))
  291. expected = obj.astype(np.int32)
  292. tm.assert_equal(res, expected)
  293. obj2 = obj.astype(np.float64)
  294. with tm.assert_produces_warning(FutureWarning, match=msg):
  295. res2 = obj2.fillna("foo", downcast="infer")
  296. expected2 = obj # get back int64
  297. tm.assert_equal(res2, expected2)
  298. with tm.assert_produces_warning(FutureWarning, match=msg):
  299. # GH#40988
  300. res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
  301. tm.assert_equal(res3, expected)
  302. @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
  303. def test_fillna_dictlike_value_duplicate_colnames(self, columns):
  304. # GH#43476
  305. df = DataFrame(np.nan, index=[0, 1], columns=columns)
  306. with tm.assert_produces_warning(None):
  307. result = df.fillna({"A": 0})
  308. expected = df.copy()
  309. expected["A"] = 0.0
  310. tm.assert_frame_equal(result, expected)
  311. def test_fillna_dtype_conversion(self, using_infer_string):
  312. # make sure that fillna on an empty frame works
  313. df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
  314. result = df.dtypes
  315. expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
  316. tm.assert_series_equal(result, expected)
  317. msg = "Downcasting object dtype arrays"
  318. with tm.assert_produces_warning(FutureWarning, match=msg):
  319. result = df.fillna(1)
  320. expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
  321. tm.assert_frame_equal(result, expected)
  322. # empty block
  323. df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
  324. result = df.fillna("nan")
  325. expected = DataFrame("nan", index=range(3), columns=["A", "B"], dtype=object)
  326. tm.assert_frame_equal(result, expected)
  327. @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
  328. def test_fillna_dtype_conversion_equiv_replace(self, val):
  329. df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]})
  330. expected = df.replace(np.nan, val)
  331. result = df.fillna(val)
  332. tm.assert_frame_equal(result, expected)
  333. def test_fillna_datetime_columns(self):
  334. # GH#7095
  335. df = DataFrame(
  336. {
  337. "A": [-1, -2, np.nan],
  338. "B": date_range("20130101", periods=3),
  339. "C": ["foo", "bar", None],
  340. "D": ["foo2", "bar2", None],
  341. },
  342. index=date_range("20130110", periods=3),
  343. )
  344. result = df.fillna("?")
  345. expected = DataFrame(
  346. {
  347. "A": [-1, -2, "?"],
  348. "B": date_range("20130101", periods=3),
  349. "C": ["foo", "bar", "?"],
  350. "D": ["foo2", "bar2", "?"],
  351. },
  352. index=date_range("20130110", periods=3),
  353. )
  354. tm.assert_frame_equal(result, expected)
  355. df = DataFrame(
  356. {
  357. "A": [-1, -2, np.nan],
  358. "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), NaT],
  359. "C": ["foo", "bar", None],
  360. "D": ["foo2", "bar2", None],
  361. },
  362. index=date_range("20130110", periods=3),
  363. )
  364. result = df.fillna("?")
  365. expected = DataFrame(
  366. {
  367. "A": [-1, -2, "?"],
  368. "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), "?"],
  369. "C": ["foo", "bar", "?"],
  370. "D": ["foo2", "bar2", "?"],
  371. },
  372. index=date_range("20130110", periods=3),
  373. )
  374. tm.assert_frame_equal(result, expected)
  375. def test_ffill(self, datetime_frame):
  376. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  377. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  378. msg = "DataFrame.fillna with 'method' is deprecated"
  379. with tm.assert_produces_warning(FutureWarning, match=msg):
  380. alt = datetime_frame.fillna(method="ffill")
  381. tm.assert_frame_equal(datetime_frame.ffill(), alt)
  382. def test_bfill(self, datetime_frame):
  383. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  384. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  385. msg = "DataFrame.fillna with 'method' is deprecated"
  386. with tm.assert_produces_warning(FutureWarning, match=msg):
  387. alt = datetime_frame.fillna(method="bfill")
  388. tm.assert_frame_equal(datetime_frame.bfill(), alt)
  389. def test_frame_pad_backfill_limit(self):
  390. index = np.arange(10)
  391. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=index)
  392. result = df[:2].reindex(index, method="pad", limit=5)
  393. msg = "DataFrame.fillna with 'method' is deprecated"
  394. with tm.assert_produces_warning(FutureWarning, match=msg):
  395. expected = df[:2].reindex(index).fillna(method="pad")
  396. expected.iloc[-3:] = np.nan
  397. tm.assert_frame_equal(result, expected)
  398. result = df[-2:].reindex(index, method="backfill", limit=5)
  399. with tm.assert_produces_warning(FutureWarning, match=msg):
  400. expected = df[-2:].reindex(index).fillna(method="backfill")
  401. expected.iloc[:3] = np.nan
  402. tm.assert_frame_equal(result, expected)
  403. def test_frame_fillna_limit(self):
  404. index = np.arange(10)
  405. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=index)
  406. result = df[:2].reindex(index)
  407. msg = "DataFrame.fillna with 'method' is deprecated"
  408. with tm.assert_produces_warning(FutureWarning, match=msg):
  409. result = result.fillna(method="pad", limit=5)
  410. with tm.assert_produces_warning(FutureWarning, match=msg):
  411. expected = df[:2].reindex(index).fillna(method="pad")
  412. expected.iloc[-3:] = np.nan
  413. tm.assert_frame_equal(result, expected)
  414. result = df[-2:].reindex(index)
  415. with tm.assert_produces_warning(FutureWarning, match=msg):
  416. result = result.fillna(method="backfill", limit=5)
  417. with tm.assert_produces_warning(FutureWarning, match=msg):
  418. expected = df[-2:].reindex(index).fillna(method="backfill")
  419. expected.iloc[:3] = np.nan
  420. tm.assert_frame_equal(result, expected)
  421. def test_fillna_skip_certain_blocks(self):
  422. # don't try to fill boolean, int blocks
  423. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)).astype(int))
  424. # it works!
  425. df.fillna(np.nan)
  426. @pytest.mark.parametrize("type", [int, float])
  427. def test_fillna_positive_limit(self, type):
  428. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))).astype(type)
  429. msg = "Limit must be greater than 0"
  430. with pytest.raises(ValueError, match=msg):
  431. df.fillna(0, limit=-5)
  432. @pytest.mark.parametrize("type", [int, float])
  433. def test_fillna_integer_limit(self, type):
  434. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))).astype(type)
  435. msg = "Limit must be an integer"
  436. with pytest.raises(ValueError, match=msg):
  437. df.fillna(0, limit=0.5)
  438. def test_fillna_inplace(self):
  439. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
  440. df.loc[:4, 1] = np.nan
  441. df.loc[-4:, 3] = np.nan
  442. expected = df.fillna(value=0)
  443. assert expected is not df
  444. df.fillna(value=0, inplace=True)
  445. tm.assert_frame_equal(df, expected)
  446. expected = df.fillna(value={0: 0}, inplace=True)
  447. assert expected is None
  448. df.loc[:4, 1] = np.nan
  449. df.loc[-4:, 3] = np.nan
  450. msg = "DataFrame.fillna with 'method' is deprecated"
  451. with tm.assert_produces_warning(FutureWarning, match=msg):
  452. expected = df.fillna(method="ffill")
  453. assert expected is not df
  454. with tm.assert_produces_warning(FutureWarning, match=msg):
  455. df.fillna(method="ffill", inplace=True)
  456. tm.assert_frame_equal(df, expected)
  457. def test_fillna_dict_series(self):
  458. df = DataFrame(
  459. {
  460. "a": [np.nan, 1, 2, np.nan, np.nan],
  461. "b": [1, 2, 3, np.nan, np.nan],
  462. "c": [np.nan, 1, 2, 3, 4],
  463. }
  464. )
  465. result = df.fillna({"a": 0, "b": 5})
  466. expected = df.copy()
  467. expected["a"] = expected["a"].fillna(0)
  468. expected["b"] = expected["b"].fillna(5)
  469. tm.assert_frame_equal(result, expected)
  470. # it works
  471. result = df.fillna({"a": 0, "b": 5, "d": 7})
  472. # Series treated same as dict
  473. result = df.fillna(df.max())
  474. expected = df.fillna(df.max().to_dict())
  475. tm.assert_frame_equal(result, expected)
  476. # disable this for now
  477. with pytest.raises(NotImplementedError, match="column by column"):
  478. df.fillna(df.max(1), axis=1)
  479. def test_fillna_dataframe(self):
  480. # GH#8377
  481. df = DataFrame(
  482. {
  483. "a": [np.nan, 1, 2, np.nan, np.nan],
  484. "b": [1, 2, 3, np.nan, np.nan],
  485. "c": [np.nan, 1, 2, 3, 4],
  486. },
  487. index=list("VWXYZ"),
  488. )
  489. # df2 may have different index and columns
  490. df2 = DataFrame(
  491. {
  492. "a": [np.nan, 10, 20, 30, 40],
  493. "b": [50, 60, 70, 80, 90],
  494. "foo": ["bar"] * 5,
  495. },
  496. index=list("VWXuZ"),
  497. )
  498. result = df.fillna(df2)
  499. # only those columns and indices which are shared get filled
  500. expected = DataFrame(
  501. {
  502. "a": [np.nan, 1, 2, np.nan, 40],
  503. "b": [1, 2, 3, np.nan, 90],
  504. "c": [np.nan, 1, 2, 3, 4],
  505. },
  506. index=list("VWXYZ"),
  507. )
  508. tm.assert_frame_equal(result, expected)
  509. def test_fillna_columns(self):
  510. arr = np.random.default_rng(2).standard_normal((10, 10))
  511. arr[:, ::2] = np.nan
  512. df = DataFrame(arr)
  513. msg = "DataFrame.fillna with 'method' is deprecated"
  514. with tm.assert_produces_warning(FutureWarning, match=msg):
  515. result = df.fillna(method="ffill", axis=1)
  516. with tm.assert_produces_warning(FutureWarning, match=msg):
  517. expected = df.T.fillna(method="pad").T
  518. tm.assert_frame_equal(result, expected)
  519. df.insert(6, "foo", 5)
  520. with tm.assert_produces_warning(FutureWarning, match=msg):
  521. result = df.fillna(method="ffill", axis=1)
  522. with tm.assert_produces_warning(FutureWarning, match=msg):
  523. expected = df.astype(float).fillna(method="ffill", axis=1)
  524. tm.assert_frame_equal(result, expected)
  525. def test_fillna_invalid_method(self, float_frame):
  526. with pytest.raises(ValueError, match="ffil"):
  527. float_frame.fillna(method="ffil")
  528. def test_fillna_invalid_value(self, float_frame):
  529. # list
  530. msg = '"value" parameter must be a scalar or dict, but you passed a "{}"'
  531. with pytest.raises(TypeError, match=msg.format("list")):
  532. float_frame.fillna([1, 2])
  533. # tuple
  534. with pytest.raises(TypeError, match=msg.format("tuple")):
  535. float_frame.fillna((1, 2))
  536. # frame with series
  537. msg = (
  538. '"value" parameter must be a scalar, dict or Series, but you '
  539. 'passed a "DataFrame"'
  540. )
  541. with pytest.raises(TypeError, match=msg):
  542. float_frame.iloc[:, 0].fillna(float_frame)
  543. def test_fillna_col_reordering(self):
  544. cols = ["COL." + str(i) for i in range(5, 0, -1)]
  545. data = np.random.default_rng(2).random((20, 5))
  546. df = DataFrame(index=range(20), columns=cols, data=data)
  547. msg = "DataFrame.fillna with 'method' is deprecated"
  548. with tm.assert_produces_warning(FutureWarning, match=msg):
  549. filled = df.fillna(method="ffill")
  550. assert df.columns.tolist() == filled.columns.tolist()
  551. def test_fill_empty(self, float_frame):
  552. df = float_frame.reindex(columns=[])
  553. result = df.fillna(value=0)
  554. tm.assert_frame_equal(result, df)
  555. def test_fillna_downcast_dict(self):
  556. # GH#40809
  557. df = DataFrame({"col1": [1, np.nan]})
  558. msg = "The 'downcast' keyword in fillna"
  559. with tm.assert_produces_warning(FutureWarning, match=msg):
  560. result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
  561. expected = DataFrame({"col1": [1, 2]})
  562. tm.assert_frame_equal(result, expected)
  563. def test_fillna_with_columns_and_limit(self):
  564. # GH40989
  565. df = DataFrame(
  566. [
  567. [np.nan, 2, np.nan, 0],
  568. [3, 4, np.nan, 1],
  569. [np.nan, np.nan, np.nan, 5],
  570. [np.nan, 3, np.nan, 4],
  571. ],
  572. columns=list("ABCD"),
  573. )
  574. result = df.fillna(axis=1, value=100, limit=1)
  575. result2 = df.fillna(axis=1, value=100, limit=2)
  576. expected = DataFrame(
  577. {
  578. "A": Series([100, 3, 100, 100], dtype="float64"),
  579. "B": [2, 4, np.nan, 3],
  580. "C": [np.nan, 100, np.nan, np.nan],
  581. "D": Series([0, 1, 5, 4], dtype="float64"),
  582. },
  583. index=[0, 1, 2, 3],
  584. )
  585. expected2 = DataFrame(
  586. {
  587. "A": Series([100, 3, 100, 100], dtype="float64"),
  588. "B": Series([2, 4, 100, 3], dtype="float64"),
  589. "C": [100, 100, np.nan, 100],
  590. "D": Series([0, 1, 5, 4], dtype="float64"),
  591. },
  592. index=[0, 1, 2, 3],
  593. )
  594. tm.assert_frame_equal(result, expected)
  595. tm.assert_frame_equal(result2, expected2)
  596. def test_fillna_datetime_inplace(self):
  597. # GH#48863
  598. df = DataFrame(
  599. {
  600. "date1": to_datetime(["2018-05-30", None]),
  601. "date2": to_datetime(["2018-09-30", None]),
  602. }
  603. )
  604. expected = df.copy()
  605. df.fillna(np.nan, inplace=True)
  606. tm.assert_frame_equal(df, expected)
  607. def test_fillna_inplace_with_columns_limit_and_value(self):
  608. # GH40989
  609. df = DataFrame(
  610. [
  611. [np.nan, 2, np.nan, 0],
  612. [3, 4, np.nan, 1],
  613. [np.nan, np.nan, np.nan, 5],
  614. [np.nan, 3, np.nan, 4],
  615. ],
  616. columns=list("ABCD"),
  617. )
  618. expected = df.fillna(axis=1, value=100, limit=1)
  619. assert expected is not df
  620. df.fillna(axis=1, value=100, limit=1, inplace=True)
  621. tm.assert_frame_equal(df, expected)
  622. @td.skip_array_manager_invalid_test
  623. @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}])
  624. def test_inplace_dict_update_view(
  625. self, val, using_copy_on_write, warn_copy_on_write
  626. ):
  627. # GH#47188
  628. df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]})
  629. df_orig = df.copy()
  630. result_view = df[:]
  631. with tm.assert_cow_warning(warn_copy_on_write):
  632. df.fillna(val, inplace=True)
  633. expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]})
  634. tm.assert_frame_equal(df, expected)
  635. if using_copy_on_write:
  636. tm.assert_frame_equal(result_view, df_orig)
  637. else:
  638. tm.assert_frame_equal(result_view, expected)
  639. def test_single_block_df_with_horizontal_axis(self):
  640. # GH 47713
  641. df = DataFrame(
  642. {
  643. "col1": [5, 0, np.nan, 10, np.nan],
  644. "col2": [7, np.nan, np.nan, 5, 3],
  645. "col3": [12, np.nan, 1, 2, 0],
  646. "col4": [np.nan, 1, 1, np.nan, 18],
  647. }
  648. )
  649. result = df.fillna(50, limit=1, axis=1)
  650. expected = DataFrame(
  651. [
  652. [5.0, 7.0, 12.0, 50.0],
  653. [0.0, 50.0, np.nan, 1.0],
  654. [50.0, np.nan, 1.0, 1.0],
  655. [10.0, 5.0, 2.0, 50.0],
  656. [50.0, 3.0, 0.0, 18.0],
  657. ],
  658. columns=["col1", "col2", "col3", "col4"],
  659. )
  660. tm.assert_frame_equal(result, expected)
  661. def test_fillna_with_multi_index_frame(self):
  662. # GH 47649
  663. pdf = DataFrame(
  664. {
  665. ("x", "a"): [np.nan, 2.0, 3.0],
  666. ("x", "b"): [1.0, 2.0, np.nan],
  667. ("y", "c"): [1.0, 2.0, np.nan],
  668. }
  669. )
  670. expected = DataFrame(
  671. {
  672. ("x", "a"): [-1.0, 2.0, 3.0],
  673. ("x", "b"): [1.0, 2.0, -1.0],
  674. ("y", "c"): [1.0, 2.0, np.nan],
  675. }
  676. )
  677. tm.assert_frame_equal(pdf.fillna({"x": -1}), expected)
  678. tm.assert_frame_equal(pdf.fillna({"x": -1, ("x", "b"): -2}), expected)
  679. expected = DataFrame(
  680. {
  681. ("x", "a"): [-1.0, 2.0, 3.0],
  682. ("x", "b"): [1.0, 2.0, -2.0],
  683. ("y", "c"): [1.0, 2.0, np.nan],
  684. }
  685. )
  686. tm.assert_frame_equal(pdf.fillna({("x", "b"): -2, "x": -1}), expected)
  687. def test_fillna_nonconsolidated_frame():
  688. # https://github.com/pandas-dev/pandas/issues/36495
  689. df = DataFrame(
  690. [
  691. [1, 1, 1, 1.0],
  692. [2, 2, 2, 2.0],
  693. [3, 3, 3, 3.0],
  694. ],
  695. columns=["i1", "i2", "i3", "f1"],
  696. )
  697. df_nonconsol = df.pivot(index="i1", columns="i2")
  698. result = df_nonconsol.fillna(0)
  699. assert result.isna().sum().sum() == 0
  700. def test_fillna_nones_inplace():
  701. # GH 48480
  702. df = DataFrame(
  703. [[None, None], [None, None]],
  704. columns=["A", "B"],
  705. )
  706. msg = "Downcasting object dtype arrays"
  707. with tm.assert_produces_warning(FutureWarning, match=msg):
  708. df.fillna(value={"A": 1, "B": 2}, inplace=True)
  709. expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])
  710. tm.assert_frame_equal(df, expected)
  711. @pytest.mark.parametrize("func", ["pad", "backfill"])
  712. def test_pad_backfill_deprecated(func):
  713. # GH#33396
  714. df = DataFrame({"a": [1, 2, 3]})
  715. with tm.assert_produces_warning(FutureWarning):
  716. getattr(df, func)()
  717. @pytest.mark.parametrize(
  718. "data, expected_data, method, kwargs",
  719. (
  720. (
  721. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  722. [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
  723. "ffill",
  724. {"limit_area": "inside"},
  725. ),
  726. (
  727. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  728. [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
  729. "ffill",
  730. {"limit_area": "inside", "limit": 1},
  731. ),
  732. (
  733. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  734. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
  735. "ffill",
  736. {"limit_area": "outside"},
  737. ),
  738. (
  739. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  740. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
  741. "ffill",
  742. {"limit_area": "outside", "limit": 1},
  743. ),
  744. (
  745. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  746. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  747. "ffill",
  748. {"limit_area": "outside", "limit": 1},
  749. ),
  750. (
  751. range(5),
  752. range(5),
  753. "ffill",
  754. {"limit_area": "outside", "limit": 1},
  755. ),
  756. (
  757. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  758. [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
  759. "bfill",
  760. {"limit_area": "inside"},
  761. ),
  762. (
  763. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  764. [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
  765. "bfill",
  766. {"limit_area": "inside", "limit": 1},
  767. ),
  768. (
  769. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  770. [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  771. "bfill",
  772. {"limit_area": "outside"},
  773. ),
  774. (
  775. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  776. [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  777. "bfill",
  778. {"limit_area": "outside", "limit": 1},
  779. ),
  780. ),
  781. )
  782. def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
  783. # GH#56492
  784. df = DataFrame(data)
  785. expected = DataFrame(expected_data)
  786. result = getattr(df, method)(**kwargs)
  787. tm.assert_frame_equal(result, expected)