test_fillna.py 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. timezone,
  5. )
  6. import numpy as np
  7. import pytest
  8. import pytz
  9. from pandas import (
  10. Categorical,
  11. DataFrame,
  12. DatetimeIndex,
  13. NaT,
  14. Period,
  15. Series,
  16. Timedelta,
  17. Timestamp,
  18. date_range,
  19. isna,
  20. timedelta_range,
  21. )
  22. import pandas._testing as tm
  23. from pandas.core.arrays import period_array
  24. @pytest.mark.filterwarnings(
  25. "ignore:(Series|DataFrame).fillna with 'method' is deprecated:FutureWarning"
  26. )
  27. class TestSeriesFillNA:
  28. def test_fillna_nat(self):
  29. series = Series([0, 1, 2, NaT._value], dtype="M8[ns]")
  30. filled = series.fillna(method="pad")
  31. filled2 = series.fillna(value=series.values[2])
  32. expected = series.copy()
  33. expected.iloc[3] = expected.iloc[2]
  34. tm.assert_series_equal(filled, expected)
  35. tm.assert_series_equal(filled2, expected)
  36. df = DataFrame({"A": series})
  37. filled = df.fillna(method="pad")
  38. filled2 = df.fillna(value=series.values[2])
  39. expected = DataFrame({"A": expected})
  40. tm.assert_frame_equal(filled, expected)
  41. tm.assert_frame_equal(filled2, expected)
  42. series = Series([NaT._value, 0, 1, 2], dtype="M8[ns]")
  43. filled = series.fillna(method="bfill")
  44. filled2 = series.fillna(value=series[1])
  45. expected = series.copy()
  46. expected[0] = expected[1]
  47. tm.assert_series_equal(filled, expected)
  48. tm.assert_series_equal(filled2, expected)
  49. df = DataFrame({"A": series})
  50. filled = df.fillna(method="bfill")
  51. filled2 = df.fillna(value=series[1])
  52. expected = DataFrame({"A": expected})
  53. tm.assert_frame_equal(filled, expected)
  54. tm.assert_frame_equal(filled2, expected)
  55. def test_fillna_value_or_method(self, datetime_series):
  56. msg = "Cannot specify both 'value' and 'method'"
  57. with pytest.raises(ValueError, match=msg):
  58. datetime_series.fillna(value=0, method="ffill")
  59. def test_fillna(self):
  60. ts = Series(
  61. [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5)
  62. )
  63. tm.assert_series_equal(ts, ts.fillna(method="ffill"))
  64. ts.iloc[2] = np.nan
  65. exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index)
  66. tm.assert_series_equal(ts.fillna(method="ffill"), exp)
  67. exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index)
  68. tm.assert_series_equal(ts.fillna(method="backfill"), exp)
  69. exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index)
  70. tm.assert_series_equal(ts.fillna(value=5), exp)
  71. msg = "Must specify a fill 'value' or 'method'"
  72. with pytest.raises(ValueError, match=msg):
  73. ts.fillna()
  74. def test_fillna_nonscalar(self):
  75. # GH#5703
  76. s1 = Series([np.nan])
  77. s2 = Series([1])
  78. result = s1.fillna(s2)
  79. expected = Series([1.0])
  80. tm.assert_series_equal(result, expected)
  81. result = s1.fillna({})
  82. tm.assert_series_equal(result, s1)
  83. result = s1.fillna(Series((), dtype=object))
  84. tm.assert_series_equal(result, s1)
  85. result = s2.fillna(s1)
  86. tm.assert_series_equal(result, s2)
  87. result = s1.fillna({0: 1})
  88. tm.assert_series_equal(result, expected)
  89. result = s1.fillna({1: 1})
  90. tm.assert_series_equal(result, Series([np.nan]))
  91. result = s1.fillna({0: 1, 1: 1})
  92. tm.assert_series_equal(result, expected)
  93. result = s1.fillna(Series({0: 1, 1: 1}))
  94. tm.assert_series_equal(result, expected)
  95. result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
  96. tm.assert_series_equal(result, s1)
  97. def test_fillna_aligns(self):
  98. s1 = Series([0, 1, 2], list("abc"))
  99. s2 = Series([0, np.nan, 2], list("bac"))
  100. result = s2.fillna(s1)
  101. expected = Series([0, 0, 2.0], list("bac"))
  102. tm.assert_series_equal(result, expected)
  103. def test_fillna_limit(self):
  104. ser = Series(np.nan, index=[0, 1, 2])
  105. result = ser.fillna(999, limit=1)
  106. expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
  107. tm.assert_series_equal(result, expected)
  108. result = ser.fillna(999, limit=2)
  109. expected = Series([999, 999, np.nan], index=[0, 1, 2])
  110. tm.assert_series_equal(result, expected)
  111. def test_fillna_dont_cast_strings(self):
  112. # GH#9043
  113. # make sure a string representation of int/float values can be filled
  114. # correctly without raising errors or being converted
  115. vals = ["0", "1.5", "-0.3"]
  116. for val in vals:
  117. ser = Series([0, 1, np.nan, np.nan, 4], dtype="float64")
  118. result = ser.fillna(val)
  119. expected = Series([0, 1, val, val, 4], dtype="object")
  120. tm.assert_series_equal(result, expected)
  121. def test_fillna_consistency(self):
  122. # GH#16402
  123. # fillna with a tz aware to a tz-naive, should result in object
  124. ser = Series([Timestamp("20130101"), NaT])
  125. result = ser.fillna(Timestamp("20130101", tz="US/Eastern"))
  126. expected = Series(
  127. [Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")],
  128. dtype="object",
  129. )
  130. tm.assert_series_equal(result, expected)
  131. result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
  132. tm.assert_series_equal(result, expected)
  133. result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
  134. tm.assert_series_equal(result, expected)
  135. # with a non-datetime
  136. result = ser.fillna("foo")
  137. expected = Series([Timestamp("20130101"), "foo"])
  138. tm.assert_series_equal(result, expected)
  139. # assignment
  140. ser2 = ser.copy()
  141. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  142. ser2[1] = "foo"
  143. tm.assert_series_equal(ser2, expected)
  144. def test_fillna_downcast(self):
  145. # GH#15277
  146. # infer int64 from float64
  147. ser = Series([1.0, np.nan])
  148. msg = "The 'downcast' keyword in fillna is deprecated"
  149. with tm.assert_produces_warning(FutureWarning, match=msg):
  150. result = ser.fillna(0, downcast="infer")
  151. expected = Series([1, 0])
  152. tm.assert_series_equal(result, expected)
  153. # infer int64 from float64 when fillna value is a dict
  154. ser = Series([1.0, np.nan])
  155. with tm.assert_produces_warning(FutureWarning, match=msg):
  156. result = ser.fillna({1: 0}, downcast="infer")
  157. expected = Series([1, 0])
  158. tm.assert_series_equal(result, expected)
  159. def test_fillna_downcast_infer_objects_to_numeric(self):
  160. # GH#44241 if we have object-dtype, 'downcast="infer"' should
  161. # _actually_ infer
  162. arr = np.arange(5).astype(object)
  163. arr[3] = np.nan
  164. ser = Series(arr)
  165. msg = "The 'downcast' keyword in fillna is deprecated"
  166. with tm.assert_produces_warning(FutureWarning, match=msg):
  167. res = ser.fillna(3, downcast="infer")
  168. expected = Series(np.arange(5), dtype=np.int64)
  169. tm.assert_series_equal(res, expected)
  170. msg = "The 'downcast' keyword in ffill is deprecated"
  171. with tm.assert_produces_warning(FutureWarning, match=msg):
  172. res = ser.ffill(downcast="infer")
  173. expected = Series([0, 1, 2, 2, 4], dtype=np.int64)
  174. tm.assert_series_equal(res, expected)
  175. msg = "The 'downcast' keyword in bfill is deprecated"
  176. with tm.assert_produces_warning(FutureWarning, match=msg):
  177. res = ser.bfill(downcast="infer")
  178. expected = Series([0, 1, 2, 4, 4], dtype=np.int64)
  179. tm.assert_series_equal(res, expected)
  180. # with a non-round float present, we will downcast to float64
  181. ser[2] = 2.5
  182. expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64)
  183. msg = "The 'downcast' keyword in fillna is deprecated"
  184. with tm.assert_produces_warning(FutureWarning, match=msg):
  185. res = ser.fillna(3, downcast="infer")
  186. tm.assert_series_equal(res, expected)
  187. msg = "The 'downcast' keyword in ffill is deprecated"
  188. with tm.assert_produces_warning(FutureWarning, match=msg):
  189. res = ser.ffill(downcast="infer")
  190. expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64)
  191. tm.assert_series_equal(res, expected)
  192. msg = "The 'downcast' keyword in bfill is deprecated"
  193. with tm.assert_produces_warning(FutureWarning, match=msg):
  194. res = ser.bfill(downcast="infer")
  195. expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
  196. tm.assert_series_equal(res, expected)
  197. def test_timedelta_fillna(self, frame_or_series, unit):
  198. # GH#3371
  199. ser = Series(
  200. [
  201. Timestamp("20130101"),
  202. Timestamp("20130101"),
  203. Timestamp("20130102"),
  204. Timestamp("20130103 9:01:01"),
  205. ],
  206. dtype=f"M8[{unit}]",
  207. )
  208. td = ser.diff()
  209. obj = frame_or_series(td).copy()
  210. # reg fillna
  211. result = obj.fillna(Timedelta(seconds=0))
  212. expected = Series(
  213. [
  214. timedelta(0),
  215. timedelta(0),
  216. timedelta(1),
  217. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  218. ],
  219. dtype=f"m8[{unit}]",
  220. )
  221. expected = frame_or_series(expected)
  222. tm.assert_equal(result, expected)
  223. # GH#45746 pre-1.? ints were interpreted as seconds. then that was
  224. # deprecated and changed to raise. In 2.0 it casts to common dtype,
  225. # consistent with every other dtype's behavior
  226. res = obj.fillna(1)
  227. expected = obj.astype(object).fillna(1)
  228. tm.assert_equal(res, expected)
  229. result = obj.fillna(Timedelta(seconds=1))
  230. expected = Series(
  231. [
  232. timedelta(seconds=1),
  233. timedelta(0),
  234. timedelta(1),
  235. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  236. ],
  237. dtype=f"m8[{unit}]",
  238. )
  239. expected = frame_or_series(expected)
  240. tm.assert_equal(result, expected)
  241. result = obj.fillna(timedelta(days=1, seconds=1))
  242. expected = Series(
  243. [
  244. timedelta(days=1, seconds=1),
  245. timedelta(0),
  246. timedelta(1),
  247. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  248. ],
  249. dtype=f"m8[{unit}]",
  250. )
  251. expected = frame_or_series(expected)
  252. tm.assert_equal(result, expected)
  253. result = obj.fillna(np.timedelta64(10**9))
  254. expected = Series(
  255. [
  256. timedelta(seconds=1),
  257. timedelta(0),
  258. timedelta(1),
  259. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  260. ],
  261. dtype=f"m8[{unit}]",
  262. )
  263. expected = frame_or_series(expected)
  264. tm.assert_equal(result, expected)
  265. result = obj.fillna(NaT)
  266. expected = Series(
  267. [
  268. NaT,
  269. timedelta(0),
  270. timedelta(1),
  271. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  272. ],
  273. dtype=f"m8[{unit}]",
  274. )
  275. expected = frame_or_series(expected)
  276. tm.assert_equal(result, expected)
  277. # ffill
  278. td[2] = np.nan
  279. obj = frame_or_series(td).copy()
  280. result = obj.ffill()
  281. expected = td.fillna(Timedelta(seconds=0))
  282. expected[0] = np.nan
  283. expected = frame_or_series(expected)
  284. tm.assert_equal(result, expected)
  285. # bfill
  286. td[2] = np.nan
  287. obj = frame_or_series(td)
  288. result = obj.bfill()
  289. expected = td.fillna(Timedelta(seconds=0))
  290. expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
  291. expected = frame_or_series(expected)
  292. tm.assert_equal(result, expected)
  293. def test_datetime64_fillna(self):
  294. ser = Series(
  295. [
  296. Timestamp("20130101"),
  297. Timestamp("20130101"),
  298. Timestamp("20130102"),
  299. Timestamp("20130103 9:01:01"),
  300. ]
  301. )
  302. ser[2] = np.nan
  303. # ffill
  304. result = ser.ffill()
  305. expected = Series(
  306. [
  307. Timestamp("20130101"),
  308. Timestamp("20130101"),
  309. Timestamp("20130101"),
  310. Timestamp("20130103 9:01:01"),
  311. ]
  312. )
  313. tm.assert_series_equal(result, expected)
  314. # bfill
  315. result = ser.bfill()
  316. expected = Series(
  317. [
  318. Timestamp("20130101"),
  319. Timestamp("20130101"),
  320. Timestamp("20130103 9:01:01"),
  321. Timestamp("20130103 9:01:01"),
  322. ]
  323. )
  324. tm.assert_series_equal(result, expected)
  325. @pytest.mark.parametrize(
  326. "scalar",
  327. [
  328. False,
  329. pytest.param(
  330. True,
  331. marks=pytest.mark.xfail(
  332. reason="GH#56410 scalar case not yet addressed"
  333. ),
  334. ),
  335. ],
  336. )
  337. @pytest.mark.parametrize("tz", [None, "UTC"])
  338. def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar):
  339. # GH#56410
  340. dti = date_range("2016-01-01", periods=3, unit="s", tz=tz)
  341. item = Timestamp("2016-02-03 04:05:06.789", tz=tz)
  342. vec = date_range(item, periods=3, unit="ms")
  343. exp_dtype = "M8[ms]" if tz is None else "M8[ms, UTC]"
  344. expected = Series([item, dti[1], dti[2]], dtype=exp_dtype)
  345. ser = Series(dti)
  346. ser[0] = NaT
  347. ser2 = ser.copy()
  348. res = ser.fillna(item)
  349. res2 = ser2.fillna(Series(vec))
  350. if scalar:
  351. tm.assert_series_equal(res, expected)
  352. else:
  353. tm.assert_series_equal(res2, expected)
  354. @pytest.mark.parametrize(
  355. "scalar",
  356. [
  357. False,
  358. pytest.param(
  359. True,
  360. marks=pytest.mark.xfail(
  361. reason="GH#56410 scalar case not yet addressed"
  362. ),
  363. ),
  364. ],
  365. )
  366. def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar):
  367. # GH#56410
  368. tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp("1970-01-01")
  369. item = Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01")
  370. vec = timedelta_range(item, periods=3, unit="ms")
  371. expected = Series([item, tdi[1], tdi[2]], dtype="m8[ms]")
  372. ser = Series(tdi)
  373. ser[0] = NaT
  374. ser2 = ser.copy()
  375. res = ser.fillna(item)
  376. res2 = ser2.fillna(Series(vec))
  377. if scalar:
  378. tm.assert_series_equal(res, expected)
  379. else:
  380. tm.assert_series_equal(res2, expected)
  381. def test_datetime64_fillna_backfill(self):
  382. # GH#6587
  383. # make sure that we are treating as integer when filling
  384. ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"], dtype="M8[ns]")
  385. expected = Series(
  386. [
  387. "2013-08-05 15:30:00.000001",
  388. "2013-08-05 15:30:00.000001",
  389. "2013-08-05 15:30:00.000001",
  390. ],
  391. dtype="M8[ns]",
  392. )
  393. result = ser.fillna(method="backfill")
  394. tm.assert_series_equal(result, expected)
  395. @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
  396. def test_datetime64_tz_fillna(self, tz, unit):
  397. # DatetimeLikeBlock
  398. ser = Series(
  399. [
  400. Timestamp("2011-01-01 10:00"),
  401. NaT,
  402. Timestamp("2011-01-03 10:00"),
  403. NaT,
  404. ],
  405. dtype=f"M8[{unit}]",
  406. )
  407. null_loc = Series([False, True, False, True])
  408. result = ser.fillna(Timestamp("2011-01-02 10:00"))
  409. expected = Series(
  410. [
  411. Timestamp("2011-01-01 10:00"),
  412. Timestamp("2011-01-02 10:00"),
  413. Timestamp("2011-01-03 10:00"),
  414. Timestamp("2011-01-02 10:00"),
  415. ],
  416. dtype=f"M8[{unit}]",
  417. )
  418. tm.assert_series_equal(expected, result)
  419. # check s is not changed
  420. tm.assert_series_equal(isna(ser), null_loc)
  421. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
  422. expected = Series(
  423. [
  424. Timestamp("2011-01-01 10:00"),
  425. Timestamp("2011-01-02 10:00", tz=tz),
  426. Timestamp("2011-01-03 10:00"),
  427. Timestamp("2011-01-02 10:00", tz=tz),
  428. ]
  429. )
  430. tm.assert_series_equal(expected, result)
  431. tm.assert_series_equal(isna(ser), null_loc)
  432. result = ser.fillna("AAA")
  433. expected = Series(
  434. [
  435. Timestamp("2011-01-01 10:00"),
  436. "AAA",
  437. Timestamp("2011-01-03 10:00"),
  438. "AAA",
  439. ],
  440. dtype=object,
  441. )
  442. tm.assert_series_equal(expected, result)
  443. tm.assert_series_equal(isna(ser), null_loc)
  444. result = ser.fillna(
  445. {
  446. 1: Timestamp("2011-01-02 10:00", tz=tz),
  447. 3: Timestamp("2011-01-04 10:00"),
  448. }
  449. )
  450. expected = Series(
  451. [
  452. Timestamp("2011-01-01 10:00"),
  453. Timestamp("2011-01-02 10:00", tz=tz),
  454. Timestamp("2011-01-03 10:00"),
  455. Timestamp("2011-01-04 10:00"),
  456. ]
  457. )
  458. tm.assert_series_equal(expected, result)
  459. tm.assert_series_equal(isna(ser), null_loc)
  460. result = ser.fillna(
  461. {1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")}
  462. )
  463. expected = Series(
  464. [
  465. Timestamp("2011-01-01 10:00"),
  466. Timestamp("2011-01-02 10:00"),
  467. Timestamp("2011-01-03 10:00"),
  468. Timestamp("2011-01-04 10:00"),
  469. ],
  470. dtype=f"M8[{unit}]",
  471. )
  472. tm.assert_series_equal(expected, result)
  473. tm.assert_series_equal(isna(ser), null_loc)
  474. # DatetimeTZBlock
  475. idx = DatetimeIndex(
  476. ["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz
  477. ).as_unit(unit)
  478. ser = Series(idx)
  479. assert ser.dtype == f"datetime64[{unit}, {tz}]"
  480. tm.assert_series_equal(isna(ser), null_loc)
  481. result = ser.fillna(Timestamp("2011-01-02 10:00"))
  482. expected = Series(
  483. [
  484. Timestamp("2011-01-01 10:00", tz=tz),
  485. Timestamp("2011-01-02 10:00"),
  486. Timestamp("2011-01-03 10:00", tz=tz),
  487. Timestamp("2011-01-02 10:00"),
  488. ]
  489. )
  490. tm.assert_series_equal(expected, result)
  491. tm.assert_series_equal(isna(ser), null_loc)
  492. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
  493. idx = DatetimeIndex(
  494. [
  495. "2011-01-01 10:00",
  496. "2011-01-02 10:00",
  497. "2011-01-03 10:00",
  498. "2011-01-02 10:00",
  499. ],
  500. tz=tz,
  501. ).as_unit(unit)
  502. expected = Series(idx)
  503. tm.assert_series_equal(expected, result)
  504. tm.assert_series_equal(isna(ser), null_loc)
  505. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
  506. idx = DatetimeIndex(
  507. [
  508. "2011-01-01 10:00",
  509. "2011-01-02 10:00",
  510. "2011-01-03 10:00",
  511. "2011-01-02 10:00",
  512. ],
  513. tz=tz,
  514. ).as_unit(unit)
  515. expected = Series(idx)
  516. tm.assert_series_equal(expected, result)
  517. tm.assert_series_equal(isna(ser), null_loc)
  518. result = ser.fillna("AAA")
  519. expected = Series(
  520. [
  521. Timestamp("2011-01-01 10:00", tz=tz),
  522. "AAA",
  523. Timestamp("2011-01-03 10:00", tz=tz),
  524. "AAA",
  525. ],
  526. dtype=object,
  527. )
  528. tm.assert_series_equal(expected, result)
  529. tm.assert_series_equal(isna(ser), null_loc)
  530. result = ser.fillna(
  531. {
  532. 1: Timestamp("2011-01-02 10:00", tz=tz),
  533. 3: Timestamp("2011-01-04 10:00"),
  534. }
  535. )
  536. expected = Series(
  537. [
  538. Timestamp("2011-01-01 10:00", tz=tz),
  539. Timestamp("2011-01-02 10:00", tz=tz),
  540. Timestamp("2011-01-03 10:00", tz=tz),
  541. Timestamp("2011-01-04 10:00"),
  542. ]
  543. )
  544. tm.assert_series_equal(expected, result)
  545. tm.assert_series_equal(isna(ser), null_loc)
  546. result = ser.fillna(
  547. {
  548. 1: Timestamp("2011-01-02 10:00", tz=tz),
  549. 3: Timestamp("2011-01-04 10:00", tz=tz),
  550. }
  551. )
  552. expected = Series(
  553. [
  554. Timestamp("2011-01-01 10:00", tz=tz),
  555. Timestamp("2011-01-02 10:00", tz=tz),
  556. Timestamp("2011-01-03 10:00", tz=tz),
  557. Timestamp("2011-01-04 10:00", tz=tz),
  558. ]
  559. ).dt.as_unit(unit)
  560. tm.assert_series_equal(expected, result)
  561. tm.assert_series_equal(isna(ser), null_loc)
  562. # filling with a naive/other zone, coerce to object
  563. result = ser.fillna(Timestamp("20130101"))
  564. expected = Series(
  565. [
  566. Timestamp("2011-01-01 10:00", tz=tz),
  567. Timestamp("2013-01-01"),
  568. Timestamp("2011-01-03 10:00", tz=tz),
  569. Timestamp("2013-01-01"),
  570. ]
  571. )
  572. tm.assert_series_equal(expected, result)
  573. tm.assert_series_equal(isna(ser), null_loc)
  574. # pre-2.0 fillna with mixed tzs would cast to object, in 2.0
  575. # it retains dtype.
  576. result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
  577. expected = Series(
  578. [
  579. Timestamp("2011-01-01 10:00", tz=tz),
  580. Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
  581. Timestamp("2011-01-03 10:00", tz=tz),
  582. Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
  583. ]
  584. ).dt.as_unit(unit)
  585. tm.assert_series_equal(expected, result)
  586. tm.assert_series_equal(isna(ser), null_loc)
  587. def test_fillna_dt64tz_with_method(self):
  588. # with timezone
  589. # GH#15855
  590. ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT])
  591. exp = Series(
  592. [
  593. Timestamp("2012-11-11 00:00:00+01:00"),
  594. Timestamp("2012-11-11 00:00:00+01:00"),
  595. ]
  596. )
  597. tm.assert_series_equal(ser.fillna(method="pad"), exp)
  598. ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")])
  599. exp = Series(
  600. [
  601. Timestamp("2012-11-11 00:00:00+01:00"),
  602. Timestamp("2012-11-11 00:00:00+01:00"),
  603. ]
  604. )
  605. tm.assert_series_equal(ser.fillna(method="bfill"), exp)
  606. def test_fillna_pytimedelta(self):
  607. # GH#8209
  608. ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"])
  609. result = ser.fillna(timedelta(1))
  610. expected = Series(Timedelta("1 days"), index=["A", "B"])
  611. tm.assert_series_equal(result, expected)
  612. def test_fillna_period(self):
  613. # GH#13737
  614. ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
  615. res = ser.fillna(Period("2012-01", freq="M"))
  616. exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")])
  617. tm.assert_series_equal(res, exp)
  618. assert res.dtype == "Period[M]"
  619. def test_fillna_dt64_timestamp(self, frame_or_series):
  620. ser = Series(
  621. [
  622. Timestamp("20130101"),
  623. Timestamp("20130101"),
  624. Timestamp("20130102"),
  625. Timestamp("20130103 9:01:01"),
  626. ]
  627. )
  628. ser[2] = np.nan
  629. obj = frame_or_series(ser)
  630. # reg fillna
  631. result = obj.fillna(Timestamp("20130104"))
  632. expected = Series(
  633. [
  634. Timestamp("20130101"),
  635. Timestamp("20130101"),
  636. Timestamp("20130104"),
  637. Timestamp("20130103 9:01:01"),
  638. ]
  639. )
  640. expected = frame_or_series(expected)
  641. tm.assert_equal(result, expected)
  642. result = obj.fillna(NaT)
  643. expected = obj
  644. tm.assert_equal(result, expected)
  645. def test_fillna_dt64_non_nao(self):
  646. # GH#27419
  647. ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")])
  648. val = np.datetime64("1975-04-05", "ms")
  649. result = ser.fillna(val)
  650. expected = Series(
  651. [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")]
  652. )
  653. tm.assert_series_equal(result, expected)
  654. def test_fillna_numeric_inplace(self):
  655. x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
  656. y = x.copy()
  657. return_value = y.fillna(value=0, inplace=True)
  658. assert return_value is None
  659. expected = x.fillna(value=0)
  660. tm.assert_series_equal(y, expected)
  661. # ---------------------------------------------------------------
  662. # CategoricalDtype
  663. @pytest.mark.parametrize(
  664. "fill_value, expected_output",
  665. [
  666. ("a", ["a", "a", "b", "a", "a"]),
  667. ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]),
  668. ({1: "a"}, ["a", "a", "b", np.nan, np.nan]),
  669. ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]),
  670. (Series("a"), ["a", np.nan, "b", np.nan, np.nan]),
  671. (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]),
  672. (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]),
  673. (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]),
  674. ],
  675. )
  676. def test_fillna_categorical(self, fill_value, expected_output):
  677. # GH#17033
  678. # Test fillna for a Categorical series
  679. data = ["a", np.nan, "b", np.nan, np.nan]
  680. ser = Series(Categorical(data, categories=["a", "b"]))
  681. exp = Series(Categorical(expected_output, categories=["a", "b"]))
  682. result = ser.fillna(fill_value)
  683. tm.assert_series_equal(result, exp)
  684. @pytest.mark.parametrize(
  685. "fill_value, expected_output",
  686. [
  687. (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]),
  688. (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]),
  689. (
  690. Series(
  691. Categorical(
  692. ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"]
  693. )
  694. ),
  695. ["a", "d", "b", "d", "a"],
  696. ),
  697. ],
  698. )
  699. def test_fillna_categorical_with_new_categories(self, fill_value, expected_output):
  700. # GH#26215
  701. data = ["a", np.nan, "b", np.nan, np.nan]
  702. ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"]))
  703. exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"]))
  704. result = ser.fillna(fill_value)
  705. tm.assert_series_equal(result, exp)
  706. def test_fillna_categorical_raises(self):
  707. data = ["a", np.nan, "b", np.nan, np.nan]
  708. ser = Series(Categorical(data, categories=["a", "b"]))
  709. cat = ser._values
  710. msg = "Cannot setitem on a Categorical with a new category"
  711. with pytest.raises(TypeError, match=msg):
  712. ser.fillna("d")
  713. msg2 = "Length of 'value' does not match."
  714. with pytest.raises(ValueError, match=msg2):
  715. cat.fillna(Series("d"))
  716. with pytest.raises(TypeError, match=msg):
  717. ser.fillna({1: "d", 3: "a"})
  718. msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
  719. with pytest.raises(TypeError, match=msg):
  720. ser.fillna(["a", "b"])
  721. msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
  722. with pytest.raises(TypeError, match=msg):
  723. ser.fillna(("a", "b"))
  724. msg = (
  725. '"value" parameter must be a scalar, dict '
  726. 'or Series, but you passed a "DataFrame"'
  727. )
  728. with pytest.raises(TypeError, match=msg):
  729. ser.fillna(DataFrame({1: ["a"], 3: ["b"]}))
  730. @pytest.mark.parametrize("dtype", [float, "float32", "float64"])
  731. @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
  732. @pytest.mark.parametrize("scalar", [True, False])
  733. def test_fillna_float_casting(self, dtype, fill_type, scalar):
  734. # GH-43424
  735. ser = Series([np.nan, 1.2], dtype=dtype)
  736. fill_values = Series([2, 2], dtype=fill_type)
  737. if scalar:
  738. fill_values = fill_values.dtype.type(2)
  739. result = ser.fillna(fill_values)
  740. expected = Series([2.0, 1.2], dtype=dtype)
  741. tm.assert_series_equal(result, expected)
  742. ser = Series([np.nan, 1.2], dtype=dtype)
  743. mask = ser.isna().to_numpy()
  744. ser[mask] = fill_values
  745. tm.assert_series_equal(ser, expected)
  746. ser = Series([np.nan, 1.2], dtype=dtype)
  747. ser.mask(mask, fill_values, inplace=True)
  748. tm.assert_series_equal(ser, expected)
  749. ser = Series([np.nan, 1.2], dtype=dtype)
  750. res = ser.where(~mask, fill_values)
  751. tm.assert_series_equal(res, expected)
  752. def test_fillna_f32_upcast_with_dict(self):
  753. # GH-43424
  754. ser = Series([np.nan, 1.2], dtype=np.float32)
  755. result = ser.fillna({0: 1})
  756. expected = Series([1.0, 1.2], dtype=np.float32)
  757. tm.assert_series_equal(result, expected)
  758. # ---------------------------------------------------------------
  759. # Invalid Usages
  760. def test_fillna_invalid_method(self, datetime_series):
  761. try:
  762. datetime_series.fillna(method="ffil")
  763. except ValueError as inst:
  764. assert "ffil" in str(inst)
  765. def test_fillna_listlike_invalid(self):
  766. ser = Series(np.random.default_rng(2).integers(-100, 100, 50))
  767. msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
  768. with pytest.raises(TypeError, match=msg):
  769. ser.fillna([1, 2])
  770. msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
  771. with pytest.raises(TypeError, match=msg):
  772. ser.fillna((1, 2))
  773. def test_fillna_method_and_limit_invalid(self):
  774. # related GH#9217, make sure limit is an int and greater than 0
  775. ser = Series([1, 2, 3, None])
  776. msg = "|".join(
  777. [
  778. r"Cannot specify both 'value' and 'method'\.",
  779. "Limit must be greater than 0",
  780. "Limit must be an integer",
  781. ]
  782. )
  783. for limit in [-1, 0, 1.0, 2.0]:
  784. for method in ["backfill", "bfill", "pad", "ffill", None]:
  785. with pytest.raises(ValueError, match=msg):
  786. ser.fillna(1, limit=limit, method=method)
  787. def test_fillna_datetime64_with_timezone_tzinfo(self):
  788. # https://github.com/pandas-dev/pandas/issues/38851
  789. # different tzinfos representing UTC treated as equal
  790. ser = Series(date_range("2020", periods=3, tz="UTC"))
  791. expected = ser.copy()
  792. ser[1] = NaT
  793. result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc))
  794. tm.assert_series_equal(result, expected)
  795. # pre-2.0 we cast to object with mixed tzs, in 2.0 we retain dtype
  796. ts = Timestamp("2000-01-01", tz="US/Pacific")
  797. ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific"))
  798. assert ser2.dtype.kind == "M"
  799. result = ser2.fillna(ts)
  800. expected = Series(
  801. [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
  802. dtype=ser2.dtype,
  803. )
  804. tm.assert_series_equal(result, expected)
  805. @pytest.mark.parametrize(
  806. "input, input_fillna, expected_data, expected_categories",
  807. [
  808. (["A", "B", None, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
  809. (["A", "B", np.nan, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
  810. ],
  811. )
  812. def test_fillna_categorical_accept_same_type(
  813. self, input, input_fillna, expected_data, expected_categories
  814. ):
  815. # GH32414
  816. cat = Categorical(input)
  817. ser = Series(cat).fillna(input_fillna)
  818. filled = cat.fillna(ser)
  819. result = cat.fillna(filled)
  820. expected = Categorical(expected_data, categories=expected_categories)
  821. tm.assert_categorical_equal(result, expected)
  822. @pytest.mark.filterwarnings(
  823. "ignore:Series.fillna with 'method' is deprecated:FutureWarning"
  824. )
  825. class TestFillnaPad:
  826. def test_fillna_bug(self):
  827. ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
  828. filled = ser.fillna(method="ffill")
  829. expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index)
  830. tm.assert_series_equal(filled, expected)
  831. filled = ser.fillna(method="bfill")
  832. expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index)
  833. tm.assert_series_equal(filled, expected)
  834. def test_ffill(self):
  835. ts = Series(
  836. [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5)
  837. )
  838. ts.iloc[2] = np.nan
  839. tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill"))
  840. def test_ffill_mixed_dtypes_without_missing_data(self):
  841. # GH#14956
  842. series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
  843. result = series.ffill()
  844. tm.assert_series_equal(series, result)
  845. def test_bfill(self):
  846. ts = Series(
  847. [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("2020-01-01", periods=5)
  848. )
  849. ts.iloc[2] = np.nan
  850. tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill"))
  851. def test_pad_nan(self):
  852. x = Series(
  853. [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float
  854. )
  855. return_value = x.fillna(method="pad", inplace=True)
  856. assert return_value is None
  857. expected = Series(
  858. [np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float
  859. )
  860. tm.assert_series_equal(x[1:], expected[1:])
  861. assert np.isnan(x.iloc[0]), np.isnan(expected.iloc[0])
  862. def test_series_fillna_limit(self):
  863. index = np.arange(10)
  864. s = Series(np.random.default_rng(2).standard_normal(10), index=index)
  865. result = s[:2].reindex(index)
  866. result = result.fillna(method="pad", limit=5)
  867. expected = s[:2].reindex(index).fillna(method="pad")
  868. expected[-3:] = np.nan
  869. tm.assert_series_equal(result, expected)
  870. result = s[-2:].reindex(index)
  871. result = result.fillna(method="bfill", limit=5)
  872. expected = s[-2:].reindex(index).fillna(method="backfill")
  873. expected[:3] = np.nan
  874. tm.assert_series_equal(result, expected)
  875. def test_series_pad_backfill_limit(self):
  876. index = np.arange(10)
  877. s = Series(np.random.default_rng(2).standard_normal(10), index=index)
  878. result = s[:2].reindex(index, method="pad", limit=5)
  879. expected = s[:2].reindex(index).fillna(method="pad")
  880. expected[-3:] = np.nan
  881. tm.assert_series_equal(result, expected)
  882. result = s[-2:].reindex(index, method="backfill", limit=5)
  883. expected = s[-2:].reindex(index).fillna(method="backfill")
  884. expected[:3] = np.nan
  885. tm.assert_series_equal(result, expected)
  886. def test_fillna_int(self):
  887. ser = Series(np.random.default_rng(2).integers(-100, 100, 50))
  888. return_value = ser.fillna(method="ffill", inplace=True)
  889. assert return_value is None
  890. tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser)
  891. def test_datetime64tz_fillna_round_issue(self):
  892. # GH#14872
  893. data = Series(
  894. [NaT, NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
  895. )
  896. filled = data.bfill()
  897. expected = Series(
  898. [
  899. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  900. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  901. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  902. ]
  903. )
  904. tm.assert_series_equal(filled, expected)
  905. def test_fillna_parr(self):
  906. # GH-24537
  907. dti = date_range(
  908. Timestamp.max - Timedelta(nanoseconds=10), periods=5, freq="ns"
  909. )
  910. ser = Series(dti.to_period("ns"))
  911. ser[2] = NaT
  912. arr = period_array(
  913. [
  914. Timestamp("2262-04-11 23:47:16.854775797"),
  915. Timestamp("2262-04-11 23:47:16.854775798"),
  916. Timestamp("2262-04-11 23:47:16.854775798"),
  917. Timestamp("2262-04-11 23:47:16.854775800"),
  918. Timestamp("2262-04-11 23:47:16.854775801"),
  919. ],
  920. freq="ns",
  921. )
  922. expected = Series(arr)
  923. filled = ser.ffill()
  924. tm.assert_series_equal(filled, expected)
  925. @pytest.mark.parametrize("func", ["pad", "backfill"])
  926. def test_pad_backfill_deprecated(self, func):
  927. # GH#33396
  928. ser = Series([1, 2, 3])
  929. with tm.assert_produces_warning(FutureWarning):
  930. getattr(ser, func)()
  931. @pytest.mark.parametrize(
  932. "data, expected_data, method, kwargs",
  933. (
  934. (
  935. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  936. [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
  937. "ffill",
  938. {"limit_area": "inside"},
  939. ),
  940. (
  941. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  942. [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
  943. "ffill",
  944. {"limit_area": "inside", "limit": 1},
  945. ),
  946. (
  947. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  948. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
  949. "ffill",
  950. {"limit_area": "outside"},
  951. ),
  952. (
  953. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  954. [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
  955. "ffill",
  956. {"limit_area": "outside", "limit": 1},
  957. ),
  958. (
  959. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  960. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  961. "ffill",
  962. {"limit_area": "outside", "limit": 1},
  963. ),
  964. (
  965. range(5),
  966. range(5),
  967. "ffill",
  968. {"limit_area": "outside", "limit": 1},
  969. ),
  970. (
  971. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  972. [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
  973. "bfill",
  974. {"limit_area": "inside"},
  975. ),
  976. (
  977. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  978. [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
  979. "bfill",
  980. {"limit_area": "inside", "limit": 1},
  981. ),
  982. (
  983. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  984. [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  985. "bfill",
  986. {"limit_area": "outside"},
  987. ),
  988. (
  989. [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
  990. [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
  991. "bfill",
  992. {"limit_area": "outside", "limit": 1},
  993. ),
  994. ),
  995. )
  996. def test_ffill_bfill_limit_area(data, expected_data, method, kwargs):
  997. # GH#56492
  998. s = Series(data)
  999. expected = Series(expected_data)
  1000. result = getattr(s, method)(**kwargs)
  1001. tm.assert_series_equal(result, expected)