test_replace.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. import re
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. import pandas._testing as tm
  6. from pandas.core.arrays import IntervalArray
  7. class TestSeriesReplace:
  8. def test_replace_explicit_none(self):
  9. # GH#36984 if the user explicitly passes value=None, give it to them
  10. ser = pd.Series([0, 0, ""], dtype=object)
  11. result = ser.replace("", None)
  12. expected = pd.Series([0, 0, None], dtype=object)
  13. tm.assert_series_equal(result, expected)
  14. # Cast column 2 to object to avoid implicit cast when setting entry to ""
  15. df = pd.DataFrame(np.zeros((3, 3))).astype({2: object})
  16. df.iloc[2, 2] = ""
  17. result = df.replace("", None)
  18. expected = pd.DataFrame(
  19. {
  20. 0: np.zeros(3),
  21. 1: np.zeros(3),
  22. 2: np.array([0.0, 0.0, None], dtype=object),
  23. }
  24. )
  25. assert expected.iloc[2, 2] is None
  26. tm.assert_frame_equal(result, expected)
  27. # GH#19998 same thing with object dtype
  28. ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
  29. result = ser.replace("a", None)
  30. expected = pd.Series([10, 20, 30, None, None, "b", None])
  31. assert expected.iloc[-1] is None
  32. tm.assert_series_equal(result, expected)
  33. def test_replace_noop_doesnt_downcast(self):
  34. # GH#44498
  35. ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
  36. res = ser.replace({np.nan: None}) # should be a no-op
  37. tm.assert_series_equal(res, ser)
  38. assert res.dtype == object
  39. # same thing but different calling convention
  40. res = ser.replace(np.nan, None)
  41. tm.assert_series_equal(res, ser)
  42. assert res.dtype == object
  43. def test_replace(self):
  44. N = 50
  45. ser = pd.Series(np.random.default_rng(2).standard_normal(N))
  46. ser[0:4] = np.nan
  47. ser[6:10] = 0
  48. # replace list with a single value
  49. return_value = ser.replace([np.nan], -1, inplace=True)
  50. assert return_value is None
  51. exp = ser.fillna(-1)
  52. tm.assert_series_equal(ser, exp)
  53. rs = ser.replace(0.0, np.nan)
  54. ser[ser == 0.0] = np.nan
  55. tm.assert_series_equal(rs, ser)
  56. ser = pd.Series(
  57. np.fabs(np.random.default_rng(2).standard_normal(N)),
  58. pd.date_range("2020-01-01", periods=N),
  59. dtype=object,
  60. )
  61. ser[:5] = np.nan
  62. ser[6:10] = "foo"
  63. ser[20:30] = "bar"
  64. # replace list with a single value
  65. msg = "Downcasting behavior in `replace`"
  66. with tm.assert_produces_warning(FutureWarning, match=msg):
  67. rs = ser.replace([np.nan, "foo", "bar"], -1)
  68. assert (rs[:5] == -1).all()
  69. assert (rs[6:10] == -1).all()
  70. assert (rs[20:30] == -1).all()
  71. assert (pd.isna(ser[:5])).all()
  72. # replace with different values
  73. with tm.assert_produces_warning(FutureWarning, match=msg):
  74. rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
  75. assert (rs[:5] == -1).all()
  76. assert (rs[6:10] == -2).all()
  77. assert (rs[20:30] == -3).all()
  78. assert (pd.isna(ser[:5])).all()
  79. # replace with different values with 2 lists
  80. with tm.assert_produces_warning(FutureWarning, match=msg):
  81. rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
  82. tm.assert_series_equal(rs, rs2)
  83. # replace inplace
  84. with tm.assert_produces_warning(FutureWarning, match=msg):
  85. return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
  86. assert return_value is None
  87. assert (ser[:5] == -1).all()
  88. assert (ser[6:10] == -1).all()
  89. assert (ser[20:30] == -1).all()
  90. def test_replace_nan_with_inf(self):
  91. ser = pd.Series([np.nan, 0, np.inf])
  92. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  93. ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
  94. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  95. filled = ser.copy()
  96. filled[4] = 0
  97. tm.assert_series_equal(ser.replace(np.inf, 0), filled)
  98. def test_replace_listlike_value_listlike_target(self, datetime_series):
  99. ser = pd.Series(datetime_series.index)
  100. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  101. # malformed
  102. msg = r"Replacement lists must match in length\. Expecting 3 got 2"
  103. with pytest.raises(ValueError, match=msg):
  104. ser.replace([1, 2, 3], [np.nan, 0])
  105. # ser is dt64 so can't hold 1 or 2, so this replace is a no-op
  106. result = ser.replace([1, 2], [np.nan, 0])
  107. tm.assert_series_equal(result, ser)
  108. ser = pd.Series([0, 1, 2, 3, 4])
  109. result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
  110. tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
  111. def test_replace_gh5319(self):
  112. # API change from 0.12?
  113. # GH 5319
  114. ser = pd.Series([0, np.nan, 2, 3, 4])
  115. expected = ser.ffill()
  116. msg = (
  117. "Series.replace without 'value' and with non-dict-like "
  118. "'to_replace' is deprecated"
  119. )
  120. with tm.assert_produces_warning(FutureWarning, match=msg):
  121. result = ser.replace([np.nan])
  122. tm.assert_series_equal(result, expected)
  123. ser = pd.Series([0, np.nan, 2, 3, 4])
  124. expected = ser.ffill()
  125. with tm.assert_produces_warning(FutureWarning, match=msg):
  126. result = ser.replace(np.nan)
  127. tm.assert_series_equal(result, expected)
  128. def test_replace_datetime64(self):
  129. # GH 5797
  130. ser = pd.Series(pd.date_range("20130101", periods=5))
  131. expected = ser.copy()
  132. expected.loc[2] = pd.Timestamp("20120101")
  133. result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
  134. tm.assert_series_equal(result, expected)
  135. result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
  136. tm.assert_series_equal(result, expected)
  137. def test_replace_nat_with_tz(self):
  138. # GH 11792: Test with replacing NaT in a list with tz data
  139. ts = pd.Timestamp("2015/01/01", tz="UTC")
  140. s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
  141. result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
  142. expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
  143. tm.assert_series_equal(expected, result)
  144. def test_replace_timedelta_td64(self):
  145. tdi = pd.timedelta_range(0, periods=5)
  146. ser = pd.Series(tdi)
  147. # Using a single dict argument means we go through replace_list
  148. result = ser.replace({ser[1]: ser[3]})
  149. expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
  150. tm.assert_series_equal(result, expected)
  151. def test_replace_with_single_list(self):
  152. ser = pd.Series([0, 1, 2, 3, 4])
  153. msg2 = (
  154. "Series.replace without 'value' and with non-dict-like "
  155. "'to_replace' is deprecated"
  156. )
  157. with tm.assert_produces_warning(FutureWarning, match=msg2):
  158. result = ser.replace([1, 2, 3])
  159. tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
  160. s = ser.copy()
  161. with tm.assert_produces_warning(FutureWarning, match=msg2):
  162. return_value = s.replace([1, 2, 3], inplace=True)
  163. assert return_value is None
  164. tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
  165. # make sure things don't get corrupted when fillna call fails
  166. s = ser.copy()
  167. msg = (
  168. r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
  169. r"\(bfill\)\. Got crash_cymbal"
  170. )
  171. msg3 = "The 'method' keyword in Series.replace is deprecated"
  172. with pytest.raises(ValueError, match=msg):
  173. with tm.assert_produces_warning(FutureWarning, match=msg3):
  174. return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
  175. assert return_value is None
  176. tm.assert_series_equal(s, ser)
  177. def test_replace_mixed_types(self):
  178. ser = pd.Series(np.arange(5), dtype="int64")
  179. def check_replace(to_rep, val, expected):
  180. sc = ser.copy()
  181. result = ser.replace(to_rep, val)
  182. return_value = sc.replace(to_rep, val, inplace=True)
  183. assert return_value is None
  184. tm.assert_series_equal(expected, result)
  185. tm.assert_series_equal(expected, sc)
  186. # 3.0 can still be held in our int64 series, so we do not upcast GH#44940
  187. tr, v = [3], [3.0]
  188. check_replace(tr, v, ser)
  189. # Note this matches what we get with the scalars 3 and 3.0
  190. check_replace(tr[0], v[0], ser)
  191. # MUST upcast to float
  192. e = pd.Series([0, 1, 2, 3.5, 4])
  193. tr, v = [3], [3.5]
  194. check_replace(tr, v, e)
  195. # casts to object
  196. e = pd.Series([0, 1, 2, 3.5, "a"])
  197. tr, v = [3, 4], [3.5, "a"]
  198. check_replace(tr, v, e)
  199. # again casts to object
  200. e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
  201. tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
  202. check_replace(tr, v, e)
  203. # casts to object
  204. e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
  205. tr, v = [3, 4], [3.5, True]
  206. check_replace(tr, v, e)
  207. # test an object with dates + floats + integers + strings
  208. dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
  209. result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
  210. expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
  211. tm.assert_series_equal(result, expected)
  212. def test_replace_bool_with_string_no_op(self):
  213. s = pd.Series([True, False, True])
  214. result = s.replace("fun", "in-the-sun")
  215. tm.assert_series_equal(s, result)
  216. def test_replace_bool_with_string(self):
  217. # nonexistent elements
  218. s = pd.Series([True, False, True])
  219. result = s.replace(True, "2u")
  220. expected = pd.Series(["2u", False, "2u"])
  221. tm.assert_series_equal(expected, result)
  222. def test_replace_bool_with_bool(self):
  223. s = pd.Series([True, False, True])
  224. result = s.replace(True, False)
  225. expected = pd.Series([False] * len(s))
  226. tm.assert_series_equal(expected, result)
  227. def test_replace_with_dict_with_bool_keys(self):
  228. s = pd.Series([True, False, True])
  229. result = s.replace({"asdf": "asdb", True: "yes"})
  230. expected = pd.Series(["yes", False, "yes"])
  231. tm.assert_series_equal(result, expected)
  232. def test_replace_Int_with_na(self, any_int_ea_dtype):
  233. # GH 38267
  234. result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
  235. expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
  236. tm.assert_series_equal(result, expected)
  237. result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
  238. result.replace(1, pd.NA, inplace=True)
  239. tm.assert_series_equal(result, expected)
  240. def test_replace2(self):
  241. N = 50
  242. ser = pd.Series(
  243. np.fabs(np.random.default_rng(2).standard_normal(N)),
  244. pd.date_range("2020-01-01", periods=N),
  245. dtype=object,
  246. )
  247. ser[:5] = np.nan
  248. ser[6:10] = "foo"
  249. ser[20:30] = "bar"
  250. # replace list with a single value
  251. msg = "Downcasting behavior in `replace`"
  252. with tm.assert_produces_warning(FutureWarning, match=msg):
  253. rs = ser.replace([np.nan, "foo", "bar"], -1)
  254. assert (rs[:5] == -1).all()
  255. assert (rs[6:10] == -1).all()
  256. assert (rs[20:30] == -1).all()
  257. assert (pd.isna(ser[:5])).all()
  258. # replace with different values
  259. with tm.assert_produces_warning(FutureWarning, match=msg):
  260. rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
  261. assert (rs[:5] == -1).all()
  262. assert (rs[6:10] == -2).all()
  263. assert (rs[20:30] == -3).all()
  264. assert (pd.isna(ser[:5])).all()
  265. # replace with different values with 2 lists
  266. with tm.assert_produces_warning(FutureWarning, match=msg):
  267. rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
  268. tm.assert_series_equal(rs, rs2)
  269. # replace inplace
  270. with tm.assert_produces_warning(FutureWarning, match=msg):
  271. return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
  272. assert return_value is None
  273. assert (ser[:5] == -1).all()
  274. assert (ser[6:10] == -1).all()
  275. assert (ser[20:30] == -1).all()
  276. @pytest.mark.parametrize("inplace", [True, False])
  277. def test_replace_cascade(self, inplace):
  278. # Test that replaced values are not replaced again
  279. # GH #50778
  280. ser = pd.Series([1, 2, 3])
  281. expected = pd.Series([2, 3, 4])
  282. res = ser.replace([1, 2, 3], [2, 3, 4], inplace=inplace)
  283. if inplace:
  284. tm.assert_series_equal(ser, expected)
  285. else:
  286. tm.assert_series_equal(res, expected)
  287. def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
  288. # GH 32621, GH#44940
  289. ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
  290. expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
  291. result = ser.replace({"one": "1", "two": "2"})
  292. tm.assert_series_equal(expected, result)
  293. def test_replace_with_empty_dictlike(self):
  294. # GH 15289
  295. s = pd.Series(list("abcd"))
  296. tm.assert_series_equal(s, s.replace({}))
  297. empty_series = pd.Series([])
  298. tm.assert_series_equal(s, s.replace(empty_series))
  299. def test_replace_string_with_number(self):
  300. # GH 15743
  301. s = pd.Series([1, 2, 3])
  302. result = s.replace("2", np.nan)
  303. expected = pd.Series([1, 2, 3])
  304. tm.assert_series_equal(expected, result)
  305. def test_replace_replacer_equals_replacement(self):
  306. # GH 20656
  307. # make sure all replacers are matching against original values
  308. s = pd.Series(["a", "b"])
  309. expected = pd.Series(["b", "a"])
  310. result = s.replace({"a": "b", "b": "a"})
  311. tm.assert_series_equal(expected, result)
  312. def test_replace_unicode_with_number(self):
  313. # GH 15743
  314. s = pd.Series([1, 2, 3])
  315. result = s.replace("2", np.nan)
  316. expected = pd.Series([1, 2, 3])
  317. tm.assert_series_equal(expected, result)
  318. def test_replace_mixed_types_with_string(self):
  319. # Testing mixed
  320. s = pd.Series([1, 2, 3, "4", 4, 5])
  321. msg = "Downcasting behavior in `replace`"
  322. with tm.assert_produces_warning(FutureWarning, match=msg):
  323. result = s.replace([2, "4"], np.nan)
  324. expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
  325. tm.assert_series_equal(expected, result)
  326. @pytest.mark.parametrize(
  327. "categorical, numeric",
  328. [
  329. (pd.Categorical(["A"], categories=["A", "B"]), [1]),
  330. (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
  331. ],
  332. )
  333. def test_replace_categorical(self, categorical, numeric, using_infer_string):
  334. # GH 24971, GH#23305
  335. ser = pd.Series(categorical)
  336. msg = "Downcasting behavior in `replace`"
  337. msg = "with CategoricalDtype is deprecated"
  338. with tm.assert_produces_warning(FutureWarning, match=msg):
  339. result = ser.replace({"A": 1, "B": 2})
  340. expected = pd.Series(numeric).astype("category")
  341. if 2 not in expected.cat.categories:
  342. # i.e. categories should be [1, 2] even if there are no "B"s present
  343. # GH#44940
  344. expected = expected.cat.add_categories(2)
  345. tm.assert_series_equal(expected, result)
  346. @pytest.mark.parametrize(
  347. "data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])]
  348. )
  349. def test_replace_categorical_inplace(self, data, data_exp):
  350. # GH 53358
  351. result = pd.Series(data, dtype="category")
  352. msg = "with CategoricalDtype is deprecated"
  353. with tm.assert_produces_warning(FutureWarning, match=msg):
  354. result.replace(to_replace="a", value="b", inplace=True)
  355. expected = pd.Series(data_exp, dtype="category")
  356. tm.assert_series_equal(result, expected)
  357. def test_replace_categorical_single(self):
  358. # GH 26988
  359. dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
  360. s = pd.Series(dti)
  361. c = s.astype("category")
  362. expected = c.copy()
  363. expected = expected.cat.add_categories("foo")
  364. expected[2] = "foo"
  365. expected = expected.cat.remove_unused_categories()
  366. assert c[2] != "foo"
  367. msg = "with CategoricalDtype is deprecated"
  368. with tm.assert_produces_warning(FutureWarning, match=msg):
  369. result = c.replace(c[2], "foo")
  370. tm.assert_series_equal(expected, result)
  371. assert c[2] != "foo" # ensure non-inplace call does not alter original
  372. msg = "with CategoricalDtype is deprecated"
  373. with tm.assert_produces_warning(FutureWarning, match=msg):
  374. return_value = c.replace(c[2], "foo", inplace=True)
  375. assert return_value is None
  376. tm.assert_series_equal(expected, c)
  377. first_value = c[0]
  378. msg = "with CategoricalDtype is deprecated"
  379. with tm.assert_produces_warning(FutureWarning, match=msg):
  380. return_value = c.replace(c[1], c[0], inplace=True)
  381. assert return_value is None
  382. assert c[0] == c[1] == first_value # test replacing with existing value
  383. def test_replace_with_no_overflowerror(self):
  384. # GH 25616
  385. # casts to object without Exception from OverflowError
  386. s = pd.Series([0, 1, 2, 3, 4])
  387. result = s.replace([3], ["100000000000000000000"])
  388. expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
  389. tm.assert_series_equal(result, expected)
  390. s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
  391. result = s.replace(["100000000000000000000"], [1])
  392. expected = pd.Series([0, 1, "100000000000000000001"])
  393. tm.assert_series_equal(result, expected)
  394. @pytest.mark.parametrize(
  395. "ser, to_replace, exp",
  396. [
  397. ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
  398. (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
  399. ],
  400. )
  401. def test_replace_commutative(self, ser, to_replace, exp):
  402. # GH 16051
  403. # DataFrame.replace() overwrites when values are non-numeric
  404. series = pd.Series(ser)
  405. expected = pd.Series(exp)
  406. result = series.replace(to_replace)
  407. tm.assert_series_equal(result, expected)
  408. @pytest.mark.parametrize(
  409. "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
  410. )
  411. def test_replace_no_cast(self, ser, exp):
  412. # GH 9113
  413. # BUG: replace int64 dtype with bool coerces to int64
  414. series = pd.Series(ser)
  415. result = series.replace(2, True)
  416. expected = pd.Series(exp)
  417. tm.assert_series_equal(result, expected)
  418. def test_replace_invalid_to_replace(self):
  419. # GH 18634
  420. # API: replace() should raise an exception if invalid argument is given
  421. series = pd.Series(["a", "b", "c "])
  422. msg = (
  423. r"Expecting 'to_replace' to be either a scalar, array-like, "
  424. r"dict or None, got invalid type.*"
  425. )
  426. msg2 = (
  427. "Series.replace without 'value' and with non-dict-like "
  428. "'to_replace' is deprecated"
  429. )
  430. with pytest.raises(TypeError, match=msg):
  431. with tm.assert_produces_warning(FutureWarning, match=msg2):
  432. series.replace(lambda x: x.strip())
  433. @pytest.mark.parametrize("frame", [False, True])
  434. def test_replace_nonbool_regex(self, frame):
  435. obj = pd.Series(["a", "b", "c "])
  436. if frame:
  437. obj = obj.to_frame()
  438. msg = "'to_replace' must be 'None' if 'regex' is not a bool"
  439. with pytest.raises(ValueError, match=msg):
  440. obj.replace(to_replace=["a"], regex="foo")
  441. @pytest.mark.parametrize("frame", [False, True])
  442. def test_replace_empty_copy(self, frame):
  443. obj = pd.Series([], dtype=np.float64)
  444. if frame:
  445. obj = obj.to_frame()
  446. res = obj.replace(4, 5, inplace=True)
  447. assert res is None
  448. res = obj.replace(4, 5, inplace=False)
  449. tm.assert_equal(res, obj)
  450. assert res is not obj
  451. def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
  452. # GH#33340
  453. ser = pd.Series([1, 2, "A", fixed_now_ts, True])
  454. to_replace = {0: 1, 2: "A"}
  455. value = "foo"
  456. msg = "Series.replace cannot use dict-like to_replace and non-None value"
  457. with pytest.raises(ValueError, match=msg):
  458. ser.replace(to_replace, value)
  459. to_replace = 1
  460. value = {0: "foo", 2: "bar"}
  461. msg = "Series.replace cannot use dict-value and non-None to_replace"
  462. with pytest.raises(ValueError, match=msg):
  463. ser.replace(to_replace, value)
  464. def test_replace_extension_other(self, frame_or_series):
  465. # https://github.com/pandas-dev/pandas/issues/34530
  466. obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
  467. result = obj.replace("", "") # no exception
  468. # should not have changed dtype
  469. tm.assert_equal(obj, result)
  470. def _check_replace_with_method(self, ser: pd.Series):
  471. df = ser.to_frame()
  472. msg1 = "The 'method' keyword in Series.replace is deprecated"
  473. with tm.assert_produces_warning(FutureWarning, match=msg1):
  474. res = ser.replace(ser[1], method="pad")
  475. expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
  476. tm.assert_series_equal(res, expected)
  477. msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
  478. with tm.assert_produces_warning(FutureWarning, match=msg2):
  479. res_df = df.replace(ser[1], method="pad")
  480. tm.assert_frame_equal(res_df, expected.to_frame())
  481. ser2 = ser.copy()
  482. with tm.assert_produces_warning(FutureWarning, match=msg1):
  483. res2 = ser2.replace(ser[1], method="pad", inplace=True)
  484. assert res2 is None
  485. tm.assert_series_equal(ser2, expected)
  486. with tm.assert_produces_warning(FutureWarning, match=msg2):
  487. res_df2 = df.replace(ser[1], method="pad", inplace=True)
  488. assert res_df2 is None
  489. tm.assert_frame_equal(df, expected.to_frame())
  490. def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
  491. arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
  492. ser = pd.Series(arr)
  493. self._check_replace_with_method(ser)
  494. @pytest.mark.parametrize("as_categorical", [True, False])
  495. def test_replace_interval_with_method(self, as_categorical):
  496. # in particular interval that can't hold NA
  497. idx = pd.IntervalIndex.from_breaks(range(4))
  498. ser = pd.Series(idx)
  499. if as_categorical:
  500. ser = ser.astype("category")
  501. self._check_replace_with_method(ser)
  502. @pytest.mark.parametrize("as_period", [True, False])
  503. @pytest.mark.parametrize("as_categorical", [True, False])
  504. def test_replace_datetimelike_with_method(self, as_period, as_categorical):
  505. idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
  506. if as_period:
  507. idx = idx.tz_localize(None).to_period("D")
  508. ser = pd.Series(idx)
  509. ser.iloc[-2] = pd.NaT
  510. if as_categorical:
  511. ser = ser.astype("category")
  512. self._check_replace_with_method(ser)
  513. def test_replace_with_compiled_regex(self):
  514. # https://github.com/pandas-dev/pandas/issues/35680
  515. s = pd.Series(["a", "b", "c"])
  516. regex = re.compile("^a$")
  517. result = s.replace({regex: "z"}, regex=True)
  518. expected = pd.Series(["z", "b", "c"])
  519. tm.assert_series_equal(result, expected)
  520. def test_pandas_replace_na(self):
  521. # GH#43344
  522. # GH#56599
  523. ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA, "AA"], dtype="string")
  524. regex_mapping = {
  525. "AA": "CC",
  526. "BB": "CC",
  527. "EE": "CC",
  528. "CC": "CC-REPL",
  529. }
  530. result = ser.replace(regex_mapping, regex=True)
  531. exp = pd.Series(
  532. ["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA, "CC"], dtype="string"
  533. )
  534. tm.assert_series_equal(result, exp)
  535. @pytest.mark.parametrize(
  536. "dtype, input_data, to_replace, expected_data",
  537. [
  538. ("bool", [True, False], {True: False}, [False, False]),
  539. ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
  540. ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
  541. ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
  542. ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
  543. ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
  544. (
  545. pd.IntervalDtype("int64"),
  546. IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
  547. {pd.Interval(1, 2): pd.Interval(10, 20)},
  548. IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
  549. ),
  550. (
  551. pd.IntervalDtype("float64"),
  552. IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
  553. {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
  554. IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
  555. ),
  556. (
  557. pd.PeriodDtype("M"),
  558. [pd.Period("2020-05", freq="M")],
  559. {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
  560. [pd.Period("2020-06", freq="M")],
  561. ),
  562. ],
  563. )
  564. def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
  565. # GH#33484
  566. ser = pd.Series(input_data, dtype=dtype)
  567. result = ser.replace(to_replace)
  568. expected = pd.Series(expected_data, dtype=dtype)
  569. tm.assert_series_equal(result, expected)
  570. def test_replace_string_dtype(self):
  571. # GH#40732, GH#44940
  572. ser = pd.Series(["one", "two", np.nan], dtype="string")
  573. res = ser.replace({"one": "1", "two": "2"})
  574. expected = pd.Series(["1", "2", np.nan], dtype="string")
  575. tm.assert_series_equal(res, expected)
  576. # GH#31644
  577. ser2 = pd.Series(["A", np.nan], dtype="string")
  578. res2 = ser2.replace("A", "B")
  579. expected2 = pd.Series(["B", np.nan], dtype="string")
  580. tm.assert_series_equal(res2, expected2)
  581. ser3 = pd.Series(["A", "B"], dtype="string")
  582. res3 = ser3.replace("A", pd.NA)
  583. expected3 = pd.Series([pd.NA, "B"], dtype="string")
  584. tm.assert_series_equal(res3, expected3)
  585. def test_replace_string_dtype_list_to_replace(self):
  586. # GH#41215, GH#44940
  587. ser = pd.Series(["abc", "def"], dtype="string")
  588. res = ser.replace(["abc", "any other string"], "xyz")
  589. expected = pd.Series(["xyz", "def"], dtype="string")
  590. tm.assert_series_equal(res, expected)
  591. def test_replace_string_dtype_regex(self):
  592. # GH#31644
  593. ser = pd.Series(["A", "B"], dtype="string")
  594. res = ser.replace(r".", "C", regex=True)
  595. expected = pd.Series(["C", "C"], dtype="string")
  596. tm.assert_series_equal(res, expected)
  597. def test_replace_nullable_numeric(self):
  598. # GH#40732, GH#44940
  599. floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
  600. assert floats.replace({1.0: 9}).dtype == floats.dtype
  601. assert floats.replace(1.0, 9).dtype == floats.dtype
  602. assert floats.replace({1.0: 9.0}).dtype == floats.dtype
  603. assert floats.replace(1.0, 9.0).dtype == floats.dtype
  604. res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
  605. assert res.dtype == floats.dtype
  606. ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
  607. assert ints.replace({1: 9}).dtype == ints.dtype
  608. assert ints.replace(1, 9).dtype == ints.dtype
  609. assert ints.replace({1: 9.0}).dtype == ints.dtype
  610. assert ints.replace(1, 9.0).dtype == ints.dtype
  611. # nullable (for now) raises instead of casting
  612. with pytest.raises(TypeError, match="Invalid value"):
  613. ints.replace({1: 9.5})
  614. with pytest.raises(TypeError, match="Invalid value"):
  615. ints.replace(1, 9.5)
  616. @pytest.mark.parametrize("regex", [False, True])
  617. def test_replace_regex_dtype_series(self, regex):
  618. # GH-48644
  619. series = pd.Series(["0"], dtype=object)
  620. expected = pd.Series([1])
  621. msg = "Downcasting behavior in `replace`"
  622. with tm.assert_produces_warning(FutureWarning, match=msg):
  623. result = series.replace(to_replace="0", value=1, regex=regex)
  624. tm.assert_series_equal(result, expected)
  625. @pytest.mark.parametrize("regex", [False, True])
  626. def test_replace_regex_dtype_series_string(self, regex):
  627. series = pd.Series(["0"], dtype="str")
  628. expected = pd.Series([1], dtype="int64")
  629. msg = "Downcasting behavior in `replace`"
  630. with tm.assert_produces_warning(FutureWarning, match=msg):
  631. result = series.replace(to_replace="0", value=1, regex=regex)
  632. tm.assert_series_equal(result, expected)
  633. def test_replace_different_int_types(self, any_int_numpy_dtype):
  634. # GH#45311
  635. labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
  636. maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype)
  637. map_dict = dict(zip(maps.values, maps.index))
  638. result = labs.replace(map_dict)
  639. expected = labs.replace({0: 0, 2: 1, 1: 2})
  640. tm.assert_series_equal(result, expected)
  641. @pytest.mark.parametrize("val", [2, np.nan, 2.0])
  642. def test_replace_value_none_dtype_numeric(self, val):
  643. # GH#48231
  644. ser = pd.Series([1, val])
  645. result = ser.replace(val, None)
  646. expected = pd.Series([1, None], dtype=object)
  647. tm.assert_series_equal(result, expected)
  648. def test_replace_change_dtype_series(self):
  649. # GH#25797
  650. df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
  651. df["Test"] = df["Test"].replace([True], [np.nan])
  652. expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
  653. tm.assert_frame_equal(df, expected)
  654. df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
  655. df["Test"] = df["Test"].replace([None], [np.nan])
  656. tm.assert_frame_equal(df, expected)
  657. df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
  658. df["Test"] = df["Test"].fillna(np.nan)
  659. tm.assert_frame_equal(df, expected)
  660. @pytest.mark.parametrize("dtype", ["object", "Int64"])
  661. def test_replace_na_in_obj_column(self, dtype):
  662. # GH#47480
  663. ser = pd.Series([0, 1, pd.NA], dtype=dtype)
  664. expected = pd.Series([0, 2, pd.NA], dtype=dtype)
  665. result = ser.replace(to_replace=1, value=2)
  666. tm.assert_series_equal(result, expected)
  667. ser.replace(to_replace=1, value=2, inplace=True)
  668. tm.assert_series_equal(ser, expected)
  669. @pytest.mark.parametrize("val", [0, 0.5])
  670. def test_replace_numeric_column_with_na(self, val):
  671. # GH#50758
  672. ser = pd.Series([val, 1])
  673. expected = pd.Series([val, pd.NA])
  674. result = ser.replace(to_replace=1, value=pd.NA)
  675. tm.assert_series_equal(result, expected)
  676. ser.replace(to_replace=1, value=pd.NA, inplace=True)
  677. tm.assert_series_equal(ser, expected)
  678. def test_replace_ea_float_with_bool(self):
  679. # GH#55398
  680. ser = pd.Series([0.0], dtype="Float64")
  681. expected = ser.copy()
  682. result = ser.replace(False, 1.0)
  683. tm.assert_series_equal(result, expected)
  684. ser = pd.Series([False], dtype="boolean")
  685. expected = ser.copy()
  686. result = ser.replace(0.0, True)
  687. tm.assert_series_equal(result, expected)