test_replace.py 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665
  1. from __future__ import annotations
  2. from datetime import datetime
  3. import re
  4. import numpy as np
  5. import pytest
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. Index,
  10. Series,
  11. Timestamp,
  12. date_range,
  13. )
  14. import pandas._testing as tm
  15. @pytest.fixture
  16. def mix_ab() -> dict[str, list[int | str]]:
  17. return {"a": list(range(4)), "b": list("ab..")}
  18. @pytest.fixture
  19. def mix_abc() -> dict[str, list[float | str]]:
  20. return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
  21. class TestDataFrameReplace:
  22. def test_replace_inplace(self, datetime_frame, float_string_frame):
  23. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  24. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  25. tsframe = datetime_frame.copy()
  26. return_value = tsframe.replace(np.nan, 0, inplace=True)
  27. assert return_value is None
  28. tm.assert_frame_equal(tsframe, datetime_frame.fillna(0))
  29. # mixed type
  30. mf = float_string_frame
  31. mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
  32. mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
  33. result = float_string_frame.replace(np.nan, 0)
  34. expected = float_string_frame.copy()
  35. expected["foo"] = expected["foo"].astype(object)
  36. expected = expected.fillna(value=0)
  37. tm.assert_frame_equal(result, expected)
  38. tsframe = datetime_frame.copy()
  39. return_value = tsframe.replace([np.nan], [0], inplace=True)
  40. assert return_value is None
  41. tm.assert_frame_equal(tsframe, datetime_frame.fillna(0))
  42. @pytest.mark.parametrize(
  43. "to_replace,values,expected",
  44. [
  45. # lists of regexes and values
  46. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  47. (
  48. [r"\s*\.\s*", r"e|f|g"],
  49. [np.nan, "crap"],
  50. {
  51. "a": ["a", "b", np.nan, np.nan],
  52. "b": ["crap"] * 3 + ["h"],
  53. "c": ["h", "crap", "l", "o"],
  54. },
  55. ),
  56. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  57. (
  58. [r"\s*(\.)\s*", r"(e|f|g)"],
  59. [r"\1\1", r"\1_crap"],
  60. {
  61. "a": ["a", "b", "..", ".."],
  62. "b": ["e_crap", "f_crap", "g_crap", "h"],
  63. "c": ["h", "e_crap", "l", "o"],
  64. },
  65. ),
  66. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  67. # or vN)]
  68. (
  69. [r"\s*(\.)\s*", r"e"],
  70. [r"\1\1", r"crap"],
  71. {
  72. "a": ["a", "b", "..", ".."],
  73. "b": ["crap", "f", "g", "h"],
  74. "c": ["h", "crap", "l", "o"],
  75. },
  76. ),
  77. ],
  78. )
  79. @pytest.mark.parametrize("inplace", [True, False])
  80. @pytest.mark.parametrize("use_value_regex_args", [True, False])
  81. def test_regex_replace_list_obj(
  82. self, to_replace, values, expected, inplace, use_value_regex_args
  83. ):
  84. df = DataFrame({"a": list("ab.."), "b": list("efgh"), "c": list("helo")})
  85. if use_value_regex_args:
  86. result = df.replace(value=values, regex=to_replace, inplace=inplace)
  87. else:
  88. result = df.replace(to_replace, values, regex=True, inplace=inplace)
  89. if inplace:
  90. assert result is None
  91. result = df
  92. expected = DataFrame(expected)
  93. tm.assert_frame_equal(result, expected)
  94. def test_regex_replace_list_mixed(self, mix_ab):
  95. # mixed frame to make sure this doesn't break things
  96. dfmix = DataFrame(mix_ab)
  97. # lists of regexes and values
  98. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  99. to_replace_res = [r"\s*\.\s*", r"a"]
  100. values = [np.nan, "crap"]
  101. mix2 = {"a": list(range(4)), "b": list("ab.."), "c": list("halo")}
  102. dfmix2 = DataFrame(mix2)
  103. res = dfmix2.replace(to_replace_res, values, regex=True)
  104. expec = DataFrame(
  105. {
  106. "a": mix2["a"],
  107. "b": ["crap", "b", np.nan, np.nan],
  108. "c": ["h", "crap", "l", "o"],
  109. }
  110. )
  111. tm.assert_frame_equal(res, expec)
  112. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  113. to_replace_res = [r"\s*(\.)\s*", r"(a|b)"]
  114. values = [r"\1\1", r"\1_crap"]
  115. res = dfmix.replace(to_replace_res, values, regex=True)
  116. expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]})
  117. tm.assert_frame_equal(res, expec)
  118. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  119. # or vN)]
  120. to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
  121. values = [r"\1\1", r"crap", r"\1_crap"]
  122. res = dfmix.replace(to_replace_res, values, regex=True)
  123. expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
  124. tm.assert_frame_equal(res, expec)
  125. to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
  126. values = [r"\1\1", r"crap", r"\1_crap"]
  127. res = dfmix.replace(regex=to_replace_res, value=values)
  128. expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
  129. tm.assert_frame_equal(res, expec)
  130. def test_regex_replace_list_mixed_inplace(self, mix_ab):
  131. dfmix = DataFrame(mix_ab)
  132. # the same inplace
  133. # lists of regexes and values
  134. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  135. to_replace_res = [r"\s*\.\s*", r"a"]
  136. values = [np.nan, "crap"]
  137. res = dfmix.copy()
  138. return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
  139. assert return_value is None
  140. expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]})
  141. tm.assert_frame_equal(res, expec)
  142. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  143. to_replace_res = [r"\s*(\.)\s*", r"(a|b)"]
  144. values = [r"\1\1", r"\1_crap"]
  145. res = dfmix.copy()
  146. return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
  147. assert return_value is None
  148. expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]})
  149. tm.assert_frame_equal(res, expec)
  150. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  151. # or vN)]
  152. to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
  153. values = [r"\1\1", r"crap", r"\1_crap"]
  154. res = dfmix.copy()
  155. return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
  156. assert return_value is None
  157. expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
  158. tm.assert_frame_equal(res, expec)
  159. to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
  160. values = [r"\1\1", r"crap", r"\1_crap"]
  161. res = dfmix.copy()
  162. return_value = res.replace(regex=to_replace_res, value=values, inplace=True)
  163. assert return_value is None
  164. expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
  165. tm.assert_frame_equal(res, expec)
  166. def test_regex_replace_dict_mixed(self, mix_abc):
  167. dfmix = DataFrame(mix_abc)
  168. # dicts
  169. # single dict {re1: v1}, search the whole frame
  170. # need test for this...
  171. # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
  172. # frame
  173. res = dfmix.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True)
  174. res2 = dfmix.copy()
  175. return_value = res2.replace(
  176. {"b": r"\s*\.\s*"}, {"b": np.nan}, inplace=True, regex=True
  177. )
  178. assert return_value is None
  179. expec = DataFrame(
  180. {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
  181. )
  182. tm.assert_frame_equal(res, expec)
  183. tm.assert_frame_equal(res2, expec)
  184. # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
  185. # whole frame
  186. res = dfmix.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True)
  187. res2 = dfmix.copy()
  188. return_value = res2.replace(
  189. {"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, inplace=True, regex=True
  190. )
  191. assert return_value is None
  192. expec = DataFrame(
  193. {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]}
  194. )
  195. tm.assert_frame_equal(res, expec)
  196. tm.assert_frame_equal(res2, expec)
  197. res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"})
  198. res2 = dfmix.copy()
  199. return_value = res2.replace(
  200. regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}, inplace=True
  201. )
  202. assert return_value is None
  203. expec = DataFrame(
  204. {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]}
  205. )
  206. tm.assert_frame_equal(res, expec)
  207. tm.assert_frame_equal(res2, expec)
  208. # scalar -> dict
  209. # to_replace regex, {value: value}
  210. expec = DataFrame(
  211. {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]}
  212. )
  213. res = dfmix.replace("a", {"b": np.nan}, regex=True)
  214. res2 = dfmix.copy()
  215. return_value = res2.replace("a", {"b": np.nan}, regex=True, inplace=True)
  216. assert return_value is None
  217. tm.assert_frame_equal(res, expec)
  218. tm.assert_frame_equal(res2, expec)
  219. res = dfmix.replace("a", {"b": np.nan}, regex=True)
  220. res2 = dfmix.copy()
  221. return_value = res2.replace(regex="a", value={"b": np.nan}, inplace=True)
  222. assert return_value is None
  223. expec = DataFrame(
  224. {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]}
  225. )
  226. tm.assert_frame_equal(res, expec)
  227. tm.assert_frame_equal(res2, expec)
  228. def test_regex_replace_dict_nested(self, mix_abc):
  229. # nested dicts will not work until this is implemented for Series
  230. dfmix = DataFrame(mix_abc)
  231. res = dfmix.replace({"b": {r"\s*\.\s*": np.nan}}, regex=True)
  232. res2 = dfmix.copy()
  233. res4 = dfmix.copy()
  234. return_value = res2.replace(
  235. {"b": {r"\s*\.\s*": np.nan}}, inplace=True, regex=True
  236. )
  237. assert return_value is None
  238. res3 = dfmix.replace(regex={"b": {r"\s*\.\s*": np.nan}})
  239. return_value = res4.replace(regex={"b": {r"\s*\.\s*": np.nan}}, inplace=True)
  240. assert return_value is None
  241. expec = DataFrame(
  242. {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
  243. )
  244. tm.assert_frame_equal(res, expec)
  245. tm.assert_frame_equal(res2, expec)
  246. tm.assert_frame_equal(res3, expec)
  247. tm.assert_frame_equal(res4, expec)
  248. def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
  249. # GH 25259
  250. dtype = any_string_dtype
  251. df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
  252. result = df.replace({"a": "."}, regex=True)
  253. expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
  254. tm.assert_frame_equal(result, expected)
  255. def test_regex_replace_dict_nested_gh4115(self):
  256. df = DataFrame(
  257. {"Type": Series(["Q", "T", "Q", "Q", "T"], dtype=object), "tmp": 2}
  258. )
  259. expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
  260. msg = "Downcasting behavior in `replace`"
  261. with tm.assert_produces_warning(FutureWarning, match=msg):
  262. result = df.replace({"Type": {"Q": 0, "T": 1}})
  263. tm.assert_frame_equal(result, expected)
  264. def test_regex_replace_list_to_scalar(self, mix_abc, using_infer_string):
  265. df = DataFrame(mix_abc)
  266. expec = DataFrame(
  267. {
  268. "a": mix_abc["a"],
  269. "b": [np.nan] * 4,
  270. "c": [np.nan, np.nan, np.nan, "d"],
  271. }
  272. )
  273. if using_infer_string:
  274. expec["b"] = expec["b"].astype("str")
  275. msg = "Downcasting behavior in `replace`"
  276. warn = None if using_infer_string else FutureWarning
  277. with tm.assert_produces_warning(warn, match=msg):
  278. res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
  279. res2 = df.copy()
  280. res3 = df.copy()
  281. with tm.assert_produces_warning(warn, match=msg):
  282. return_value = res2.replace(
  283. [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
  284. )
  285. assert return_value is None
  286. with tm.assert_produces_warning(warn, match=msg):
  287. return_value = res3.replace(
  288. regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
  289. )
  290. assert return_value is None
  291. tm.assert_frame_equal(res, expec)
  292. tm.assert_frame_equal(res2, expec)
  293. tm.assert_frame_equal(res3, expec)
  294. def test_regex_replace_str_to_numeric(self, mix_abc):
  295. # what happens when you try to replace a numeric value with a regex?
  296. df = DataFrame(mix_abc)
  297. res = df.replace(r"\s*\.\s*", 0, regex=True)
  298. res2 = df.copy()
  299. return_value = res2.replace(r"\s*\.\s*", 0, inplace=True, regex=True)
  300. assert return_value is None
  301. res3 = df.copy()
  302. return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True)
  303. assert return_value is None
  304. expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]})
  305. tm.assert_frame_equal(res, expec)
  306. tm.assert_frame_equal(res2, expec)
  307. tm.assert_frame_equal(res3, expec)
  308. def test_regex_replace_regex_list_to_numeric(self, mix_abc):
  309. df = DataFrame(mix_abc)
  310. res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
  311. res2 = df.copy()
  312. return_value = res2.replace([r"\s*\.\s*", "b"], 0, regex=True, inplace=True)
  313. assert return_value is None
  314. res3 = df.copy()
  315. return_value = res3.replace(regex=[r"\s*\.\s*", "b"], value=0, inplace=True)
  316. assert return_value is None
  317. expec = DataFrame(
  318. {"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]}
  319. )
  320. tm.assert_frame_equal(res, expec)
  321. tm.assert_frame_equal(res2, expec)
  322. tm.assert_frame_equal(res3, expec)
  323. def test_regex_replace_series_of_regexes(self, mix_abc):
  324. df = DataFrame(mix_abc)
  325. s1 = Series({"b": r"\s*\.\s*"})
  326. s2 = Series({"b": np.nan})
  327. res = df.replace(s1, s2, regex=True)
  328. res2 = df.copy()
  329. return_value = res2.replace(s1, s2, inplace=True, regex=True)
  330. assert return_value is None
  331. res3 = df.copy()
  332. return_value = res3.replace(regex=s1, value=s2, inplace=True)
  333. assert return_value is None
  334. expec = DataFrame(
  335. {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
  336. )
  337. tm.assert_frame_equal(res, expec)
  338. tm.assert_frame_equal(res2, expec)
  339. tm.assert_frame_equal(res3, expec)
  340. def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
  341. df = DataFrame(mix_abc)
  342. expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]})
  343. res = df.replace(0, "a")
  344. tm.assert_frame_equal(res, expec)
  345. assert res.a.dtype == np.object_
  346. @pytest.mark.parametrize(
  347. "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}]
  348. )
  349. def test_joint_simple_replace_and_regex_replace(self, to_replace):
  350. # GH-39338
  351. df = DataFrame(
  352. {
  353. "col1": ["1,000", "a", "3"],
  354. "col2": ["a", "", "b"],
  355. "col3": ["a", "b", "c"],
  356. }
  357. )
  358. result = df.replace(regex=to_replace)
  359. expected = DataFrame(
  360. {
  361. "col1": ["1000", "a", "3"],
  362. "col2": ["a", np.nan, "b"],
  363. "col3": ["a", "b", "c"],
  364. }
  365. )
  366. tm.assert_frame_equal(result, expected)
  367. @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"])
  368. def test_replace_regex_metachar(self, metachar):
  369. df = DataFrame({"a": [metachar, "else"]})
  370. result = df.replace({"a": {metachar: "paren"}})
  371. expected = DataFrame({"a": ["paren", "else"]})
  372. tm.assert_frame_equal(result, expected)
  373. @pytest.mark.parametrize(
  374. "data,to_replace,expected",
  375. [
  376. (["xax", "xbx"], {"a": "c", "b": "d"}, ["xcx", "xdx"]),
  377. (["d", "", ""], {r"^\s*$": pd.NA}, ["d", pd.NA, pd.NA]),
  378. ],
  379. )
  380. def test_regex_replace_string_types(
  381. self, data, to_replace, expected, frame_or_series, any_string_dtype
  382. ):
  383. # GH-41333, GH-35977
  384. dtype = any_string_dtype
  385. obj = frame_or_series(data, dtype=dtype)
  386. result = obj.replace(to_replace, regex=True)
  387. expected = frame_or_series(expected, dtype=dtype)
  388. tm.assert_equal(result, expected)
  389. def test_replace(self, datetime_frame):
  390. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  391. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  392. zero_filled = datetime_frame.replace(np.nan, -1e8)
  393. tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8))
  394. tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame)
  395. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  396. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  397. datetime_frame.loc[datetime_frame.index[:5], "B"] = -1e8
  398. # empty
  399. df = DataFrame(index=["a", "b"])
  400. tm.assert_frame_equal(df, df.replace(5, 7))
  401. # GH 11698
  402. # test for mixed data types.
  403. df = DataFrame(
  404. [("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))]
  405. )
  406. df1 = df.replace("-", np.nan)
  407. expected_df = DataFrame(
  408. [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))]
  409. )
  410. tm.assert_frame_equal(df1, expected_df)
  411. def test_replace_list(self):
  412. obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
  413. dfobj = DataFrame(obj)
  414. # lists of regexes and values
  415. # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
  416. to_replace_res = [r".", r"e"]
  417. values = [np.nan, "crap"]
  418. res = dfobj.replace(to_replace_res, values)
  419. expec = DataFrame(
  420. {
  421. "a": ["a", "b", np.nan, np.nan],
  422. "b": ["crap", "f", "g", "h"],
  423. "c": ["h", "crap", "l", "o"],
  424. }
  425. )
  426. tm.assert_frame_equal(res, expec)
  427. # list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
  428. to_replace_res = [r".", r"f"]
  429. values = [r"..", r"crap"]
  430. res = dfobj.replace(to_replace_res, values)
  431. expec = DataFrame(
  432. {
  433. "a": ["a", "b", "..", ".."],
  434. "b": ["e", "crap", "g", "h"],
  435. "c": ["h", "e", "l", "o"],
  436. }
  437. )
  438. tm.assert_frame_equal(res, expec)
  439. def test_replace_with_empty_list(self, frame_or_series):
  440. # GH 21977
  441. ser = Series([["a", "b"], [], np.nan, [1]])
  442. obj = DataFrame({"col": ser})
  443. obj = tm.get_obj(obj, frame_or_series)
  444. expected = obj
  445. result = obj.replace([], np.nan)
  446. tm.assert_equal(result, expected)
  447. # GH 19266
  448. msg = (
  449. "NumPy boolean array indexing assignment cannot assign {size} "
  450. "input values to the 1 output values where the mask is true"
  451. )
  452. with pytest.raises(ValueError, match=msg.format(size=0)):
  453. obj.replace({np.nan: []})
  454. with pytest.raises(ValueError, match=msg.format(size=2)):
  455. obj.replace({np.nan: ["dummy", "alt"]})
  456. def test_replace_series_dict(self):
  457. # from GH 3064
  458. df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}})
  459. result = df.replace(0, {"zero": 0.5, "one": 1.0})
  460. expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}})
  461. tm.assert_frame_equal(result, expected)
  462. result = df.replace(0, df.mean())
  463. tm.assert_frame_equal(result, expected)
  464. # series to series/dict
  465. df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}})
  466. s = Series({"zero": 0.0, "one": 2.0})
  467. result = df.replace(s, {"zero": 0.5, "one": 1.0})
  468. expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}})
  469. tm.assert_frame_equal(result, expected)
  470. result = df.replace(s, df.mean())
  471. tm.assert_frame_equal(result, expected)
  472. def test_replace_convert(self):
  473. # gh 3907
  474. df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
  475. m = {"foo": 1, "bar": 2, "bah": 3}
  476. msg = "Downcasting behavior in `replace` "
  477. with tm.assert_produces_warning(FutureWarning, match=msg):
  478. rep = df.replace(m)
  479. expec = Series([np.int64] * 3)
  480. res = rep.dtypes
  481. tm.assert_series_equal(expec, res)
  482. def test_replace_mixed(self, float_string_frame):
  483. mf = float_string_frame
  484. mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
  485. mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
  486. result = float_string_frame.replace(np.nan, -18)
  487. expected = float_string_frame.copy()
  488. expected["foo"] = expected["foo"].astype(object)
  489. expected = expected.fillna(value=-18)
  490. tm.assert_frame_equal(result, expected)
  491. expected2 = float_string_frame.copy()
  492. expected2["foo"] = expected2["foo"].astype(object)
  493. tm.assert_frame_equal(result.replace(-18, np.nan), expected2)
  494. result = float_string_frame.replace(np.nan, -1e8)
  495. expected = float_string_frame.copy()
  496. expected["foo"] = expected["foo"].astype(object)
  497. expected = expected.fillna(value=-1e8)
  498. tm.assert_frame_equal(result, expected)
  499. expected2 = float_string_frame.copy()
  500. expected2["foo"] = expected2["foo"].astype(object)
  501. tm.assert_frame_equal(result.replace(-1e8, np.nan), expected2)
  502. def test_replace_mixed_int_block_upcasting(self):
  503. # int block upcasting
  504. df = DataFrame(
  505. {
  506. "A": Series([1.0, 2.0], dtype="float64"),
  507. "B": Series([0, 1], dtype="int64"),
  508. }
  509. )
  510. expected = DataFrame(
  511. {
  512. "A": Series([1.0, 2.0], dtype="float64"),
  513. "B": Series([0.5, 1], dtype="float64"),
  514. }
  515. )
  516. result = df.replace(0, 0.5)
  517. tm.assert_frame_equal(result, expected)
  518. return_value = df.replace(0, 0.5, inplace=True)
  519. assert return_value is None
  520. tm.assert_frame_equal(df, expected)
  521. def test_replace_mixed_int_block_splitting(self):
  522. # int block splitting
  523. df = DataFrame(
  524. {
  525. "A": Series([1.0, 2.0], dtype="float64"),
  526. "B": Series([0, 1], dtype="int64"),
  527. "C": Series([1, 2], dtype="int64"),
  528. }
  529. )
  530. expected = DataFrame(
  531. {
  532. "A": Series([1.0, 2.0], dtype="float64"),
  533. "B": Series([0.5, 1], dtype="float64"),
  534. "C": Series([1, 2], dtype="int64"),
  535. }
  536. )
  537. result = df.replace(0, 0.5)
  538. tm.assert_frame_equal(result, expected)
  539. def test_replace_mixed2(self, using_infer_string):
  540. # to object block upcasting
  541. df = DataFrame(
  542. {
  543. "A": Series([1.0, 2.0], dtype="float64"),
  544. "B": Series([0, 1], dtype="int64"),
  545. }
  546. )
  547. expected = DataFrame(
  548. {
  549. "A": Series([1, "foo"], dtype="object"),
  550. "B": Series([0, 1], dtype="int64"),
  551. }
  552. )
  553. result = df.replace(2, "foo")
  554. tm.assert_frame_equal(result, expected)
  555. expected = DataFrame(
  556. {
  557. "A": Series(["foo", "bar"], dtype="object"),
  558. "B": Series([0, "foo"], dtype="object"),
  559. }
  560. )
  561. result = df.replace([1, 2], ["foo", "bar"])
  562. tm.assert_frame_equal(result, expected)
  563. def test_replace_mixed3(self):
  564. # test case from
  565. df = DataFrame(
  566. {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")}
  567. )
  568. result = df.replace(3, df.mean().to_dict())
  569. expected = df.copy().astype("float64")
  570. m = df.mean()
  571. expected.iloc[0, 0] = m.iloc[0]
  572. expected.iloc[1, 1] = m.iloc[1]
  573. tm.assert_frame_equal(result, expected)
  574. def test_replace_nullable_int_with_string_doesnt_cast(self):
  575. # GH#25438 don't cast df['a'] to float64
  576. df = DataFrame({"a": [1, 2, 3, np.nan], "b": ["some", "strings", "here", "he"]})
  577. df["a"] = df["a"].astype("Int64")
  578. res = df.replace("", np.nan)
  579. tm.assert_series_equal(res["a"], df["a"])
  580. @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
  581. def test_replace_with_nullable_column(self, dtype):
  582. # GH-44499
  583. nullable_ser = Series([1, 0, 1], dtype=dtype)
  584. df = DataFrame({"A": ["A", "B", "x"], "B": nullable_ser})
  585. result = df.replace("x", "X")
  586. expected = DataFrame({"A": ["A", "B", "X"], "B": nullable_ser})
  587. tm.assert_frame_equal(result, expected)
  588. def test_replace_simple_nested_dict(self):
  589. df = DataFrame({"col": range(1, 5)})
  590. expected = DataFrame({"col": ["a", 2, 3, "b"]})
  591. result = df.replace({"col": {1: "a", 4: "b"}})
  592. tm.assert_frame_equal(expected, result)
  593. # in this case, should be the same as the not nested version
  594. result = df.replace({1: "a", 4: "b"})
  595. tm.assert_frame_equal(expected, result)
  596. def test_replace_simple_nested_dict_with_nonexistent_value(self):
  597. df = DataFrame({"col": range(1, 5)})
  598. expected = DataFrame({"col": ["a", 2, 3, "b"]})
  599. result = df.replace({-1: "-", 1: "a", 4: "b"})
  600. tm.assert_frame_equal(expected, result)
  601. result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}})
  602. tm.assert_frame_equal(expected, result)
  603. def test_replace_NA_with_None(self):
  604. # gh-45601
  605. df = DataFrame({"value": [42, None]}).astype({"value": "Int64"})
  606. result = df.replace({pd.NA: None})
  607. expected = DataFrame({"value": [42, None]}, dtype=object)
  608. tm.assert_frame_equal(result, expected)
  609. def test_replace_NAT_with_None(self):
  610. # gh-45836
  611. df = DataFrame([pd.NaT, pd.NaT])
  612. result = df.replace({pd.NaT: None, np.nan: None})
  613. expected = DataFrame([None, None])
  614. tm.assert_frame_equal(result, expected)
  615. def test_replace_with_None_keeps_categorical(self):
  616. # gh-46634
  617. cat_series = Series(["b", "b", "b", "d"], dtype="category")
  618. df = DataFrame(
  619. {
  620. "id": Series([5, 4, 3, 2], dtype="float64"),
  621. "col": cat_series,
  622. }
  623. )
  624. result = df.replace({3: None})
  625. expected = DataFrame(
  626. {
  627. "id": Series([5.0, 4.0, None, 2.0], dtype="object"),
  628. "col": cat_series,
  629. }
  630. )
  631. tm.assert_frame_equal(result, expected)
  632. def test_replace_value_is_none(self, datetime_frame):
  633. orig_value = datetime_frame.iloc[0, 0]
  634. orig2 = datetime_frame.iloc[1, 0]
  635. datetime_frame.iloc[0, 0] = np.nan
  636. datetime_frame.iloc[1, 0] = 1
  637. result = datetime_frame.replace(to_replace={np.nan: 0})
  638. expected = datetime_frame.T.replace(to_replace={np.nan: 0}).T
  639. tm.assert_frame_equal(result, expected)
  640. result = datetime_frame.replace(to_replace={np.nan: 0, 1: -1e8})
  641. tsframe = datetime_frame.copy()
  642. tsframe.iloc[0, 0] = 0
  643. tsframe.iloc[1, 0] = -1e8
  644. expected = tsframe
  645. tm.assert_frame_equal(expected, result)
  646. datetime_frame.iloc[0, 0] = orig_value
  647. datetime_frame.iloc[1, 0] = orig2
  648. def test_replace_for_new_dtypes(self, datetime_frame):
  649. # dtypes
  650. tsframe = datetime_frame.copy().astype(np.float32)
  651. tsframe.loc[tsframe.index[:5], "A"] = np.nan
  652. tsframe.loc[tsframe.index[-5:], "A"] = np.nan
  653. zero_filled = tsframe.replace(np.nan, -1e8)
  654. tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8))
  655. tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe)
  656. tsframe.loc[tsframe.index[:5], "A"] = np.nan
  657. tsframe.loc[tsframe.index[-5:], "A"] = np.nan
  658. tsframe.loc[tsframe.index[:5], "B"] = np.nan
  659. msg = "DataFrame.fillna with 'method' is deprecated"
  660. with tm.assert_produces_warning(FutureWarning, match=msg):
  661. # TODO: what is this even testing?
  662. result = tsframe.fillna(method="bfill")
  663. tm.assert_frame_equal(result, tsframe.fillna(method="bfill"))
  664. @pytest.mark.parametrize(
  665. "frame, to_replace, value, expected",
  666. [
  667. (DataFrame({"ints": [1, 2, 3]}), 1, 0, DataFrame({"ints": [0, 2, 3]})),
  668. (
  669. DataFrame({"ints": [1, 2, 3]}, dtype=np.int32),
  670. 1,
  671. 0,
  672. DataFrame({"ints": [0, 2, 3]}, dtype=np.int32),
  673. ),
  674. (
  675. DataFrame({"ints": [1, 2, 3]}, dtype=np.int16),
  676. 1,
  677. 0,
  678. DataFrame({"ints": [0, 2, 3]}, dtype=np.int16),
  679. ),
  680. (
  681. DataFrame({"bools": [True, False, True]}),
  682. False,
  683. True,
  684. DataFrame({"bools": [True, True, True]}),
  685. ),
  686. (
  687. DataFrame({"complex": [1j, 2j, 3j]}),
  688. 1j,
  689. 0,
  690. DataFrame({"complex": [0j, 2j, 3j]}),
  691. ),
  692. (
  693. DataFrame(
  694. {
  695. "datetime64": Index(
  696. [
  697. datetime(2018, 5, 28),
  698. datetime(2018, 7, 28),
  699. datetime(2018, 5, 28),
  700. ]
  701. )
  702. }
  703. ),
  704. datetime(2018, 5, 28),
  705. datetime(2018, 7, 28),
  706. DataFrame({"datetime64": Index([datetime(2018, 7, 28)] * 3)}),
  707. ),
  708. # GH 20380
  709. (
  710. DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["foo"]}),
  711. "foo",
  712. "bar",
  713. DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["bar"]}),
  714. ),
  715. # GH 36782
  716. (
  717. DataFrame({"dt": [datetime(2920, 10, 1)]}),
  718. datetime(2920, 10, 1),
  719. datetime(2020, 10, 1),
  720. DataFrame({"dt": [datetime(2020, 10, 1)]}),
  721. ),
  722. (
  723. DataFrame(
  724. {
  725. "A": date_range("20130101", periods=3, tz="US/Eastern"),
  726. "B": [0, np.nan, 2],
  727. }
  728. ),
  729. Timestamp("20130102", tz="US/Eastern"),
  730. Timestamp("20130104", tz="US/Eastern"),
  731. DataFrame(
  732. {
  733. "A": pd.DatetimeIndex(
  734. [
  735. Timestamp("20130101", tz="US/Eastern"),
  736. Timestamp("20130104", tz="US/Eastern"),
  737. Timestamp("20130103", tz="US/Eastern"),
  738. ]
  739. ).as_unit("ns"),
  740. "B": [0, np.nan, 2],
  741. }
  742. ),
  743. ),
  744. # GH 35376
  745. (
  746. DataFrame([[1, 1.0], [2, 2.0]]),
  747. 1.0,
  748. 5,
  749. DataFrame([[5, 5.0], [2, 2.0]]),
  750. ),
  751. (
  752. DataFrame([[1, 1.0], [2, 2.0]]),
  753. 1,
  754. 5,
  755. DataFrame([[5, 5.0], [2, 2.0]]),
  756. ),
  757. (
  758. DataFrame([[1, 1.0], [2, 2.0]]),
  759. 1.0,
  760. 5.0,
  761. DataFrame([[5, 5.0], [2, 2.0]]),
  762. ),
  763. (
  764. DataFrame([[1, 1.0], [2, 2.0]]),
  765. 1,
  766. 5.0,
  767. DataFrame([[5, 5.0], [2, 2.0]]),
  768. ),
  769. ],
  770. )
  771. def test_replace_dtypes(self, frame, to_replace, value, expected):
  772. warn = None
  773. if isinstance(to_replace, datetime) and to_replace.year == 2920:
  774. warn = FutureWarning
  775. msg = "Downcasting behavior in `replace` "
  776. with tm.assert_produces_warning(warn, match=msg):
  777. result = frame.replace(to_replace, value)
  778. tm.assert_frame_equal(result, expected)
  779. def test_replace_input_formats_listlike(self):
  780. # both dicts
  781. to_rep = {"A": np.nan, "B": 0, "C": ""}
  782. values = {"A": 0, "B": -1, "C": "missing"}
  783. df = DataFrame(
  784. {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
  785. )
  786. filled = df.replace(to_rep, values)
  787. expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()}
  788. tm.assert_frame_equal(filled, DataFrame(expected))
  789. result = df.replace([0, 2, 5], [5, 2, 0])
  790. expected = DataFrame(
  791. {"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]}
  792. )
  793. tm.assert_frame_equal(result, expected)
  794. # scalar to dict
  795. values = {"A": 0, "B": -1, "C": "missing"}
  796. df = DataFrame(
  797. {"A": [np.nan, 0, np.nan], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
  798. )
  799. filled = df.replace(np.nan, values)
  800. expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()}
  801. tm.assert_frame_equal(filled, DataFrame(expected))
  802. # list to list
  803. to_rep = [np.nan, 0, ""]
  804. values = [-2, -1, "missing"]
  805. result = df.replace(to_rep, values)
  806. expected = df.copy()
  807. for rep, value in zip(to_rep, values):
  808. return_value = expected.replace(rep, value, inplace=True)
  809. assert return_value is None
  810. tm.assert_frame_equal(result, expected)
  811. msg = r"Replacement lists must match in length\. Expecting 3 got 2"
  812. with pytest.raises(ValueError, match=msg):
  813. df.replace(to_rep, values[1:])
  814. def test_replace_input_formats_scalar(self):
  815. df = DataFrame(
  816. {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
  817. )
  818. # dict to scalar
  819. to_rep = {"A": np.nan, "B": 0, "C": ""}
  820. filled = df.replace(to_rep, 0)
  821. expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()}
  822. tm.assert_frame_equal(filled, DataFrame(expected))
  823. msg = "value argument must be scalar, dict, or Series"
  824. with pytest.raises(TypeError, match=msg):
  825. df.replace(to_rep, [np.nan, 0, ""])
  826. # list to scalar
  827. to_rep = [np.nan, 0, ""]
  828. result = df.replace(to_rep, -1)
  829. expected = df.copy()
  830. for rep in to_rep:
  831. return_value = expected.replace(rep, -1, inplace=True)
  832. assert return_value is None
  833. tm.assert_frame_equal(result, expected)
  834. def test_replace_limit(self):
  835. # TODO
  836. pass
  837. def test_replace_dict_no_regex(self, any_string_dtype):
  838. answer = Series(
  839. {
  840. 0: "Strongly Agree",
  841. 1: "Agree",
  842. 2: "Neutral",
  843. 3: "Disagree",
  844. 4: "Strongly Disagree",
  845. },
  846. dtype=any_string_dtype,
  847. )
  848. weights = {
  849. "Agree": 4,
  850. "Disagree": 2,
  851. "Neutral": 3,
  852. "Strongly Agree": 5,
  853. "Strongly Disagree": 1,
  854. }
  855. expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
  856. msg = "Downcasting behavior in `replace` "
  857. with tm.assert_produces_warning(FutureWarning, match=msg):
  858. result = answer.replace(weights)
  859. tm.assert_series_equal(result, expected)
  860. def test_replace_series_no_regex(self, any_string_dtype):
  861. answer = Series(
  862. {
  863. 0: "Strongly Agree",
  864. 1: "Agree",
  865. 2: "Neutral",
  866. 3: "Disagree",
  867. 4: "Strongly Disagree",
  868. },
  869. dtype=any_string_dtype,
  870. )
  871. weights = Series(
  872. {
  873. "Agree": 4,
  874. "Disagree": 2,
  875. "Neutral": 3,
  876. "Strongly Agree": 5,
  877. "Strongly Disagree": 1,
  878. }
  879. )
  880. expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
  881. msg = "Downcasting behavior in `replace` "
  882. with tm.assert_produces_warning(FutureWarning, match=msg):
  883. result = answer.replace(weights)
  884. tm.assert_series_equal(result, expected)
  885. def test_replace_dict_tuple_list_ordering_remains_the_same(self):
  886. df = DataFrame({"A": [np.nan, 1]})
  887. res1 = df.replace(to_replace={np.nan: 0, 1: -1e8})
  888. res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0])
  889. res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0])
  890. expected = DataFrame({"A": [0, -1e8]})
  891. tm.assert_frame_equal(res1, res2)
  892. tm.assert_frame_equal(res2, res3)
  893. tm.assert_frame_equal(res3, expected)
  894. def test_replace_doesnt_replace_without_regex(self):
  895. df = DataFrame(
  896. {
  897. "fol": [1, 2, 2, 3],
  898. "T_opp": ["0", "vr", "0", "0"],
  899. "T_Dir": ["0", "0", "0", "bt"],
  900. "T_Enh": ["vo", "0", "0", "0"],
  901. }
  902. )
  903. res = df.replace({r"\D": 1})
  904. tm.assert_frame_equal(df, res)
  905. def test_replace_bool_with_string(self):
  906. df = DataFrame({"a": [True, False], "b": list("ab")})
  907. result = df.replace(True, "a")
  908. expected = DataFrame({"a": ["a", False], "b": df.b})
  909. tm.assert_frame_equal(result, expected)
  910. def test_replace_pure_bool_with_string_no_op(self):
  911. df = DataFrame(np.random.default_rng(2).random((2, 2)) > 0.5)
  912. result = df.replace("asdf", "fdsa")
  913. tm.assert_frame_equal(df, result)
  914. def test_replace_bool_with_bool(self):
  915. df = DataFrame(np.random.default_rng(2).random((2, 2)) > 0.5)
  916. result = df.replace(False, True)
  917. expected = DataFrame(np.ones((2, 2), dtype=bool))
  918. tm.assert_frame_equal(result, expected)
  919. def test_replace_with_dict_with_bool_keys(self):
  920. df = DataFrame({0: [True, False], 1: [False, True]})
  921. result = df.replace({"asdf": "asdb", True: "yes"})
  922. expected = DataFrame({0: ["yes", False], 1: [False, "yes"]})
  923. tm.assert_frame_equal(result, expected)
  924. def test_replace_dict_strings_vs_ints(self):
  925. # GH#34789
  926. df = DataFrame({"Y0": [1, 2], "Y1": [3, 4]})
  927. result = df.replace({"replace_string": "test"})
  928. tm.assert_frame_equal(result, df)
  929. result = df["Y0"].replace({"replace_string": "test"})
  930. tm.assert_series_equal(result, df["Y0"])
  931. def test_replace_truthy(self):
  932. df = DataFrame({"a": [True, True]})
  933. r = df.replace([np.inf, -np.inf], np.nan)
  934. e = df
  935. tm.assert_frame_equal(r, e)
  936. def test_nested_dict_overlapping_keys_replace_int(self):
  937. # GH 27660 keep behaviour consistent for simple dictionary and
  938. # nested dictionary replacement
  939. df = DataFrame({"a": list(range(1, 5))})
  940. result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))})
  941. expected = df.replace(dict(zip(range(1, 5), range(2, 6))))
  942. tm.assert_frame_equal(result, expected)
  943. def test_nested_dict_overlapping_keys_replace_str(self):
  944. # GH 27660
  945. a = np.arange(1, 5)
  946. astr = a.astype(str)
  947. bstr = np.arange(2, 6).astype(str)
  948. df = DataFrame({"a": astr})
  949. result = df.replace(dict(zip(astr, bstr)))
  950. expected = df.replace({"a": dict(zip(astr, bstr))})
  951. tm.assert_frame_equal(result, expected)
  952. def test_replace_swapping_bug(self):
  953. df = DataFrame({"a": [True, False, True]})
  954. res = df.replace({"a": {True: "Y", False: "N"}})
  955. expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
  956. tm.assert_frame_equal(res, expect)
  957. df = DataFrame({"a": [0, 1, 0]})
  958. res = df.replace({"a": {0: "Y", 1: "N"}})
  959. expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
  960. tm.assert_frame_equal(res, expect)
  961. def test_replace_period(self):
  962. d = {
  963. "fname": {
  964. "out_augmented_AUG_2011.json": pd.Period(year=2011, month=8, freq="M"),
  965. "out_augmented_JAN_2011.json": pd.Period(year=2011, month=1, freq="M"),
  966. "out_augmented_MAY_2012.json": pd.Period(year=2012, month=5, freq="M"),
  967. "out_augmented_SUBSIDY_WEEK.json": pd.Period(
  968. year=2011, month=4, freq="M"
  969. ),
  970. "out_augmented_AUG_2012.json": pd.Period(year=2012, month=8, freq="M"),
  971. "out_augmented_MAY_2011.json": pd.Period(year=2011, month=5, freq="M"),
  972. "out_augmented_SEP_2013.json": pd.Period(year=2013, month=9, freq="M"),
  973. }
  974. }
  975. df = DataFrame(
  976. [
  977. "out_augmented_AUG_2012.json",
  978. "out_augmented_SEP_2013.json",
  979. "out_augmented_SUBSIDY_WEEK.json",
  980. "out_augmented_MAY_2012.json",
  981. "out_augmented_MAY_2011.json",
  982. "out_augmented_AUG_2011.json",
  983. "out_augmented_JAN_2011.json",
  984. ],
  985. columns=["fname"],
  986. )
  987. assert set(df.fname.values) == set(d["fname"].keys())
  988. expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
  989. assert expected.dtypes.iloc[0] == "Period[M]"
  990. msg = "Downcasting behavior in `replace` "
  991. with tm.assert_produces_warning(FutureWarning, match=msg):
  992. result = df.replace(d)
  993. tm.assert_frame_equal(result, expected)
  994. def test_replace_datetime(self):
  995. d = {
  996. "fname": {
  997. "out_augmented_AUG_2011.json": Timestamp("2011-08"),
  998. "out_augmented_JAN_2011.json": Timestamp("2011-01"),
  999. "out_augmented_MAY_2012.json": Timestamp("2012-05"),
  1000. "out_augmented_SUBSIDY_WEEK.json": Timestamp("2011-04"),
  1001. "out_augmented_AUG_2012.json": Timestamp("2012-08"),
  1002. "out_augmented_MAY_2011.json": Timestamp("2011-05"),
  1003. "out_augmented_SEP_2013.json": Timestamp("2013-09"),
  1004. }
  1005. }
  1006. df = DataFrame(
  1007. [
  1008. "out_augmented_AUG_2012.json",
  1009. "out_augmented_SEP_2013.json",
  1010. "out_augmented_SUBSIDY_WEEK.json",
  1011. "out_augmented_MAY_2012.json",
  1012. "out_augmented_MAY_2011.json",
  1013. "out_augmented_AUG_2011.json",
  1014. "out_augmented_JAN_2011.json",
  1015. ],
  1016. columns=["fname"],
  1017. )
  1018. assert set(df.fname.values) == set(d["fname"].keys())
  1019. expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
  1020. msg = "Downcasting behavior in `replace` "
  1021. with tm.assert_produces_warning(FutureWarning, match=msg):
  1022. result = df.replace(d)
  1023. tm.assert_frame_equal(result, expected)
  1024. def test_replace_datetimetz(self):
  1025. # GH 11326
  1026. # behaving poorly when presented with a datetime64[ns, tz]
  1027. df = DataFrame(
  1028. {
  1029. "A": date_range("20130101", periods=3, tz="US/Eastern"),
  1030. "B": [0, np.nan, 2],
  1031. }
  1032. )
  1033. result = df.replace(np.nan, 1)
  1034. expected = DataFrame(
  1035. {
  1036. "A": date_range("20130101", periods=3, tz="US/Eastern"),
  1037. "B": Series([0, 1, 2], dtype="float64"),
  1038. }
  1039. )
  1040. tm.assert_frame_equal(result, expected)
  1041. result = df.fillna(1)
  1042. tm.assert_frame_equal(result, expected)
  1043. result = df.replace(0, np.nan)
  1044. expected = DataFrame(
  1045. {
  1046. "A": date_range("20130101", periods=3, tz="US/Eastern"),
  1047. "B": [np.nan, np.nan, 2],
  1048. }
  1049. )
  1050. tm.assert_frame_equal(result, expected)
  1051. result = df.replace(
  1052. Timestamp("20130102", tz="US/Eastern"),
  1053. Timestamp("20130104", tz="US/Eastern"),
  1054. )
  1055. expected = DataFrame(
  1056. {
  1057. "A": [
  1058. Timestamp("20130101", tz="US/Eastern"),
  1059. Timestamp("20130104", tz="US/Eastern"),
  1060. Timestamp("20130103", tz="US/Eastern"),
  1061. ],
  1062. "B": [0, np.nan, 2],
  1063. }
  1064. )
  1065. expected["A"] = expected["A"].dt.as_unit("ns")
  1066. tm.assert_frame_equal(result, expected)
  1067. result = df.copy()
  1068. result.iloc[1, 0] = np.nan
  1069. result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern"))
  1070. tm.assert_frame_equal(result, expected)
  1071. # pre-2.0 this would coerce to object with mismatched tzs
  1072. result = df.copy()
  1073. result.iloc[1, 0] = np.nan
  1074. result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific"))
  1075. expected = DataFrame(
  1076. {
  1077. "A": [
  1078. Timestamp("20130101", tz="US/Eastern"),
  1079. Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"),
  1080. Timestamp("20130103", tz="US/Eastern"),
  1081. ],
  1082. "B": [0, np.nan, 2],
  1083. }
  1084. )
  1085. expected["A"] = expected["A"].dt.as_unit("ns")
  1086. tm.assert_frame_equal(result, expected)
  1087. result = df.copy()
  1088. result.iloc[1, 0] = np.nan
  1089. result = result.replace({"A": np.nan}, Timestamp("20130104"))
  1090. expected = DataFrame(
  1091. {
  1092. "A": [
  1093. Timestamp("20130101", tz="US/Eastern"),
  1094. Timestamp("20130104"),
  1095. Timestamp("20130103", tz="US/Eastern"),
  1096. ],
  1097. "B": [0, np.nan, 2],
  1098. }
  1099. )
  1100. tm.assert_frame_equal(result, expected)
  1101. def test_replace_with_empty_dictlike(self, mix_abc):
  1102. # GH 15289
  1103. df = DataFrame(mix_abc)
  1104. tm.assert_frame_equal(df, df.replace({}))
  1105. tm.assert_frame_equal(df, df.replace(Series([], dtype=object)))
  1106. tm.assert_frame_equal(df, df.replace({"b": {}}))
  1107. tm.assert_frame_equal(df, df.replace(Series({"b": {}})))
  1108. @pytest.mark.parametrize(
  1109. "to_replace, method, expected",
  1110. [
  1111. (0, "bfill", {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}),
  1112. (
  1113. np.nan,
  1114. "bfill",
  1115. {"A": [0, 1, 2], "B": [5.0, 7.0, 7.0], "C": ["a", "b", "c"]},
  1116. ),
  1117. ("d", "ffill", {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}),
  1118. (
  1119. [0, 2],
  1120. "bfill",
  1121. {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
  1122. ),
  1123. (
  1124. [1, 2],
  1125. "pad",
  1126. {"A": [0, 0, 0], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
  1127. ),
  1128. (
  1129. (1, 2),
  1130. "bfill",
  1131. {"A": [0, 2, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
  1132. ),
  1133. (
  1134. ["b", "c"],
  1135. "ffill",
  1136. {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "a", "a"]},
  1137. ),
  1138. ],
  1139. )
  1140. def test_replace_method(self, to_replace, method, expected):
  1141. # GH 19632
  1142. df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]})
  1143. msg = "The 'method' keyword in DataFrame.replace is deprecated"
  1144. with tm.assert_produces_warning(FutureWarning, match=msg):
  1145. result = df.replace(to_replace=to_replace, value=None, method=method)
  1146. expected = DataFrame(expected)
  1147. tm.assert_frame_equal(result, expected)
  1148. @pytest.mark.parametrize(
  1149. "replace_dict, final_data",
  1150. [({"a": 1, "b": 1}, [[3, 3], [2, 2]]), ({"a": 1, "b": 2}, [[3, 1], [2, 3]])],
  1151. )
  1152. def test_categorical_replace_with_dict(self, replace_dict, final_data):
  1153. # GH 26988
  1154. df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category")
  1155. final_data = np.array(final_data)
  1156. a = pd.Categorical(final_data[:, 0], categories=[3, 2])
  1157. ex_cat = [3, 2] if replace_dict["b"] == 1 else [1, 3]
  1158. b = pd.Categorical(final_data[:, 1], categories=ex_cat)
  1159. expected = DataFrame({"a": a, "b": b})
  1160. msg2 = "with CategoricalDtype is deprecated"
  1161. with tm.assert_produces_warning(FutureWarning, match=msg2):
  1162. result = df.replace(replace_dict, 3)
  1163. tm.assert_frame_equal(result, expected)
  1164. msg = (
  1165. r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are "
  1166. "different"
  1167. )
  1168. with pytest.raises(AssertionError, match=msg):
  1169. # ensure non-inplace call does not affect original
  1170. tm.assert_frame_equal(df, expected)
  1171. with tm.assert_produces_warning(FutureWarning, match=msg2):
  1172. return_value = df.replace(replace_dict, 3, inplace=True)
  1173. assert return_value is None
  1174. tm.assert_frame_equal(df, expected)
  1175. @pytest.mark.parametrize(
  1176. "df, to_replace, exp",
  1177. [
  1178. (
  1179. {"col1": [1, 2, 3], "col2": [4, 5, 6]},
  1180. {4: 5, 5: 6, 6: 7},
  1181. {"col1": [1, 2, 3], "col2": [5, 6, 7]},
  1182. ),
  1183. (
  1184. {"col1": [1, 2, 3], "col2": ["4", "5", "6"]},
  1185. {"4": "5", "5": "6", "6": "7"},
  1186. {"col1": [1, 2, 3], "col2": ["5", "6", "7"]},
  1187. ),
  1188. ],
  1189. )
  1190. def test_replace_commutative(self, df, to_replace, exp):
  1191. # GH 16051
  1192. # DataFrame.replace() overwrites when values are non-numeric
  1193. # also added to data frame whilst issue was for series
  1194. df = DataFrame(df)
  1195. expected = DataFrame(exp)
  1196. result = df.replace(to_replace)
  1197. tm.assert_frame_equal(result, expected)
  1198. @pytest.mark.parametrize(
  1199. "replacer",
  1200. [
  1201. Timestamp("20170827"),
  1202. np.int8(1),
  1203. np.int16(1),
  1204. np.float32(1),
  1205. np.float64(1),
  1206. ],
  1207. )
  1208. def test_replace_replacer_dtype(self, replacer):
  1209. # GH26632
  1210. df = DataFrame(["a"], dtype=object)
  1211. msg = "Downcasting behavior in `replace` "
  1212. with tm.assert_produces_warning(FutureWarning, match=msg):
  1213. result = df.replace({"a": replacer, "b": replacer})
  1214. expected = DataFrame([replacer])
  1215. tm.assert_frame_equal(result, expected)
  1216. def test_replace_after_convert_dtypes(self):
  1217. # GH31517
  1218. df = DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64")
  1219. result = df.replace(1, 10)
  1220. expected = DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64")
  1221. tm.assert_frame_equal(result, expected)
  1222. def test_replace_invalid_to_replace(self):
  1223. # GH 18634
  1224. # API: replace() should raise an exception if invalid argument is given
  1225. df = DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]})
  1226. msg = (
  1227. r"Expecting 'to_replace' to be either a scalar, array-like, "
  1228. r"dict or None, got invalid type.*"
  1229. )
  1230. msg2 = (
  1231. "DataFrame.replace without 'value' and with non-dict-like "
  1232. "'to_replace' is deprecated"
  1233. )
  1234. with pytest.raises(TypeError, match=msg):
  1235. with tm.assert_produces_warning(FutureWarning, match=msg2):
  1236. df.replace(lambda x: x.strip())
  1237. @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"])
  1238. @pytest.mark.parametrize("value", [np.nan, pd.NA])
  1239. def test_replace_no_replacement_dtypes(self, dtype, value):
  1240. # https://github.com/pandas-dev/pandas/issues/32988
  1241. df = DataFrame(np.eye(2), dtype=dtype)
  1242. result = df.replace(to_replace=[None, -np.inf, np.inf], value=value)
  1243. tm.assert_frame_equal(result, df)
  1244. @pytest.mark.parametrize("replacement", [np.nan, 5])
  1245. def test_replace_with_duplicate_columns(self, replacement):
  1246. # GH 24798
  1247. result = DataFrame({"A": [1, 2, 3], "A1": [4, 5, 6], "B": [7, 8, 9]})
  1248. result.columns = list("AAB")
  1249. expected = DataFrame(
  1250. {"A": [1, 2, 3], "A1": [4, 5, 6], "B": [replacement, 8, 9]}
  1251. )
  1252. expected.columns = list("AAB")
  1253. result["B"] = result["B"].replace(7, replacement)
  1254. tm.assert_frame_equal(result, expected)
  1255. @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)])
  1256. def test_replace_ea_ignore_float(self, frame_or_series, value):
  1257. # GH#34871
  1258. obj = DataFrame({"Per": [value] * 3})
  1259. obj = tm.get_obj(obj, frame_or_series)
  1260. expected = obj.copy()
  1261. result = obj.replace(1.0, 0.0)
  1262. tm.assert_equal(expected, result)
  1263. def test_replace_value_category_type(self):
  1264. """
  1265. Test for #23305: to ensure category dtypes are maintained
  1266. after replace with direct values
  1267. """
  1268. # create input data
  1269. input_dict = {
  1270. "col1": [1, 2, 3, 4],
  1271. "col2": ["a", "b", "c", "d"],
  1272. "col3": [1.5, 2.5, 3.5, 4.5],
  1273. "col4": ["cat1", "cat2", "cat3", "cat4"],
  1274. "col5": ["obj1", "obj2", "obj3", "obj4"],
  1275. }
  1276. # explicitly cast columns as category and order them
  1277. input_df = DataFrame(data=input_dict).astype(
  1278. {"col2": "category", "col4": "category"}
  1279. )
  1280. input_df["col2"] = input_df["col2"].cat.reorder_categories(
  1281. ["a", "b", "c", "d"], ordered=True
  1282. )
  1283. input_df["col4"] = input_df["col4"].cat.reorder_categories(
  1284. ["cat1", "cat2", "cat3", "cat4"], ordered=True
  1285. )
  1286. # create expected dataframe
  1287. expected_dict = {
  1288. "col1": [1, 2, 3, 4],
  1289. "col2": ["a", "b", "c", "z"],
  1290. "col3": [1.5, 2.5, 3.5, 4.5],
  1291. "col4": ["cat1", "catX", "cat3", "cat4"],
  1292. "col5": ["obj9", "obj2", "obj3", "obj4"],
  1293. }
  1294. # explicitly cast columns as category and order them
  1295. expected = DataFrame(data=expected_dict).astype(
  1296. {"col2": "category", "col4": "category"}
  1297. )
  1298. expected["col2"] = expected["col2"].cat.reorder_categories(
  1299. ["a", "b", "c", "z"], ordered=True
  1300. )
  1301. expected["col4"] = expected["col4"].cat.reorder_categories(
  1302. ["cat1", "catX", "cat3", "cat4"], ordered=True
  1303. )
  1304. # replace values in input dataframe
  1305. msg = (
  1306. r"The behavior of Series\.replace \(and DataFrame.replace\) "
  1307. "with CategoricalDtype"
  1308. )
  1309. with tm.assert_produces_warning(FutureWarning, match=msg):
  1310. input_df = input_df.replace("d", "z")
  1311. input_df = input_df.replace("obj1", "obj9")
  1312. result = input_df.replace("cat2", "catX")
  1313. result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"})
  1314. tm.assert_frame_equal(result, expected)
  1315. def test_replace_dict_category_type(self):
  1316. """
  1317. Test to ensure category dtypes are maintained
  1318. after replace with dict values
  1319. """
  1320. # GH#35268, GH#44940
  1321. # create input dataframe
  1322. input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]}
  1323. # explicitly cast columns as category
  1324. input_df = DataFrame(data=input_dict).astype(
  1325. {"col1": "category", "col2": "category", "col3": "category"}
  1326. )
  1327. # create expected dataframe
  1328. expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]}
  1329. # explicitly cast columns as category
  1330. expected = DataFrame(data=expected_dict).astype(
  1331. {"col1": "category", "col2": "category", "col3": "category"}
  1332. )
  1333. # replace values in input dataframe using a dict
  1334. msg = (
  1335. r"The behavior of Series\.replace \(and DataFrame.replace\) "
  1336. "with CategoricalDtype"
  1337. )
  1338. with tm.assert_produces_warning(FutureWarning, match=msg):
  1339. result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
  1340. tm.assert_frame_equal(result, expected)
  1341. def test_replace_with_compiled_regex(self):
  1342. # https://github.com/pandas-dev/pandas/issues/35680
  1343. df = DataFrame(["a", "b", "c"])
  1344. regex = re.compile("^a$")
  1345. result = df.replace({regex: "z"}, regex=True)
  1346. expected = DataFrame(["z", "b", "c"])
  1347. tm.assert_frame_equal(result, expected)
  1348. def test_replace_intervals(self):
  1349. # https://github.com/pandas-dev/pandas/issues/35931
  1350. df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]})
  1351. result = df.replace({"a": {pd.Interval(0, 1): "x"}})
  1352. expected = DataFrame({"a": ["x", "x"]}, dtype=object)
  1353. tm.assert_frame_equal(result, expected)
  1354. def test_replace_unicode(self):
  1355. # GH: 16784
  1356. columns_values_map = {"positive": {"正面": 1, "中立": 1, "负面": 0}}
  1357. df1 = DataFrame({"positive": np.ones(3)})
  1358. result = df1.replace(columns_values_map)
  1359. expected = DataFrame({"positive": np.ones(3)})
  1360. tm.assert_frame_equal(result, expected)
  1361. def test_replace_bytes(self, frame_or_series):
  1362. # GH#38900
  1363. obj = frame_or_series(["o"]).astype("|S")
  1364. expected = obj.copy()
  1365. obj = obj.replace({None: np.nan})
  1366. tm.assert_equal(obj, expected)
  1367. @pytest.mark.parametrize(
  1368. "data, to_replace, value, expected",
  1369. [
  1370. ([1], [1.0], [0], [0]),
  1371. ([1], [1], [0], [0]),
  1372. ([1.0], [1.0], [0], [0.0]),
  1373. ([1.0], [1], [0], [0.0]),
  1374. ],
  1375. )
  1376. @pytest.mark.parametrize("box", [list, tuple, np.array])
  1377. def test_replace_list_with_mixed_type(
  1378. self, data, to_replace, value, expected, box, frame_or_series
  1379. ):
  1380. # GH#40371
  1381. obj = frame_or_series(data)
  1382. expected = frame_or_series(expected)
  1383. result = obj.replace(box(to_replace), value)
  1384. tm.assert_equal(result, expected)
  1385. @pytest.mark.parametrize("val", [2, np.nan, 2.0])
  1386. def test_replace_value_none_dtype_numeric(self, val):
  1387. # GH#48231
  1388. df = DataFrame({"a": [1, val]})
  1389. result = df.replace(val, None)
  1390. expected = DataFrame({"a": [1, None]}, dtype=object)
  1391. tm.assert_frame_equal(result, expected)
  1392. df = DataFrame({"a": [1, val]})
  1393. result = df.replace({val: None})
  1394. tm.assert_frame_equal(result, expected)
  1395. def test_replace_with_nil_na(self):
  1396. # GH 32075
  1397. ser = DataFrame({"a": ["nil", pd.NA]})
  1398. expected = DataFrame({"a": ["anything else", pd.NA]}, index=[0, 1])
  1399. result = ser.replace("nil", "anything else")
  1400. tm.assert_frame_equal(expected, result)
  1401. class TestDataFrameReplaceRegex:
  1402. @pytest.mark.parametrize(
  1403. "data",
  1404. [
  1405. {"a": list("ab.."), "b": list("efgh")},
  1406. {"a": list("ab.."), "b": list(range(4))},
  1407. ],
  1408. )
  1409. @pytest.mark.parametrize(
  1410. "to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")]
  1411. )
  1412. @pytest.mark.parametrize("compile_regex", [True, False])
  1413. @pytest.mark.parametrize("regex_kwarg", [True, False])
  1414. @pytest.mark.parametrize("inplace", [True, False])
  1415. def test_regex_replace_scalar(
  1416. self, data, to_replace, value, compile_regex, regex_kwarg, inplace
  1417. ):
  1418. df = DataFrame(data)
  1419. expected = df.copy()
  1420. if compile_regex:
  1421. to_replace = re.compile(to_replace)
  1422. if regex_kwarg:
  1423. regex = to_replace
  1424. to_replace = None
  1425. else:
  1426. regex = True
  1427. result = df.replace(to_replace, value, inplace=inplace, regex=regex)
  1428. if inplace:
  1429. assert result is None
  1430. result = df
  1431. if value is np.nan:
  1432. expected_replace_val = np.nan
  1433. else:
  1434. expected_replace_val = "..."
  1435. expected.loc[expected["a"] == ".", "a"] = expected_replace_val
  1436. tm.assert_frame_equal(result, expected)
  1437. @pytest.mark.parametrize("regex", [False, True])
  1438. def test_replace_regex_dtype_frame(self, regex):
  1439. # GH-48644
  1440. df1 = DataFrame({"A": ["0"], "B": ["0"]})
  1441. expected_df1 = DataFrame({"A": [1], "B": [1]})
  1442. msg = "Downcasting behavior in `replace`"
  1443. with tm.assert_produces_warning(FutureWarning, match=msg):
  1444. result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
  1445. tm.assert_frame_equal(result_df1, expected_df1)
  1446. df2 = DataFrame({"A": ["0"], "B": ["1"]})
  1447. expected_df2 = DataFrame({"A": [1], "B": ["1"]})
  1448. with tm.assert_produces_warning(FutureWarning, match=msg):
  1449. result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
  1450. tm.assert_frame_equal(result_df2, expected_df2)
  1451. def test_replace_with_value_also_being_replaced(self):
  1452. # GH46306
  1453. df = DataFrame({"A": [0, 1, 2], "B": [1, 0, 2]})
  1454. result = df.replace({0: 1, 1: np.nan})
  1455. expected = DataFrame({"A": [1, np.nan, 2], "B": [np.nan, 1, 2]})
  1456. tm.assert_frame_equal(result, expected)
  1457. def test_replace_categorical_no_replacement(self):
  1458. # GH#46672
  1459. df = DataFrame(
  1460. {
  1461. "a": ["one", "two", None, "three"],
  1462. "b": ["one", None, "two", "three"],
  1463. },
  1464. dtype="category",
  1465. )
  1466. expected = df.copy()
  1467. result = df.replace(to_replace=[".", "def"], value=["_", None])
  1468. tm.assert_frame_equal(result, expected)
  1469. def test_replace_object_splitting(self, using_infer_string):
  1470. # GH#53977
  1471. df = DataFrame({"a": ["a"], "b": "b"})
  1472. if using_infer_string:
  1473. assert len(df._mgr.blocks) == 2
  1474. else:
  1475. assert len(df._mgr.blocks) == 1
  1476. df.replace(to_replace=r"^\s*$", value="", inplace=True, regex=True)
  1477. if using_infer_string:
  1478. assert len(df._mgr.blocks) == 2
  1479. else:
  1480. assert len(df._mgr.blocks) == 1