test_methods.py 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import (
  4. HAS_PYARROW,
  5. WARNING_CHECK_DISABLED,
  6. )
  7. from pandas.errors import SettingWithCopyWarning
  8. import pandas as pd
  9. from pandas import (
  10. DataFrame,
  11. Index,
  12. MultiIndex,
  13. Period,
  14. Series,
  15. Timestamp,
  16. date_range,
  17. option_context,
  18. period_range,
  19. )
  20. import pandas._testing as tm
  21. from pandas.tests.copy_view.util import get_array
  22. from pandas.util.version import Version
  23. def test_copy(using_copy_on_write):
  24. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  25. df_copy = df.copy()
  26. # the deep copy by defaults takes a shallow copy of the Index
  27. assert df_copy.index is not df.index
  28. assert df_copy.columns is not df.columns
  29. assert df_copy.index.is_(df.index)
  30. assert df_copy.columns.is_(df.columns)
  31. # the deep copy doesn't share memory
  32. assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  33. if using_copy_on_write:
  34. assert not df_copy._mgr.blocks[0].refs.has_reference()
  35. assert not df_copy._mgr.blocks[1].refs.has_reference()
  36. # mutating copy doesn't mutate original
  37. df_copy.iloc[0, 0] = 0
  38. assert df.iloc[0, 0] == 1
  39. def test_copy_shallow(using_copy_on_write, warn_copy_on_write):
  40. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  41. df_copy = df.copy(deep=False)
  42. # the shallow copy also makes a shallow copy of the index
  43. if using_copy_on_write:
  44. assert df_copy.index is not df.index
  45. assert df_copy.columns is not df.columns
  46. assert df_copy.index.is_(df.index)
  47. assert df_copy.columns.is_(df.columns)
  48. else:
  49. assert df_copy.index is df.index
  50. assert df_copy.columns is df.columns
  51. # the shallow copy still shares memory
  52. assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  53. if using_copy_on_write:
  54. assert df_copy._mgr.blocks[0].refs.has_reference()
  55. assert df_copy._mgr.blocks[1].refs.has_reference()
  56. if using_copy_on_write:
  57. # mutating shallow copy doesn't mutate original
  58. df_copy.iloc[0, 0] = 0
  59. assert df.iloc[0, 0] == 1
  60. # mutating triggered a copy-on-write -> no longer shares memory
  61. assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  62. # but still shares memory for the other columns/blocks
  63. assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c"))
  64. else:
  65. # mutating shallow copy does mutate original
  66. with tm.assert_cow_warning(warn_copy_on_write):
  67. df_copy.iloc[0, 0] = 0
  68. assert df.iloc[0, 0] == 0
  69. # and still shares memory
  70. assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  71. @pytest.mark.parametrize("copy", [True, None, False])
  72. @pytest.mark.parametrize(
  73. "method",
  74. [
  75. lambda df, copy: df.rename(columns=str.lower, copy=copy),
  76. lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
  77. lambda df, copy: df.reindex_like(df, copy=copy),
  78. lambda df, copy: df.align(df, copy=copy)[0],
  79. lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
  80. lambda df, copy: df.rename_axis(index="test", copy=copy),
  81. lambda df, copy: df.rename_axis(columns="test", copy=copy),
  82. lambda df, copy: df.astype({"b": "int64"}, copy=copy),
  83. # lambda df, copy: df.swaplevel(0, 0, copy=copy),
  84. lambda df, copy: df.swapaxes(0, 0, copy=copy),
  85. lambda df, copy: df.truncate(0, 5, copy=copy),
  86. lambda df, copy: df.infer_objects(copy=copy),
  87. lambda df, copy: df.to_timestamp(copy=copy),
  88. lambda df, copy: df.to_period(freq="D", copy=copy),
  89. lambda df, copy: df.tz_localize("US/Central", copy=copy),
  90. lambda df, copy: df.tz_convert("US/Central", copy=copy),
  91. lambda df, copy: df.set_flags(allows_duplicate_labels=False, copy=copy),
  92. ],
  93. ids=[
  94. "rename",
  95. "reindex",
  96. "reindex_like",
  97. "align",
  98. "set_axis",
  99. "rename_axis0",
  100. "rename_axis1",
  101. "astype",
  102. # "swaplevel", # only series
  103. "swapaxes",
  104. "truncate",
  105. "infer_objects",
  106. "to_timestamp",
  107. "to_period",
  108. "tz_localize",
  109. "tz_convert",
  110. "set_flags",
  111. ],
  112. )
  113. def test_methods_copy_keyword(
  114. request, method, copy, using_copy_on_write, using_array_manager
  115. ):
  116. index = None
  117. if "to_timestamp" in request.node.callspec.id:
  118. index = period_range("2012-01-01", freq="D", periods=3)
  119. elif "to_period" in request.node.callspec.id:
  120. index = date_range("2012-01-01", freq="D", periods=3)
  121. elif "tz_localize" in request.node.callspec.id:
  122. index = date_range("2012-01-01", freq="D", periods=3)
  123. elif "tz_convert" in request.node.callspec.id:
  124. index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
  125. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
  126. if "swapaxes" in request.node.callspec.id:
  127. msg = "'DataFrame.swapaxes' is deprecated"
  128. with tm.assert_produces_warning(FutureWarning, match=msg):
  129. df2 = method(df, copy=copy)
  130. else:
  131. df2 = method(df, copy=copy)
  132. share_memory = using_copy_on_write or copy is False
  133. if request.node.callspec.id.startswith("reindex-"):
  134. # TODO copy=False without CoW still returns a copy in this case
  135. if not using_copy_on_write and not using_array_manager and copy is False:
  136. share_memory = False
  137. if share_memory:
  138. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  139. else:
  140. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  141. @pytest.mark.parametrize("copy", [True, None, False])
  142. @pytest.mark.parametrize(
  143. "method",
  144. [
  145. lambda ser, copy: ser.rename(index={0: 100}, copy=copy),
  146. lambda ser, copy: ser.rename(None, copy=copy),
  147. lambda ser, copy: ser.reindex(index=ser.index, copy=copy),
  148. lambda ser, copy: ser.reindex_like(ser, copy=copy),
  149. lambda ser, copy: ser.align(ser, copy=copy)[0],
  150. lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy),
  151. lambda ser, copy: ser.rename_axis(index="test", copy=copy),
  152. lambda ser, copy: ser.astype("int64", copy=copy),
  153. lambda ser, copy: ser.swaplevel(0, 1, copy=copy),
  154. lambda ser, copy: ser.swapaxes(0, 0, copy=copy),
  155. lambda ser, copy: ser.truncate(0, 5, copy=copy),
  156. lambda ser, copy: ser.infer_objects(copy=copy),
  157. lambda ser, copy: ser.to_timestamp(copy=copy),
  158. lambda ser, copy: ser.to_period(freq="D", copy=copy),
  159. lambda ser, copy: ser.tz_localize("US/Central", copy=copy),
  160. lambda ser, copy: ser.tz_convert("US/Central", copy=copy),
  161. lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy),
  162. ],
  163. ids=[
  164. "rename (dict)",
  165. "rename",
  166. "reindex",
  167. "reindex_like",
  168. "align",
  169. "set_axis",
  170. "rename_axis0",
  171. "astype",
  172. "swaplevel",
  173. "swapaxes",
  174. "truncate",
  175. "infer_objects",
  176. "to_timestamp",
  177. "to_period",
  178. "tz_localize",
  179. "tz_convert",
  180. "set_flags",
  181. ],
  182. )
  183. def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write):
  184. index = None
  185. if "to_timestamp" in request.node.callspec.id:
  186. index = period_range("2012-01-01", freq="D", periods=3)
  187. elif "to_period" in request.node.callspec.id:
  188. index = date_range("2012-01-01", freq="D", periods=3)
  189. elif "tz_localize" in request.node.callspec.id:
  190. index = date_range("2012-01-01", freq="D", periods=3)
  191. elif "tz_convert" in request.node.callspec.id:
  192. index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
  193. elif "swaplevel" in request.node.callspec.id:
  194. index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]])
  195. ser = Series([1, 2, 3], index=index)
  196. if "swapaxes" in request.node.callspec.id:
  197. msg = "'Series.swapaxes' is deprecated"
  198. with tm.assert_produces_warning(FutureWarning, match=msg):
  199. ser2 = method(ser, copy=copy)
  200. else:
  201. ser2 = method(ser, copy=copy)
  202. share_memory = using_copy_on_write or copy is False
  203. if share_memory:
  204. assert np.shares_memory(get_array(ser2), get_array(ser))
  205. else:
  206. assert not np.shares_memory(get_array(ser2), get_array(ser))
  207. @pytest.mark.parametrize("copy", [True, None, False])
  208. def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager):
  209. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  210. result = df.transpose(copy=copy)
  211. share_memory = using_copy_on_write or copy is False or copy is None
  212. share_memory = share_memory and not using_array_manager
  213. if share_memory:
  214. assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
  215. else:
  216. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  217. # -----------------------------------------------------------------------------
  218. # DataFrame methods returning new DataFrame using shallow copy
  219. def test_reset_index(using_copy_on_write):
  220. # Case: resetting the index (i.e. adding a new column) + mutating the
  221. # resulting dataframe
  222. df = DataFrame(
  223. {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12]
  224. )
  225. df_orig = df.copy()
  226. df2 = df.reset_index()
  227. df2._mgr._verify_integrity()
  228. if using_copy_on_write:
  229. # still shares memory (df2 is a shallow copy)
  230. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  231. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  232. # mutating df2 triggers a copy-on-write for that column / block
  233. df2.iloc[0, 2] = 0
  234. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  235. if using_copy_on_write:
  236. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  237. tm.assert_frame_equal(df, df_orig)
  238. @pytest.mark.parametrize("index", [pd.RangeIndex(0, 2), Index([1, 2])])
  239. def test_reset_index_series_drop(using_copy_on_write, index):
  240. ser = Series([1, 2], index=index)
  241. ser_orig = ser.copy()
  242. ser2 = ser.reset_index(drop=True)
  243. if using_copy_on_write:
  244. assert np.shares_memory(get_array(ser), get_array(ser2))
  245. assert not ser._mgr._has_no_reference(0)
  246. else:
  247. assert not np.shares_memory(get_array(ser), get_array(ser2))
  248. ser2.iloc[0] = 100
  249. tm.assert_series_equal(ser, ser_orig)
  250. def test_groupby_column_index_in_references():
  251. df = DataFrame(
  252. {"A": ["a", "b", "c", "d"], "B": [1, 2, 3, 4], "C": ["a", "a", "b", "b"]}
  253. )
  254. df = df.set_index("A")
  255. key = df["C"]
  256. result = df.groupby(key, observed=True).sum()
  257. expected = df.groupby("C", observed=True).sum()
  258. tm.assert_frame_equal(result, expected)
  259. def test_rename_columns(using_copy_on_write):
  260. # Case: renaming columns returns a new dataframe
  261. # + afterwards modifying the result
  262. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  263. df_orig = df.copy()
  264. df2 = df.rename(columns=str.upper)
  265. if using_copy_on_write:
  266. assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  267. df2.iloc[0, 0] = 0
  268. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  269. if using_copy_on_write:
  270. assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
  271. expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]})
  272. tm.assert_frame_equal(df2, expected)
  273. tm.assert_frame_equal(df, df_orig)
  274. def test_rename_columns_modify_parent(using_copy_on_write):
  275. # Case: renaming columns returns a new dataframe
  276. # + afterwards modifying the original (parent) dataframe
  277. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  278. df2 = df.rename(columns=str.upper)
  279. df2_orig = df2.copy()
  280. if using_copy_on_write:
  281. assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  282. else:
  283. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  284. df.iloc[0, 0] = 0
  285. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  286. if using_copy_on_write:
  287. assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
  288. expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  289. tm.assert_frame_equal(df, expected)
  290. tm.assert_frame_equal(df2, df2_orig)
  291. def test_pipe(using_copy_on_write):
  292. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  293. df_orig = df.copy()
  294. def testfunc(df):
  295. return df
  296. df2 = df.pipe(testfunc)
  297. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  298. # mutating df2 triggers a copy-on-write for that column
  299. df2.iloc[0, 0] = 0
  300. if using_copy_on_write:
  301. tm.assert_frame_equal(df, df_orig)
  302. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  303. else:
  304. expected = DataFrame({"a": [0, 2, 3], "b": 1.5})
  305. tm.assert_frame_equal(df, expected)
  306. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  307. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  308. def test_pipe_modify_df(using_copy_on_write):
  309. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  310. df_orig = df.copy()
  311. def testfunc(df):
  312. df.iloc[0, 0] = 100
  313. return df
  314. df2 = df.pipe(testfunc)
  315. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  316. if using_copy_on_write:
  317. tm.assert_frame_equal(df, df_orig)
  318. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  319. else:
  320. expected = DataFrame({"a": [100, 2, 3], "b": 1.5})
  321. tm.assert_frame_equal(df, expected)
  322. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  323. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  324. def test_reindex_columns(using_copy_on_write):
  325. # Case: reindexing the column returns a new dataframe
  326. # + afterwards modifying the result
  327. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  328. df_orig = df.copy()
  329. df2 = df.reindex(columns=["a", "c"])
  330. if using_copy_on_write:
  331. # still shares memory (df2 is a shallow copy)
  332. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  333. else:
  334. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  335. # mutating df2 triggers a copy-on-write for that column
  336. df2.iloc[0, 0] = 0
  337. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  338. if using_copy_on_write:
  339. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  340. tm.assert_frame_equal(df, df_orig)
  341. @pytest.mark.parametrize(
  342. "index",
  343. [
  344. lambda idx: idx,
  345. lambda idx: idx.view(),
  346. lambda idx: idx.copy(),
  347. lambda idx: list(idx),
  348. ],
  349. ids=["identical", "view", "copy", "values"],
  350. )
  351. def test_reindex_rows(index, using_copy_on_write):
  352. # Case: reindexing the rows with an index that matches the current index
  353. # can use a shallow copy
  354. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  355. df_orig = df.copy()
  356. df2 = df.reindex(index=index(df.index))
  357. if using_copy_on_write:
  358. # still shares memory (df2 is a shallow copy)
  359. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  360. else:
  361. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  362. # mutating df2 triggers a copy-on-write for that column
  363. df2.iloc[0, 0] = 0
  364. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  365. if using_copy_on_write:
  366. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  367. tm.assert_frame_equal(df, df_orig)
  368. def test_drop_on_column(using_copy_on_write):
  369. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  370. df_orig = df.copy()
  371. df2 = df.drop(columns="a")
  372. df2._mgr._verify_integrity()
  373. if using_copy_on_write:
  374. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  375. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  376. else:
  377. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  378. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  379. df2.iloc[0, 0] = 0
  380. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  381. if using_copy_on_write:
  382. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  383. tm.assert_frame_equal(df, df_orig)
  384. def test_select_dtypes(using_copy_on_write):
  385. # Case: selecting columns using `select_dtypes()` returns a new dataframe
  386. # + afterwards modifying the result
  387. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  388. df_orig = df.copy()
  389. df2 = df.select_dtypes("int64")
  390. df2._mgr._verify_integrity()
  391. if using_copy_on_write:
  392. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  393. else:
  394. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  395. # mutating df2 triggers a copy-on-write for that column/block
  396. df2.iloc[0, 0] = 0
  397. if using_copy_on_write:
  398. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  399. tm.assert_frame_equal(df, df_orig)
  400. @pytest.mark.parametrize(
  401. "filter_kwargs", [{"items": ["a"]}, {"like": "a"}, {"regex": "a"}]
  402. )
  403. def test_filter(using_copy_on_write, filter_kwargs):
  404. # Case: selecting columns using `filter()` returns a new dataframe
  405. # + afterwards modifying the result
  406. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  407. df_orig = df.copy()
  408. df2 = df.filter(**filter_kwargs)
  409. if using_copy_on_write:
  410. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  411. else:
  412. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  413. # mutating df2 triggers a copy-on-write for that column/block
  414. if using_copy_on_write:
  415. df2.iloc[0, 0] = 0
  416. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  417. tm.assert_frame_equal(df, df_orig)
  418. def test_shift_no_op(using_copy_on_write):
  419. df = DataFrame(
  420. [[1, 2], [3, 4], [5, 6]],
  421. index=date_range("2020-01-01", "2020-01-03"),
  422. columns=["a", "b"],
  423. )
  424. df_orig = df.copy()
  425. df2 = df.shift(periods=0)
  426. if using_copy_on_write:
  427. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  428. else:
  429. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  430. df.iloc[0, 0] = 0
  431. if using_copy_on_write:
  432. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  433. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  434. tm.assert_frame_equal(df2, df_orig)
  435. def test_shift_index(using_copy_on_write):
  436. df = DataFrame(
  437. [[1, 2], [3, 4], [5, 6]],
  438. index=date_range("2020-01-01", "2020-01-03"),
  439. columns=["a", "b"],
  440. )
  441. df2 = df.shift(periods=1, axis=0)
  442. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  443. def test_shift_rows_freq(using_copy_on_write):
  444. df = DataFrame(
  445. [[1, 2], [3, 4], [5, 6]],
  446. index=date_range("2020-01-01", "2020-01-03"),
  447. columns=["a", "b"],
  448. )
  449. df_orig = df.copy()
  450. df_orig.index = date_range("2020-01-02", "2020-01-04")
  451. df2 = df.shift(periods=1, freq="1D")
  452. if using_copy_on_write:
  453. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  454. else:
  455. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  456. df.iloc[0, 0] = 0
  457. if using_copy_on_write:
  458. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  459. tm.assert_frame_equal(df2, df_orig)
  460. def test_shift_columns(using_copy_on_write, warn_copy_on_write):
  461. df = DataFrame(
  462. [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02")
  463. )
  464. df2 = df.shift(periods=1, axis=1)
  465. assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01"))
  466. with tm.assert_cow_warning(warn_copy_on_write):
  467. df.iloc[0, 0] = 0
  468. if using_copy_on_write:
  469. assert not np.shares_memory(
  470. get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")
  471. )
  472. expected = DataFrame(
  473. [[np.nan, 1], [np.nan, 3], [np.nan, 5]],
  474. columns=date_range("2020-01-01", "2020-01-02"),
  475. )
  476. tm.assert_frame_equal(df2, expected)
  477. def test_pop(using_copy_on_write, warn_copy_on_write):
  478. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  479. df_orig = df.copy()
  480. view_original = df[:]
  481. result = df.pop("a")
  482. assert np.shares_memory(result.values, get_array(view_original, "a"))
  483. assert np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
  484. if using_copy_on_write:
  485. result.iloc[0] = 0
  486. assert not np.shares_memory(result.values, get_array(view_original, "a"))
  487. with tm.assert_cow_warning(warn_copy_on_write):
  488. df.iloc[0, 0] = 0
  489. if using_copy_on_write:
  490. assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
  491. tm.assert_frame_equal(view_original, df_orig)
  492. else:
  493. expected = DataFrame({"a": [1, 2, 3], "b": [0, 5, 6], "c": [0.1, 0.2, 0.3]})
  494. tm.assert_frame_equal(view_original, expected)
  495. @pytest.mark.parametrize(
  496. "func",
  497. [
  498. lambda x, y: x.align(y),
  499. lambda x, y: x.align(y.a, axis=0),
  500. lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1),
  501. ],
  502. )
  503. def test_align_frame(using_copy_on_write, func):
  504. df = DataFrame({"a": [1, 2, 3], "b": "a"})
  505. df_orig = df.copy()
  506. df_changed = df[["b", "a"]].copy()
  507. df2, _ = func(df, df_changed)
  508. if using_copy_on_write:
  509. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  510. else:
  511. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  512. df2.iloc[0, 0] = 0
  513. if using_copy_on_write:
  514. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  515. tm.assert_frame_equal(df, df_orig)
  516. def test_align_series(using_copy_on_write):
  517. ser = Series([1, 2])
  518. ser_orig = ser.copy()
  519. ser_other = ser.copy()
  520. ser2, ser_other_result = ser.align(ser_other)
  521. if using_copy_on_write:
  522. assert np.shares_memory(ser2.values, ser.values)
  523. assert np.shares_memory(ser_other_result.values, ser_other.values)
  524. else:
  525. assert not np.shares_memory(ser2.values, ser.values)
  526. assert not np.shares_memory(ser_other_result.values, ser_other.values)
  527. ser2.iloc[0] = 0
  528. ser_other_result.iloc[0] = 0
  529. if using_copy_on_write:
  530. assert not np.shares_memory(ser2.values, ser.values)
  531. assert not np.shares_memory(ser_other_result.values, ser_other.values)
  532. tm.assert_series_equal(ser, ser_orig)
  533. tm.assert_series_equal(ser_other, ser_orig)
  534. def test_align_copy_false(using_copy_on_write):
  535. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  536. df_orig = df.copy()
  537. df2, df3 = df.align(df, copy=False)
  538. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  539. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  540. if using_copy_on_write:
  541. df2.loc[0, "a"] = 0
  542. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  543. df3.loc[0, "a"] = 0
  544. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  545. def test_align_with_series_copy_false(using_copy_on_write):
  546. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  547. ser = Series([1, 2, 3], name="x")
  548. ser_orig = ser.copy()
  549. df_orig = df.copy()
  550. df2, ser2 = df.align(ser, copy=False, axis=0)
  551. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  552. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  553. assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x"))
  554. if using_copy_on_write:
  555. df2.loc[0, "a"] = 0
  556. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  557. ser2.loc[0] = 0
  558. tm.assert_series_equal(ser, ser_orig) # Original is unchanged
  559. def test_to_frame(using_copy_on_write, warn_copy_on_write):
  560. # Case: converting a Series to a DataFrame with to_frame
  561. ser = Series([1, 2, 3])
  562. ser_orig = ser.copy()
  563. df = ser[:].to_frame()
  564. # currently this always returns a "view"
  565. assert np.shares_memory(ser.values, get_array(df, 0))
  566. with tm.assert_cow_warning(warn_copy_on_write):
  567. df.iloc[0, 0] = 0
  568. if using_copy_on_write:
  569. # mutating df triggers a copy-on-write for that column
  570. assert not np.shares_memory(ser.values, get_array(df, 0))
  571. tm.assert_series_equal(ser, ser_orig)
  572. else:
  573. # but currently select_dtypes() actually returns a view -> mutates parent
  574. expected = ser_orig.copy()
  575. expected.iloc[0] = 0
  576. tm.assert_series_equal(ser, expected)
  577. # modify original series -> don't modify dataframe
  578. df = ser[:].to_frame()
  579. with tm.assert_cow_warning(warn_copy_on_write):
  580. ser.iloc[0] = 0
  581. if using_copy_on_write:
  582. tm.assert_frame_equal(df, ser_orig.to_frame())
  583. else:
  584. expected = ser_orig.copy().to_frame()
  585. expected.iloc[0, 0] = 0
  586. tm.assert_frame_equal(df, expected)
  587. @pytest.mark.parametrize("ax", ["index", "columns"])
  588. def test_swapaxes_noop(using_copy_on_write, ax):
  589. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  590. df_orig = df.copy()
  591. msg = "'DataFrame.swapaxes' is deprecated"
  592. with tm.assert_produces_warning(FutureWarning, match=msg):
  593. df2 = df.swapaxes(ax, ax)
  594. if using_copy_on_write:
  595. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  596. else:
  597. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  598. # mutating df2 triggers a copy-on-write for that column/block
  599. df2.iloc[0, 0] = 0
  600. if using_copy_on_write:
  601. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  602. tm.assert_frame_equal(df, df_orig)
  603. def test_swapaxes_single_block(using_copy_on_write):
  604. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"])
  605. df_orig = df.copy()
  606. msg = "'DataFrame.swapaxes' is deprecated"
  607. with tm.assert_produces_warning(FutureWarning, match=msg):
  608. df2 = df.swapaxes("index", "columns")
  609. if using_copy_on_write:
  610. assert np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  611. else:
  612. assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  613. # mutating df2 triggers a copy-on-write for that column/block
  614. df2.iloc[0, 0] = 0
  615. if using_copy_on_write:
  616. assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  617. tm.assert_frame_equal(df, df_orig)
  618. def test_swapaxes_read_only_array():
  619. df = DataFrame({"a": [1, 2], "b": 3})
  620. msg = "'DataFrame.swapaxes' is deprecated"
  621. with tm.assert_produces_warning(FutureWarning, match=msg):
  622. df = df.swapaxes(axis1="index", axis2="columns")
  623. df.iloc[0, 0] = 100
  624. expected = DataFrame({0: [100, 3], 1: [2, 3]}, index=["a", "b"])
  625. tm.assert_frame_equal(df, expected)
  626. @pytest.mark.parametrize(
  627. "method, idx",
  628. [
  629. (lambda df: df.copy(deep=False).copy(deep=False), 0),
  630. (lambda df: df.reset_index().reset_index(), 2),
  631. (lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0),
  632. (lambda df: df.copy(deep=False).select_dtypes(include="number"), 0),
  633. ],
  634. ids=["shallow-copy", "reset_index", "rename", "select_dtypes"],
  635. )
  636. def test_chained_methods(request, method, idx, using_copy_on_write, warn_copy_on_write):
  637. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  638. df_orig = df.copy()
  639. # when not using CoW, only the copy() variant actually gives a view
  640. df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy"
  641. # modify df2 -> don't modify df
  642. df2 = method(df)
  643. with tm.assert_cow_warning(warn_copy_on_write and df2_is_view):
  644. df2.iloc[0, idx] = 0
  645. if not df2_is_view:
  646. tm.assert_frame_equal(df, df_orig)
  647. # modify df -> don't modify df2
  648. df2 = method(df)
  649. with tm.assert_cow_warning(warn_copy_on_write and df2_is_view):
  650. df.iloc[0, 0] = 0
  651. if not df2_is_view:
  652. tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)
  653. @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
  654. def test_to_timestamp(using_copy_on_write, obj):
  655. obj.index = Index([Period("2012-1-1", freq="D"), Period("2012-1-2", freq="D")])
  656. obj_orig = obj.copy()
  657. obj2 = obj.to_timestamp()
  658. if using_copy_on_write:
  659. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  660. else:
  661. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  662. # mutating obj2 triggers a copy-on-write for that column / block
  663. obj2.iloc[0] = 0
  664. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  665. tm.assert_equal(obj, obj_orig)
  666. @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
  667. def test_to_period(using_copy_on_write, obj):
  668. obj.index = Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")])
  669. obj_orig = obj.copy()
  670. obj2 = obj.to_period(freq="Y")
  671. if using_copy_on_write:
  672. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  673. else:
  674. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  675. # mutating obj2 triggers a copy-on-write for that column / block
  676. obj2.iloc[0] = 0
  677. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  678. tm.assert_equal(obj, obj_orig)
  679. def test_set_index(using_copy_on_write):
  680. # GH 49473
  681. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  682. df_orig = df.copy()
  683. df2 = df.set_index("a")
  684. if using_copy_on_write:
  685. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  686. else:
  687. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  688. # mutating df2 triggers a copy-on-write for that column / block
  689. df2.iloc[0, 1] = 0
  690. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  691. tm.assert_frame_equal(df, df_orig)
  692. def test_set_index_mutating_parent_does_not_mutate_index():
  693. df = DataFrame({"a": [1, 2, 3], "b": 1})
  694. result = df.set_index("a")
  695. expected = result.copy()
  696. df.iloc[0, 0] = 100
  697. tm.assert_frame_equal(result, expected)
  698. def test_add_prefix(using_copy_on_write):
  699. # GH 49473
  700. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  701. df_orig = df.copy()
  702. df2 = df.add_prefix("CoW_")
  703. if using_copy_on_write:
  704. assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
  705. df2.iloc[0, 0] = 0
  706. assert not np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
  707. if using_copy_on_write:
  708. assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c"))
  709. expected = DataFrame(
  710. {"CoW_a": [0, 2, 3], "CoW_b": [4, 5, 6], "CoW_c": [0.1, 0.2, 0.3]}
  711. )
  712. tm.assert_frame_equal(df2, expected)
  713. tm.assert_frame_equal(df, df_orig)
  714. def test_add_suffix(using_copy_on_write):
  715. # GH 49473
  716. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  717. df_orig = df.copy()
  718. df2 = df.add_suffix("_CoW")
  719. if using_copy_on_write:
  720. assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
  721. df2.iloc[0, 0] = 0
  722. assert not np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
  723. if using_copy_on_write:
  724. assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c"))
  725. expected = DataFrame(
  726. {"a_CoW": [0, 2, 3], "b_CoW": [4, 5, 6], "c_CoW": [0.1, 0.2, 0.3]}
  727. )
  728. tm.assert_frame_equal(df2, expected)
  729. tm.assert_frame_equal(df, df_orig)
  730. @pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)])
  731. def test_dropna(using_copy_on_write, axis, val):
  732. df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"})
  733. df_orig = df.copy()
  734. df2 = df.dropna(axis=axis)
  735. if using_copy_on_write:
  736. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  737. else:
  738. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  739. df2.iloc[0, 0] = 0
  740. if using_copy_on_write:
  741. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  742. tm.assert_frame_equal(df, df_orig)
  743. @pytest.mark.parametrize("val", [5, 5.5])
  744. def test_dropna_series(using_copy_on_write, val):
  745. ser = Series([1, val, 4])
  746. ser_orig = ser.copy()
  747. ser2 = ser.dropna()
  748. if using_copy_on_write:
  749. assert np.shares_memory(ser2.values, ser.values)
  750. else:
  751. assert not np.shares_memory(ser2.values, ser.values)
  752. ser2.iloc[0] = 0
  753. if using_copy_on_write:
  754. assert not np.shares_memory(ser2.values, ser.values)
  755. tm.assert_series_equal(ser, ser_orig)
  756. @pytest.mark.parametrize(
  757. "method",
  758. [
  759. lambda df: df.head(),
  760. lambda df: df.head(2),
  761. lambda df: df.tail(),
  762. lambda df: df.tail(3),
  763. ],
  764. )
  765. def test_head_tail(method, using_copy_on_write, warn_copy_on_write):
  766. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  767. df_orig = df.copy()
  768. df2 = method(df)
  769. df2._mgr._verify_integrity()
  770. if using_copy_on_write:
  771. # We are explicitly deviating for CoW here to make an eager copy (avoids
  772. # tracking references for very cheap ops)
  773. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  774. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  775. # modify df2 to trigger CoW for that block
  776. with tm.assert_cow_warning(warn_copy_on_write):
  777. df2.iloc[0, 0] = 0
  778. if using_copy_on_write:
  779. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  780. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  781. else:
  782. # without CoW enabled, head and tail return views. Mutating df2 also mutates df.
  783. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  784. with tm.assert_cow_warning(warn_copy_on_write):
  785. df2.iloc[0, 0] = 1
  786. tm.assert_frame_equal(df, df_orig)
  787. def test_infer_objects(using_copy_on_write, using_infer_string):
  788. df = DataFrame(
  789. {"a": [1, 2], "b": Series(["x", "y"], dtype=object), "c": 1, "d": "x"}
  790. )
  791. df_orig = df.copy()
  792. df2 = df.infer_objects()
  793. if using_copy_on_write:
  794. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  795. if using_infer_string:
  796. assert not tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  797. else:
  798. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  799. else:
  800. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  801. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  802. df2.iloc[0, 0] = 0
  803. df2.iloc[0, 1] = "d"
  804. if using_copy_on_write:
  805. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  806. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  807. tm.assert_frame_equal(df, df_orig)
  808. def test_infer_objects_no_reference(using_copy_on_write, using_infer_string):
  809. df = DataFrame(
  810. {
  811. "a": [1, 2],
  812. "b": Series(["x", "y"], dtype=object),
  813. "c": 1,
  814. "d": Series(
  815. [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
  816. ),
  817. "e": Series(["z", "w"], dtype=object),
  818. }
  819. )
  820. df = df.infer_objects()
  821. arr_a = get_array(df, "a")
  822. arr_b = get_array(df, "b")
  823. arr_d = get_array(df, "d")
  824. df.iloc[0, 0] = 0
  825. df.iloc[0, 1] = "d"
  826. df.iloc[0, 3] = Timestamp("2018-12-31")
  827. if using_copy_on_write:
  828. assert np.shares_memory(arr_a, get_array(df, "a"))
  829. if using_infer_string:
  830. # note that the underlying memory of arr_b has been copied anyway
  831. # because of the assignment, but the EA is updated inplace so still
  832. # appears the share memory
  833. assert tm.shares_memory(arr_b, get_array(df, "b"))
  834. else:
  835. # TODO(CoW): Block splitting causes references here
  836. assert not np.shares_memory(arr_b, get_array(df, "b"))
  837. assert np.shares_memory(arr_d, get_array(df, "d"))
  838. def test_infer_objects_reference(using_copy_on_write, using_infer_string):
  839. df = DataFrame(
  840. {
  841. "a": [1, 2],
  842. "b": Series(["x", "y"], dtype=object),
  843. "c": 1,
  844. "d": Series(
  845. [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
  846. ),
  847. }
  848. )
  849. view = df[:] # noqa: F841
  850. df = df.infer_objects()
  851. arr_a = get_array(df, "a")
  852. arr_b = get_array(df, "b")
  853. arr_d = get_array(df, "d")
  854. df.iloc[0, 0] = 0
  855. df.iloc[0, 1] = "d"
  856. df.iloc[0, 3] = Timestamp("2018-12-31")
  857. if using_copy_on_write:
  858. assert not np.shares_memory(arr_a, get_array(df, "a"))
  859. if not using_infer_string or HAS_PYARROW:
  860. assert not np.shares_memory(arr_b, get_array(df, "b"))
  861. assert np.shares_memory(arr_d, get_array(df, "d"))
  862. @pytest.mark.parametrize(
  863. "kwargs",
  864. [
  865. {"before": "a", "after": "b", "axis": 1},
  866. {"before": 0, "after": 1, "axis": 0},
  867. ],
  868. )
  869. def test_truncate(using_copy_on_write, kwargs):
  870. df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2})
  871. df_orig = df.copy()
  872. df2 = df.truncate(**kwargs)
  873. df2._mgr._verify_integrity()
  874. if using_copy_on_write:
  875. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  876. else:
  877. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  878. df2.iloc[0, 0] = 0
  879. if using_copy_on_write:
  880. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  881. tm.assert_frame_equal(df, df_orig)
  882. @pytest.mark.parametrize("method", ["assign", "drop_duplicates"])
  883. def test_assign_drop_duplicates(using_copy_on_write, method):
  884. df = DataFrame({"a": [1, 2, 3]})
  885. df_orig = df.copy()
  886. df2 = getattr(df, method)()
  887. df2._mgr._verify_integrity()
  888. if using_copy_on_write:
  889. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  890. else:
  891. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  892. df2.iloc[0, 0] = 0
  893. if using_copy_on_write:
  894. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  895. tm.assert_frame_equal(df, df_orig)
  896. @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})])
  897. def test_take(using_copy_on_write, obj):
  898. # Check that no copy is made when we take all rows in original order
  899. obj_orig = obj.copy()
  900. obj2 = obj.take([0, 1])
  901. if using_copy_on_write:
  902. assert np.shares_memory(obj2.values, obj.values)
  903. else:
  904. assert not np.shares_memory(obj2.values, obj.values)
  905. obj2.iloc[0] = 0
  906. if using_copy_on_write:
  907. assert not np.shares_memory(obj2.values, obj.values)
  908. tm.assert_equal(obj, obj_orig)
  909. @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})])
  910. def test_between_time(using_copy_on_write, obj):
  911. obj.index = date_range("2018-04-09", periods=2, freq="1D20min")
  912. obj_orig = obj.copy()
  913. obj2 = obj.between_time("0:00", "1:00")
  914. if using_copy_on_write:
  915. assert np.shares_memory(obj2.values, obj.values)
  916. else:
  917. assert not np.shares_memory(obj2.values, obj.values)
  918. obj2.iloc[0] = 0
  919. if using_copy_on_write:
  920. assert not np.shares_memory(obj2.values, obj.values)
  921. tm.assert_equal(obj, obj_orig)
  922. def test_reindex_like(using_copy_on_write):
  923. df = DataFrame({"a": [1, 2], "b": "a"})
  924. other = DataFrame({"b": "a", "a": [1, 2]})
  925. df_orig = df.copy()
  926. df2 = df.reindex_like(other)
  927. if using_copy_on_write:
  928. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  929. else:
  930. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  931. df2.iloc[0, 1] = 0
  932. if using_copy_on_write:
  933. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  934. tm.assert_frame_equal(df, df_orig)
  935. def test_sort_index(using_copy_on_write):
  936. # GH 49473
  937. ser = Series([1, 2, 3])
  938. ser_orig = ser.copy()
  939. ser2 = ser.sort_index()
  940. if using_copy_on_write:
  941. assert np.shares_memory(ser.values, ser2.values)
  942. else:
  943. assert not np.shares_memory(ser.values, ser2.values)
  944. # mutating ser triggers a copy-on-write for the column / block
  945. ser2.iloc[0] = 0
  946. assert not np.shares_memory(ser2.values, ser.values)
  947. tm.assert_series_equal(ser, ser_orig)
  948. @pytest.mark.parametrize(
  949. "obj, kwargs",
  950. [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
  951. )
  952. def test_sort_values(using_copy_on_write, obj, kwargs):
  953. obj_orig = obj.copy()
  954. obj2 = obj.sort_values(**kwargs)
  955. if using_copy_on_write:
  956. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  957. else:
  958. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  959. # mutating df triggers a copy-on-write for the column / block
  960. obj2.iloc[0] = 0
  961. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  962. tm.assert_equal(obj, obj_orig)
  963. @pytest.mark.parametrize(
  964. "obj, kwargs",
  965. [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
  966. )
  967. def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_write):
  968. obj_orig = obj.copy()
  969. view = obj[:]
  970. obj.sort_values(inplace=True, **kwargs)
  971. assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  972. # mutating obj triggers a copy-on-write for the column / block
  973. with tm.assert_cow_warning(warn_copy_on_write):
  974. obj.iloc[0] = 0
  975. if using_copy_on_write:
  976. assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  977. tm.assert_equal(view, obj_orig)
  978. else:
  979. assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  980. @pytest.mark.parametrize("decimals", [-1, 0, 1])
  981. def test_round(using_copy_on_write, warn_copy_on_write, decimals):
  982. df = DataFrame({"a": [1, 2], "b": "c"})
  983. df_orig = df.copy()
  984. df2 = df.round(decimals=decimals)
  985. if using_copy_on_write:
  986. assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  987. # TODO: Make inplace by using out parameter of ndarray.round?
  988. if decimals >= 0 and Version(np.__version__) < Version("2.4.0.dev0"):
  989. # Ensure lazy copy if no-op
  990. # TODO: Cannot rely on Numpy returning view after version 2.3
  991. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  992. else:
  993. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  994. else:
  995. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  996. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  997. df2.iloc[0, 1] = "d"
  998. df2.iloc[0, 0] = 4
  999. if using_copy_on_write:
  1000. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  1001. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1002. tm.assert_frame_equal(df, df_orig)
  1003. def test_reorder_levels(using_copy_on_write):
  1004. index = MultiIndex.from_tuples(
  1005. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
  1006. )
  1007. df = DataFrame({"a": [1, 2, 3, 4]}, index=index)
  1008. df_orig = df.copy()
  1009. df2 = df.reorder_levels(order=["two", "one"])
  1010. if using_copy_on_write:
  1011. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1012. else:
  1013. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1014. df2.iloc[0, 0] = 0
  1015. if using_copy_on_write:
  1016. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1017. tm.assert_frame_equal(df, df_orig)
  1018. def test_series_reorder_levels(using_copy_on_write):
  1019. index = MultiIndex.from_tuples(
  1020. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
  1021. )
  1022. ser = Series([1, 2, 3, 4], index=index)
  1023. ser_orig = ser.copy()
  1024. ser2 = ser.reorder_levels(order=["two", "one"])
  1025. if using_copy_on_write:
  1026. assert np.shares_memory(ser2.values, ser.values)
  1027. else:
  1028. assert not np.shares_memory(ser2.values, ser.values)
  1029. ser2.iloc[0] = 0
  1030. if using_copy_on_write:
  1031. assert not np.shares_memory(ser2.values, ser.values)
  1032. tm.assert_series_equal(ser, ser_orig)
  1033. @pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})])
  1034. def test_swaplevel(using_copy_on_write, obj):
  1035. index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
  1036. obj.index = index
  1037. obj_orig = obj.copy()
  1038. obj2 = obj.swaplevel()
  1039. if using_copy_on_write:
  1040. assert np.shares_memory(obj2.values, obj.values)
  1041. else:
  1042. assert not np.shares_memory(obj2.values, obj.values)
  1043. obj2.iloc[0] = 0
  1044. if using_copy_on_write:
  1045. assert not np.shares_memory(obj2.values, obj.values)
  1046. tm.assert_equal(obj, obj_orig)
  1047. def test_frame_set_axis(using_copy_on_write):
  1048. # GH 49473
  1049. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  1050. df_orig = df.copy()
  1051. df2 = df.set_axis(["a", "b", "c"], axis="index")
  1052. if using_copy_on_write:
  1053. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1054. else:
  1055. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1056. # mutating df2 triggers a copy-on-write for that column / block
  1057. df2.iloc[0, 0] = 0
  1058. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1059. tm.assert_frame_equal(df, df_orig)
  1060. def test_series_set_axis(using_copy_on_write):
  1061. # GH 49473
  1062. ser = Series([1, 2, 3])
  1063. ser_orig = ser.copy()
  1064. ser2 = ser.set_axis(["a", "b", "c"], axis="index")
  1065. if using_copy_on_write:
  1066. assert np.shares_memory(ser, ser2)
  1067. else:
  1068. assert not np.shares_memory(ser, ser2)
  1069. # mutating ser triggers a copy-on-write for the column / block
  1070. ser2.iloc[0] = 0
  1071. assert not np.shares_memory(ser2, ser)
  1072. tm.assert_series_equal(ser, ser_orig)
  1073. def test_set_flags(using_copy_on_write, warn_copy_on_write):
  1074. ser = Series([1, 2, 3])
  1075. ser_orig = ser.copy()
  1076. ser2 = ser.set_flags(allows_duplicate_labels=False)
  1077. assert np.shares_memory(ser, ser2)
  1078. # mutating ser triggers a copy-on-write for the column / block
  1079. with tm.assert_cow_warning(warn_copy_on_write):
  1080. ser2.iloc[0] = 0
  1081. if using_copy_on_write:
  1082. assert not np.shares_memory(ser2, ser)
  1083. tm.assert_series_equal(ser, ser_orig)
  1084. else:
  1085. assert np.shares_memory(ser2, ser)
  1086. expected = Series([0, 2, 3])
  1087. tm.assert_series_equal(ser, expected)
  1088. @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}])
  1089. def test_rename_axis(using_copy_on_write, kwargs):
  1090. df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a"))
  1091. df_orig = df.copy()
  1092. df2 = df.rename_axis(**kwargs)
  1093. if using_copy_on_write:
  1094. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1095. else:
  1096. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1097. df2.iloc[0, 0] = 0
  1098. if using_copy_on_write:
  1099. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1100. tm.assert_frame_equal(df, df_orig)
  1101. @pytest.mark.parametrize(
  1102. "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)]
  1103. )
  1104. def test_tz_convert_localize(using_copy_on_write, func, tz):
  1105. # GH 49473
  1106. ser = Series(
  1107. [1, 2], index=date_range(start="2014-08-01 09:00", freq="h", periods=2, tz=tz)
  1108. )
  1109. ser_orig = ser.copy()
  1110. ser2 = getattr(ser, func)("US/Central")
  1111. if using_copy_on_write:
  1112. assert np.shares_memory(ser.values, ser2.values)
  1113. else:
  1114. assert not np.shares_memory(ser.values, ser2.values)
  1115. # mutating ser triggers a copy-on-write for the column / block
  1116. ser2.iloc[0] = 0
  1117. assert not np.shares_memory(ser2.values, ser.values)
  1118. tm.assert_series_equal(ser, ser_orig)
  1119. def test_droplevel(using_copy_on_write):
  1120. # GH 49473
  1121. index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
  1122. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index)
  1123. df_orig = df.copy()
  1124. df2 = df.droplevel(0)
  1125. if using_copy_on_write:
  1126. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  1127. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1128. else:
  1129. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  1130. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1131. # mutating df2 triggers a copy-on-write for that column / block
  1132. df2.iloc[0, 0] = 0
  1133. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1134. if using_copy_on_write:
  1135. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  1136. tm.assert_frame_equal(df, df_orig)
  1137. def test_squeeze(using_copy_on_write, warn_copy_on_write):
  1138. df = DataFrame({"a": [1, 2, 3]})
  1139. df_orig = df.copy()
  1140. series = df.squeeze()
  1141. # Should share memory regardless of CoW since squeeze is just an iloc
  1142. assert np.shares_memory(series.values, get_array(df, "a"))
  1143. # mutating squeezed df triggers a copy-on-write for that column/block
  1144. with tm.assert_cow_warning(warn_copy_on_write):
  1145. series.iloc[0] = 0
  1146. if using_copy_on_write:
  1147. assert not np.shares_memory(series.values, get_array(df, "a"))
  1148. tm.assert_frame_equal(df, df_orig)
  1149. else:
  1150. # Without CoW the original will be modified
  1151. assert np.shares_memory(series.values, get_array(df, "a"))
  1152. assert df.loc[0, "a"] == 0
  1153. def test_items(using_copy_on_write, warn_copy_on_write):
  1154. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  1155. df_orig = df.copy()
  1156. # Test this twice, since the second time, the item cache will be
  1157. # triggered, and we want to make sure it still works then.
  1158. for i in range(2):
  1159. for name, ser in df.items():
  1160. assert np.shares_memory(get_array(ser, name), get_array(df, name))
  1161. # mutating df triggers a copy-on-write for that column / block
  1162. with tm.assert_cow_warning(warn_copy_on_write):
  1163. ser.iloc[0] = 0
  1164. if using_copy_on_write:
  1165. assert not np.shares_memory(get_array(ser, name), get_array(df, name))
  1166. tm.assert_frame_equal(df, df_orig)
  1167. else:
  1168. # Original frame will be modified
  1169. assert df.loc[0, name] == 0
  1170. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1171. def test_putmask(using_copy_on_write, dtype, warn_copy_on_write):
  1172. df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
  1173. view = df[:]
  1174. df_orig = df.copy()
  1175. with tm.assert_cow_warning(warn_copy_on_write):
  1176. df[df == df] = 5
  1177. if using_copy_on_write:
  1178. assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1179. tm.assert_frame_equal(view, df_orig)
  1180. else:
  1181. # Without CoW the original will be modified
  1182. assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1183. assert view.iloc[0, 0] == 5
  1184. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1185. def test_putmask_no_reference(using_copy_on_write, dtype):
  1186. df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
  1187. arr_a = get_array(df, "a")
  1188. df[df == df] = 5
  1189. if using_copy_on_write:
  1190. assert np.shares_memory(arr_a, get_array(df, "a"))
  1191. @pytest.mark.parametrize("dtype", ["float64", "Float64"])
  1192. def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype):
  1193. df = DataFrame({"a": [1.5, 2], "b": 1.5}, dtype=dtype)
  1194. arr_a = get_array(df, "a")
  1195. df[df == df] = DataFrame({"a": [5.5, 5]})
  1196. if using_copy_on_write:
  1197. assert np.shares_memory(arr_a, get_array(df, "a"))
  1198. @pytest.mark.parametrize(
  1199. "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)]
  1200. )
  1201. def test_putmask_dont_copy_some_blocks(
  1202. using_copy_on_write, val, exp, warn, warn_copy_on_write
  1203. ):
  1204. df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
  1205. view = df[:]
  1206. df_orig = df.copy()
  1207. indexer = DataFrame(
  1208. [[True, False, False], [True, False, False]], columns=list("abc")
  1209. )
  1210. if warn_copy_on_write:
  1211. with tm.assert_cow_warning():
  1212. df[indexer] = val
  1213. else:
  1214. with tm.assert_produces_warning(warn, match="incompatible dtype"):
  1215. df[indexer] = val
  1216. if using_copy_on_write:
  1217. assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1218. # TODO(CoW): Could split blocks to avoid copying the whole block
  1219. assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
  1220. assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
  1221. assert df._mgr._has_no_reference(1) is not exp
  1222. assert not df._mgr._has_no_reference(2)
  1223. tm.assert_frame_equal(view, df_orig)
  1224. elif val == 5:
  1225. # Without CoW the original will be modified, the other case upcasts, e.g. copy
  1226. assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1227. assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
  1228. assert view.iloc[0, 0] == 5
  1229. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1230. @pytest.mark.parametrize(
  1231. "func",
  1232. [
  1233. lambda ser: ser.where(ser > 0, 10),
  1234. lambda ser: ser.mask(ser <= 0, 10),
  1235. ],
  1236. )
  1237. def test_where_mask_noop(using_copy_on_write, dtype, func):
  1238. ser = Series([1, 2, 3], dtype=dtype)
  1239. ser_orig = ser.copy()
  1240. result = func(ser)
  1241. if using_copy_on_write:
  1242. assert np.shares_memory(get_array(ser), get_array(result))
  1243. else:
  1244. assert not np.shares_memory(get_array(ser), get_array(result))
  1245. result.iloc[0] = 10
  1246. if using_copy_on_write:
  1247. assert not np.shares_memory(get_array(ser), get_array(result))
  1248. tm.assert_series_equal(ser, ser_orig)
  1249. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1250. @pytest.mark.parametrize(
  1251. "func",
  1252. [
  1253. lambda ser: ser.where(ser < 0, 10),
  1254. lambda ser: ser.mask(ser >= 0, 10),
  1255. ],
  1256. )
  1257. def test_where_mask(using_copy_on_write, dtype, func):
  1258. ser = Series([1, 2, 3], dtype=dtype)
  1259. ser_orig = ser.copy()
  1260. result = func(ser)
  1261. assert not np.shares_memory(get_array(ser), get_array(result))
  1262. tm.assert_series_equal(ser, ser_orig)
  1263. @pytest.mark.parametrize("dtype, val", [("int64", 10.5), ("Int64", 10)])
  1264. @pytest.mark.parametrize(
  1265. "func",
  1266. [
  1267. lambda df, val: df.where(df < 0, val),
  1268. lambda df, val: df.mask(df >= 0, val),
  1269. ],
  1270. )
  1271. def test_where_mask_noop_on_single_column(using_copy_on_write, dtype, val, func):
  1272. df = DataFrame({"a": [1, 2, 3], "b": [-4, -5, -6]}, dtype=dtype)
  1273. df_orig = df.copy()
  1274. result = func(df, val)
  1275. if using_copy_on_write:
  1276. assert np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1277. assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  1278. else:
  1279. assert not np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1280. result.iloc[0, 1] = 10
  1281. if using_copy_on_write:
  1282. assert not np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1283. tm.assert_frame_equal(df, df_orig)
  1284. @pytest.mark.parametrize("func", ["mask", "where"])
  1285. def test_chained_where_mask(using_copy_on_write, func):
  1286. df = DataFrame({"a": [1, 4, 2], "b": 1})
  1287. df_orig = df.copy()
  1288. if using_copy_on_write:
  1289. with tm.raises_chained_assignment_error():
  1290. getattr(df["a"], func)(df["a"] > 2, 5, inplace=True)
  1291. tm.assert_frame_equal(df, df_orig)
  1292. with tm.raises_chained_assignment_error():
  1293. getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True)
  1294. tm.assert_frame_equal(df, df_orig)
  1295. else:
  1296. with tm.assert_produces_warning(
  1297. FutureWarning if not WARNING_CHECK_DISABLED else None,
  1298. match="inplace method",
  1299. ):
  1300. getattr(df["a"], func)(df["a"] > 2, 5, inplace=True)
  1301. with tm.assert_produces_warning(None):
  1302. with option_context("mode.chained_assignment", None):
  1303. getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True)
  1304. with tm.assert_produces_warning(None):
  1305. with option_context("mode.chained_assignment", None):
  1306. getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True)
  1307. def test_asfreq_noop(using_copy_on_write):
  1308. df = DataFrame(
  1309. {"a": [0.0, None, 2.0, 3.0]},
  1310. index=date_range("1/1/2000", periods=4, freq="min"),
  1311. )
  1312. df_orig = df.copy()
  1313. df2 = df.asfreq(freq="min")
  1314. if using_copy_on_write:
  1315. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1316. else:
  1317. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1318. # mutating df2 triggers a copy-on-write for that column / block
  1319. df2.iloc[0, 0] = 0
  1320. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1321. tm.assert_frame_equal(df, df_orig)
  1322. def test_iterrows(using_copy_on_write):
  1323. df = DataFrame({"a": 0, "b": 1}, index=[1, 2, 3])
  1324. df_orig = df.copy()
  1325. for _, sub in df.iterrows():
  1326. sub.iloc[0] = 100
  1327. if using_copy_on_write:
  1328. tm.assert_frame_equal(df, df_orig)
  1329. def test_interpolate_creates_copy(using_copy_on_write, warn_copy_on_write):
  1330. # GH#51126
  1331. df = DataFrame({"a": [1.5, np.nan, 3]})
  1332. view = df[:]
  1333. expected = df.copy()
  1334. with tm.assert_cow_warning(warn_copy_on_write):
  1335. df.ffill(inplace=True)
  1336. with tm.assert_cow_warning(warn_copy_on_write):
  1337. df.iloc[0, 0] = 100.5
  1338. if using_copy_on_write:
  1339. tm.assert_frame_equal(view, expected)
  1340. else:
  1341. expected = DataFrame({"a": [100.5, 1.5, 3]})
  1342. tm.assert_frame_equal(view, expected)
  1343. def test_isetitem(using_copy_on_write):
  1344. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  1345. df_orig = df.copy()
  1346. df2 = df.copy(deep=None) # Trigger a CoW
  1347. df2.isetitem(1, np.array([-1, -2, -3])) # This is inplace
  1348. if using_copy_on_write:
  1349. assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1350. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  1351. else:
  1352. assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1353. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  1354. df2.loc[0, "a"] = 0
  1355. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  1356. if using_copy_on_write:
  1357. assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1358. else:
  1359. assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1360. @pytest.mark.parametrize(
  1361. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  1362. )
  1363. def test_isetitem_series(using_copy_on_write, dtype):
  1364. df = DataFrame({"a": [1, 2, 3], "b": np.array([4, 5, 6], dtype=dtype)})
  1365. ser = Series([7, 8, 9])
  1366. ser_orig = ser.copy()
  1367. df.isetitem(0, ser)
  1368. if using_copy_on_write:
  1369. assert np.shares_memory(get_array(df, "a"), get_array(ser))
  1370. assert not df._mgr._has_no_reference(0)
  1371. # mutating dataframe doesn't update series
  1372. df.loc[0, "a"] = 0
  1373. tm.assert_series_equal(ser, ser_orig)
  1374. # mutating series doesn't update dataframe
  1375. df = DataFrame({"a": [1, 2, 3], "b": np.array([4, 5, 6], dtype=dtype)})
  1376. ser = Series([7, 8, 9])
  1377. df.isetitem(0, ser)
  1378. ser.loc[0] = 0
  1379. expected = DataFrame({"a": [7, 8, 9], "b": np.array([4, 5, 6], dtype=dtype)})
  1380. tm.assert_frame_equal(df, expected)
  1381. def test_isetitem_frame(using_copy_on_write):
  1382. df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2})
  1383. rhs = DataFrame({"a": [4, 5, 6], "b": 2})
  1384. df.isetitem([0, 1], rhs)
  1385. if using_copy_on_write:
  1386. assert np.shares_memory(get_array(df, "a"), get_array(rhs, "a"))
  1387. assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
  1388. assert not df._mgr._has_no_reference(0)
  1389. else:
  1390. assert not np.shares_memory(get_array(df, "a"), get_array(rhs, "a"))
  1391. assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
  1392. expected = df.copy()
  1393. rhs.iloc[0, 0] = 100
  1394. rhs.iloc[0, 1] = 100
  1395. tm.assert_frame_equal(df, expected)
  1396. @pytest.mark.parametrize("key", ["a", ["a"]])
  1397. def test_get(using_copy_on_write, warn_copy_on_write, key):
  1398. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1399. df_orig = df.copy()
  1400. result = df.get(key)
  1401. if using_copy_on_write:
  1402. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  1403. result.iloc[0] = 0
  1404. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  1405. tm.assert_frame_equal(df, df_orig)
  1406. else:
  1407. # for non-CoW it depends on whether we got a Series or DataFrame if it
  1408. # is a view or copy or triggers a warning or not
  1409. if warn_copy_on_write:
  1410. warn = FutureWarning if isinstance(key, str) else None
  1411. else:
  1412. warn = SettingWithCopyWarning if isinstance(key, list) else None
  1413. with option_context("chained_assignment", "warn"):
  1414. with tm.assert_produces_warning(warn):
  1415. result.iloc[0] = 0
  1416. if isinstance(key, list):
  1417. tm.assert_frame_equal(df, df_orig)
  1418. else:
  1419. assert df.iloc[0, 0] == 0
  1420. @pytest.mark.parametrize("axis, key", [(0, 0), (1, "a")])
  1421. @pytest.mark.parametrize(
  1422. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  1423. )
  1424. def test_xs(
  1425. using_copy_on_write, warn_copy_on_write, using_array_manager, axis, key, dtype
  1426. ):
  1427. single_block = (dtype == "int64") and not using_array_manager
  1428. is_view = single_block or (using_array_manager and axis == 1)
  1429. df = DataFrame(
  1430. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  1431. )
  1432. df_orig = df.copy()
  1433. result = df.xs(key, axis=axis)
  1434. if axis == 1 or single_block:
  1435. assert np.shares_memory(get_array(df, "a"), get_array(result))
  1436. elif using_copy_on_write:
  1437. assert result._mgr._has_no_reference(0)
  1438. if using_copy_on_write or (is_view and not warn_copy_on_write):
  1439. result.iloc[0] = 0
  1440. elif warn_copy_on_write:
  1441. with tm.assert_cow_warning(single_block or axis == 1):
  1442. result.iloc[0] = 0
  1443. else:
  1444. with option_context("chained_assignment", "warn"):
  1445. with tm.assert_produces_warning(SettingWithCopyWarning):
  1446. result.iloc[0] = 0
  1447. if using_copy_on_write or (not single_block and axis == 0):
  1448. tm.assert_frame_equal(df, df_orig)
  1449. else:
  1450. assert df.iloc[0, 0] == 0
  1451. @pytest.mark.parametrize("axis", [0, 1])
  1452. @pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)])
  1453. def test_xs_multiindex(
  1454. using_copy_on_write, warn_copy_on_write, using_array_manager, key, level, axis
  1455. ):
  1456. arr = np.arange(18).reshape(6, 3)
  1457. index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"])
  1458. df = DataFrame(arr, index=index, columns=list("abc"))
  1459. if axis == 1:
  1460. df = df.transpose().copy()
  1461. df_orig = df.copy()
  1462. result = df.xs(key, level=level, axis=axis)
  1463. if level == 0:
  1464. assert np.shares_memory(
  1465. get_array(df, df.columns[0]), get_array(result, result.columns[0])
  1466. )
  1467. if warn_copy_on_write:
  1468. warn = FutureWarning if level == 0 else None
  1469. elif not using_copy_on_write and not using_array_manager:
  1470. warn = SettingWithCopyWarning
  1471. else:
  1472. warn = None
  1473. with option_context("chained_assignment", "warn"):
  1474. with tm.assert_produces_warning(warn):
  1475. result.iloc[0, 0] = 0
  1476. tm.assert_frame_equal(df, df_orig)
  1477. def test_update_frame(using_copy_on_write, warn_copy_on_write):
  1478. df1 = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]})
  1479. df2 = DataFrame({"b": [100.0]}, index=[1])
  1480. df1_orig = df1.copy()
  1481. view = df1[:]
  1482. # TODO(CoW) better warning message?
  1483. with tm.assert_cow_warning(warn_copy_on_write):
  1484. df1.update(df2)
  1485. expected = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 100.0, 6.0]})
  1486. tm.assert_frame_equal(df1, expected)
  1487. if using_copy_on_write:
  1488. # df1 is updated, but its view not
  1489. tm.assert_frame_equal(view, df1_orig)
  1490. assert np.shares_memory(get_array(df1, "a"), get_array(view, "a"))
  1491. assert not np.shares_memory(get_array(df1, "b"), get_array(view, "b"))
  1492. else:
  1493. tm.assert_frame_equal(view, expected)
  1494. def test_update_series(using_copy_on_write, warn_copy_on_write):
  1495. ser1 = Series([1.0, 2.0, 3.0])
  1496. ser2 = Series([100.0], index=[1])
  1497. ser1_orig = ser1.copy()
  1498. view = ser1[:]
  1499. if warn_copy_on_write:
  1500. with tm.assert_cow_warning():
  1501. ser1.update(ser2)
  1502. else:
  1503. ser1.update(ser2)
  1504. expected = Series([1.0, 100.0, 3.0])
  1505. tm.assert_series_equal(ser1, expected)
  1506. if using_copy_on_write:
  1507. # ser1 is updated, but its view not
  1508. tm.assert_series_equal(view, ser1_orig)
  1509. else:
  1510. tm.assert_series_equal(view, expected)
  1511. def test_update_chained_assignment(using_copy_on_write):
  1512. df = DataFrame({"a": [1, 2, 3]})
  1513. ser2 = Series([100.0], index=[1])
  1514. df_orig = df.copy()
  1515. if using_copy_on_write:
  1516. with tm.raises_chained_assignment_error():
  1517. df["a"].update(ser2)
  1518. tm.assert_frame_equal(df, df_orig)
  1519. with tm.raises_chained_assignment_error():
  1520. df[["a"]].update(ser2.to_frame())
  1521. tm.assert_frame_equal(df, df_orig)
  1522. else:
  1523. with tm.assert_produces_warning(
  1524. FutureWarning if not WARNING_CHECK_DISABLED else None,
  1525. match="inplace method",
  1526. ):
  1527. df["a"].update(ser2)
  1528. with tm.assert_produces_warning(None):
  1529. with option_context("mode.chained_assignment", None):
  1530. df[["a"]].update(ser2.to_frame())
  1531. with tm.assert_produces_warning(None):
  1532. with option_context("mode.chained_assignment", None):
  1533. df[df["a"] > 1].update(ser2.to_frame())
  1534. def test_inplace_arithmetic_series(using_copy_on_write):
  1535. ser = Series([1, 2, 3])
  1536. ser_orig = ser.copy()
  1537. data = get_array(ser)
  1538. ser *= 2
  1539. if using_copy_on_write:
  1540. # https://github.com/pandas-dev/pandas/pull/55745
  1541. # changed to NOT update inplace because there is no benefit (actual
  1542. # operation already done non-inplace). This was only for the optics
  1543. # of updating the backing array inplace, but we no longer want to make
  1544. # that guarantee
  1545. assert not np.shares_memory(get_array(ser), data)
  1546. tm.assert_numpy_array_equal(data, get_array(ser_orig))
  1547. else:
  1548. assert np.shares_memory(get_array(ser), data)
  1549. tm.assert_numpy_array_equal(data, get_array(ser))
  1550. def test_inplace_arithmetic_series_with_reference(
  1551. using_copy_on_write, warn_copy_on_write
  1552. ):
  1553. ser = Series([1, 2, 3])
  1554. ser_orig = ser.copy()
  1555. view = ser[:]
  1556. with tm.assert_cow_warning(warn_copy_on_write):
  1557. ser *= 2
  1558. if using_copy_on_write:
  1559. assert not np.shares_memory(get_array(ser), get_array(view))
  1560. tm.assert_series_equal(ser_orig, view)
  1561. else:
  1562. assert np.shares_memory(get_array(ser), get_array(view))
  1563. @pytest.mark.parametrize("copy", [True, False])
  1564. def test_transpose(using_copy_on_write, copy, using_array_manager):
  1565. df = DataFrame({"a": [1, 2, 3], "b": 1})
  1566. df_orig = df.copy()
  1567. result = df.transpose(copy=copy)
  1568. if not copy and not using_array_manager or using_copy_on_write:
  1569. assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1570. else:
  1571. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1572. result.iloc[0, 0] = 100
  1573. if using_copy_on_write:
  1574. tm.assert_frame_equal(df, df_orig)
  1575. def test_transpose_different_dtypes(using_copy_on_write):
  1576. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  1577. df_orig = df.copy()
  1578. result = df.T
  1579. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1580. result.iloc[0, 0] = 100
  1581. if using_copy_on_write:
  1582. tm.assert_frame_equal(df, df_orig)
  1583. def test_transpose_ea_single_column(using_copy_on_write):
  1584. df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  1585. result = df.T
  1586. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1587. def test_transform_frame(using_copy_on_write, warn_copy_on_write):
  1588. df = DataFrame({"a": [1, 2, 3], "b": 1})
  1589. df_orig = df.copy()
  1590. def func(ser):
  1591. ser.iloc[0] = 100
  1592. return ser
  1593. with tm.assert_cow_warning(warn_copy_on_write):
  1594. df.transform(func)
  1595. if using_copy_on_write:
  1596. tm.assert_frame_equal(df, df_orig)
  1597. def test_transform_series(using_copy_on_write, warn_copy_on_write):
  1598. ser = Series([1, 2, 3])
  1599. ser_orig = ser.copy()
  1600. def func(ser):
  1601. ser.iloc[0] = 100
  1602. return ser
  1603. with tm.assert_cow_warning(warn_copy_on_write):
  1604. ser.transform(func)
  1605. if using_copy_on_write:
  1606. tm.assert_series_equal(ser, ser_orig)
  1607. def test_count_read_only_array():
  1608. df = DataFrame({"a": [1, 2], "b": 3})
  1609. result = df.count()
  1610. result.iloc[0] = 100
  1611. expected = Series([100, 2], index=["a", "b"])
  1612. tm.assert_series_equal(result, expected)
  1613. def test_series_view(using_copy_on_write, warn_copy_on_write):
  1614. ser = Series([1, 2, 3])
  1615. ser_orig = ser.copy()
  1616. with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
  1617. ser2 = ser.view()
  1618. assert np.shares_memory(get_array(ser), get_array(ser2))
  1619. if using_copy_on_write:
  1620. assert not ser2._mgr._has_no_reference(0)
  1621. with tm.assert_cow_warning(warn_copy_on_write):
  1622. ser2.iloc[0] = 100
  1623. if using_copy_on_write:
  1624. tm.assert_series_equal(ser_orig, ser)
  1625. else:
  1626. expected = Series([100, 2, 3])
  1627. tm.assert_series_equal(ser, expected)
  1628. def test_insert_series(using_copy_on_write):
  1629. df = DataFrame({"a": [1, 2, 3]})
  1630. ser = Series([1, 2, 3])
  1631. ser_orig = ser.copy()
  1632. df.insert(loc=1, value=ser, column="b")
  1633. if using_copy_on_write:
  1634. assert np.shares_memory(get_array(ser), get_array(df, "b"))
  1635. assert not df._mgr._has_no_reference(1)
  1636. else:
  1637. assert not np.shares_memory(get_array(ser), get_array(df, "b"))
  1638. df.iloc[0, 1] = 100
  1639. tm.assert_series_equal(ser, ser_orig)
  1640. def test_eval(using_copy_on_write):
  1641. df = DataFrame({"a": [1, 2, 3], "b": 1})
  1642. df_orig = df.copy()
  1643. result = df.eval("c = a+b")
  1644. if using_copy_on_write:
  1645. assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  1646. else:
  1647. assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  1648. result.iloc[0, 0] = 100
  1649. tm.assert_frame_equal(df, df_orig)
  1650. def test_eval_inplace(using_copy_on_write, warn_copy_on_write):
  1651. df = DataFrame({"a": [1, 2, 3], "b": 1})
  1652. df_orig = df.copy()
  1653. df_view = df[:]
  1654. df.eval("c = a+b", inplace=True)
  1655. assert np.shares_memory(get_array(df, "a"), get_array(df_view, "a"))
  1656. with tm.assert_cow_warning(warn_copy_on_write):
  1657. df.iloc[0, 0] = 100
  1658. if using_copy_on_write:
  1659. tm.assert_frame_equal(df_view, df_orig)
  1660. def test_apply_modify_row(using_copy_on_write, warn_copy_on_write):
  1661. # Case: applying a function on each row as a Series object, where the
  1662. # function mutates the row object (which needs to trigger CoW if row is a view)
  1663. df = DataFrame({"A": [1, 2], "B": [3, 4]})
  1664. df_orig = df.copy()
  1665. def transform(row):
  1666. row["B"] = 100
  1667. return row
  1668. with tm.assert_cow_warning(warn_copy_on_write):
  1669. df.apply(transform, axis=1)
  1670. if using_copy_on_write:
  1671. tm.assert_frame_equal(df, df_orig)
  1672. else:
  1673. assert df.loc[0, "B"] == 100
  1674. # row Series is a copy
  1675. df = DataFrame({"A": [1, 2], "B": ["b", "c"]})
  1676. df_orig = df.copy()
  1677. with tm.assert_produces_warning(None):
  1678. df.apply(transform, axis=1)
  1679. tm.assert_frame_equal(df, df_orig)