test_indexing.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266
  1. import numpy as np
  2. import pytest
  3. from pandas.errors import SettingWithCopyWarning
  4. from pandas.core.dtypes.common import is_float_dtype
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. from pandas.tests.copy_view.util import get_array
  12. @pytest.fixture(params=["numpy", "nullable"])
  13. def backend(request):
  14. if request.param == "numpy":
  15. def make_dataframe(*args, **kwargs):
  16. return DataFrame(*args, **kwargs)
  17. def make_series(*args, **kwargs):
  18. return Series(*args, **kwargs)
  19. elif request.param == "nullable":
  20. def make_dataframe(*args, **kwargs):
  21. df = DataFrame(*args, **kwargs)
  22. df_nullable = df.convert_dtypes()
  23. # convert_dtypes will try to cast float to int if there is no loss in
  24. # precision -> undo that change
  25. for col in df.columns:
  26. if is_float_dtype(df[col].dtype) and not is_float_dtype(
  27. df_nullable[col].dtype
  28. ):
  29. df_nullable[col] = df_nullable[col].astype("Float64")
  30. # copy final result to ensure we start with a fully self-owning DataFrame
  31. return df_nullable.copy()
  32. def make_series(*args, **kwargs):
  33. ser = Series(*args, **kwargs)
  34. return ser.convert_dtypes().copy()
  35. return request.param, make_dataframe, make_series
  36. # -----------------------------------------------------------------------------
  37. # Indexing operations taking subset + modifying the subset/parent
  38. def test_subset_column_selection(backend, using_copy_on_write):
  39. # Case: taking a subset of the columns of a DataFrame
  40. # + afterwards modifying the subset
  41. _, DataFrame, _ = backend
  42. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  43. df_orig = df.copy()
  44. subset = df[["a", "c"]]
  45. if using_copy_on_write:
  46. # the subset shares memory ...
  47. assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  48. # ... but uses CoW when being modified
  49. subset.iloc[0, 0] = 0
  50. else:
  51. assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  52. # INFO this no longer raise warning since pandas 1.4
  53. # with pd.option_context("chained_assignment", "warn"):
  54. # with tm.assert_produces_warning(SettingWithCopyWarning):
  55. subset.iloc[0, 0] = 0
  56. assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  57. expected = DataFrame({"a": [0, 2, 3], "c": [0.1, 0.2, 0.3]})
  58. tm.assert_frame_equal(subset, expected)
  59. tm.assert_frame_equal(df, df_orig)
  60. def test_subset_column_selection_modify_parent(backend, using_copy_on_write):
  61. # Case: taking a subset of the columns of a DataFrame
  62. # + afterwards modifying the parent
  63. _, DataFrame, _ = backend
  64. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  65. subset = df[["a", "c"]]
  66. if using_copy_on_write:
  67. # the subset shares memory ...
  68. assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  69. # ... but parent uses CoW parent when it is modified
  70. df.iloc[0, 0] = 0
  71. assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  72. if using_copy_on_write:
  73. # different column/block still shares memory
  74. assert np.shares_memory(get_array(subset, "c"), get_array(df, "c"))
  75. expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]})
  76. tm.assert_frame_equal(subset, expected)
  77. def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write):
  78. # Case: taking a subset of the rows of a DataFrame using a slice
  79. # + afterwards modifying the subset
  80. _, DataFrame, _ = backend
  81. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  82. df_orig = df.copy()
  83. subset = df[1:3]
  84. subset._mgr._verify_integrity()
  85. assert np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  86. if using_copy_on_write:
  87. subset.iloc[0, 0] = 0
  88. assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a"))
  89. else:
  90. # INFO this no longer raise warning since pandas 1.4
  91. # with pd.option_context("chained_assignment", "warn"):
  92. # with tm.assert_produces_warning(SettingWithCopyWarning):
  93. with tm.assert_cow_warning(warn_copy_on_write):
  94. subset.iloc[0, 0] = 0
  95. subset._mgr._verify_integrity()
  96. expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3))
  97. tm.assert_frame_equal(subset, expected)
  98. if using_copy_on_write:
  99. # original parent dataframe is not modified (CoW)
  100. tm.assert_frame_equal(df, df_orig)
  101. else:
  102. # original parent dataframe is actually updated
  103. df_orig.iloc[1, 0] = 0
  104. tm.assert_frame_equal(df, df_orig)
  105. @pytest.mark.parametrize(
  106. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  107. )
  108. def test_subset_column_slice(
  109. backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
  110. ):
  111. # Case: taking a subset of the columns of a DataFrame using a slice
  112. # + afterwards modifying the subset
  113. dtype_backend, DataFrame, _ = backend
  114. single_block = (
  115. dtype == "int64" and dtype_backend == "numpy"
  116. ) and not using_array_manager
  117. df = DataFrame(
  118. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  119. )
  120. df_orig = df.copy()
  121. subset = df.iloc[:, 1:]
  122. subset._mgr._verify_integrity()
  123. if using_copy_on_write:
  124. assert np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
  125. subset.iloc[0, 0] = 0
  126. assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
  127. elif warn_copy_on_write:
  128. with tm.assert_cow_warning(single_block):
  129. subset.iloc[0, 0] = 0
  130. else:
  131. # we only get a warning in case of a single block
  132. warn = SettingWithCopyWarning if single_block else None
  133. with pd.option_context("chained_assignment", "warn"):
  134. with tm.assert_produces_warning(warn):
  135. subset.iloc[0, 0] = 0
  136. expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)})
  137. tm.assert_frame_equal(subset, expected)
  138. # original parent dataframe is not modified (also not for BlockManager case,
  139. # except for single block)
  140. if not using_copy_on_write and (using_array_manager or single_block):
  141. df_orig.iloc[0, 1] = 0
  142. tm.assert_frame_equal(df, df_orig)
  143. else:
  144. tm.assert_frame_equal(df, df_orig)
  145. @pytest.mark.parametrize(
  146. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  147. )
  148. @pytest.mark.parametrize(
  149. "row_indexer",
  150. [slice(1, 2), np.array([False, True, True]), np.array([1, 2])],
  151. ids=["slice", "mask", "array"],
  152. )
  153. @pytest.mark.parametrize(
  154. "column_indexer",
  155. [slice("b", "c"), np.array([False, True, True]), ["b", "c"]],
  156. ids=["slice", "mask", "array"],
  157. )
  158. def test_subset_loc_rows_columns(
  159. backend,
  160. dtype,
  161. row_indexer,
  162. column_indexer,
  163. using_array_manager,
  164. using_copy_on_write,
  165. warn_copy_on_write,
  166. ):
  167. # Case: taking a subset of the rows+columns of a DataFrame using .loc
  168. # + afterwards modifying the subset
  169. # Generic test for several combinations of row/column indexers, not all
  170. # of those could actually return a view / need CoW (so this test is not
  171. # checking memory sharing, only ensuring subsequent mutation doesn't
  172. # affect the parent dataframe)
  173. dtype_backend, DataFrame, _ = backend
  174. df = DataFrame(
  175. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  176. )
  177. df_orig = df.copy()
  178. subset = df.loc[row_indexer, column_indexer]
  179. # a few corner cases _do_ actually modify the parent (with both row and column
  180. # slice, and in case of ArrayManager or BlockManager with single block)
  181. mutate_parent = (
  182. isinstance(row_indexer, slice)
  183. and isinstance(column_indexer, slice)
  184. and (
  185. using_array_manager
  186. or (
  187. dtype == "int64"
  188. and dtype_backend == "numpy"
  189. and not using_copy_on_write
  190. )
  191. )
  192. )
  193. # modifying the subset never modifies the parent
  194. with tm.assert_cow_warning(warn_copy_on_write and mutate_parent):
  195. subset.iloc[0, 0] = 0
  196. expected = DataFrame(
  197. {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3)
  198. )
  199. tm.assert_frame_equal(subset, expected)
  200. if mutate_parent:
  201. df_orig.iloc[1, 1] = 0
  202. tm.assert_frame_equal(df, df_orig)
  203. @pytest.mark.parametrize(
  204. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  205. )
  206. @pytest.mark.parametrize(
  207. "row_indexer",
  208. [slice(1, 3), np.array([False, True, True]), np.array([1, 2])],
  209. ids=["slice", "mask", "array"],
  210. )
  211. @pytest.mark.parametrize(
  212. "column_indexer",
  213. [slice(1, 3), np.array([False, True, True]), [1, 2]],
  214. ids=["slice", "mask", "array"],
  215. )
  216. def test_subset_iloc_rows_columns(
  217. backend,
  218. dtype,
  219. row_indexer,
  220. column_indexer,
  221. using_array_manager,
  222. using_copy_on_write,
  223. warn_copy_on_write,
  224. ):
  225. # Case: taking a subset of the rows+columns of a DataFrame using .iloc
  226. # + afterwards modifying the subset
  227. # Generic test for several combinations of row/column indexers, not all
  228. # of those could actually return a view / need CoW (so this test is not
  229. # checking memory sharing, only ensuring subsequent mutation doesn't
  230. # affect the parent dataframe)
  231. dtype_backend, DataFrame, _ = backend
  232. df = DataFrame(
  233. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  234. )
  235. df_orig = df.copy()
  236. subset = df.iloc[row_indexer, column_indexer]
  237. # a few corner cases _do_ actually modify the parent (with both row and column
  238. # slice, and in case of ArrayManager or BlockManager with single block)
  239. mutate_parent = (
  240. isinstance(row_indexer, slice)
  241. and isinstance(column_indexer, slice)
  242. and (
  243. using_array_manager
  244. or (
  245. dtype == "int64"
  246. and dtype_backend == "numpy"
  247. and not using_copy_on_write
  248. )
  249. )
  250. )
  251. # modifying the subset never modifies the parent
  252. with tm.assert_cow_warning(warn_copy_on_write and mutate_parent):
  253. subset.iloc[0, 0] = 0
  254. expected = DataFrame(
  255. {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3)
  256. )
  257. tm.assert_frame_equal(subset, expected)
  258. if mutate_parent:
  259. df_orig.iloc[1, 1] = 0
  260. tm.assert_frame_equal(df, df_orig)
  261. @pytest.mark.parametrize(
  262. "indexer",
  263. [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
  264. ids=["slice", "mask", "array"],
  265. )
  266. def test_subset_set_with_row_indexer(
  267. backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write
  268. ):
  269. # Case: setting values with a row indexer on a viewing subset
  270. # subset[indexer] = value and subset.iloc[indexer] = value
  271. _, DataFrame, _ = backend
  272. df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
  273. df_orig = df.copy()
  274. subset = df[1:4]
  275. if (
  276. indexer_si is tm.setitem
  277. and isinstance(indexer, np.ndarray)
  278. and indexer.dtype == "int"
  279. ):
  280. pytest.skip("setitem with labels selects on columns")
  281. if using_copy_on_write:
  282. indexer_si(subset)[indexer] = 0
  283. elif warn_copy_on_write:
  284. with tm.assert_cow_warning():
  285. indexer_si(subset)[indexer] = 0
  286. else:
  287. # INFO iloc no longer raises warning since pandas 1.4
  288. warn = SettingWithCopyWarning if indexer_si is tm.setitem else None
  289. with pd.option_context("chained_assignment", "warn"):
  290. with tm.assert_produces_warning(warn):
  291. indexer_si(subset)[indexer] = 0
  292. expected = DataFrame(
  293. {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4)
  294. )
  295. tm.assert_frame_equal(subset, expected)
  296. if using_copy_on_write:
  297. # original parent dataframe is not modified (CoW)
  298. tm.assert_frame_equal(df, df_orig)
  299. else:
  300. # original parent dataframe is actually updated
  301. df_orig[1:3] = 0
  302. tm.assert_frame_equal(df, df_orig)
  303. def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write):
  304. # Case: setting values with a mask on a viewing subset: subset[mask] = value
  305. _, DataFrame, _ = backend
  306. df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
  307. df_orig = df.copy()
  308. subset = df[1:4]
  309. mask = subset > 3
  310. if using_copy_on_write:
  311. subset[mask] = 0
  312. elif warn_copy_on_write:
  313. with tm.assert_cow_warning():
  314. subset[mask] = 0
  315. else:
  316. with pd.option_context("chained_assignment", "warn"):
  317. with tm.assert_produces_warning(SettingWithCopyWarning):
  318. subset[mask] = 0
  319. expected = DataFrame(
  320. {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4)
  321. )
  322. tm.assert_frame_equal(subset, expected)
  323. if using_copy_on_write:
  324. # original parent dataframe is not modified (CoW)
  325. tm.assert_frame_equal(df, df_orig)
  326. else:
  327. # original parent dataframe is actually updated
  328. df_orig.loc[3, "a"] = 0
  329. df_orig.loc[1:3, "b"] = 0
  330. tm.assert_frame_equal(df, df_orig)
  331. def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write):
  332. # Case: setting a single column on a viewing subset -> subset[col] = value
  333. dtype_backend, DataFrame, _ = backend
  334. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  335. df_orig = df.copy()
  336. subset = df[1:3]
  337. if dtype_backend == "numpy":
  338. arr = np.array([10, 11], dtype="int64")
  339. else:
  340. arr = pd.array([10, 11], dtype="Int64")
  341. if using_copy_on_write or warn_copy_on_write:
  342. subset["a"] = arr
  343. else:
  344. with pd.option_context("chained_assignment", "warn"):
  345. with tm.assert_produces_warning(SettingWithCopyWarning):
  346. subset["a"] = arr
  347. subset._mgr._verify_integrity()
  348. expected = DataFrame(
  349. {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)
  350. )
  351. tm.assert_frame_equal(subset, expected)
  352. tm.assert_frame_equal(df, df_orig)
  353. @pytest.mark.parametrize(
  354. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  355. )
  356. def test_subset_set_column_with_loc(
  357. backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
  358. ):
  359. # Case: setting a single column with loc on a viewing subset
  360. # -> subset.loc[:, col] = value
  361. _, DataFrame, _ = backend
  362. df = DataFrame(
  363. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  364. )
  365. df_orig = df.copy()
  366. subset = df[1:3]
  367. if using_copy_on_write:
  368. subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
  369. elif warn_copy_on_write:
  370. with tm.assert_cow_warning():
  371. subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
  372. else:
  373. with pd.option_context("chained_assignment", "warn"):
  374. with tm.assert_produces_warning(
  375. None,
  376. raise_on_extra_warnings=not using_array_manager,
  377. ):
  378. subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
  379. subset._mgr._verify_integrity()
  380. expected = DataFrame(
  381. {"a": [10, 11], "b": [5, 6], "c": np.array([8, 9], dtype=dtype)},
  382. index=range(1, 3),
  383. )
  384. tm.assert_frame_equal(subset, expected)
  385. if using_copy_on_write:
  386. # original parent dataframe is not modified (CoW)
  387. tm.assert_frame_equal(df, df_orig)
  388. else:
  389. # original parent dataframe is actually updated
  390. df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64")
  391. tm.assert_frame_equal(df, df_orig)
  392. def test_subset_set_column_with_loc2(
  393. backend, using_copy_on_write, warn_copy_on_write, using_array_manager
  394. ):
  395. # Case: setting a single column with loc on a viewing subset
  396. # -> subset.loc[:, col] = value
  397. # separate test for case of DataFrame of a single column -> takes a separate
  398. # code path
  399. _, DataFrame, _ = backend
  400. df = DataFrame({"a": [1, 2, 3]})
  401. df_orig = df.copy()
  402. subset = df[1:3]
  403. if using_copy_on_write:
  404. subset.loc[:, "a"] = 0
  405. elif warn_copy_on_write:
  406. with tm.assert_cow_warning():
  407. subset.loc[:, "a"] = 0
  408. else:
  409. with pd.option_context("chained_assignment", "warn"):
  410. with tm.assert_produces_warning(
  411. None,
  412. raise_on_extra_warnings=not using_array_manager,
  413. ):
  414. subset.loc[:, "a"] = 0
  415. subset._mgr._verify_integrity()
  416. expected = DataFrame({"a": [0, 0]}, index=range(1, 3))
  417. tm.assert_frame_equal(subset, expected)
  418. if using_copy_on_write:
  419. # original parent dataframe is not modified (CoW)
  420. tm.assert_frame_equal(df, df_orig)
  421. else:
  422. # original parent dataframe is actually updated
  423. df_orig.loc[1:3, "a"] = 0
  424. tm.assert_frame_equal(df, df_orig)
  425. @pytest.mark.parametrize(
  426. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  427. )
  428. def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype):
  429. # Case: setting multiple columns on a viewing subset
  430. # -> subset[[col1, col2]] = value
  431. dtype_backend, DataFrame, _ = backend
  432. df = DataFrame(
  433. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  434. )
  435. df_orig = df.copy()
  436. subset = df[1:3]
  437. if using_copy_on_write or warn_copy_on_write:
  438. subset[["a", "c"]] = 0
  439. else:
  440. with pd.option_context("chained_assignment", "warn"):
  441. with tm.assert_produces_warning(SettingWithCopyWarning):
  442. subset[["a", "c"]] = 0
  443. subset._mgr._verify_integrity()
  444. if using_copy_on_write:
  445. # first and third column should certainly have no references anymore
  446. assert all(subset._mgr._has_no_reference(i) for i in [0, 2])
  447. expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
  448. if dtype_backend == "nullable":
  449. # there is not yet a global option, so overriding a column by setting a scalar
  450. # defaults to numpy dtype even if original column was nullable
  451. expected["a"] = expected["a"].astype("int64")
  452. expected["c"] = expected["c"].astype("int64")
  453. tm.assert_frame_equal(subset, expected)
  454. tm.assert_frame_equal(df, df_orig)
  455. @pytest.mark.parametrize(
  456. "indexer",
  457. [slice("a", "b"), np.array([True, True, False]), ["a", "b"]],
  458. ids=["slice", "mask", "array"],
  459. )
  460. def test_subset_set_with_column_indexer(
  461. backend, indexer, using_copy_on_write, warn_copy_on_write
  462. ):
  463. # Case: setting multiple columns with a column indexer on a viewing subset
  464. # -> subset.loc[:, [col1, col2]] = value
  465. _, DataFrame, _ = backend
  466. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
  467. df_orig = df.copy()
  468. subset = df[1:3]
  469. if using_copy_on_write:
  470. subset.loc[:, indexer] = 0
  471. elif warn_copy_on_write:
  472. with tm.assert_cow_warning():
  473. subset.loc[:, indexer] = 0
  474. else:
  475. with pd.option_context("chained_assignment", "warn"):
  476. # As of 2.0, this setitem attempts (successfully) to set values
  477. # inplace, so the assignment is not chained.
  478. subset.loc[:, indexer] = 0
  479. subset._mgr._verify_integrity()
  480. expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3))
  481. tm.assert_frame_equal(subset, expected)
  482. if using_copy_on_write:
  483. tm.assert_frame_equal(df, df_orig)
  484. else:
  485. # pre-2.0, in the mixed case with BlockManager, only column "a"
  486. # would be mutated in the parent frame. this changed with the
  487. # enforcement of GH#45333
  488. df_orig.loc[1:2, ["a", "b"]] = 0
  489. tm.assert_frame_equal(df, df_orig)
  490. @pytest.mark.parametrize(
  491. "method",
  492. [
  493. lambda df: df[["a", "b"]][0:2],
  494. lambda df: df[0:2][["a", "b"]],
  495. lambda df: df[["a", "b"]].iloc[0:2],
  496. lambda df: df[["a", "b"]].loc[0:1],
  497. lambda df: df[0:2].iloc[:, 0:2],
  498. lambda df: df[0:2].loc[:, "a":"b"], # type: ignore[misc]
  499. ],
  500. ids=[
  501. "row-getitem-slice",
  502. "column-getitem",
  503. "row-iloc-slice",
  504. "row-loc-slice",
  505. "column-iloc-slice",
  506. "column-loc-slice",
  507. ],
  508. )
  509. @pytest.mark.parametrize(
  510. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  511. )
  512. def test_subset_chained_getitem(
  513. request,
  514. backend,
  515. method,
  516. dtype,
  517. using_copy_on_write,
  518. using_array_manager,
  519. warn_copy_on_write,
  520. ):
  521. # Case: creating a subset using multiple, chained getitem calls using views
  522. # still needs to guarantee proper CoW behaviour
  523. _, DataFrame, _ = backend
  524. df = DataFrame(
  525. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  526. )
  527. df_orig = df.copy()
  528. # when not using CoW, it depends on whether we have a single block or not
  529. # and whether we are slicing the columns -> in that case we have a view
  530. test_callspec = request.node.callspec.id
  531. if not using_array_manager:
  532. subset_is_view = test_callspec in (
  533. "numpy-single-block-column-iloc-slice",
  534. "numpy-single-block-column-loc-slice",
  535. )
  536. else:
  537. # with ArrayManager, it doesn't matter whether we have
  538. # single vs mixed block or numpy vs nullable dtypes
  539. subset_is_view = test_callspec.endswith(
  540. ("column-iloc-slice", "column-loc-slice")
  541. )
  542. # modify subset -> don't modify parent
  543. subset = method(df)
  544. with tm.assert_cow_warning(warn_copy_on_write and subset_is_view):
  545. subset.iloc[0, 0] = 0
  546. if using_copy_on_write or (not subset_is_view):
  547. tm.assert_frame_equal(df, df_orig)
  548. else:
  549. assert df.iloc[0, 0] == 0
  550. # modify parent -> don't modify subset
  551. subset = method(df)
  552. with tm.assert_cow_warning(warn_copy_on_write and subset_is_view):
  553. df.iloc[0, 0] = 0
  554. expected = DataFrame({"a": [1, 2], "b": [4, 5]})
  555. if using_copy_on_write or not subset_is_view:
  556. tm.assert_frame_equal(subset, expected)
  557. else:
  558. assert subset.iloc[0, 0] == 0
  559. @pytest.mark.parametrize(
  560. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  561. )
  562. def test_subset_chained_getitem_column(
  563. backend, dtype, using_copy_on_write, warn_copy_on_write
  564. ):
  565. # Case: creating a subset using multiple, chained getitem calls using views
  566. # still needs to guarantee proper CoW behaviour
  567. dtype_backend, DataFrame, Series = backend
  568. df = DataFrame(
  569. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  570. )
  571. df_orig = df.copy()
  572. # modify subset -> don't modify parent
  573. subset = df[:]["a"][0:2]
  574. df._clear_item_cache()
  575. with tm.assert_cow_warning(warn_copy_on_write):
  576. subset.iloc[0] = 0
  577. if using_copy_on_write:
  578. tm.assert_frame_equal(df, df_orig)
  579. else:
  580. assert df.iloc[0, 0] == 0
  581. # modify parent -> don't modify subset
  582. subset = df[:]["a"][0:2]
  583. df._clear_item_cache()
  584. with tm.assert_cow_warning(warn_copy_on_write):
  585. df.iloc[0, 0] = 0
  586. expected = Series([1, 2], name="a")
  587. if using_copy_on_write:
  588. tm.assert_series_equal(subset, expected)
  589. else:
  590. assert subset.iloc[0] == 0
  591. @pytest.mark.parametrize(
  592. "method",
  593. [
  594. lambda s: s["a":"c"]["a":"b"], # type: ignore[misc]
  595. lambda s: s.iloc[0:3].iloc[0:2],
  596. lambda s: s.loc["a":"c"].loc["a":"b"], # type: ignore[misc]
  597. lambda s: s.loc["a":"c"] # type: ignore[misc]
  598. .iloc[0:3]
  599. .iloc[0:2]
  600. .loc["a":"b"] # type: ignore[misc]
  601. .iloc[0:1],
  602. ],
  603. ids=["getitem", "iloc", "loc", "long-chain"],
  604. )
  605. def test_subset_chained_getitem_series(
  606. backend, method, using_copy_on_write, warn_copy_on_write
  607. ):
  608. # Case: creating a subset using multiple, chained getitem calls using views
  609. # still needs to guarantee proper CoW behaviour
  610. _, _, Series = backend
  611. s = Series([1, 2, 3], index=["a", "b", "c"])
  612. s_orig = s.copy()
  613. # modify subset -> don't modify parent
  614. subset = method(s)
  615. with tm.assert_cow_warning(warn_copy_on_write):
  616. subset.iloc[0] = 0
  617. if using_copy_on_write:
  618. tm.assert_series_equal(s, s_orig)
  619. else:
  620. assert s.iloc[0] == 0
  621. # modify parent -> don't modify subset
  622. subset = s.iloc[0:3].iloc[0:2]
  623. with tm.assert_cow_warning(warn_copy_on_write):
  624. s.iloc[0] = 0
  625. expected = Series([1, 2], index=["a", "b"])
  626. if using_copy_on_write:
  627. tm.assert_series_equal(subset, expected)
  628. else:
  629. assert subset.iloc[0] == 0
  630. def test_subset_chained_single_block_row(
  631. using_copy_on_write, using_array_manager, warn_copy_on_write
  632. ):
  633. # not parametrizing this for dtype backend, since this explicitly tests single block
  634. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  635. df_orig = df.copy()
  636. # modify subset -> don't modify parent
  637. subset = df[:].iloc[0].iloc[0:2]
  638. with tm.assert_cow_warning(warn_copy_on_write):
  639. subset.iloc[0] = 0
  640. if using_copy_on_write or using_array_manager:
  641. tm.assert_frame_equal(df, df_orig)
  642. else:
  643. assert df.iloc[0, 0] == 0
  644. # modify parent -> don't modify subset
  645. subset = df[:].iloc[0].iloc[0:2]
  646. with tm.assert_cow_warning(warn_copy_on_write):
  647. df.iloc[0, 0] = 0
  648. expected = Series([1, 4], index=["a", "b"], name=0)
  649. if using_copy_on_write or using_array_manager:
  650. tm.assert_series_equal(subset, expected)
  651. else:
  652. assert subset.iloc[0] == 0
  653. @pytest.mark.parametrize(
  654. "method",
  655. [
  656. lambda df: df[:],
  657. lambda df: df.loc[:, :],
  658. lambda df: df.loc[:],
  659. lambda df: df.iloc[:, :],
  660. lambda df: df.iloc[:],
  661. ],
  662. ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"],
  663. )
  664. def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write):
  665. # Case: also all variants of indexing with a null slice (:) should return
  666. # new objects to ensure we correctly use CoW for the results
  667. dtype_backend, DataFrame, _ = backend
  668. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  669. df_orig = df.copy()
  670. df2 = method(df)
  671. # we always return new objects (shallow copy), regardless of CoW or not
  672. assert df2 is not df
  673. # and those trigger CoW when mutated
  674. with tm.assert_cow_warning(warn_copy_on_write):
  675. df2.iloc[0, 0] = 0
  676. if using_copy_on_write:
  677. tm.assert_frame_equal(df, df_orig)
  678. else:
  679. assert df.iloc[0, 0] == 0
  680. @pytest.mark.parametrize(
  681. "method",
  682. [
  683. lambda s: s[:],
  684. lambda s: s.loc[:],
  685. lambda s: s.iloc[:],
  686. ],
  687. ids=["getitem", "loc", "iloc"],
  688. )
  689. def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_write):
  690. _, _, Series = backend
  691. s = Series([1, 2, 3], index=["a", "b", "c"])
  692. s_orig = s.copy()
  693. s2 = method(s)
  694. # we always return new objects, regardless of CoW or not
  695. assert s2 is not s
  696. # and those trigger CoW when mutated
  697. with tm.assert_cow_warning(warn_copy_on_write):
  698. s2.iloc[0] = 0
  699. if using_copy_on_write:
  700. tm.assert_series_equal(s, s_orig)
  701. else:
  702. assert s.iloc[0] == 0
  703. # TODO add more tests modifying the parent
  704. # -----------------------------------------------------------------------------
  705. # Series -- Indexing operations taking subset + modifying the subset/parent
  706. def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write):
  707. # Case: taking a slice of a Series + afterwards modifying the subset
  708. _, _, Series = backend
  709. s = Series([1, 2, 3], index=["a", "b", "c"])
  710. s_orig = s.copy()
  711. subset = s[:]
  712. assert np.shares_memory(get_array(subset), get_array(s))
  713. with tm.assert_cow_warning(warn_copy_on_write):
  714. subset.iloc[0] = 0
  715. if using_copy_on_write:
  716. assert not np.shares_memory(get_array(subset), get_array(s))
  717. expected = Series([0, 2, 3], index=["a", "b", "c"])
  718. tm.assert_series_equal(subset, expected)
  719. if using_copy_on_write:
  720. # original parent series is not modified (CoW)
  721. tm.assert_series_equal(s, s_orig)
  722. else:
  723. # original parent series is actually updated
  724. assert s.iloc[0] == 0
  725. def test_series_getitem_ellipsis(using_copy_on_write, warn_copy_on_write):
  726. # Case: taking a view of a Series using Ellipsis + afterwards modifying the subset
  727. s = Series([1, 2, 3])
  728. s_orig = s.copy()
  729. subset = s[...]
  730. assert np.shares_memory(get_array(subset), get_array(s))
  731. with tm.assert_cow_warning(warn_copy_on_write):
  732. subset.iloc[0] = 0
  733. if using_copy_on_write:
  734. assert not np.shares_memory(get_array(subset), get_array(s))
  735. expected = Series([0, 2, 3])
  736. tm.assert_series_equal(subset, expected)
  737. if using_copy_on_write:
  738. # original parent series is not modified (CoW)
  739. tm.assert_series_equal(s, s_orig)
  740. else:
  741. # original parent series is actually updated
  742. assert s.iloc[0] == 0
  743. @pytest.mark.parametrize(
  744. "indexer",
  745. [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
  746. ids=["slice", "mask", "array"],
  747. )
  748. def test_series_subset_set_with_indexer(
  749. backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write
  750. ):
  751. # Case: setting values in a viewing Series with an indexer
  752. _, _, Series = backend
  753. s = Series([1, 2, 3], index=["a", "b", "c"])
  754. s_orig = s.copy()
  755. subset = s[:]
  756. warn = None
  757. msg = "Series.__setitem__ treating keys as positions is deprecated"
  758. if (
  759. indexer_si is tm.setitem
  760. and isinstance(indexer, np.ndarray)
  761. and indexer.dtype.kind == "i"
  762. ):
  763. warn = FutureWarning
  764. if warn_copy_on_write:
  765. with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None):
  766. indexer_si(subset)[indexer] = 0
  767. else:
  768. with tm.assert_produces_warning(warn, match=msg):
  769. indexer_si(subset)[indexer] = 0
  770. expected = Series([0, 0, 3], index=["a", "b", "c"])
  771. tm.assert_series_equal(subset, expected)
  772. if using_copy_on_write:
  773. tm.assert_series_equal(s, s_orig)
  774. else:
  775. tm.assert_series_equal(s, expected)
  776. # -----------------------------------------------------------------------------
  777. # del operator
  778. def test_del_frame(backend, using_copy_on_write, warn_copy_on_write):
  779. # Case: deleting a column with `del` on a viewing child dataframe should
  780. # not modify parent + update the references
  781. dtype_backend, DataFrame, _ = backend
  782. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  783. df_orig = df.copy()
  784. df2 = df[:]
  785. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  786. del df2["b"]
  787. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  788. tm.assert_frame_equal(df, df_orig)
  789. tm.assert_frame_equal(df2, df_orig[["a", "c"]])
  790. df2._mgr._verify_integrity()
  791. with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"):
  792. df.loc[0, "b"] = 200
  793. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  794. df_orig = df.copy()
  795. with tm.assert_cow_warning(warn_copy_on_write):
  796. df2.loc[0, "a"] = 100
  797. if using_copy_on_write:
  798. # modifying child after deleting a column still doesn't update parent
  799. tm.assert_frame_equal(df, df_orig)
  800. else:
  801. assert df.loc[0, "a"] == 100
  802. def test_del_series(backend):
  803. _, _, Series = backend
  804. s = Series([1, 2, 3], index=["a", "b", "c"])
  805. s_orig = s.copy()
  806. s2 = s[:]
  807. assert np.shares_memory(get_array(s), get_array(s2))
  808. del s2["a"]
  809. assert not np.shares_memory(get_array(s), get_array(s2))
  810. tm.assert_series_equal(s, s_orig)
  811. tm.assert_series_equal(s2, s_orig[["b", "c"]])
  812. # modifying s2 doesn't need copy on write (due to `del`, s2 is backed by new array)
  813. values = s2.values
  814. s2.loc["b"] = 100
  815. assert values[0] == 100
  816. # -----------------------------------------------------------------------------
  817. # Accessing column as Series
  818. def test_column_as_series(
  819. backend, using_copy_on_write, warn_copy_on_write, using_array_manager
  820. ):
  821. # Case: selecting a single column now also uses Copy-on-Write
  822. dtype_backend, DataFrame, Series = backend
  823. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  824. df_orig = df.copy()
  825. s = df["a"]
  826. assert np.shares_memory(get_array(s, "a"), get_array(df, "a"))
  827. if using_copy_on_write or using_array_manager:
  828. s[0] = 0
  829. else:
  830. if warn_copy_on_write:
  831. with tm.assert_cow_warning():
  832. s[0] = 0
  833. else:
  834. warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
  835. with pd.option_context("chained_assignment", "warn"):
  836. with tm.assert_produces_warning(warn):
  837. s[0] = 0
  838. expected = Series([0, 2, 3], name="a")
  839. tm.assert_series_equal(s, expected)
  840. if using_copy_on_write:
  841. # assert not np.shares_memory(s.values, get_array(df, "a"))
  842. tm.assert_frame_equal(df, df_orig)
  843. # ensure cached series on getitem is not the changed series
  844. tm.assert_series_equal(df["a"], df_orig["a"])
  845. else:
  846. df_orig.iloc[0, 0] = 0
  847. tm.assert_frame_equal(df, df_orig)
  848. def test_column_as_series_set_with_upcast(
  849. backend, using_copy_on_write, using_array_manager, warn_copy_on_write
  850. ):
  851. # Case: selecting a single column now also uses Copy-on-Write -> when
  852. # setting a value causes an upcast, we don't need to update the parent
  853. # DataFrame through the cache mechanism
  854. dtype_backend, DataFrame, Series = backend
  855. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  856. df_orig = df.copy()
  857. s = df["a"]
  858. if dtype_backend == "nullable":
  859. with tm.assert_cow_warning(warn_copy_on_write):
  860. with pytest.raises(TypeError, match="Invalid value"):
  861. s[0] = "foo"
  862. expected = Series([1, 2, 3], name="a")
  863. elif using_copy_on_write or warn_copy_on_write or using_array_manager:
  864. # TODO(CoW-warn) assert the FutureWarning for CoW is also raised
  865. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  866. s[0] = "foo"
  867. expected = Series(["foo", 2, 3], dtype=object, name="a")
  868. else:
  869. with pd.option_context("chained_assignment", "warn"):
  870. msg = "|".join(
  871. [
  872. "A value is trying to be set on a copy of a slice from a DataFrame",
  873. "Setting an item of incompatible dtype is deprecated",
  874. ]
  875. )
  876. with tm.assert_produces_warning(
  877. (SettingWithCopyWarning, FutureWarning), match=msg
  878. ):
  879. s[0] = "foo"
  880. expected = Series(["foo", 2, 3], dtype=object, name="a")
  881. tm.assert_series_equal(s, expected)
  882. if using_copy_on_write:
  883. tm.assert_frame_equal(df, df_orig)
  884. # ensure cached series on getitem is not the changed series
  885. tm.assert_series_equal(df["a"], df_orig["a"])
  886. else:
  887. df_orig["a"] = expected
  888. tm.assert_frame_equal(df, df_orig)
  889. @pytest.mark.parametrize(
  890. "method",
  891. [
  892. lambda df: df["a"],
  893. lambda df: df.loc[:, "a"],
  894. lambda df: df.iloc[:, 0],
  895. ],
  896. ids=["getitem", "loc", "iloc"],
  897. )
  898. def test_column_as_series_no_item_cache(
  899. request,
  900. backend,
  901. method,
  902. using_copy_on_write,
  903. warn_copy_on_write,
  904. using_array_manager,
  905. ):
  906. # Case: selecting a single column (which now also uses Copy-on-Write to protect
  907. # the view) should always give a new object (i.e. not make use of a cache)
  908. dtype_backend, DataFrame, _ = backend
  909. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  910. df_orig = df.copy()
  911. s1 = method(df)
  912. s2 = method(df)
  913. is_iloc = "iloc" in request.node.name
  914. if using_copy_on_write or warn_copy_on_write or is_iloc:
  915. assert s1 is not s2
  916. else:
  917. assert s1 is s2
  918. if using_copy_on_write or using_array_manager:
  919. s1.iloc[0] = 0
  920. elif warn_copy_on_write:
  921. with tm.assert_cow_warning():
  922. s1.iloc[0] = 0
  923. else:
  924. warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
  925. with pd.option_context("chained_assignment", "warn"):
  926. with tm.assert_produces_warning(warn):
  927. s1.iloc[0] = 0
  928. if using_copy_on_write:
  929. tm.assert_series_equal(s2, df_orig["a"])
  930. tm.assert_frame_equal(df, df_orig)
  931. else:
  932. assert s2.iloc[0] == 0
  933. # TODO add tests for other indexing methods on the Series
  934. def test_dataframe_add_column_from_series(backend, using_copy_on_write):
  935. # Case: adding a new column to a DataFrame from an existing column/series
  936. # -> delays copy under CoW
  937. _, DataFrame, Series = backend
  938. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  939. s = Series([10, 11, 12])
  940. df["new"] = s
  941. if using_copy_on_write:
  942. assert np.shares_memory(get_array(df, "new"), get_array(s))
  943. else:
  944. assert not np.shares_memory(get_array(df, "new"), get_array(s))
  945. # editing series -> doesn't modify column in frame
  946. s[0] = 0
  947. expected = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]})
  948. tm.assert_frame_equal(df, expected)
  949. @pytest.mark.parametrize("val", [100, "a"])
  950. @pytest.mark.parametrize(
  951. "indexer_func, indexer",
  952. [
  953. (tm.loc, (0, "a")),
  954. (tm.iloc, (0, 0)),
  955. (tm.loc, ([0], "a")),
  956. (tm.iloc, ([0], 0)),
  957. (tm.loc, (slice(None), "a")),
  958. (tm.iloc, (slice(None), 0)),
  959. ],
  960. )
  961. @pytest.mark.parametrize(
  962. "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"]
  963. )
  964. def test_set_value_copy_only_necessary_column(
  965. using_copy_on_write, warn_copy_on_write, indexer_func, indexer, val, col
  966. ):
  967. # When setting inplace, only copy column that is modified instead of the whole
  968. # block (by splitting the block)
  969. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col})
  970. df_orig = df.copy()
  971. view = df[:]
  972. if val == "a" and not warn_copy_on_write:
  973. with tm.assert_produces_warning(
  974. FutureWarning, match="Setting an item of incompatible dtype is deprecated"
  975. ):
  976. indexer_func(df)[indexer] = val
  977. if val == "a" and warn_copy_on_write:
  978. with tm.assert_produces_warning(
  979. FutureWarning, match="incompatible dtype|Setting a value on a view"
  980. ):
  981. indexer_func(df)[indexer] = val
  982. else:
  983. with tm.assert_cow_warning(warn_copy_on_write and val == 100):
  984. indexer_func(df)[indexer] = val
  985. if using_copy_on_write:
  986. assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
  987. assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
  988. tm.assert_frame_equal(view, df_orig)
  989. else:
  990. assert np.shares_memory(get_array(df, "c"), get_array(view, "c"))
  991. if val == "a":
  992. assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
  993. else:
  994. assert np.shares_memory(get_array(df, "a"), get_array(view, "a"))
  995. def test_series_midx_slice(using_copy_on_write, warn_copy_on_write):
  996. ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]))
  997. ser_orig = ser.copy()
  998. result = ser[1]
  999. assert np.shares_memory(get_array(ser), get_array(result))
  1000. with tm.assert_cow_warning(warn_copy_on_write):
  1001. result.iloc[0] = 100
  1002. if using_copy_on_write:
  1003. tm.assert_series_equal(ser, ser_orig)
  1004. else:
  1005. expected = Series(
  1006. [100, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])
  1007. )
  1008. tm.assert_series_equal(ser, expected)
  1009. def test_getitem_midx_slice(
  1010. using_copy_on_write, warn_copy_on_write, using_array_manager
  1011. ):
  1012. df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2})
  1013. df_orig = df.copy()
  1014. new_df = df[("a",)]
  1015. if using_copy_on_write:
  1016. assert not new_df._mgr._has_no_reference(0)
  1017. if not using_array_manager:
  1018. assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x"))
  1019. if using_copy_on_write:
  1020. new_df.iloc[0, 0] = 100
  1021. tm.assert_frame_equal(df_orig, df)
  1022. else:
  1023. if warn_copy_on_write:
  1024. with tm.assert_cow_warning():
  1025. new_df.iloc[0, 0] = 100
  1026. else:
  1027. with pd.option_context("chained_assignment", "warn"):
  1028. with tm.assert_produces_warning(SettingWithCopyWarning):
  1029. new_df.iloc[0, 0] = 100
  1030. assert df.iloc[0, 0] == 100
  1031. def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write):
  1032. ser = Series(
  1033. [1, 2, 3],
  1034. index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]),
  1035. )
  1036. result = ser[(1, 2)]
  1037. assert np.shares_memory(get_array(ser), get_array(result))
  1038. with tm.assert_cow_warning(warn_copy_on_write):
  1039. result.iloc[0] = 100
  1040. if using_copy_on_write:
  1041. expected = Series(
  1042. [1, 2, 3],
  1043. index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]),
  1044. )
  1045. tm.assert_series_equal(ser, expected)
  1046. def test_midx_read_only_bool_indexer():
  1047. # GH#56635
  1048. def mklbl(prefix, n):
  1049. return [f"{prefix}{i}" for i in range(n)]
  1050. idx = pd.MultiIndex.from_product(
  1051. [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
  1052. )
  1053. cols = pd.MultiIndex.from_tuples(
  1054. [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
  1055. )
  1056. df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1)
  1057. mask = df[("a", "foo")] == 1
  1058. expected_mask = mask.copy()
  1059. result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :]
  1060. expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :]
  1061. tm.assert_frame_equal(result, expected)
  1062. tm.assert_series_equal(mask, expected_mask)
  1063. def test_loc_enlarging_with_dataframe(using_copy_on_write):
  1064. df = DataFrame({"a": [1, 2, 3]})
  1065. rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
  1066. rhs_orig = rhs.copy()
  1067. df.loc[:, ["b", "c"]] = rhs
  1068. if using_copy_on_write:
  1069. assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
  1070. assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c"))
  1071. assert not df._mgr._has_no_reference(1)
  1072. else:
  1073. assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
  1074. df.iloc[0, 1] = 100
  1075. tm.assert_frame_equal(rhs, rhs_orig)