test_setitem.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. import numpy as np
  2. import pytest
  3. from pandas.errors import SettingWithCopyError
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame,
  8. MultiIndex,
  9. Series,
  10. date_range,
  11. isna,
  12. notna,
  13. )
  14. import pandas._testing as tm
  15. def assert_equal(a, b):
  16. assert a == b
  17. class TestMultiIndexSetItem:
  18. def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
  19. target.loc[indexers] = value
  20. result = target.loc[indexers]
  21. if expected is None:
  22. expected = value
  23. compare_fn(result, expected)
  24. def test_setitem_multiindex(self):
  25. # GH#7190
  26. cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
  27. index = MultiIndex.from_product(
  28. [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
  29. )
  30. t, n = 0, 2
  31. df = DataFrame(
  32. np.nan,
  33. columns=cols,
  34. index=index,
  35. )
  36. self.check(target=df, indexers=((t, n), "X"), value=0)
  37. df = DataFrame(-999, columns=cols, index=index)
  38. self.check(target=df, indexers=((t, n), "X"), value=1)
  39. df = DataFrame(columns=cols, index=index)
  40. self.check(target=df, indexers=((t, n), "X"), value=2)
  41. # gh-7218: assigning with 0-dim arrays
  42. df = DataFrame(-999, columns=cols, index=index)
  43. self.check(
  44. target=df,
  45. indexers=((t, n), "X"),
  46. value=np.array(3),
  47. expected=3,
  48. )
  49. def test_setitem_multiindex2(self):
  50. # GH#5206
  51. df = DataFrame(
  52. np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
  53. )
  54. df["F"] = 99
  55. row_selection = df["A"] % 2 == 0
  56. col_selection = ["B", "C"]
  57. df.loc[row_selection, col_selection] = df["F"]
  58. output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
  59. tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
  60. self.check(
  61. target=df,
  62. indexers=(row_selection, col_selection),
  63. value=df["F"],
  64. compare_fn=tm.assert_frame_equal,
  65. expected=output,
  66. )
  67. def test_setitem_multiindex3(self):
  68. # GH#11372
  69. idx = MultiIndex.from_product(
  70. [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
  71. )
  72. cols = MultiIndex.from_product(
  73. [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
  74. )
  75. df = DataFrame(
  76. np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
  77. )
  78. subidx = MultiIndex.from_arrays(
  79. [["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
  80. )
  81. subcols = MultiIndex.from_arrays(
  82. [["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
  83. )
  84. vals = DataFrame(
  85. np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
  86. )
  87. self.check(
  88. target=df,
  89. indexers=(subidx, subcols),
  90. value=vals,
  91. compare_fn=tm.assert_frame_equal,
  92. )
  93. # set all columns
  94. vals = DataFrame(
  95. np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
  96. )
  97. self.check(
  98. target=df,
  99. indexers=(subidx, slice(None, None, None)),
  100. value=vals,
  101. compare_fn=tm.assert_frame_equal,
  102. )
  103. # identity
  104. copy = df.copy()
  105. self.check(
  106. target=df,
  107. indexers=(df.index, df.columns),
  108. value=df,
  109. compare_fn=tm.assert_frame_equal,
  110. expected=copy,
  111. )
  112. # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
  113. # all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
  114. @td.skip_array_manager_not_yet_implemented
  115. def test_multiindex_setitem(self):
  116. # GH 3738
  117. # setting with a multi-index right hand side
  118. arrays = [
  119. np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
  120. np.array(["one", "two", "one", "one", "two", "one"]),
  121. np.arange(0, 6, 1),
  122. ]
  123. df_orig = DataFrame(
  124. np.random.default_rng(2).standard_normal((6, 3)),
  125. index=arrays,
  126. columns=["A", "B", "C"],
  127. ).sort_index()
  128. expected = df_orig.loc[["bar"]] * 2
  129. df = df_orig.copy()
  130. df.loc[["bar"]] *= 2
  131. tm.assert_frame_equal(df.loc[["bar"]], expected)
  132. # raise because these have differing levels
  133. msg = "cannot align on a multi-index with out specifying the join levels"
  134. with pytest.raises(TypeError, match=msg):
  135. df.loc["bar"] *= 2
  136. def test_multiindex_setitem2(self):
  137. # from SO
  138. # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
  139. df_orig = DataFrame.from_dict(
  140. {
  141. "price": {
  142. ("DE", "Coal", "Stock"): 2,
  143. ("DE", "Gas", "Stock"): 4,
  144. ("DE", "Elec", "Demand"): 1,
  145. ("FR", "Gas", "Stock"): 5,
  146. ("FR", "Solar", "SupIm"): 0,
  147. ("FR", "Wind", "SupIm"): 0,
  148. }
  149. }
  150. )
  151. df_orig.index = MultiIndex.from_tuples(
  152. df_orig.index, names=["Sit", "Com", "Type"]
  153. )
  154. expected = df_orig.copy()
  155. expected.iloc[[0, 1, 3]] *= 2
  156. idx = pd.IndexSlice
  157. df = df_orig.copy()
  158. df.loc[idx[:, :, "Stock"], :] *= 2
  159. tm.assert_frame_equal(df, expected)
  160. df = df_orig.copy()
  161. df.loc[idx[:, :, "Stock"], "price"] *= 2
  162. tm.assert_frame_equal(df, expected)
  163. def test_multiindex_assignment(self):
  164. # GH3777 part 2
  165. # mixed dtype
  166. df = DataFrame(
  167. np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
  168. columns=list("abc"),
  169. index=[[4, 4, 8], [8, 10, 12]],
  170. )
  171. df["d"] = np.nan
  172. arr = np.array([0.0, 1.0])
  173. df.loc[4, "d"] = arr
  174. tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
  175. def test_multiindex_assignment_single_dtype(
  176. self, using_copy_on_write, warn_copy_on_write
  177. ):
  178. # GH3777 part 2b
  179. # single dtype
  180. arr = np.array([0.0, 1.0])
  181. df = DataFrame(
  182. np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
  183. columns=list("abc"),
  184. index=[[4, 4, 8], [8, 10, 12]],
  185. dtype=np.int64,
  186. )
  187. view = df["c"].iloc[:2].values
  188. # arr can be losslessly cast to int, so this setitem is inplace
  189. # INFO(CoW-warn) this does not warn because we directly took .values
  190. # above, so no reference to a pandas object is alive for `view`
  191. df.loc[4, "c"] = arr
  192. exp = Series(arr, index=[8, 10], name="c", dtype="int64")
  193. result = df.loc[4, "c"]
  194. tm.assert_series_equal(result, exp)
  195. # extra check for inplace-ness
  196. if not using_copy_on_write:
  197. tm.assert_numpy_array_equal(view, exp.values)
  198. # arr + 0.5 cannot be cast losslessly to int, so we upcast
  199. with tm.assert_produces_warning(
  200. FutureWarning, match="item of incompatible dtype"
  201. ):
  202. df.loc[4, "c"] = arr + 0.5
  203. result = df.loc[4, "c"]
  204. exp = exp + 0.5
  205. tm.assert_series_equal(result, exp)
  206. # scalar ok
  207. with tm.assert_cow_warning(warn_copy_on_write):
  208. df.loc[4, "c"] = 10
  209. exp = Series(10, index=[8, 10], name="c", dtype="float64")
  210. tm.assert_series_equal(df.loc[4, "c"], exp)
  211. # invalid assignments
  212. msg = "Must have equal len keys and value when setting with an iterable"
  213. with pytest.raises(ValueError, match=msg):
  214. df.loc[4, "c"] = [0, 1, 2, 3]
  215. with pytest.raises(ValueError, match=msg):
  216. df.loc[4, "c"] = [0]
  217. # But with a length-1 listlike column indexer this behaves like
  218. # `df.loc[4, "c"] = 0
  219. with tm.assert_cow_warning(warn_copy_on_write):
  220. df.loc[4, ["c"]] = [0]
  221. assert (df.loc[4, "c"] == 0).all()
  222. def test_groupby_example(self):
  223. # groupby example
  224. NUM_ROWS = 100
  225. NUM_COLS = 10
  226. col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
  227. index_cols = col_names[:5]
  228. df = DataFrame(
  229. np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
  230. dtype=np.int64,
  231. columns=col_names,
  232. )
  233. df = df.set_index(index_cols).sort_index()
  234. grp = df.groupby(level=index_cols[:4])
  235. df["new_col"] = np.nan
  236. # we are actually operating on a copy here
  237. # but in this case, that's ok
  238. for name, df2 in grp:
  239. new_vals = np.arange(df2.shape[0])
  240. df.loc[name, "new_col"] = new_vals
  241. def test_series_setitem(
  242. self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
  243. ):
  244. ymd = multiindex_year_month_day_dataframe_random_data
  245. s = ymd["A"]
  246. with tm.assert_cow_warning(warn_copy_on_write):
  247. s[2000, 3] = np.nan
  248. assert isna(s.values[42:65]).all()
  249. assert notna(s.values[:42]).all()
  250. assert notna(s.values[65:]).all()
  251. with tm.assert_cow_warning(warn_copy_on_write):
  252. s[2000, 3, 10] = np.nan
  253. assert isna(s.iloc[49])
  254. with pytest.raises(KeyError, match="49"):
  255. # GH#33355 dont fall-back to positional when leading level is int
  256. s[49]
  257. def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
  258. frame = multiindex_dataframe_random_data
  259. df = frame.T.copy()
  260. values = df.values.copy()
  261. result = df[df > 0]
  262. expected = df.where(df > 0)
  263. tm.assert_frame_equal(result, expected)
  264. df[df > 0] = 5
  265. values[values > 0] = 5
  266. tm.assert_almost_equal(df.values, values)
  267. df[df == 5] = 0
  268. values[values == 5] = 0
  269. tm.assert_almost_equal(df.values, values)
  270. # a df that needs alignment first
  271. df[df[:-1] < 0] = 2
  272. np.putmask(values[:-1], values[:-1] < 0, 2)
  273. tm.assert_almost_equal(df.values, values)
  274. with pytest.raises(TypeError, match="boolean values only"):
  275. df[df * 0] = 2
  276. def test_frame_getitem_setitem_multislice(self):
  277. levels = [["t1", "t2"], ["a", "b", "c"]]
  278. codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
  279. midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
  280. df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
  281. result = df.loc[:, "value"]
  282. tm.assert_series_equal(df["value"], result)
  283. result = df.loc[df.index[1:3], "value"]
  284. tm.assert_series_equal(df["value"][1:3], result)
  285. result = df.loc[:, :]
  286. tm.assert_frame_equal(df, result)
  287. result = df
  288. df.loc[:, "value"] = 10
  289. result["value"] = 10
  290. tm.assert_frame_equal(df, result)
  291. df.loc[:, :] = 10
  292. tm.assert_frame_equal(df, result)
  293. def test_frame_setitem_multi_column(self):
  294. df = DataFrame(
  295. np.random.default_rng(2).standard_normal((10, 4)),
  296. columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
  297. )
  298. cp = df.copy()
  299. cp["a"] = cp["b"]
  300. tm.assert_frame_equal(cp["a"], cp["b"])
  301. # set with ndarray
  302. cp = df.copy()
  303. cp["a"] = cp["b"].values
  304. tm.assert_frame_equal(cp["a"], cp["b"])
  305. def test_frame_setitem_multi_column2(self):
  306. # ---------------------------------------
  307. # GH#1803
  308. columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
  309. df = DataFrame(index=[1, 3, 5], columns=columns)
  310. # Works, but adds a column instead of updating the two existing ones
  311. df["A"] = 0.0 # Doesn't work
  312. assert (df["A"].values == 0).all()
  313. # it broadcasts
  314. df["B", "1"] = [1, 2, 3]
  315. df["A"] = df["B", "1"]
  316. sliced_a1 = df["A", "1"]
  317. sliced_a2 = df["A", "2"]
  318. sliced_b1 = df["B", "1"]
  319. tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
  320. tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
  321. assert sliced_a1.name == ("A", "1")
  322. assert sliced_a2.name == ("A", "2")
  323. assert sliced_b1.name == ("B", "1")
  324. def test_loc_getitem_tuple_plus_columns(
  325. self, multiindex_year_month_day_dataframe_random_data
  326. ):
  327. # GH #1013
  328. ymd = multiindex_year_month_day_dataframe_random_data
  329. df = ymd[:5]
  330. result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
  331. expected = df.loc[2000, 1, 6][["A", "B", "C"]]
  332. tm.assert_series_equal(result, expected)
  333. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  334. def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
  335. index = MultiIndex(
  336. levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
  337. )
  338. obj = DataFrame(
  339. np.random.default_rng(2).standard_normal((len(index), 4)),
  340. index=index,
  341. columns=["a", "b", "c", "d"],
  342. )
  343. obj = tm.get_obj(obj, frame_or_series)
  344. res = obj.loc[1:2]
  345. exp = obj.reindex(obj.index[2:])
  346. tm.assert_equal(res, exp)
  347. obj.loc[1:2] = 7
  348. assert (obj.loc[1:2] == 7).values.all()
  349. def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
  350. frame = multiindex_dataframe_random_data
  351. dft = frame.T
  352. s = dft["foo", "two"]
  353. dft["foo", "two"] = s > s.median()
  354. tm.assert_series_equal(dft["foo", "two"], s > s.median())
  355. # assert isinstance(dft._data.blocks[1].items, MultiIndex)
  356. reindexed = dft.reindex(columns=[("foo", "two")])
  357. tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
  358. def test_set_column_scalar_with_loc(
  359. self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
  360. ):
  361. frame = multiindex_dataframe_random_data
  362. subset = frame.index[[1, 4, 5]]
  363. frame.loc[subset] = 99
  364. assert (frame.loc[subset].values == 99).all()
  365. frame_original = frame.copy()
  366. col = frame["B"]
  367. with tm.assert_cow_warning(warn_copy_on_write):
  368. col[subset] = 97
  369. if using_copy_on_write:
  370. # chained setitem doesn't work with CoW
  371. tm.assert_frame_equal(frame, frame_original)
  372. else:
  373. assert (frame.loc[subset, "B"] == 97).all()
  374. def test_nonunique_assignment_1750(self):
  375. df = DataFrame(
  376. [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
  377. )
  378. df = df.set_index(["A", "B"])
  379. mi = MultiIndex.from_tuples([(1, 1)])
  380. df.loc[mi, "C"] = "_"
  381. assert (df.xs((1, 1))["C"] == "_").all()
  382. def test_astype_assignment_with_dups(self):
  383. # GH 4686
  384. # assignment with dups that has a dtype change
  385. cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
  386. df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
  387. index = df.index.copy()
  388. df["A"] = df["A"].astype(np.float64)
  389. tm.assert_index_equal(df.index, index)
  390. def test_setitem_nonmonotonic(self):
  391. # https://github.com/pandas-dev/pandas/issues/31449
  392. index = MultiIndex.from_tuples(
  393. [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
  394. )
  395. df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
  396. df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
  397. expected = DataFrame({"e": [99, 1, 100]}, index=index)
  398. tm.assert_frame_equal(df, expected)
  399. class TestSetitemWithExpansionMultiIndex:
  400. def test_setitem_new_column_mixed_depth(self):
  401. arrays = [
  402. ["a", "top", "top", "routine1", "routine1", "routine2"],
  403. ["", "OD", "OD", "result1", "result2", "result1"],
  404. ["", "wx", "wy", "", "", ""],
  405. ]
  406. tuples = sorted(zip(*arrays))
  407. index = MultiIndex.from_tuples(tuples)
  408. df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
  409. result = df.copy()
  410. expected = df.copy()
  411. result["b"] = [1, 2, 3, 4]
  412. expected["b", "", ""] = [1, 2, 3, 4]
  413. tm.assert_frame_equal(result, expected)
  414. def test_setitem_new_column_all_na(self):
  415. # GH#1534
  416. mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
  417. df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
  418. s = Series({(1, 1): 1, (1, 2): 2})
  419. df["new"] = s
  420. assert df["new"].isna().all()
  421. def test_setitem_enlargement_keep_index_names(self):
  422. # GH#53053
  423. mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
  424. df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
  425. df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
  426. mi_expected = MultiIndex.from_tuples(
  427. [(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
  428. )
  429. expected = DataFrame(
  430. data=[[10, 20, 30], [10, 20, 30]],
  431. index=mi_expected,
  432. columns=["A", "B", "C"],
  433. )
  434. tm.assert_frame_equal(df, expected)
  435. @td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
  436. # is not a view
  437. def test_frame_setitem_view_direct(
  438. multiindex_dataframe_random_data, using_copy_on_write
  439. ):
  440. # this works because we are modifying the underlying array
  441. # really a no-no
  442. df = multiindex_dataframe_random_data.T
  443. if using_copy_on_write:
  444. with pytest.raises(ValueError, match="read-only"):
  445. df["foo"].values[:] = 0
  446. assert (df["foo"].values != 0).all()
  447. else:
  448. df["foo"].values[:] = 0
  449. assert (df["foo"].values == 0).all()
  450. def test_frame_setitem_copy_raises(
  451. multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
  452. ):
  453. # will raise/warn as its chained assignment
  454. df = multiindex_dataframe_random_data.T
  455. if using_copy_on_write or warn_copy_on_write:
  456. with tm.raises_chained_assignment_error():
  457. df["foo"]["one"] = 2
  458. else:
  459. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  460. with pytest.raises(SettingWithCopyError, match=msg):
  461. with tm.raises_chained_assignment_error():
  462. df["foo"]["one"] = 2
  463. def test_frame_setitem_copy_no_write(
  464. multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
  465. ):
  466. frame = multiindex_dataframe_random_data.T
  467. expected = frame
  468. df = frame.copy()
  469. if using_copy_on_write or warn_copy_on_write:
  470. with tm.raises_chained_assignment_error():
  471. df["foo"]["one"] = 2
  472. else:
  473. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  474. with pytest.raises(SettingWithCopyError, match=msg):
  475. with tm.raises_chained_assignment_error():
  476. df["foo"]["one"] = 2
  477. result = df
  478. tm.assert_frame_equal(result, expected)
  479. def test_frame_setitem_partial_multiindex():
  480. # GH 54875
  481. df = DataFrame(
  482. {
  483. "a": [1, 2, 3],
  484. "b": [3, 4, 5],
  485. "c": 6,
  486. "d": 7,
  487. }
  488. ).set_index(["a", "b", "c"])
  489. ser = Series(8, index=df.index.droplevel("c"))
  490. result = df.copy()
  491. result["d"] = ser
  492. expected = df.copy()
  493. expected["d"] = 8
  494. tm.assert_frame_equal(result, expected)