setitem.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. class BaseSetitemTests:
  6. @pytest.fixture(
  7. params=[
  8. lambda x: x.index,
  9. lambda x: list(x.index),
  10. lambda x: slice(None),
  11. lambda x: slice(0, len(x)),
  12. lambda x: range(len(x)),
  13. lambda x: list(range(len(x))),
  14. lambda x: np.ones(len(x), dtype=bool),
  15. ],
  16. ids=[
  17. "index",
  18. "list[index]",
  19. "null_slice",
  20. "full_slice",
  21. "range",
  22. "list(range)",
  23. "mask",
  24. ],
  25. )
  26. def full_indexer(self, request):
  27. """
  28. Fixture for an indexer to pass to obj.loc to get/set the full length of the
  29. object.
  30. In some cases, assumes that obj.index is the default RangeIndex.
  31. """
  32. return request.param
  33. @pytest.fixture(autouse=True)
  34. def skip_if_immutable(self, dtype, request):
  35. if dtype._is_immutable:
  36. node = request.node
  37. if node.name.split("[")[0] == "test_is_immutable":
  38. # This fixture is auto-used, but we want to not-skip
  39. # test_is_immutable.
  40. return
  41. # When BaseSetitemTests is mixed into ExtensionTests, we only
  42. # want this fixture to operate on the tests defined in this
  43. # class/file.
  44. defined_in = node.function.__qualname__.split(".")[0]
  45. if defined_in == "BaseSetitemTests":
  46. pytest.skip("__setitem__ test not applicable with immutable dtype")
  47. def test_is_immutable(self, data):
  48. if data.dtype._is_immutable:
  49. with pytest.raises(TypeError):
  50. data[0] = data[0]
  51. else:
  52. data[0] = data[1]
  53. assert data[0] == data[1]
  54. def test_setitem_scalar_series(self, data, box_in_series):
  55. if box_in_series:
  56. data = pd.Series(data)
  57. data[0] = data[1]
  58. assert data[0] == data[1]
  59. def test_setitem_sequence(self, data, box_in_series):
  60. if box_in_series:
  61. data = pd.Series(data)
  62. original = data.copy()
  63. data[[0, 1]] = [data[1], data[0]]
  64. assert data[0] == original[1]
  65. assert data[1] == original[0]
  66. def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
  67. ser = pd.Series(data)
  68. original = ser.copy()
  69. value = [data[0]]
  70. if as_array:
  71. value = data._from_sequence(value, dtype=data.dtype)
  72. xpr = "cannot set using a {} indexer with a different length"
  73. with pytest.raises(ValueError, match=xpr.format("list-like")):
  74. ser[[0, 1]] = value
  75. # Ensure no modifications made before the exception
  76. tm.assert_series_equal(ser, original)
  77. with pytest.raises(ValueError, match=xpr.format("slice")):
  78. ser[slice(3)] = value
  79. tm.assert_series_equal(ser, original)
  80. def test_setitem_empty_indexer(self, data, box_in_series):
  81. if box_in_series:
  82. data = pd.Series(data)
  83. original = data.copy()
  84. data[np.array([], dtype=int)] = []
  85. tm.assert_equal(data, original)
  86. def test_setitem_sequence_broadcasts(self, data, box_in_series):
  87. if box_in_series:
  88. data = pd.Series(data)
  89. data[[0, 1]] = data[2]
  90. assert data[0] == data[2]
  91. assert data[1] == data[2]
  92. @pytest.mark.parametrize("setter", ["loc", "iloc"])
  93. def test_setitem_scalar(self, data, setter):
  94. arr = pd.Series(data)
  95. setter = getattr(arr, setter)
  96. setter[0] = data[1]
  97. assert arr[0] == data[1]
  98. def test_setitem_loc_scalar_mixed(self, data):
  99. df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
  100. df.loc[0, "B"] = data[1]
  101. assert df.loc[0, "B"] == data[1]
  102. def test_setitem_loc_scalar_single(self, data):
  103. df = pd.DataFrame({"B": data})
  104. df.loc[10, "B"] = data[1]
  105. assert df.loc[10, "B"] == data[1]
  106. def test_setitem_loc_scalar_multiple_homogoneous(self, data):
  107. df = pd.DataFrame({"A": data, "B": data})
  108. df.loc[10, "B"] = data[1]
  109. assert df.loc[10, "B"] == data[1]
  110. def test_setitem_iloc_scalar_mixed(self, data):
  111. df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
  112. df.iloc[0, 1] = data[1]
  113. assert df.loc[0, "B"] == data[1]
  114. def test_setitem_iloc_scalar_single(self, data):
  115. df = pd.DataFrame({"B": data})
  116. df.iloc[10, 0] = data[1]
  117. assert df.loc[10, "B"] == data[1]
  118. def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
  119. df = pd.DataFrame({"A": data, "B": data})
  120. df.iloc[10, 1] = data[1]
  121. assert df.loc[10, "B"] == data[1]
  122. @pytest.mark.parametrize(
  123. "mask",
  124. [
  125. np.array([True, True, True, False, False]),
  126. pd.array([True, True, True, False, False], dtype="boolean"),
  127. pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
  128. ],
  129. ids=["numpy-array", "boolean-array", "boolean-array-na"],
  130. )
  131. def test_setitem_mask(self, data, mask, box_in_series):
  132. arr = data[:5].copy()
  133. expected = arr.take([0, 0, 0, 3, 4])
  134. if box_in_series:
  135. arr = pd.Series(arr)
  136. expected = pd.Series(expected)
  137. arr[mask] = data[0]
  138. tm.assert_equal(expected, arr)
  139. def test_setitem_mask_raises(self, data, box_in_series):
  140. # wrong length
  141. mask = np.array([True, False])
  142. if box_in_series:
  143. data = pd.Series(data)
  144. with pytest.raises(IndexError, match="wrong length"):
  145. data[mask] = data[0]
  146. mask = pd.array(mask, dtype="boolean")
  147. with pytest.raises(IndexError, match="wrong length"):
  148. data[mask] = data[0]
  149. def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
  150. mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
  151. mask[:3] = True
  152. mask[3:5] = pd.NA
  153. if box_in_series:
  154. data = pd.Series(data)
  155. data[mask] = data[0]
  156. assert (data[:3] == data[0]).all()
  157. @pytest.mark.parametrize(
  158. "idx",
  159. [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
  160. ids=["list", "integer-array", "numpy-array"],
  161. )
  162. def test_setitem_integer_array(self, data, idx, box_in_series):
  163. arr = data[:5].copy()
  164. expected = data.take([0, 0, 0, 3, 4])
  165. if box_in_series:
  166. arr = pd.Series(arr)
  167. expected = pd.Series(expected)
  168. arr[idx] = arr[0]
  169. tm.assert_equal(arr, expected)
  170. @pytest.mark.parametrize(
  171. "idx, box_in_series",
  172. [
  173. ([0, 1, 2, pd.NA], False),
  174. pytest.param(
  175. [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
  176. ),
  177. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  178. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  179. ],
  180. ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
  181. )
  182. def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
  183. arr = data.copy()
  184. # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
  185. # for list of labels with Series
  186. if box_in_series:
  187. arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
  188. msg = "Cannot index with an integer indexer containing NA values"
  189. with pytest.raises(ValueError, match=msg):
  190. arr[idx] = arr[0]
  191. @pytest.mark.parametrize("as_callable", [True, False])
  192. @pytest.mark.parametrize("setter", ["loc", None])
  193. def test_setitem_mask_aligned(self, data, as_callable, setter):
  194. ser = pd.Series(data)
  195. mask = np.zeros(len(data), dtype=bool)
  196. mask[:2] = True
  197. if as_callable:
  198. mask2 = lambda x: mask
  199. else:
  200. mask2 = mask
  201. if setter:
  202. # loc
  203. target = getattr(ser, setter)
  204. else:
  205. # Series.__setitem__
  206. target = ser
  207. target[mask2] = data[5:7]
  208. ser[mask2] = data[5:7]
  209. assert ser[0] == data[5]
  210. assert ser[1] == data[6]
  211. @pytest.mark.parametrize("setter", ["loc", None])
  212. def test_setitem_mask_broadcast(self, data, setter):
  213. ser = pd.Series(data)
  214. mask = np.zeros(len(data), dtype=bool)
  215. mask[:2] = True
  216. if setter: # loc
  217. target = getattr(ser, setter)
  218. else: # __setitem__
  219. target = ser
  220. target[mask] = data[10]
  221. assert ser[0] == data[10]
  222. assert ser[1] == data[10]
  223. def test_setitem_expand_columns(self, data):
  224. df = pd.DataFrame({"A": data})
  225. result = df.copy()
  226. result["B"] = 1
  227. expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
  228. tm.assert_frame_equal(result, expected)
  229. result = df.copy()
  230. result.loc[:, "B"] = 1
  231. tm.assert_frame_equal(result, expected)
  232. # overwrite with new type
  233. result["B"] = data
  234. expected = pd.DataFrame({"A": data, "B": data})
  235. tm.assert_frame_equal(result, expected)
  236. def test_setitem_expand_with_extension(self, data):
  237. df = pd.DataFrame({"A": [1] * len(data)})
  238. result = df.copy()
  239. result["B"] = data
  240. expected = pd.DataFrame({"A": [1] * len(data), "B": data})
  241. tm.assert_frame_equal(result, expected)
  242. result = df.copy()
  243. result.loc[:, "B"] = data
  244. tm.assert_frame_equal(result, expected)
  245. def test_setitem_frame_invalid_length(self, data):
  246. df = pd.DataFrame({"A": [1] * len(data)})
  247. xpr = (
  248. rf"Length of values \({len(data[:5])}\) "
  249. rf"does not match length of index \({len(df)}\)"
  250. )
  251. with pytest.raises(ValueError, match=xpr):
  252. df["B"] = data[:5]
  253. def test_setitem_tuple_index(self, data):
  254. ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
  255. expected = pd.Series(data.take([1, 1]), index=ser.index)
  256. ser[(0, 0)] = data[1]
  257. tm.assert_series_equal(ser, expected)
  258. def test_setitem_slice(self, data, box_in_series):
  259. arr = data[:5].copy()
  260. expected = data.take([0, 0, 0, 3, 4])
  261. if box_in_series:
  262. arr = pd.Series(arr)
  263. expected = pd.Series(expected)
  264. arr[:3] = data[0]
  265. tm.assert_equal(arr, expected)
  266. def test_setitem_loc_iloc_slice(self, data):
  267. arr = data[:5].copy()
  268. s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
  269. expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
  270. result = s.copy()
  271. result.iloc[:3] = data[0]
  272. tm.assert_equal(result, expected)
  273. result = s.copy()
  274. result.loc[:"c"] = data[0]
  275. tm.assert_equal(result, expected)
  276. def test_setitem_slice_mismatch_length_raises(self, data):
  277. arr = data[:5]
  278. with pytest.raises(ValueError):
  279. arr[:1] = arr[:2]
  280. def test_setitem_slice_array(self, data):
  281. arr = data[:5].copy()
  282. arr[:5] = data[-5:]
  283. tm.assert_extension_array_equal(arr, data[-5:])
  284. def test_setitem_scalar_key_sequence_raise(self, data):
  285. arr = data[:5].copy()
  286. with pytest.raises(ValueError):
  287. arr[0] = arr[[0, 1]]
  288. def test_setitem_preserves_views(self, data):
  289. # GH#28150 setitem shouldn't swap the underlying data
  290. view1 = data.view()
  291. view2 = data[:]
  292. data[0] = data[1]
  293. assert view1[0] == data[1]
  294. assert view2[0] == data[1]
  295. def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
  296. # https://github.com/pandas-dev/pandas/issues/32395
  297. df = expected = pd.DataFrame({0: pd.Series(data)})
  298. result = pd.DataFrame(index=df.index)
  299. key = full_indexer(df)
  300. result.loc[key, 0] = df[0]
  301. tm.assert_frame_equal(result, expected)
  302. def test_setitem_with_expansion_row(self, data, na_value):
  303. df = pd.DataFrame({"data": data[:1]})
  304. df.loc[1, "data"] = data[1]
  305. expected = pd.DataFrame({"data": data[:2]})
  306. tm.assert_frame_equal(df, expected)
  307. # https://github.com/pandas-dev/pandas/issues/47284
  308. df.loc[2, "data"] = na_value
  309. expected = pd.DataFrame(
  310. {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
  311. )
  312. tm.assert_frame_equal(df, expected)
  313. def test_setitem_series(self, data, full_indexer):
  314. # https://github.com/pandas-dev/pandas/issues/32395
  315. ser = pd.Series(data, name="data")
  316. result = pd.Series(index=ser.index, dtype=object, name="data")
  317. # because result has object dtype, the attempt to do setting inplace
  318. # is successful, and object dtype is retained
  319. key = full_indexer(ser)
  320. result.loc[key] = ser
  321. expected = pd.Series(
  322. data.astype(object), index=ser.index, name="data", dtype=object
  323. )
  324. tm.assert_series_equal(result, expected)
  325. def test_setitem_frame_2d_values(self, data):
  326. # GH#44514
  327. df = pd.DataFrame({"A": data})
  328. # Avoiding using_array_manager fixture
  329. # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
  330. using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
  331. using_copy_on_write = pd.options.mode.copy_on_write
  332. blk_data = df._mgr.arrays[0]
  333. orig = df.copy()
  334. df.iloc[:] = df.copy()
  335. tm.assert_frame_equal(df, orig)
  336. df.iloc[:-1] = df.iloc[:-1].copy()
  337. tm.assert_frame_equal(df, orig)
  338. df.iloc[:] = df.values
  339. tm.assert_frame_equal(df, orig)
  340. if not using_array_manager and not using_copy_on_write:
  341. # GH#33457 Check that this setting occurred in-place
  342. # FIXME(ArrayManager): this should work there too
  343. assert df._mgr.arrays[0] is blk_data
  344. df.iloc[:-1] = df.values[:-1]
  345. tm.assert_frame_equal(df, orig)
  346. def test_delitem_series(self, data):
  347. # GH#40763
  348. ser = pd.Series(data, name="data")
  349. taker = np.arange(len(ser))
  350. taker = np.delete(taker, 1)
  351. expected = ser[taker]
  352. del ser[1]
  353. tm.assert_series_equal(ser, expected)
  354. def test_setitem_invalid(self, data, invalid_scalar):
  355. msg = "" # messages vary by subclass, so we do not test it
  356. with pytest.raises((ValueError, TypeError), match=msg):
  357. data[0] = invalid_scalar
  358. with pytest.raises((ValueError, TypeError), match=msg):
  359. data[:] = invalid_scalar
  360. def test_setitem_2d_values(self, data):
  361. # GH50085
  362. original = data.copy()
  363. df = pd.DataFrame({"a": data, "b": data})
  364. df.loc[[0, 1], :] = df.loc[[1, 0], :].values
  365. assert (df.loc[0, :] == original[1]).all()
  366. assert (df.loc[1, :] == original[0]).all()