test_iloc.py 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533
  1. """ test positional based indexing with iloc """
  2. from datetime import datetime
  3. import re
  4. import numpy as np
  5. import pytest
  6. from pandas.errors import IndexingError
  7. import pandas.util._test_decorators as td
  8. from pandas import (
  9. NA,
  10. Categorical,
  11. CategoricalDtype,
  12. DataFrame,
  13. Index,
  14. Interval,
  15. NaT,
  16. Series,
  17. Timestamp,
  18. array,
  19. concat,
  20. date_range,
  21. interval_range,
  22. isna,
  23. to_datetime,
  24. )
  25. import pandas._testing as tm
  26. from pandas.api.types import is_scalar
  27. from pandas.tests.indexing.common import check_indexing_smoketest_or_raises
  28. # We pass through the error message from numpy
  29. _slice_iloc_msg = re.escape(
  30. "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) "
  31. "and integer or boolean arrays are valid indices"
  32. )
  33. class TestiLoc:
  34. @pytest.mark.parametrize("key", [2, -1, [0, 1, 2]])
  35. @pytest.mark.parametrize("kind", ["series", "frame"])
  36. @pytest.mark.parametrize(
  37. "col",
  38. ["labels", "mixed", "ts", "floats", "empty"],
  39. )
  40. def test_iloc_getitem_int_and_list_int(self, key, kind, col, request):
  41. obj = request.getfixturevalue(f"{kind}_{col}")
  42. check_indexing_smoketest_or_raises(
  43. obj,
  44. "iloc",
  45. key,
  46. fails=IndexError,
  47. )
  48. # array of ints (GH5006), make sure that a single indexer is returning
  49. # the correct type
  50. class TestiLocBaseIndependent:
  51. """Tests Independent Of Base Class"""
  52. @pytest.mark.parametrize(
  53. "key",
  54. [
  55. slice(None),
  56. slice(3),
  57. range(3),
  58. [0, 1, 2],
  59. Index(range(3)),
  60. np.asarray([0, 1, 2]),
  61. ],
  62. )
  63. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  64. def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager):
  65. frame = DataFrame({0: range(3)}, dtype=object)
  66. cat = Categorical(["alpha", "beta", "gamma"])
  67. if not using_array_manager:
  68. assert frame._mgr.blocks[0]._can_hold_element(cat)
  69. df = frame.copy()
  70. orig_vals = df.values
  71. indexer(df)[key, 0] = cat
  72. expected = DataFrame({0: cat}).astype(object)
  73. if not using_array_manager:
  74. assert np.shares_memory(df[0].values, orig_vals)
  75. tm.assert_frame_equal(df, expected)
  76. # check we dont have a view on cat (may be undesired GH#39986)
  77. df.iloc[0, 0] = "gamma"
  78. assert cat[0] != "gamma"
  79. # pre-2.0 with mixed dataframe ("split" path) we always overwrote the
  80. # column. as of 2.0 we correctly write "into" the column, so
  81. # we retain the object dtype.
  82. frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
  83. df = frame.copy()
  84. indexer(df)[key, 0] = cat
  85. expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
  86. tm.assert_frame_equal(df, expected)
  87. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  88. @pytest.mark.parametrize("has_ref", [True, False])
  89. @pytest.mark.parametrize("box", [array, Series])
  90. def test_iloc_setitem_ea_inplace(
  91. self, frame_or_series, box, has_ref, using_copy_on_write
  92. ):
  93. # GH#38952 Case with not setting a full column
  94. # IntegerArray without NAs
  95. arr = array([1, 2, 3, 4])
  96. obj = frame_or_series(arr.to_numpy("i8"))
  97. if has_ref:
  98. view = obj[:] # noqa: F841
  99. if frame_or_series is Series:
  100. values = obj.values
  101. else:
  102. values = obj._mgr.arrays[0]
  103. if frame_or_series is Series:
  104. obj.iloc[:2] = box(arr[2:])
  105. else:
  106. obj.iloc[:2, 0] = box(arr[2:])
  107. expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8"))
  108. tm.assert_equal(obj, expected)
  109. # Check that we are actually in-place
  110. if not has_ref:
  111. if frame_or_series is Series:
  112. if using_copy_on_write:
  113. assert obj.values is not values
  114. assert np.shares_memory(obj.values, values)
  115. else:
  116. assert obj.values is values
  117. else:
  118. assert np.shares_memory(obj[0].values, values)
  119. def test_is_scalar_access(self):
  120. # GH#32085 index with duplicates doesn't matter for _is_scalar_access
  121. index = Index([1, 2, 1])
  122. ser = Series(range(3), index=index)
  123. assert ser.iloc._is_scalar_access((1,))
  124. df = ser.to_frame()
  125. assert df.iloc._is_scalar_access((1, 0))
  126. def test_iloc_exceeds_bounds(self):
  127. # GH6296
  128. # iloc should allow indexers that exceed the bounds
  129. df = DataFrame(np.random.default_rng(2).random((20, 5)), columns=list("ABCDE"))
  130. # lists of positions should raise IndexError!
  131. msg = "positional indexers are out-of-bounds"
  132. with pytest.raises(IndexError, match=msg):
  133. df.iloc[:, [0, 1, 2, 3, 4, 5]]
  134. with pytest.raises(IndexError, match=msg):
  135. df.iloc[[1, 30]]
  136. with pytest.raises(IndexError, match=msg):
  137. df.iloc[[1, -30]]
  138. with pytest.raises(IndexError, match=msg):
  139. df.iloc[[100]]
  140. s = df["A"]
  141. with pytest.raises(IndexError, match=msg):
  142. s.iloc[[100]]
  143. with pytest.raises(IndexError, match=msg):
  144. s.iloc[[-100]]
  145. # still raise on a single indexer
  146. msg = "single positional indexer is out-of-bounds"
  147. with pytest.raises(IndexError, match=msg):
  148. df.iloc[30]
  149. with pytest.raises(IndexError, match=msg):
  150. df.iloc[-30]
  151. # GH10779
  152. # single positive/negative indexer exceeding Series bounds should raise
  153. # an IndexError
  154. with pytest.raises(IndexError, match=msg):
  155. s.iloc[30]
  156. with pytest.raises(IndexError, match=msg):
  157. s.iloc[-30]
  158. # slices are ok
  159. result = df.iloc[:, 4:10] # 0 < start < len < stop
  160. expected = df.iloc[:, 4:]
  161. tm.assert_frame_equal(result, expected)
  162. result = df.iloc[:, -4:-10] # stop < 0 < start < len
  163. expected = df.iloc[:, :0]
  164. tm.assert_frame_equal(result, expected)
  165. result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
  166. expected = df.iloc[:, :4:-1]
  167. tm.assert_frame_equal(result, expected)
  168. result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
  169. expected = df.iloc[:, 4::-1]
  170. tm.assert_frame_equal(result, expected)
  171. result = df.iloc[:, -10:4] # start < 0 < stop < len
  172. expected = df.iloc[:, :4]
  173. tm.assert_frame_equal(result, expected)
  174. result = df.iloc[:, 10:4] # 0 < stop < len < start
  175. expected = df.iloc[:, :0]
  176. tm.assert_frame_equal(result, expected)
  177. result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
  178. expected = df.iloc[:, :0]
  179. tm.assert_frame_equal(result, expected)
  180. result = df.iloc[:, 10:11] # 0 < len < start < stop
  181. expected = df.iloc[:, :0]
  182. tm.assert_frame_equal(result, expected)
  183. # slice bounds exceeding is ok
  184. result = s.iloc[18:30]
  185. expected = s.iloc[18:]
  186. tm.assert_series_equal(result, expected)
  187. result = s.iloc[30:]
  188. expected = s.iloc[:0]
  189. tm.assert_series_equal(result, expected)
  190. result = s.iloc[30::-1]
  191. expected = s.iloc[::-1]
  192. tm.assert_series_equal(result, expected)
  193. # doc example
  194. dfl = DataFrame(
  195. np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
  196. )
  197. tm.assert_frame_equal(
  198. dfl.iloc[:, 2:3],
  199. DataFrame(index=dfl.index, columns=Index([], dtype=dfl.columns.dtype)),
  200. )
  201. tm.assert_frame_equal(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
  202. tm.assert_frame_equal(dfl.iloc[4:6], dfl.iloc[[4]])
  203. msg = "positional indexers are out-of-bounds"
  204. with pytest.raises(IndexError, match=msg):
  205. dfl.iloc[[4, 5, 6]]
  206. msg = "single positional indexer is out-of-bounds"
  207. with pytest.raises(IndexError, match=msg):
  208. dfl.iloc[:, 4]
  209. @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))])
  210. @pytest.mark.parametrize(
  211. "index_vals,column_vals",
  212. [
  213. ([slice(None), ["A", "D"]]),
  214. (["1", "2"], slice(None)),
  215. ([datetime(2019, 1, 1)], slice(None)),
  216. ],
  217. )
  218. def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals):
  219. # GH 25753
  220. df = DataFrame(
  221. np.random.default_rng(2).standard_normal((len(index), len(columns))),
  222. index=index,
  223. columns=columns,
  224. )
  225. msg = ".iloc requires numeric indexers, got"
  226. with pytest.raises(IndexError, match=msg):
  227. df.iloc[index_vals, column_vals]
  228. def test_iloc_getitem_invalid_scalar(self, frame_or_series):
  229. # GH 21982
  230. obj = DataFrame(np.arange(100).reshape(10, 10))
  231. obj = tm.get_obj(obj, frame_or_series)
  232. with pytest.raises(TypeError, match="Cannot index by location index"):
  233. obj.iloc["a"]
  234. def test_iloc_array_not_mutating_negative_indices(self):
  235. # GH 21867
  236. array_with_neg_numbers = np.array([1, 2, -1])
  237. array_copy = array_with_neg_numbers.copy()
  238. df = DataFrame(
  239. {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]},
  240. index=[1, 2, 3],
  241. )
  242. df.iloc[array_with_neg_numbers]
  243. tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
  244. df.iloc[:, array_with_neg_numbers]
  245. tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
  246. def test_iloc_getitem_neg_int_can_reach_first_index(self):
  247. # GH10547 and GH10779
  248. # negative integers should be able to reach index 0
  249. df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]})
  250. s = df["A"]
  251. expected = df.iloc[0]
  252. result = df.iloc[-3]
  253. tm.assert_series_equal(result, expected)
  254. expected = df.iloc[[0]]
  255. result = df.iloc[[-3]]
  256. tm.assert_frame_equal(result, expected)
  257. expected = s.iloc[0]
  258. result = s.iloc[-3]
  259. assert result == expected
  260. expected = s.iloc[[0]]
  261. result = s.iloc[[-3]]
  262. tm.assert_series_equal(result, expected)
  263. # check the length 1 Series case highlighted in GH10547
  264. expected = Series(["a"], index=["A"])
  265. result = expected.iloc[[-1]]
  266. tm.assert_series_equal(result, expected)
  267. def test_iloc_getitem_dups(self):
  268. # GH 6766
  269. df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
  270. df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
  271. df = concat([df1, df2], axis=1)
  272. # cross-sectional indexing
  273. result = df.iloc[0, 0]
  274. assert isna(result)
  275. result = df.iloc[0, :]
  276. expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0)
  277. tm.assert_series_equal(result, expected)
  278. def test_iloc_getitem_array(self):
  279. df = DataFrame(
  280. [
  281. {"A": 1, "B": 2, "C": 3},
  282. {"A": 100, "B": 200, "C": 300},
  283. {"A": 1000, "B": 2000, "C": 3000},
  284. ]
  285. )
  286. expected = DataFrame([{"A": 1, "B": 2, "C": 3}])
  287. tm.assert_frame_equal(df.iloc[[0]], expected)
  288. expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
  289. tm.assert_frame_equal(df.iloc[[0, 1]], expected)
  290. expected = DataFrame([{"B": 2, "C": 3}, {"B": 2000, "C": 3000}], index=[0, 2])
  291. result = df.iloc[[0, 2], [1, 2]]
  292. tm.assert_frame_equal(result, expected)
  293. def test_iloc_getitem_bool(self):
  294. df = DataFrame(
  295. [
  296. {"A": 1, "B": 2, "C": 3},
  297. {"A": 100, "B": 200, "C": 300},
  298. {"A": 1000, "B": 2000, "C": 3000},
  299. ]
  300. )
  301. expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
  302. result = df.iloc[[True, True, False]]
  303. tm.assert_frame_equal(result, expected)
  304. expected = DataFrame(
  305. [{"A": 1, "B": 2, "C": 3}, {"A": 1000, "B": 2000, "C": 3000}], index=[0, 2]
  306. )
  307. result = df.iloc[lambda x: x.index % 2 == 0]
  308. tm.assert_frame_equal(result, expected)
  309. @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
  310. def test_iloc_getitem_bool_diff_len(self, index):
  311. # GH26658
  312. s = Series([1, 2, 3])
  313. msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}"
  314. with pytest.raises(IndexError, match=msg):
  315. s.iloc[index]
  316. def test_iloc_getitem_slice(self):
  317. df = DataFrame(
  318. [
  319. {"A": 1, "B": 2, "C": 3},
  320. {"A": 100, "B": 200, "C": 300},
  321. {"A": 1000, "B": 2000, "C": 3000},
  322. ]
  323. )
  324. expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
  325. result = df.iloc[:2]
  326. tm.assert_frame_equal(result, expected)
  327. expected = DataFrame([{"A": 100, "B": 200}], index=[1])
  328. result = df.iloc[1:2, 0:2]
  329. tm.assert_frame_equal(result, expected)
  330. expected = DataFrame(
  331. [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}]
  332. )
  333. result = df.iloc[:, lambda df: [0, 2]]
  334. tm.assert_frame_equal(result, expected)
  335. def test_iloc_getitem_slice_dups(self):
  336. df1 = DataFrame(
  337. np.random.default_rng(2).standard_normal((10, 4)),
  338. columns=["A", "A", "B", "B"],
  339. )
  340. df2 = DataFrame(
  341. np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2),
  342. columns=["A", "C"],
  343. )
  344. # axis=1
  345. df = concat([df1, df2], axis=1)
  346. tm.assert_frame_equal(df.iloc[:, :4], df1)
  347. tm.assert_frame_equal(df.iloc[:, 4:], df2)
  348. df = concat([df2, df1], axis=1)
  349. tm.assert_frame_equal(df.iloc[:, :2], df2)
  350. tm.assert_frame_equal(df.iloc[:, 2:], df1)
  351. exp = concat([df2, df1.iloc[:, [0]]], axis=1)
  352. tm.assert_frame_equal(df.iloc[:, 0:3], exp)
  353. # axis=0
  354. df = concat([df, df], axis=0)
  355. tm.assert_frame_equal(df.iloc[0:10, :2], df2)
  356. tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
  357. tm.assert_frame_equal(df.iloc[10:, :2], df2)
  358. tm.assert_frame_equal(df.iloc[10:, 2:], df1)
  359. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  360. @pytest.mark.parametrize("has_ref", [True, False])
  361. def test_iloc_setitem(self, warn_copy_on_write, has_ref):
  362. df = DataFrame(
  363. np.random.default_rng(2).standard_normal((4, 4)),
  364. index=np.arange(0, 8, 2),
  365. columns=np.arange(0, 12, 3),
  366. )
  367. if has_ref:
  368. view = df[:] # noqa: F841
  369. df.iloc[1, 1] = 1
  370. result = df.iloc[1, 1]
  371. assert result == 1
  372. df.iloc[:, 2:3] = 0
  373. expected = df.iloc[:, 2:3]
  374. result = df.iloc[:, 2:3]
  375. tm.assert_frame_equal(result, expected)
  376. # GH5771
  377. s = Series(0, index=[4, 5, 6])
  378. s.iloc[1:2] += 1
  379. expected = Series([0, 1, 0], index=[4, 5, 6])
  380. tm.assert_series_equal(s, expected)
  381. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  382. @pytest.mark.parametrize("has_ref", [True, False])
  383. def test_iloc_setitem_axis_argument(self, has_ref):
  384. # GH45032
  385. df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
  386. df[1] = df[1].astype(object)
  387. if has_ref:
  388. view = df[:]
  389. expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
  390. expected[1] = expected[1].astype(object)
  391. df.iloc(axis=0)[2] = 5
  392. tm.assert_frame_equal(df, expected)
  393. df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
  394. df[1] = df[1].astype(object)
  395. if has_ref:
  396. view = df[:] # noqa: F841
  397. expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
  398. expected[1] = expected[1].astype(object)
  399. df.iloc(axis=1)[2] = 5
  400. tm.assert_frame_equal(df, expected)
  401. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  402. @pytest.mark.parametrize("has_ref", [True, False])
  403. def test_iloc_setitem_list(self, has_ref):
  404. # setitem with an iloc list
  405. df = DataFrame(
  406. np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
  407. )
  408. if has_ref:
  409. view = df[:] # noqa: F841
  410. df.iloc[[0, 1], [1, 2]]
  411. df.iloc[[0, 1], [1, 2]] += 100
  412. expected = DataFrame(
  413. np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
  414. index=["A", "B", "C"],
  415. columns=["A", "B", "C"],
  416. )
  417. tm.assert_frame_equal(df, expected)
  418. def test_iloc_setitem_pandas_object(self):
  419. # GH 17193
  420. s_orig = Series([0, 1, 2, 3])
  421. expected = Series([0, -1, -2, 3])
  422. s = s_orig.copy()
  423. s.iloc[Series([1, 2])] = [-1, -2]
  424. tm.assert_series_equal(s, expected)
  425. s = s_orig.copy()
  426. s.iloc[Index([1, 2])] = [-1, -2]
  427. tm.assert_series_equal(s, expected)
  428. def test_iloc_setitem_dups(self):
  429. # GH 6766
  430. # iloc with a mask aligning from another iloc
  431. df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
  432. df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
  433. df = concat([df1, df2], axis=1)
  434. expected = df.fillna(3)
  435. inds = np.isnan(df.iloc[:, 0])
  436. mask = inds[inds].index
  437. df.iloc[mask, 0] = df.iloc[mask, 2]
  438. tm.assert_frame_equal(df, expected)
  439. # del a dup column across blocks
  440. expected = DataFrame({0: [1, 2], 1: [3, 4]})
  441. expected.columns = ["B", "B"]
  442. del df["A"]
  443. tm.assert_frame_equal(df, expected)
  444. # assign back to self
  445. df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
  446. tm.assert_frame_equal(df, expected)
  447. # reversed x 2
  448. df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
  449. df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
  450. tm.assert_frame_equal(df, expected)
  451. def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
  452. self, using_array_manager
  453. ):
  454. # Same as the "assign back to self" check in test_iloc_setitem_dups
  455. # but on a DataFrame with multiple blocks
  456. df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])
  457. # setting float values that can be held by existing integer arrays
  458. # is inplace
  459. df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
  460. if not using_array_manager:
  461. assert len(df._mgr.blocks) == 1
  462. # if the assigned values cannot be held by existing integer arrays,
  463. # we cast
  464. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  465. df.iloc[:, 0] = df.iloc[:, 0] + 0.5
  466. if not using_array_manager:
  467. assert len(df._mgr.blocks) == 2
  468. expected = df.copy()
  469. # assign back to self
  470. df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
  471. tm.assert_frame_equal(df, expected)
  472. # TODO: GH#27620 this test used to compare iloc against ix; check if this
  473. # is redundant with another test comparing iloc against loc
  474. def test_iloc_getitem_frame(self):
  475. df = DataFrame(
  476. np.random.default_rng(2).standard_normal((10, 4)),
  477. index=range(0, 20, 2),
  478. columns=range(0, 8, 2),
  479. )
  480. result = df.iloc[2]
  481. exp = df.loc[4]
  482. tm.assert_series_equal(result, exp)
  483. result = df.iloc[2, 2]
  484. exp = df.loc[4, 4]
  485. assert result == exp
  486. # slice
  487. result = df.iloc[4:8]
  488. expected = df.loc[8:14]
  489. tm.assert_frame_equal(result, expected)
  490. result = df.iloc[:, 2:3]
  491. expected = df.loc[:, 4:5]
  492. tm.assert_frame_equal(result, expected)
  493. # list of integers
  494. result = df.iloc[[0, 1, 3]]
  495. expected = df.loc[[0, 2, 6]]
  496. tm.assert_frame_equal(result, expected)
  497. result = df.iloc[[0, 1, 3], [0, 1]]
  498. expected = df.loc[[0, 2, 6], [0, 2]]
  499. tm.assert_frame_equal(result, expected)
  500. # neg indices
  501. result = df.iloc[[-1, 1, 3], [-1, 1]]
  502. expected = df.loc[[18, 2, 6], [6, 2]]
  503. tm.assert_frame_equal(result, expected)
  504. # dups indices
  505. result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
  506. expected = df.loc[[18, 18, 2, 6], [6, 2]]
  507. tm.assert_frame_equal(result, expected)
  508. # with index-like
  509. s = Series(index=range(1, 5), dtype=object)
  510. result = df.iloc[s.index]
  511. expected = df.loc[[2, 4, 6, 8]]
  512. tm.assert_frame_equal(result, expected)
  513. def test_iloc_getitem_labelled_frame(self):
  514. # try with labelled frame
  515. df = DataFrame(
  516. np.random.default_rng(2).standard_normal((10, 4)),
  517. index=list("abcdefghij"),
  518. columns=list("ABCD"),
  519. )
  520. result = df.iloc[1, 1]
  521. exp = df.loc["b", "B"]
  522. assert result == exp
  523. result = df.iloc[:, 2:3]
  524. expected = df.loc[:, ["C"]]
  525. tm.assert_frame_equal(result, expected)
  526. # negative indexing
  527. result = df.iloc[-1, -1]
  528. exp = df.loc["j", "D"]
  529. assert result == exp
  530. # out-of-bounds exception
  531. msg = "index 5 is out of bounds for axis 0 with size 4|index out of bounds"
  532. with pytest.raises(IndexError, match=msg):
  533. df.iloc[10, 5]
  534. # trying to use a label
  535. msg = (
  536. r"Location based indexing can only have \[integer, integer "
  537. r"slice \(START point is INCLUDED, END point is EXCLUDED\), "
  538. r"listlike of integers, boolean array\] types"
  539. )
  540. with pytest.raises(ValueError, match=msg):
  541. df.iloc["j", "D"]
  542. def test_iloc_getitem_doc_issue(self, using_array_manager):
  543. # multi axis slicing issue with single block
  544. # surfaced in GH 6059
  545. arr = np.random.default_rng(2).standard_normal((6, 4))
  546. index = date_range("20130101", periods=6)
  547. columns = list("ABCD")
  548. df = DataFrame(arr, index=index, columns=columns)
  549. # defines ref_locs
  550. df.describe()
  551. result = df.iloc[3:5, 0:2]
  552. expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2])
  553. tm.assert_frame_equal(result, expected)
  554. # for dups
  555. df.columns = list("aaaa")
  556. result = df.iloc[3:5, 0:2]
  557. expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa"))
  558. tm.assert_frame_equal(result, expected)
  559. # related
  560. arr = np.random.default_rng(2).standard_normal((6, 4))
  561. index = list(range(0, 12, 2))
  562. columns = list(range(0, 8, 2))
  563. df = DataFrame(arr, index=index, columns=columns)
  564. if not using_array_manager:
  565. df._mgr.blocks[0].mgr_locs
  566. result = df.iloc[1:5, 2:4]
  567. expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
  568. tm.assert_frame_equal(result, expected)
  569. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  570. @pytest.mark.parametrize("has_ref", [True, False])
  571. def test_iloc_setitem_series(self, has_ref):
  572. df = DataFrame(
  573. np.random.default_rng(2).standard_normal((10, 4)),
  574. index=list("abcdefghij"),
  575. columns=list("ABCD"),
  576. )
  577. if has_ref:
  578. view = df[:] # noqa: F841
  579. df.iloc[1, 1] = 1
  580. result = df.iloc[1, 1]
  581. assert result == 1
  582. df.iloc[:, 2:3] = 0
  583. expected = df.iloc[:, 2:3]
  584. result = df.iloc[:, 2:3]
  585. tm.assert_frame_equal(result, expected)
  586. s = Series(np.random.default_rng(2).standard_normal(10), index=range(0, 20, 2))
  587. s.iloc[1] = 1
  588. result = s.iloc[1]
  589. assert result == 1
  590. s.iloc[:4] = 0
  591. expected = s.iloc[:4]
  592. result = s.iloc[:4]
  593. tm.assert_series_equal(result, expected)
  594. s = Series([-1] * 6)
  595. s.iloc[0::2] = [0, 2, 4]
  596. s.iloc[1::2] = [1, 3, 5]
  597. result = s
  598. expected = Series([0, 1, 2, 3, 4, 5])
  599. tm.assert_series_equal(result, expected)
  600. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  601. @pytest.mark.parametrize("has_ref", [True, False])
  602. def test_iloc_setitem_list_of_lists(self, has_ref):
  603. # GH 7551
  604. # list-of-list is set incorrectly in mixed vs. single dtyped frames
  605. df = DataFrame(
  606. {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
  607. )
  608. if has_ref:
  609. view = df[:]
  610. df.iloc[2:4] = [[10, 11], [12, 13]]
  611. expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
  612. tm.assert_frame_equal(df, expected)
  613. df = DataFrame(
  614. {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
  615. )
  616. if has_ref:
  617. view = df[:] # noqa: F841
  618. df.iloc[2:4] = [["x", 11], ["y", 13]]
  619. expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
  620. tm.assert_frame_equal(df, expected)
  621. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  622. @pytest.mark.parametrize("has_ref", [True, False])
  623. @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
  624. @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
  625. def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value):
  626. # GH #19474
  627. # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
  628. # elementwisely, not using "setter('A', ['Z'])".
  629. # Set object type to avoid upcast when setting "Z"
  630. df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
  631. if has_ref:
  632. view = df[:] # noqa: F841
  633. df.iloc[0, indexer] = value
  634. result = df.iloc[0, 0]
  635. assert is_scalar(result) and result == "Z"
  636. @pytest.mark.filterwarnings("ignore::UserWarning")
  637. def test_iloc_mask(self):
  638. # GH 3631, iloc with a mask (of a series) should raise
  639. df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
  640. mask = df.a % 2 == 0
  641. msg = "iLocation based boolean indexing cannot use an indexable as a mask"
  642. with pytest.raises(ValueError, match=msg):
  643. df.iloc[mask]
  644. mask.index = range(len(mask))
  645. msg = "iLocation based boolean indexing on an integer type is not available"
  646. with pytest.raises(NotImplementedError, match=msg):
  647. df.iloc[mask]
  648. # ndarray ok
  649. result = df.iloc[np.array([True] * len(mask), dtype=bool)]
  650. tm.assert_frame_equal(result, df)
  651. # the possibilities
  652. locs = np.arange(4)
  653. nums = 2**locs
  654. reps = [bin(num) for num in nums]
  655. df = DataFrame({"locs": locs, "nums": nums}, reps)
  656. expected = {
  657. (None, ""): "0b1100",
  658. (None, ".loc"): "0b1100",
  659. (None, ".iloc"): "0b1100",
  660. ("index", ""): "0b11",
  661. ("index", ".loc"): "0b11",
  662. ("index", ".iloc"): (
  663. "iLocation based boolean indexing cannot use an indexable as a mask"
  664. ),
  665. ("locs", ""): "Unalignable boolean Series provided as indexer "
  666. "(index of the boolean Series and of the indexed "
  667. "object do not match).",
  668. ("locs", ".loc"): "Unalignable boolean Series provided as indexer "
  669. "(index of the boolean Series and of the "
  670. "indexed object do not match).",
  671. ("locs", ".iloc"): (
  672. "iLocation based boolean indexing on an "
  673. "integer type is not available"
  674. ),
  675. }
  676. # UserWarnings from reindex of a boolean mask
  677. for idx in [None, "index", "locs"]:
  678. mask = (df.nums > 2).values
  679. if idx:
  680. mask_index = getattr(df, idx)[::-1]
  681. mask = Series(mask, list(mask_index))
  682. for method in ["", ".loc", ".iloc"]:
  683. try:
  684. if method:
  685. accessor = getattr(df, method[1:])
  686. else:
  687. accessor = df
  688. answer = str(bin(accessor[mask]["nums"].sum()))
  689. except (ValueError, IndexingError, NotImplementedError) as err:
  690. answer = str(err)
  691. key = (
  692. idx,
  693. method,
  694. )
  695. r = expected.get(key)
  696. if r != answer:
  697. raise AssertionError(
  698. f"[{key}] does not match [{answer}], received [{r}]"
  699. )
  700. def test_iloc_non_unique_indexing(self):
  701. # GH 4017, non-unique indexing (on the axis)
  702. df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000})
  703. idx = np.arange(30) * 99
  704. expected = df.iloc[idx]
  705. df3 = concat([df, 2 * df, 3 * df])
  706. result = df3.iloc[idx]
  707. tm.assert_frame_equal(result, expected)
  708. df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
  709. df2 = concat([df2, 2 * df2, 3 * df2])
  710. with pytest.raises(KeyError, match="not in index"):
  711. df2.loc[idx]
  712. def test_iloc_empty_list_indexer_is_ok(self):
  713. df = DataFrame(
  714. np.ones((5, 2)),
  715. index=Index([f"i-{i}" for i in range(5)], name="a"),
  716. columns=Index([f"i-{i}" for i in range(2)], name="a"),
  717. )
  718. # vertical empty
  719. tm.assert_frame_equal(
  720. df.iloc[:, []],
  721. df.iloc[:, :0],
  722. check_index_type=True,
  723. check_column_type=True,
  724. )
  725. # horizontal empty
  726. tm.assert_frame_equal(
  727. df.iloc[[], :],
  728. df.iloc[:0, :],
  729. check_index_type=True,
  730. check_column_type=True,
  731. )
  732. # horizontal empty
  733. tm.assert_frame_equal(
  734. df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
  735. )
  736. def test_identity_slice_returns_new_object(
  737. self, using_copy_on_write, warn_copy_on_write
  738. ):
  739. # GH13873
  740. original_df = DataFrame({"a": [1, 2, 3]})
  741. sliced_df = original_df.iloc[:]
  742. assert sliced_df is not original_df
  743. # should be a shallow copy
  744. assert np.shares_memory(original_df["a"], sliced_df["a"])
  745. # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig
  746. # depending on CoW
  747. with tm.assert_cow_warning(warn_copy_on_write):
  748. original_df.loc[:, "a"] = [4, 4, 4]
  749. if using_copy_on_write:
  750. assert (sliced_df["a"] == [1, 2, 3]).all()
  751. else:
  752. assert (sliced_df["a"] == 4).all()
  753. original_series = Series([1, 2, 3, 4, 5, 6])
  754. sliced_series = original_series.iloc[:]
  755. assert sliced_series is not original_series
  756. # should also be a shallow copy
  757. with tm.assert_cow_warning(warn_copy_on_write):
  758. original_series[:3] = [7, 8, 9]
  759. if using_copy_on_write:
  760. # shallow copy not updated (CoW)
  761. assert all(sliced_series[:3] == [1, 2, 3])
  762. else:
  763. assert all(sliced_series[:3] == [7, 8, 9])
  764. def test_indexing_zerodim_np_array(self):
  765. # GH24919
  766. df = DataFrame([[1, 2], [3, 4]])
  767. result = df.iloc[np.array(0)]
  768. s = Series([1, 2], name=0)
  769. tm.assert_series_equal(result, s)
  770. def test_series_indexing_zerodim_np_array(self):
  771. # GH24919
  772. s = Series([1, 2])
  773. result = s.iloc[np.array(0)]
  774. assert result == 1
  775. def test_iloc_setitem_categorical_updates_inplace(self):
  776. # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
  777. cat = Categorical(["A", "B", "C"])
  778. df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)
  779. assert tm.shares_memory(df[1], cat)
  780. # With the enforcement of GH#45333 in 2.0, this modifies original
  781. # values inplace
  782. df.iloc[:, 0] = cat[::-1]
  783. assert tm.shares_memory(df[1], cat)
  784. expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
  785. tm.assert_categorical_equal(cat, expected)
  786. def test_iloc_with_boolean_operation(self):
  787. # GH 20627
  788. result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]])
  789. result.iloc[result.index <= 2] *= 2
  790. expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]])
  791. tm.assert_frame_equal(result, expected)
  792. result.iloc[result.index > 2] *= 2
  793. expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]])
  794. tm.assert_frame_equal(result, expected)
  795. result.iloc[[True, True, False, False]] *= 2
  796. expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]])
  797. tm.assert_frame_equal(result, expected)
  798. result.iloc[[False, False, True, True]] /= 2
  799. expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]])
  800. tm.assert_frame_equal(result, expected)
  801. def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self):
  802. # GH#29521
  803. df = DataFrame({"x": Categorical("a b c d e".split())})
  804. result = df.iloc[0]
  805. raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"])
  806. expected = Series(raw_cat, index=["x"], name=0, dtype="category")
  807. tm.assert_series_equal(result, expected)
  808. def test_iloc_getitem_categorical_values(self):
  809. # GH#14580
  810. # test iloc() on Series with Categorical data
  811. ser = Series([1, 2, 3]).astype("category")
  812. # get slice
  813. result = ser.iloc[0:2]
  814. expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
  815. tm.assert_series_equal(result, expected)
  816. # get list of indexes
  817. result = ser.iloc[[0, 1]]
  818. expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
  819. tm.assert_series_equal(result, expected)
  820. # get boolean array
  821. result = ser.iloc[[True, False, False]]
  822. expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
  823. tm.assert_series_equal(result, expected)
  824. @pytest.mark.parametrize("value", [None, NaT, np.nan])
  825. def test_iloc_setitem_td64_values_cast_na(self, value):
  826. # GH#18586
  827. series = Series([0, 1, 2], dtype="timedelta64[ns]")
  828. series.iloc[0] = value
  829. expected = Series([NaT, 1, 2], dtype="timedelta64[ns]")
  830. tm.assert_series_equal(series, expected)
  831. @pytest.mark.parametrize("not_na", [Interval(0, 1), "a", 1.0])
  832. def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture):
  833. # GH#27937
  834. dtype = CategoricalDtype(categories=[not_na])
  835. ser = Series(
  836. [nulls_fixture, nulls_fixture, nulls_fixture, nulls_fixture], dtype=dtype
  837. )
  838. ser.iloc[:3] = [nulls_fixture, not_na, nulls_fixture]
  839. exp = Series([nulls_fixture, not_na, nulls_fixture, nulls_fixture], dtype=dtype)
  840. tm.assert_series_equal(ser, exp)
  841. def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
  842. idx = Index([])
  843. obj = DataFrame(
  844. np.random.default_rng(2).standard_normal((len(idx), len(idx))),
  845. index=idx,
  846. columns=idx,
  847. )
  848. nd3 = np.random.default_rng(2).integers(5, size=(2, 2, 2))
  849. msg = f"Cannot set values with ndim > {obj.ndim}"
  850. with pytest.raises(ValueError, match=msg):
  851. obj.iloc[nd3] = 0
  852. @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
  853. def test_iloc_getitem_read_only_values(self, indexer):
  854. # GH#10043 this is fundamentally a test for iloc, but test loc while
  855. # we're here
  856. rw_array = np.eye(10)
  857. rw_df = DataFrame(rw_array)
  858. ro_array = np.eye(10)
  859. ro_array.setflags(write=False)
  860. ro_df = DataFrame(ro_array)
  861. tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]])
  862. tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]])
  863. tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1])
  864. tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3])
  865. def test_iloc_getitem_readonly_key(self):
  866. # GH#17192 iloc with read-only array raising TypeError
  867. df = DataFrame({"data": np.ones(100, dtype="float64")})
  868. indices = np.array([1, 3, 6])
  869. indices.flags.writeable = False
  870. result = df.iloc[indices]
  871. expected = df.loc[[1, 3, 6]]
  872. tm.assert_frame_equal(result, expected)
  873. result = df["data"].iloc[indices]
  874. expected = df["data"].loc[[1, 3, 6]]
  875. tm.assert_series_equal(result, expected)
  876. def test_iloc_assign_series_to_df_cell(self):
  877. # GH 37593
  878. df = DataFrame(columns=["a"], index=[0])
  879. df.iloc[0, 0] = Series([1, 2, 3])
  880. expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
  881. tm.assert_frame_equal(df, expected)
  882. @pytest.mark.parametrize("klass", [list, np.array])
  883. def test_iloc_setitem_bool_indexer(self, klass):
  884. # GH#36741
  885. df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
  886. indexer = klass([True, False, False])
  887. df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
  888. expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
  889. tm.assert_frame_equal(df, expected)
  890. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  891. @pytest.mark.parametrize("has_ref", [True, False])
  892. @pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
  893. def test_iloc_setitem_pure_position_based(self, indexer, has_ref):
  894. # GH#22046
  895. df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
  896. df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  897. if has_ref:
  898. view = df2[:] # noqa: F841
  899. df2.iloc[:, indexer] = df1.iloc[:, [0]]
  900. expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
  901. tm.assert_frame_equal(df2, expected)
  902. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  903. @pytest.mark.parametrize("has_ref", [True, False])
  904. def test_iloc_setitem_dictionary_value(self, has_ref):
  905. # GH#37728
  906. df = DataFrame({"x": [1, 2], "y": [2, 2]})
  907. if has_ref:
  908. view = df[:]
  909. rhs = {"x": 9, "y": 99}
  910. df.iloc[1] = rhs
  911. expected = DataFrame({"x": [1, 9], "y": [2, 99]})
  912. tm.assert_frame_equal(df, expected)
  913. # GH#38335 same thing, mixed dtypes
  914. df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
  915. if has_ref:
  916. view = df[:] # noqa: F841
  917. df.iloc[1] = rhs
  918. expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
  919. tm.assert_frame_equal(df, expected)
  920. def test_iloc_getitem_float_duplicates(self):
  921. df = DataFrame(
  922. np.random.default_rng(2).standard_normal((3, 3)),
  923. index=[0.1, 0.2, 0.2],
  924. columns=list("abc"),
  925. )
  926. expect = df.iloc[1:]
  927. tm.assert_frame_equal(df.loc[0.2], expect)
  928. expect = df.iloc[1:, 0]
  929. tm.assert_series_equal(df.loc[0.2, "a"], expect)
  930. df.index = [1, 0.2, 0.2]
  931. expect = df.iloc[1:]
  932. tm.assert_frame_equal(df.loc[0.2], expect)
  933. expect = df.iloc[1:, 0]
  934. tm.assert_series_equal(df.loc[0.2, "a"], expect)
  935. df = DataFrame(
  936. np.random.default_rng(2).standard_normal((4, 3)),
  937. index=[1, 0.2, 0.2, 1],
  938. columns=list("abc"),
  939. )
  940. expect = df.iloc[1:-1]
  941. tm.assert_frame_equal(df.loc[0.2], expect)
  942. expect = df.iloc[1:-1, 0]
  943. tm.assert_series_equal(df.loc[0.2, "a"], expect)
  944. df.index = [0.1, 0.2, 2, 0.2]
  945. expect = df.iloc[[1, -1]]
  946. tm.assert_frame_equal(df.loc[0.2], expect)
  947. expect = df.iloc[[1, -1], 0]
  948. tm.assert_series_equal(df.loc[0.2, "a"], expect)
  949. def test_iloc_setitem_custom_object(self):
  950. # iloc with an object
  951. class TO:
  952. def __init__(self, value) -> None:
  953. self.value = value
  954. def __str__(self) -> str:
  955. return f"[{self.value}]"
  956. __repr__ = __str__
  957. def __eq__(self, other) -> bool:
  958. return self.value == other.value
  959. def view(self):
  960. return self
  961. df = DataFrame(index=[0, 1], columns=[0])
  962. df.iloc[1, 0] = TO(1)
  963. df.iloc[1, 0] = TO(2)
  964. result = DataFrame(index=[0, 1], columns=[0])
  965. result.iloc[1, 0] = TO(2)
  966. tm.assert_frame_equal(result, df)
  967. # remains object dtype even after setting it back
  968. df = DataFrame(index=[0, 1], columns=[0])
  969. df.iloc[1, 0] = TO(1)
  970. df.iloc[1, 0] = np.nan
  971. result = DataFrame(index=[0, 1], columns=[0])
  972. tm.assert_frame_equal(result, df)
  973. def test_iloc_getitem_with_duplicates(self):
  974. df = DataFrame(
  975. np.random.default_rng(2).random((3, 3)),
  976. columns=list("ABC"),
  977. index=list("aab"),
  978. )
  979. result = df.iloc[0]
  980. assert isinstance(result, Series)
  981. tm.assert_almost_equal(result.values, df.values[0])
  982. result = df.T.iloc[:, 0]
  983. assert isinstance(result, Series)
  984. tm.assert_almost_equal(result.values, df.values[0])
  985. def test_iloc_getitem_with_duplicates2(self):
  986. # GH#2259
  987. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2])
  988. result = df.iloc[:, [0]]
  989. expected = df.take([0], axis=1)
  990. tm.assert_frame_equal(result, expected)
  991. def test_iloc_interval(self):
  992. # GH#17130
  993. df = DataFrame({Interval(1, 2): [1, 2]})
  994. result = df.iloc[0]
  995. expected = Series({Interval(1, 2): 1}, name=0)
  996. tm.assert_series_equal(result, expected)
  997. result = df.iloc[:, 0]
  998. expected = Series([1, 2], name=Interval(1, 2))
  999. tm.assert_series_equal(result, expected)
  1000. result = df.copy()
  1001. result.iloc[:, 0] += 1
  1002. expected = DataFrame({Interval(1, 2): [2, 3]})
  1003. tm.assert_frame_equal(result, expected)
  1004. @pytest.mark.parametrize("indexing_func", [list, np.array])
  1005. @pytest.mark.parametrize("rhs_func", [list, np.array])
  1006. def test_loc_setitem_boolean_list(self, rhs_func, indexing_func):
  1007. # GH#20438 testing specifically list key, not arraylike
  1008. ser = Series([0, 1, 2])
  1009. ser.iloc[indexing_func([True, False, True])] = rhs_func([5, 10])
  1010. expected = Series([5, 1, 10])
  1011. tm.assert_series_equal(ser, expected)
  1012. df = DataFrame({"a": [0, 1, 2]})
  1013. df.iloc[indexing_func([True, False, True])] = rhs_func([[5], [10]])
  1014. expected = DataFrame({"a": [5, 1, 10]})
  1015. tm.assert_frame_equal(df, expected)
  1016. def test_iloc_getitem_slice_negative_step_ea_block(self):
  1017. # GH#44551
  1018. df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
  1019. res = df.iloc[:, ::-1]
  1020. tm.assert_frame_equal(res, df)
  1021. df["B"] = "foo"
  1022. res = df.iloc[:, ::-1]
  1023. expected = DataFrame({"B": df["B"], "A": df["A"]})
  1024. tm.assert_frame_equal(res, expected)
  1025. def test_iloc_setitem_2d_ndarray_into_ea_block(self):
  1026. # GH#44703
  1027. df = DataFrame({"status": ["a", "b", "c"]}, dtype="category")
  1028. df.iloc[np.array([0, 1]), np.array([0])] = np.array([["a"], ["a"]])
  1029. expected = DataFrame({"status": ["a", "a", "c"]}, dtype=df["status"].dtype)
  1030. tm.assert_frame_equal(df, expected)
  1031. @td.skip_array_manager_not_yet_implemented
  1032. def test_iloc_getitem_int_single_ea_block_view(self):
  1033. # GH#45241
  1034. # TODO: make an extension interface test for this?
  1035. arr = interval_range(1, 10.0)._values
  1036. df = DataFrame(arr)
  1037. # ser should be a *view* on the DataFrame data
  1038. ser = df.iloc[2]
  1039. # if we have a view, then changing arr[2] should also change ser[0]
  1040. assert arr[2] != arr[-1] # otherwise the rest isn't meaningful
  1041. arr[2] = arr[-1]
  1042. assert ser[0] == arr[-1]
  1043. def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
  1044. # GH#20511
  1045. df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})
  1046. if using_infer_string:
  1047. with tm.assert_produces_warning(
  1048. FutureWarning, match="Setting an item of incompatible dtype"
  1049. ):
  1050. df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
  1051. else:
  1052. df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
  1053. expected = DataFrame(
  1054. {
  1055. "A": [
  1056. Timestamp("2021-01-01 00:00:00"),
  1057. Timestamp("2022-01-01 00:00:00"),
  1058. ],
  1059. "B": ["2021", "2022"],
  1060. }
  1061. )
  1062. tm.assert_frame_equal(df, expected, check_dtype=False)
  1063. class TestILocErrors:
  1064. # NB: this test should work for _any_ Series we can pass as
  1065. # series_with_simple_index
  1066. def test_iloc_float_raises(
  1067. self, series_with_simple_index, frame_or_series, warn_copy_on_write
  1068. ):
  1069. # GH#4892
  1070. # float_indexers should raise exceptions
  1071. # on appropriate Index types & accessors
  1072. # this duplicates the code below
  1073. # but is specifically testing for the error
  1074. # message
  1075. obj = series_with_simple_index
  1076. if frame_or_series is DataFrame:
  1077. obj = obj.to_frame()
  1078. msg = "Cannot index by location index with a non-integer key"
  1079. with pytest.raises(TypeError, match=msg):
  1080. obj.iloc[3.0]
  1081. with pytest.raises(IndexError, match=_slice_iloc_msg):
  1082. with tm.assert_cow_warning(
  1083. warn_copy_on_write and frame_or_series is DataFrame
  1084. ):
  1085. obj.iloc[3.0] = 0
  1086. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  1087. @pytest.mark.parametrize("has_ref", [True, False])
  1088. def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref):
  1089. with pytest.raises(IndexingError, match="Too many indexers"):
  1090. float_frame.iloc[:, :, :]
  1091. if has_ref:
  1092. view = float_frame[:] # noqa: F841
  1093. with pytest.raises(IndexError, match="too many indices for array"):
  1094. # GH#32257 we let numpy do validation, get their exception
  1095. float_frame.iloc[:, :, :] = 1
  1096. def test_iloc_frame_indexer(self):
  1097. # GH#39004
  1098. df = DataFrame({"a": [1, 2, 3]})
  1099. indexer = DataFrame({"a": [True, False, True]})
  1100. msg = "DataFrame indexer for .iloc is not supported. Consider using .loc"
  1101. with pytest.raises(TypeError, match=msg):
  1102. df.iloc[indexer] = 1
  1103. msg = (
  1104. "DataFrame indexer is not allowed for .iloc\n"
  1105. "Consider using .loc for automatic alignment."
  1106. )
  1107. with pytest.raises(IndexError, match=msg):
  1108. df.iloc[indexer]
  1109. class TestILocSetItemDuplicateColumns:
  1110. def test_iloc_setitem_scalar_duplicate_columns(self):
  1111. # GH#15686, duplicate columns and mixed dtype
  1112. df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
  1113. df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
  1114. df = concat([df1, df2], axis=1)
  1115. df.iloc[0, 0] = -1
  1116. assert df.iloc[0, 0] == -1
  1117. assert df.iloc[0, 2] == 3
  1118. assert df.dtypes.iloc[2] == np.int64
  1119. def test_iloc_setitem_list_duplicate_columns(self):
  1120. # GH#22036 setting with same-sized list
  1121. df = DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"])
  1122. df.iloc[:, 2] = ["str3"]
  1123. expected = DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"])
  1124. tm.assert_frame_equal(df, expected)
  1125. def test_iloc_setitem_series_duplicate_columns(self):
  1126. df = DataFrame(
  1127. np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"]
  1128. )
  1129. df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
  1130. assert df.dtypes.iloc[2] == np.int64
  1131. @pytest.mark.parametrize(
  1132. ["dtypes", "init_value", "expected_value"],
  1133. [("int64", "0", 0), ("float", "1.2", 1.2)],
  1134. )
  1135. def test_iloc_setitem_dtypes_duplicate_columns(
  1136. self, dtypes, init_value, expected_value
  1137. ):
  1138. # GH#22035
  1139. df = DataFrame(
  1140. [[init_value, "str", "str2"]], columns=["a", "b", "b"], dtype=object
  1141. )
  1142. # with the enforcement of GH#45333 in 2.0, this sets values inplace,
  1143. # so we retain object dtype
  1144. df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes)
  1145. expected_df = DataFrame(
  1146. [[expected_value, "str", "str2"]],
  1147. columns=["a", "b", "b"],
  1148. dtype=object,
  1149. )
  1150. tm.assert_frame_equal(df, expected_df)
  1151. class TestILocCallable:
  1152. def test_frame_iloc_getitem_callable(self):
  1153. # GH#11485
  1154. df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
  1155. # return location
  1156. res = df.iloc[lambda x: [1, 3]]
  1157. tm.assert_frame_equal(res, df.iloc[[1, 3]])
  1158. res = df.iloc[lambda x: [1, 3], :]
  1159. tm.assert_frame_equal(res, df.iloc[[1, 3], :])
  1160. res = df.iloc[lambda x: [1, 3], lambda x: 0]
  1161. tm.assert_series_equal(res, df.iloc[[1, 3], 0])
  1162. res = df.iloc[lambda x: [1, 3], lambda x: [0]]
  1163. tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
  1164. # mixture
  1165. res = df.iloc[[1, 3], lambda x: 0]
  1166. tm.assert_series_equal(res, df.iloc[[1, 3], 0])
  1167. res = df.iloc[[1, 3], lambda x: [0]]
  1168. tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
  1169. res = df.iloc[lambda x: [1, 3], 0]
  1170. tm.assert_series_equal(res, df.iloc[[1, 3], 0])
  1171. res = df.iloc[lambda x: [1, 3], [0]]
  1172. tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
  1173. def test_frame_iloc_setitem_callable(self):
  1174. # GH#11485
  1175. df = DataFrame(
  1176. {"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
  1177. index=list("ABCD"),
  1178. )
  1179. # return location
  1180. res = df.copy()
  1181. res.iloc[lambda x: [1, 3]] = 0
  1182. exp = df.copy()
  1183. exp.iloc[[1, 3]] = 0
  1184. tm.assert_frame_equal(res, exp)
  1185. res = df.copy()
  1186. res.iloc[lambda x: [1, 3], :] = -1
  1187. exp = df.copy()
  1188. exp.iloc[[1, 3], :] = -1
  1189. tm.assert_frame_equal(res, exp)
  1190. res = df.copy()
  1191. res.iloc[lambda x: [1, 3], lambda x: 0] = 5
  1192. exp = df.copy()
  1193. exp.iloc[[1, 3], 0] = 5
  1194. tm.assert_frame_equal(res, exp)
  1195. res = df.copy()
  1196. res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
  1197. exp = df.copy()
  1198. exp.iloc[[1, 3], [0]] = 25
  1199. tm.assert_frame_equal(res, exp)
  1200. # mixture
  1201. res = df.copy()
  1202. res.iloc[[1, 3], lambda x: 0] = -3
  1203. exp = df.copy()
  1204. exp.iloc[[1, 3], 0] = -3
  1205. tm.assert_frame_equal(res, exp)
  1206. res = df.copy()
  1207. res.iloc[[1, 3], lambda x: [0]] = -5
  1208. exp = df.copy()
  1209. exp.iloc[[1, 3], [0]] = -5
  1210. tm.assert_frame_equal(res, exp)
  1211. res = df.copy()
  1212. res.iloc[lambda x: [1, 3], 0] = 10
  1213. exp = df.copy()
  1214. exp.iloc[[1, 3], 0] = 10
  1215. tm.assert_frame_equal(res, exp)
  1216. res = df.copy()
  1217. res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
  1218. exp = df.copy()
  1219. exp.iloc[[1, 3], [0]] = [-5, -5]
  1220. tm.assert_frame_equal(res, exp)
  1221. class TestILocSeries:
  1222. def test_iloc(self, using_copy_on_write, warn_copy_on_write):
  1223. ser = Series(
  1224. np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
  1225. )
  1226. ser_original = ser.copy()
  1227. for i in range(len(ser)):
  1228. result = ser.iloc[i]
  1229. exp = ser[ser.index[i]]
  1230. tm.assert_almost_equal(result, exp)
  1231. # pass a slice
  1232. result = ser.iloc[slice(1, 3)]
  1233. expected = ser.loc[2:4]
  1234. tm.assert_series_equal(result, expected)
  1235. # test slice is a view
  1236. with tm.assert_produces_warning(None):
  1237. # GH#45324 make sure we aren't giving a spurious FutureWarning
  1238. with tm.assert_cow_warning(warn_copy_on_write):
  1239. result[:] = 0
  1240. if using_copy_on_write:
  1241. tm.assert_series_equal(ser, ser_original)
  1242. else:
  1243. assert (ser.iloc[1:3] == 0).all()
  1244. # list of integers
  1245. result = ser.iloc[[0, 2, 3, 4, 5]]
  1246. expected = ser.reindex(ser.index[[0, 2, 3, 4, 5]])
  1247. tm.assert_series_equal(result, expected)
  1248. def test_iloc_getitem_nonunique(self):
  1249. ser = Series([0, 1, 2], index=[0, 1, 0])
  1250. assert ser.iloc[2] == 2
  1251. def test_iloc_setitem_pure_position_based(self):
  1252. # GH#22046
  1253. ser1 = Series([1, 2, 3])
  1254. ser2 = Series([4, 5, 6], index=[1, 0, 2])
  1255. ser1.iloc[1:3] = ser2.iloc[1:3]
  1256. expected = Series([1, 5, 6])
  1257. tm.assert_series_equal(ser1, expected)
  1258. def test_iloc_nullable_int64_size_1_nan(self):
  1259. # GH 31861
  1260. result = DataFrame({"a": ["test"], "b": [np.nan]})
  1261. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  1262. result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
  1263. expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")})
  1264. tm.assert_frame_equal(result, expected)