indexing.py 97 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821
  1. from __future__ import annotations
  2. from contextlib import suppress
  3. import sys
  4. from typing import (
  5. TYPE_CHECKING,
  6. Any,
  7. TypeVar,
  8. cast,
  9. final,
  10. )
  11. import warnings
  12. import numpy as np
  13. from pandas._config import (
  14. using_copy_on_write,
  15. warn_copy_on_write,
  16. )
  17. from pandas._libs.indexing import NDFrameIndexerBase
  18. from pandas._libs.lib import item_from_zerodim
  19. from pandas.compat import PYPY
  20. from pandas.compat._constants import WARNING_CHECK_DISABLED
  21. from pandas.errors import (
  22. AbstractMethodError,
  23. ChainedAssignmentError,
  24. IndexingError,
  25. InvalidIndexError,
  26. LossySetitemError,
  27. _chained_assignment_msg,
  28. _chained_assignment_warning_msg,
  29. _check_cacher,
  30. )
  31. from pandas.util._decorators import doc
  32. from pandas.util._exceptions import find_stack_level
  33. from pandas.core.dtypes.cast import (
  34. can_hold_element,
  35. maybe_promote,
  36. )
  37. from pandas.core.dtypes.common import (
  38. is_array_like,
  39. is_bool_dtype,
  40. is_hashable,
  41. is_integer,
  42. is_iterator,
  43. is_list_like,
  44. is_numeric_dtype,
  45. is_object_dtype,
  46. is_scalar,
  47. is_sequence,
  48. )
  49. from pandas.core.dtypes.concat import concat_compat
  50. from pandas.core.dtypes.dtypes import ExtensionDtype
  51. from pandas.core.dtypes.generic import (
  52. ABCDataFrame,
  53. ABCSeries,
  54. )
  55. from pandas.core.dtypes.missing import (
  56. construct_1d_array_from_inferred_fill_value,
  57. infer_fill_value,
  58. is_valid_na_for_dtype,
  59. isna,
  60. na_value_for_dtype,
  61. )
  62. from pandas.core import algorithms as algos
  63. import pandas.core.common as com
  64. from pandas.core.construction import (
  65. array as pd_array,
  66. extract_array,
  67. )
  68. from pandas.core.indexers import (
  69. check_array_indexer,
  70. is_list_like_indexer,
  71. is_scalar_indexer,
  72. length_of_indexer,
  73. )
  74. from pandas.core.indexes.api import (
  75. Index,
  76. MultiIndex,
  77. )
  78. if TYPE_CHECKING:
  79. from collections.abc import (
  80. Hashable,
  81. Sequence,
  82. )
  83. from pandas._typing import (
  84. Axis,
  85. AxisInt,
  86. Self,
  87. npt,
  88. )
  89. from pandas import (
  90. DataFrame,
  91. Series,
  92. )
  93. T = TypeVar("T")
  94. # "null slice"
  95. _NS = slice(None, None)
  96. _one_ellipsis_message = "indexer may only contain one '...' entry"
  97. # the public IndexSlicerMaker
  98. class _IndexSlice:
  99. """
  100. Create an object to more easily perform multi-index slicing.
  101. See Also
  102. --------
  103. MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
  104. Notes
  105. -----
  106. See :ref:`Defined Levels <advanced.shown_levels>`
  107. for further info on slicing a MultiIndex.
  108. Examples
  109. --------
  110. >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
  111. >>> columns = ['foo', 'bar']
  112. >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
  113. ... index=midx, columns=columns)
  114. Using the default slice command:
  115. >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
  116. foo bar
  117. A0 B0 0 1
  118. B1 2 3
  119. A1 B0 8 9
  120. B1 10 11
  121. Using the IndexSlice class for a more intuitive command:
  122. >>> idx = pd.IndexSlice
  123. >>> dfmi.loc[idx[:, 'B0':'B1'], :]
  124. foo bar
  125. A0 B0 0 1
  126. B1 2 3
  127. A1 B0 8 9
  128. B1 10 11
  129. """
  130. def __getitem__(self, arg):
  131. return arg
  132. IndexSlice = _IndexSlice()
  133. class IndexingMixin:
  134. """
  135. Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
  136. """
  137. @property
  138. def iloc(self) -> _iLocIndexer:
  139. """
  140. Purely integer-location based indexing for selection by position.
  141. .. deprecated:: 2.2.0
  142. Returning a tuple from a callable is deprecated.
  143. ``.iloc[]`` is primarily integer position based (from ``0`` to
  144. ``length-1`` of the axis), but may also be used with a boolean
  145. array.
  146. Allowed inputs are:
  147. - An integer, e.g. ``5``.
  148. - A list or array of integers, e.g. ``[4, 3, 0]``.
  149. - A slice object with ints, e.g. ``1:7``.
  150. - A boolean array.
  151. - A ``callable`` function with one argument (the calling Series or
  152. DataFrame) and that returns valid output for indexing (one of the above).
  153. This is useful in method chains, when you don't have a reference to the
  154. calling object, but would like to base your selection on
  155. some value.
  156. - A tuple of row and column indexes. The tuple elements consist of one of the
  157. above inputs, e.g. ``(0, 1)``.
  158. ``.iloc`` will raise ``IndexError`` if a requested indexer is
  159. out-of-bounds, except *slice* indexers which allow out-of-bounds
  160. indexing (this conforms with python/numpy *slice* semantics).
  161. See more at :ref:`Selection by Position <indexing.integer>`.
  162. See Also
  163. --------
  164. DataFrame.iat : Fast integer location scalar accessor.
  165. DataFrame.loc : Purely label-location based indexer for selection by label.
  166. Series.iloc : Purely integer-location based indexing for
  167. selection by position.
  168. Examples
  169. --------
  170. >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
  171. ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
  172. ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]
  173. >>> df = pd.DataFrame(mydict)
  174. >>> df
  175. a b c d
  176. 0 1 2 3 4
  177. 1 100 200 300 400
  178. 2 1000 2000 3000 4000
  179. **Indexing just the rows**
  180. With a scalar integer.
  181. >>> type(df.iloc[0])
  182. <class 'pandas.core.series.Series'>
  183. >>> df.iloc[0]
  184. a 1
  185. b 2
  186. c 3
  187. d 4
  188. Name: 0, dtype: int64
  189. With a list of integers.
  190. >>> df.iloc[[0]]
  191. a b c d
  192. 0 1 2 3 4
  193. >>> type(df.iloc[[0]])
  194. <class 'pandas.core.frame.DataFrame'>
  195. >>> df.iloc[[0, 1]]
  196. a b c d
  197. 0 1 2 3 4
  198. 1 100 200 300 400
  199. With a `slice` object.
  200. >>> df.iloc[:3]
  201. a b c d
  202. 0 1 2 3 4
  203. 1 100 200 300 400
  204. 2 1000 2000 3000 4000
  205. With a boolean mask the same length as the index.
  206. >>> df.iloc[[True, False, True]]
  207. a b c d
  208. 0 1 2 3 4
  209. 2 1000 2000 3000 4000
  210. With a callable, useful in method chains. The `x` passed
  211. to the ``lambda`` is the DataFrame being sliced. This selects
  212. the rows whose index label even.
  213. >>> df.iloc[lambda x: x.index % 2 == 0]
  214. a b c d
  215. 0 1 2 3 4
  216. 2 1000 2000 3000 4000
  217. **Indexing both axes**
  218. You can mix the indexer types for the index and columns. Use ``:`` to
  219. select the entire axis.
  220. With scalar integers.
  221. >>> df.iloc[0, 1]
  222. 2
  223. With lists of integers.
  224. >>> df.iloc[[0, 2], [1, 3]]
  225. b d
  226. 0 2 4
  227. 2 2000 4000
  228. With `slice` objects.
  229. >>> df.iloc[1:3, 0:3]
  230. a b c
  231. 1 100 200 300
  232. 2 1000 2000 3000
  233. With a boolean array whose length matches the columns.
  234. >>> df.iloc[:, [True, False, True, False]]
  235. a c
  236. 0 1 3
  237. 1 100 300
  238. 2 1000 3000
  239. With a callable function that expects the Series or DataFrame.
  240. >>> df.iloc[:, lambda df: [0, 2]]
  241. a c
  242. 0 1 3
  243. 1 100 300
  244. 2 1000 3000
  245. """
  246. return _iLocIndexer("iloc", self)
  247. @property
  248. def loc(self) -> _LocIndexer:
  249. """
  250. Access a group of rows and columns by label(s) or a boolean array.
  251. ``.loc[]`` is primarily label based, but may also be used with a
  252. boolean array.
  253. Allowed inputs are:
  254. - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
  255. interpreted as a *label* of the index, and **never** as an
  256. integer position along the index).
  257. - A list or array of labels, e.g. ``['a', 'b', 'c']``.
  258. - A slice object with labels, e.g. ``'a':'f'``.
  259. .. warning:: Note that contrary to usual python slices, **both** the
  260. start and the stop are included
  261. - A boolean array of the same length as the axis being sliced,
  262. e.g. ``[True, False, True]``.
  263. - An alignable boolean Series. The index of the key will be aligned before
  264. masking.
  265. - An alignable Index. The Index of the returned selection will be the input.
  266. - A ``callable`` function with one argument (the calling Series or
  267. DataFrame) and that returns valid output for indexing (one of the above)
  268. See more at :ref:`Selection by Label <indexing.label>`.
  269. Raises
  270. ------
  271. KeyError
  272. If any items are not found.
  273. IndexingError
  274. If an indexed key is passed and its index is unalignable to the frame index.
  275. See Also
  276. --------
  277. DataFrame.at : Access a single value for a row/column label pair.
  278. DataFrame.iloc : Access group of rows and columns by integer position(s).
  279. DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
  280. Series/DataFrame.
  281. Series.loc : Access group of values using labels.
  282. Examples
  283. --------
  284. **Getting values**
  285. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  286. ... index=['cobra', 'viper', 'sidewinder'],
  287. ... columns=['max_speed', 'shield'])
  288. >>> df
  289. max_speed shield
  290. cobra 1 2
  291. viper 4 5
  292. sidewinder 7 8
  293. Single label. Note this returns the row as a Series.
  294. >>> df.loc['viper']
  295. max_speed 4
  296. shield 5
  297. Name: viper, dtype: int64
  298. List of labels. Note using ``[[]]`` returns a DataFrame.
  299. >>> df.loc[['viper', 'sidewinder']]
  300. max_speed shield
  301. viper 4 5
  302. sidewinder 7 8
  303. Single label for row and column
  304. >>> df.loc['cobra', 'shield']
  305. 2
  306. Slice with labels for row and single label for column. As mentioned
  307. above, note that both the start and stop of the slice are included.
  308. >>> df.loc['cobra':'viper', 'max_speed']
  309. cobra 1
  310. viper 4
  311. Name: max_speed, dtype: int64
  312. Boolean list with the same length as the row axis
  313. >>> df.loc[[False, False, True]]
  314. max_speed shield
  315. sidewinder 7 8
  316. Alignable boolean Series:
  317. >>> df.loc[pd.Series([False, True, False],
  318. ... index=['viper', 'sidewinder', 'cobra'])]
  319. max_speed shield
  320. sidewinder 7 8
  321. Index (same behavior as ``df.reindex``)
  322. >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
  323. max_speed shield
  324. foo
  325. cobra 1 2
  326. viper 4 5
  327. Conditional that returns a boolean Series
  328. >>> df.loc[df['shield'] > 6]
  329. max_speed shield
  330. sidewinder 7 8
  331. Conditional that returns a boolean Series with column labels specified
  332. >>> df.loc[df['shield'] > 6, ['max_speed']]
  333. max_speed
  334. sidewinder 7
  335. Multiple conditional using ``&`` that returns a boolean Series
  336. >>> df.loc[(df['max_speed'] > 1) & (df['shield'] < 8)]
  337. max_speed shield
  338. viper 4 5
  339. Multiple conditional using ``|`` that returns a boolean Series
  340. >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)]
  341. max_speed shield
  342. cobra 1 2
  343. sidewinder 7 8
  344. Please ensure that each condition is wrapped in parentheses ``()``.
  345. See the :ref:`user guide<indexing.boolean>`
  346. for more details and explanations of Boolean indexing.
  347. .. note::
  348. If you find yourself using 3 or more conditionals in ``.loc[]``,
  349. consider using :ref:`advanced indexing<advanced.advanced_hierarchical>`.
  350. See below for using ``.loc[]`` on MultiIndex DataFrames.
  351. Callable that returns a boolean Series
  352. >>> df.loc[lambda df: df['shield'] == 8]
  353. max_speed shield
  354. sidewinder 7 8
  355. **Setting values**
  356. Set value for all items matching the list of labels
  357. >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
  358. >>> df
  359. max_speed shield
  360. cobra 1 2
  361. viper 4 50
  362. sidewinder 7 50
  363. Set value for an entire row
  364. >>> df.loc['cobra'] = 10
  365. >>> df
  366. max_speed shield
  367. cobra 10 10
  368. viper 4 50
  369. sidewinder 7 50
  370. Set value for an entire column
  371. >>> df.loc[:, 'max_speed'] = 30
  372. >>> df
  373. max_speed shield
  374. cobra 30 10
  375. viper 30 50
  376. sidewinder 30 50
  377. Set value for rows matching callable condition
  378. >>> df.loc[df['shield'] > 35] = 0
  379. >>> df
  380. max_speed shield
  381. cobra 30 10
  382. viper 0 0
  383. sidewinder 0 0
  384. Add value matching location
  385. >>> df.loc["viper", "shield"] += 5
  386. >>> df
  387. max_speed shield
  388. cobra 30 10
  389. viper 0 5
  390. sidewinder 0 0
  391. Setting using a ``Series`` or a ``DataFrame`` sets the values matching the
  392. index labels, not the index positions.
  393. >>> shuffled_df = df.loc[["viper", "cobra", "sidewinder"]]
  394. >>> df.loc[:] += shuffled_df
  395. >>> df
  396. max_speed shield
  397. cobra 60 20
  398. viper 0 10
  399. sidewinder 0 0
  400. **Getting values on a DataFrame with an index that has integer labels**
  401. Another example using integers for the index
  402. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  403. ... index=[7, 8, 9], columns=['max_speed', 'shield'])
  404. >>> df
  405. max_speed shield
  406. 7 1 2
  407. 8 4 5
  408. 9 7 8
  409. Slice with integer labels for rows. As mentioned above, note that both
  410. the start and stop of the slice are included.
  411. >>> df.loc[7:9]
  412. max_speed shield
  413. 7 1 2
  414. 8 4 5
  415. 9 7 8
  416. **Getting values with a MultiIndex**
  417. A number of examples using a DataFrame with a MultiIndex
  418. >>> tuples = [
  419. ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
  420. ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
  421. ... ('viper', 'mark ii'), ('viper', 'mark iii')
  422. ... ]
  423. >>> index = pd.MultiIndex.from_tuples(tuples)
  424. >>> values = [[12, 2], [0, 4], [10, 20],
  425. ... [1, 4], [7, 1], [16, 36]]
  426. >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
  427. >>> df
  428. max_speed shield
  429. cobra mark i 12 2
  430. mark ii 0 4
  431. sidewinder mark i 10 20
  432. mark ii 1 4
  433. viper mark ii 7 1
  434. mark iii 16 36
  435. Single label. Note this returns a DataFrame with a single index.
  436. >>> df.loc['cobra']
  437. max_speed shield
  438. mark i 12 2
  439. mark ii 0 4
  440. Single index tuple. Note this returns a Series.
  441. >>> df.loc[('cobra', 'mark ii')]
  442. max_speed 0
  443. shield 4
  444. Name: (cobra, mark ii), dtype: int64
  445. Single label for row and column. Similar to passing in a tuple, this
  446. returns a Series.
  447. >>> df.loc['cobra', 'mark i']
  448. max_speed 12
  449. shield 2
  450. Name: (cobra, mark i), dtype: int64
  451. Single tuple. Note using ``[[]]`` returns a DataFrame.
  452. >>> df.loc[[('cobra', 'mark ii')]]
  453. max_speed shield
  454. cobra mark ii 0 4
  455. Single tuple for the index with a single label for the column
  456. >>> df.loc[('cobra', 'mark i'), 'shield']
  457. 2
  458. Slice from index tuple to single label
  459. >>> df.loc[('cobra', 'mark i'):'viper']
  460. max_speed shield
  461. cobra mark i 12 2
  462. mark ii 0 4
  463. sidewinder mark i 10 20
  464. mark ii 1 4
  465. viper mark ii 7 1
  466. mark iii 16 36
  467. Slice from index tuple to index tuple
  468. >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
  469. max_speed shield
  470. cobra mark i 12 2
  471. mark ii 0 4
  472. sidewinder mark i 10 20
  473. mark ii 1 4
  474. viper mark ii 7 1
  475. Please see the :ref:`user guide<advanced.advanced_hierarchical>`
  476. for more details and explanations of advanced indexing.
  477. """
  478. return _LocIndexer("loc", self)
  479. @property
  480. def at(self) -> _AtIndexer:
  481. """
  482. Access a single value for a row/column label pair.
  483. Similar to ``loc``, in that both provide label-based lookups. Use
  484. ``at`` if you only need to get or set a single value in a DataFrame
  485. or Series.
  486. Raises
  487. ------
  488. KeyError
  489. If getting a value and 'label' does not exist in a DataFrame or Series.
  490. ValueError
  491. If row/column label pair is not a tuple or if any label
  492. from the pair is not a scalar for DataFrame.
  493. If label is list-like (*excluding* NamedTuple) for Series.
  494. See Also
  495. --------
  496. DataFrame.at : Access a single value for a row/column pair by label.
  497. DataFrame.iat : Access a single value for a row/column pair by integer
  498. position.
  499. DataFrame.loc : Access a group of rows and columns by label(s).
  500. DataFrame.iloc : Access a group of rows and columns by integer
  501. position(s).
  502. Series.at : Access a single value by label.
  503. Series.iat : Access a single value by integer position.
  504. Series.loc : Access a group of rows by label(s).
  505. Series.iloc : Access a group of rows by integer position(s).
  506. Notes
  507. -----
  508. See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
  509. for more details.
  510. Examples
  511. --------
  512. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  513. ... index=[4, 5, 6], columns=['A', 'B', 'C'])
  514. >>> df
  515. A B C
  516. 4 0 2 3
  517. 5 0 4 1
  518. 6 10 20 30
  519. Get value at specified row/column pair
  520. >>> df.at[4, 'B']
  521. 2
  522. Set value at specified row/column pair
  523. >>> df.at[4, 'B'] = 10
  524. >>> df.at[4, 'B']
  525. 10
  526. Get value within a Series
  527. >>> df.loc[5].at['B']
  528. 4
  529. """
  530. return _AtIndexer("at", self)
  531. @property
  532. def iat(self) -> _iAtIndexer:
  533. """
  534. Access a single value for a row/column pair by integer position.
  535. Similar to ``iloc``, in that both provide integer-based lookups. Use
  536. ``iat`` if you only need to get or set a single value in a DataFrame
  537. or Series.
  538. Raises
  539. ------
  540. IndexError
  541. When integer position is out of bounds.
  542. See Also
  543. --------
  544. DataFrame.at : Access a single value for a row/column label pair.
  545. DataFrame.loc : Access a group of rows and columns by label(s).
  546. DataFrame.iloc : Access a group of rows and columns by integer position(s).
  547. Examples
  548. --------
  549. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  550. ... columns=['A', 'B', 'C'])
  551. >>> df
  552. A B C
  553. 0 0 2 3
  554. 1 0 4 1
  555. 2 10 20 30
  556. Get value at specified row/column pair
  557. >>> df.iat[1, 2]
  558. 1
  559. Set value at specified row/column pair
  560. >>> df.iat[1, 2] = 10
  561. >>> df.iat[1, 2]
  562. 10
  563. Get value within a series
  564. >>> df.loc[0].iat[1]
  565. 2
  566. """
  567. return _iAtIndexer("iat", self)
  568. class _LocationIndexer(NDFrameIndexerBase):
  569. _valid_types: str
  570. axis: AxisInt | None = None
  571. # sub-classes need to set _takeable
  572. _takeable: bool
  573. @final
  574. def __call__(self, axis: Axis | None = None) -> Self:
  575. # we need to return a copy of ourselves
  576. new_self = type(self)(self.name, self.obj)
  577. if axis is not None:
  578. axis_int_none = self.obj._get_axis_number(axis)
  579. else:
  580. axis_int_none = axis
  581. new_self.axis = axis_int_none
  582. return new_self
  583. def _get_setitem_indexer(self, key):
  584. """
  585. Convert a potentially-label-based key into a positional indexer.
  586. """
  587. if self.name == "loc":
  588. # always holds here bc iloc overrides _get_setitem_indexer
  589. self._ensure_listlike_indexer(key)
  590. if isinstance(key, tuple):
  591. for x in key:
  592. check_dict_or_set_indexers(x)
  593. if self.axis is not None:
  594. key = _tupleize_axis_indexer(self.ndim, self.axis, key)
  595. ax = self.obj._get_axis(0)
  596. if (
  597. isinstance(ax, MultiIndex)
  598. and self.name != "iloc"
  599. and is_hashable(key)
  600. and not isinstance(key, slice)
  601. ):
  602. with suppress(KeyError, InvalidIndexError):
  603. # TypeError e.g. passed a bool
  604. return ax.get_loc(key)
  605. if isinstance(key, tuple):
  606. with suppress(IndexingError):
  607. # suppress "Too many indexers"
  608. return self._convert_tuple(key)
  609. if isinstance(key, range):
  610. # GH#45479 test_loc_setitem_range_key
  611. key = list(key)
  612. return self._convert_to_indexer(key, axis=0)
  613. @final
  614. def _maybe_mask_setitem_value(self, indexer, value):
  615. """
  616. If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
  617. same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
  618. similar to Series.__setitem__.
  619. Note this is only for loc, not iloc.
  620. """
  621. if (
  622. isinstance(indexer, tuple)
  623. and len(indexer) == 2
  624. and isinstance(value, (ABCSeries, ABCDataFrame))
  625. ):
  626. pi, icols = indexer
  627. ndim = value.ndim
  628. if com.is_bool_indexer(pi) and len(value) == len(pi):
  629. newkey = pi.nonzero()[0]
  630. if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
  631. # e.g. test_loc_setitem_boolean_mask_allfalse
  632. # test_loc_setitem_ndframe_values_alignment
  633. value = self.obj.iloc._align_series(indexer, value)
  634. indexer = (newkey, icols)
  635. elif (
  636. isinstance(icols, np.ndarray)
  637. and icols.dtype.kind == "i"
  638. and len(icols) == 1
  639. ):
  640. if ndim == 1:
  641. # We implicitly broadcast, though numpy does not, see
  642. # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
  643. # test_loc_setitem_ndframe_values_alignment
  644. value = self.obj.iloc._align_series(indexer, value)
  645. indexer = (newkey, icols)
  646. elif ndim == 2 and value.shape[1] == 1:
  647. # test_loc_setitem_ndframe_values_alignment
  648. value = self.obj.iloc._align_frame(indexer, value)
  649. indexer = (newkey, icols)
  650. elif com.is_bool_indexer(indexer):
  651. indexer = indexer.nonzero()[0]
  652. return indexer, value
  653. @final
  654. def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
  655. """
  656. Ensure that a list-like of column labels are all present by adding them if
  657. they do not already exist.
  658. Parameters
  659. ----------
  660. key : list-like of column labels
  661. Target labels.
  662. axis : key axis if known
  663. """
  664. column_axis = 1
  665. # column only exists in 2-dimensional DataFrame
  666. if self.ndim != 2:
  667. return
  668. if isinstance(key, tuple) and len(key) > 1:
  669. # key may be a tuple if we are .loc
  670. # if length of key is > 1 set key to column part
  671. key = key[column_axis]
  672. axis = column_axis
  673. if (
  674. axis == column_axis
  675. and not isinstance(self.obj.columns, MultiIndex)
  676. and is_list_like_indexer(key)
  677. and not com.is_bool_indexer(key)
  678. and all(is_hashable(k) for k in key)
  679. ):
  680. # GH#38148
  681. keys = self.obj.columns.union(key, sort=False)
  682. diff = Index(key).difference(self.obj.columns, sort=False)
  683. if len(diff):
  684. # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
  685. # is a new column, add the new columns with dtype=np.void
  686. # so that later when we go through setitem_single_column
  687. # we will use isetitem. Without this, the reindex_axis
  688. # below would create float64 columns in this example, which
  689. # would successfully hold 7, so we would end up with the wrong
  690. # dtype.
  691. indexer = np.arange(len(keys), dtype=np.intp)
  692. indexer[len(self.obj.columns) :] = -1
  693. new_mgr = self.obj._mgr.reindex_indexer(
  694. keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
  695. )
  696. self.obj._mgr = new_mgr
  697. return
  698. self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
  699. @final
  700. def __setitem__(self, key, value) -> None:
  701. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  702. if sys.getrefcount(self.obj) <= 2:
  703. warnings.warn(
  704. _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
  705. )
  706. elif not PYPY and not WARNING_CHECK_DISABLED and not using_copy_on_write():
  707. ctr = sys.getrefcount(self.obj)
  708. ref_count = 2
  709. if not warn_copy_on_write() and _check_cacher(self.obj):
  710. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  711. ref_count += 1
  712. if ctr <= ref_count:
  713. warnings.warn(
  714. _chained_assignment_warning_msg, FutureWarning, stacklevel=2
  715. )
  716. check_dict_or_set_indexers(key)
  717. if isinstance(key, tuple):
  718. key = tuple(list(x) if is_iterator(x) else x for x in key)
  719. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  720. else:
  721. maybe_callable = com.apply_if_callable(key, self.obj)
  722. key = self._check_deprecated_callable_usage(key, maybe_callable)
  723. indexer = self._get_setitem_indexer(key)
  724. self._has_valid_setitem_indexer(key)
  725. iloc = self if self.name == "iloc" else self.obj.iloc
  726. iloc._setitem_with_indexer(indexer, value, self.name)
  727. def _validate_key(self, key, axis: AxisInt):
  728. """
  729. Ensure that key is valid for current indexer.
  730. Parameters
  731. ----------
  732. key : scalar, slice or list-like
  733. Key requested.
  734. axis : int
  735. Dimension on which the indexing is being made.
  736. Raises
  737. ------
  738. TypeError
  739. If the key (or some element of it) has wrong type.
  740. IndexError
  741. If the key (or some element of it) is out of bounds.
  742. KeyError
  743. If the key was not found.
  744. """
  745. raise AbstractMethodError(self)
  746. @final
  747. def _expand_ellipsis(self, tup: tuple) -> tuple:
  748. """
  749. If a tuple key includes an Ellipsis, replace it with an appropriate
  750. number of null slices.
  751. """
  752. if any(x is Ellipsis for x in tup):
  753. if tup.count(Ellipsis) > 1:
  754. raise IndexingError(_one_ellipsis_message)
  755. if len(tup) == self.ndim:
  756. # It is unambiguous what axis this Ellipsis is indexing,
  757. # treat as a single null slice.
  758. i = tup.index(Ellipsis)
  759. # FIXME: this assumes only one Ellipsis
  760. new_key = tup[:i] + (_NS,) + tup[i + 1 :]
  761. return new_key
  762. # TODO: other cases? only one test gets here, and that is covered
  763. # by _validate_key_length
  764. return tup
  765. @final
  766. def _validate_tuple_indexer(self, key: tuple) -> tuple:
  767. """
  768. Check the key for valid keys across my indexer.
  769. """
  770. key = self._validate_key_length(key)
  771. key = self._expand_ellipsis(key)
  772. for i, k in enumerate(key):
  773. try:
  774. self._validate_key(k, i)
  775. except ValueError as err:
  776. raise ValueError(
  777. "Location based indexing can only have "
  778. f"[{self._valid_types}] types"
  779. ) from err
  780. return key
  781. @final
  782. def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
  783. """
  784. Returns
  785. -------
  786. bool
  787. """
  788. if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
  789. return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
  790. return False
  791. @final
  792. def _convert_tuple(self, key: tuple) -> tuple:
  793. # Note: we assume _tupleize_axis_indexer has been called, if necessary.
  794. self._validate_key_length(key)
  795. keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
  796. return tuple(keyidx)
  797. @final
  798. def _validate_key_length(self, key: tuple) -> tuple:
  799. if len(key) > self.ndim:
  800. if key[0] is Ellipsis:
  801. # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
  802. key = key[1:]
  803. if Ellipsis in key:
  804. raise IndexingError(_one_ellipsis_message)
  805. return self._validate_key_length(key)
  806. raise IndexingError("Too many indexers")
  807. return key
  808. @final
  809. def _getitem_tuple_same_dim(self, tup: tuple):
  810. """
  811. Index with indexers that should return an object of the same dimension
  812. as self.obj.
  813. This is only called after a failed call to _getitem_lowerdim.
  814. """
  815. retval = self.obj
  816. # Selecting columns before rows is significantly faster
  817. start_val = (self.ndim - len(tup)) + 1
  818. for i, key in enumerate(reversed(tup)):
  819. i = self.ndim - i - start_val
  820. if com.is_null_slice(key):
  821. continue
  822. retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  823. # We should never have retval.ndim < self.ndim, as that should
  824. # be handled by the _getitem_lowerdim call above.
  825. assert retval.ndim == self.ndim
  826. if retval is self.obj:
  827. # if all axes were a null slice (`df.loc[:, :]`), ensure we still
  828. # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
  829. retval = retval.copy(deep=False)
  830. return retval
  831. @final
  832. def _getitem_lowerdim(self, tup: tuple):
  833. # we can directly get the axis result since the axis is specified
  834. if self.axis is not None:
  835. axis = self.obj._get_axis_number(self.axis)
  836. return self._getitem_axis(tup, axis=axis)
  837. # we may have a nested tuples indexer here
  838. if self._is_nested_tuple_indexer(tup):
  839. return self._getitem_nested_tuple(tup)
  840. # we maybe be using a tuple to represent multiple dimensions here
  841. ax0 = self.obj._get_axis(0)
  842. # ...but iloc should handle the tuple as simple integer-location
  843. # instead of checking it as multiindex representation (GH 13797)
  844. if (
  845. isinstance(ax0, MultiIndex)
  846. and self.name != "iloc"
  847. and not any(isinstance(x, slice) for x in tup)
  848. ):
  849. # Note: in all extant test cases, replacing the slice condition with
  850. # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
  851. # is equivalent.
  852. # (see the other place where we call _handle_lowerdim_multi_index_axis0)
  853. with suppress(IndexingError):
  854. return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
  855. tup = self._validate_key_length(tup)
  856. for i, key in enumerate(tup):
  857. if is_label_like(key):
  858. # We don't need to check for tuples here because those are
  859. # caught by the _is_nested_tuple_indexer check above.
  860. section = self._getitem_axis(key, axis=i)
  861. # We should never have a scalar section here, because
  862. # _getitem_lowerdim is only called after a check for
  863. # is_scalar_access, which that would be.
  864. if section.ndim == self.ndim:
  865. # we're in the middle of slicing through a MultiIndex
  866. # revise the key wrt to `section` by inserting an _NS
  867. new_key = tup[:i] + (_NS,) + tup[i + 1 :]
  868. else:
  869. # Note: the section.ndim == self.ndim check above
  870. # rules out having DataFrame here, so we dont need to worry
  871. # about transposing.
  872. new_key = tup[:i] + tup[i + 1 :]
  873. if len(new_key) == 1:
  874. new_key = new_key[0]
  875. # Slices should return views, but calling iloc/loc with a null
  876. # slice returns a new object.
  877. if com.is_null_slice(new_key):
  878. return section
  879. # This is an elided recursive call to iloc/loc
  880. return getattr(section, self.name)[new_key]
  881. raise IndexingError("not applicable")
  882. @final
  883. def _getitem_nested_tuple(self, tup: tuple):
  884. # we have a nested tuple so have at least 1 multi-index level
  885. # we should be able to match up the dimensionality here
  886. def _contains_slice(x: object) -> bool:
  887. # Check if object is a slice or a tuple containing a slice
  888. if isinstance(x, tuple):
  889. return any(isinstance(v, slice) for v in x)
  890. elif isinstance(x, slice):
  891. return True
  892. return False
  893. for key in tup:
  894. check_dict_or_set_indexers(key)
  895. # we have too many indexers for our dim, but have at least 1
  896. # multi-index dimension, try to see if we have something like
  897. # a tuple passed to a series with a multi-index
  898. if len(tup) > self.ndim:
  899. if self.name != "loc":
  900. # This should never be reached, but let's be explicit about it
  901. raise ValueError("Too many indices") # pragma: no cover
  902. if all(
  903. (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x)
  904. for x in tup
  905. ):
  906. # GH#10521 Series should reduce MultiIndex dimensions instead of
  907. # DataFrame, IndexingError is not raised when slice(None,None,None)
  908. # with one row.
  909. with suppress(IndexingError):
  910. return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
  911. tup
  912. )
  913. elif isinstance(self.obj, ABCSeries) and any(
  914. isinstance(k, tuple) for k in tup
  915. ):
  916. # GH#35349 Raise if tuple in tuple for series
  917. # Do this after the all-hashable-or-null-slice check so that
  918. # we are only getting non-hashable tuples, in particular ones
  919. # that themselves contain a slice entry
  920. # See test_loc_series_getitem_too_many_dimensions
  921. raise IndexingError("Too many indexers")
  922. # this is a series with a multi-index specified a tuple of
  923. # selectors
  924. axis = self.axis or 0
  925. return self._getitem_axis(tup, axis=axis)
  926. # handle the multi-axis by taking sections and reducing
  927. # this is iterative
  928. obj = self.obj
  929. # GH#41369 Loop in reverse order ensures indexing along columns before rows
  930. # which selects only necessary blocks which avoids dtype conversion if possible
  931. axis = len(tup) - 1
  932. for key in tup[::-1]:
  933. if com.is_null_slice(key):
  934. axis -= 1
  935. continue
  936. obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
  937. axis -= 1
  938. # if we have a scalar, we are done
  939. if is_scalar(obj) or not hasattr(obj, "ndim"):
  940. break
  941. return obj
  942. def _convert_to_indexer(self, key, axis: AxisInt):
  943. raise AbstractMethodError(self)
  944. def _check_deprecated_callable_usage(self, key: Any, maybe_callable: T) -> T:
  945. # GH53533
  946. if self.name == "iloc" and callable(key) and isinstance(maybe_callable, tuple):
  947. warnings.warn(
  948. "Returning a tuple from a callable with iloc "
  949. "is deprecated and will be removed in a future version",
  950. FutureWarning,
  951. stacklevel=find_stack_level(),
  952. )
  953. return maybe_callable
  954. @final
  955. def __getitem__(self, key):
  956. check_dict_or_set_indexers(key)
  957. if type(key) is tuple:
  958. key = tuple(list(x) if is_iterator(x) else x for x in key)
  959. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  960. if self._is_scalar_access(key):
  961. return self.obj._get_value(*key, takeable=self._takeable)
  962. return self._getitem_tuple(key)
  963. else:
  964. # we by definition only have the 0th axis
  965. axis = self.axis or 0
  966. maybe_callable = com.apply_if_callable(key, self.obj)
  967. maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)
  968. return self._getitem_axis(maybe_callable, axis=axis)
  969. def _is_scalar_access(self, key: tuple):
  970. raise NotImplementedError()
  971. def _getitem_tuple(self, tup: tuple):
  972. raise AbstractMethodError(self)
  973. def _getitem_axis(self, key, axis: AxisInt):
  974. raise NotImplementedError()
  975. def _has_valid_setitem_indexer(self, indexer) -> bool:
  976. raise AbstractMethodError(self)
  977. @final
  978. def _getbool_axis(self, key, axis: AxisInt):
  979. # caller is responsible for ensuring non-None axis
  980. labels = self.obj._get_axis(axis)
  981. key = check_bool_indexer(labels, key)
  982. inds = key.nonzero()[0]
  983. return self.obj._take_with_is_copy(inds, axis=axis)
  984. @doc(IndexingMixin.loc)
  985. class _LocIndexer(_LocationIndexer):
  986. _takeable: bool = False
  987. _valid_types = (
  988. "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
  989. "endpoints included! Can be slices of integers if the "
  990. "index is integers), listlike of labels, boolean"
  991. )
  992. # -------------------------------------------------------------------
  993. # Key Checks
  994. @doc(_LocationIndexer._validate_key)
  995. def _validate_key(self, key, axis: Axis):
  996. # valid for a collection of labels (we check their presence later)
  997. # slice of labels (where start-end in labels)
  998. # slice of integers (only if in the labels)
  999. # boolean not in slice and with boolean index
  1000. ax = self.obj._get_axis(axis)
  1001. if isinstance(key, bool) and not (
  1002. is_bool_dtype(ax.dtype)
  1003. or ax.dtype.name == "boolean"
  1004. or isinstance(ax, MultiIndex)
  1005. and is_bool_dtype(ax.get_level_values(0).dtype)
  1006. ):
  1007. raise KeyError(
  1008. f"{key}: boolean label can not be used without a boolean index"
  1009. )
  1010. if isinstance(key, slice) and (
  1011. isinstance(key.start, bool) or isinstance(key.stop, bool)
  1012. ):
  1013. raise TypeError(f"{key}: boolean values can not be used in a slice")
  1014. def _has_valid_setitem_indexer(self, indexer) -> bool:
  1015. return True
  1016. def _is_scalar_access(self, key: tuple) -> bool:
  1017. """
  1018. Returns
  1019. -------
  1020. bool
  1021. """
  1022. # this is a shortcut accessor to both .loc and .iloc
  1023. # that provide the equivalent access of .at and .iat
  1024. # a) avoid getting things via sections and (to minimize dtype changes)
  1025. # b) provide a performant path
  1026. if len(key) != self.ndim:
  1027. return False
  1028. for i, k in enumerate(key):
  1029. if not is_scalar(k):
  1030. return False
  1031. ax = self.obj.axes[i]
  1032. if isinstance(ax, MultiIndex):
  1033. return False
  1034. if isinstance(k, str) and ax._supports_partial_string_indexing:
  1035. # partial string indexing, df.loc['2000', 'A']
  1036. # should not be considered scalar
  1037. return False
  1038. if not ax._index_as_unique:
  1039. return False
  1040. return True
  1041. # -------------------------------------------------------------------
  1042. # MultiIndex Handling
  1043. def _multi_take_opportunity(self, tup: tuple) -> bool:
  1044. """
  1045. Check whether there is the possibility to use ``_multi_take``.
  1046. Currently the limit is that all axes being indexed, must be indexed with
  1047. list-likes.
  1048. Parameters
  1049. ----------
  1050. tup : tuple
  1051. Tuple of indexers, one per axis.
  1052. Returns
  1053. -------
  1054. bool
  1055. Whether the current indexing,
  1056. can be passed through `_multi_take`.
  1057. """
  1058. if not all(is_list_like_indexer(x) for x in tup):
  1059. return False
  1060. # just too complicated
  1061. return not any(com.is_bool_indexer(x) for x in tup)
  1062. def _multi_take(self, tup: tuple):
  1063. """
  1064. Create the indexers for the passed tuple of keys, and
  1065. executes the take operation. This allows the take operation to be
  1066. executed all at once, rather than once for each dimension.
  1067. Improving efficiency.
  1068. Parameters
  1069. ----------
  1070. tup : tuple
  1071. Tuple of indexers, one per axis.
  1072. Returns
  1073. -------
  1074. values: same type as the object being indexed
  1075. """
  1076. # GH 836
  1077. d = {
  1078. axis: self._get_listlike_indexer(key, axis)
  1079. for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
  1080. }
  1081. return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
  1082. # -------------------------------------------------------------------
  1083. def _getitem_iterable(self, key, axis: AxisInt):
  1084. """
  1085. Index current object with an iterable collection of keys.
  1086. Parameters
  1087. ----------
  1088. key : iterable
  1089. Targeted labels.
  1090. axis : int
  1091. Dimension on which the indexing is being made.
  1092. Raises
  1093. ------
  1094. KeyError
  1095. If no key was found. Will change in the future to raise if not all
  1096. keys were found.
  1097. Returns
  1098. -------
  1099. scalar, DataFrame, or Series: indexed value(s).
  1100. """
  1101. # we assume that not com.is_bool_indexer(key), as that is
  1102. # handled before we get here.
  1103. self._validate_key(key, axis)
  1104. # A collection of keys
  1105. keyarr, indexer = self._get_listlike_indexer(key, axis)
  1106. return self.obj._reindex_with_indexers(
  1107. {axis: [keyarr, indexer]}, copy=True, allow_dups=True
  1108. )
  1109. def _getitem_tuple(self, tup: tuple):
  1110. with suppress(IndexingError):
  1111. tup = self._expand_ellipsis(tup)
  1112. return self._getitem_lowerdim(tup)
  1113. # no multi-index, so validate all of the indexers
  1114. tup = self._validate_tuple_indexer(tup)
  1115. # ugly hack for GH #836
  1116. if self._multi_take_opportunity(tup):
  1117. return self._multi_take(tup)
  1118. return self._getitem_tuple_same_dim(tup)
  1119. def _get_label(self, label, axis: AxisInt):
  1120. # GH#5567 this will fail if the label is not present in the axis.
  1121. return self.obj.xs(label, axis=axis)
  1122. def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
  1123. # we have an axis0 multi-index, handle or raise
  1124. axis = self.axis or 0
  1125. try:
  1126. # fast path for series or for tup devoid of slices
  1127. return self._get_label(tup, axis=axis)
  1128. except KeyError as ek:
  1129. # raise KeyError if number of indexers match
  1130. # else IndexingError will be raised
  1131. if self.ndim < len(tup) <= self.obj.index.nlevels:
  1132. raise ek
  1133. raise IndexingError("No label returned") from ek
  1134. def _getitem_axis(self, key, axis: AxisInt):
  1135. key = item_from_zerodim(key)
  1136. if is_iterator(key):
  1137. key = list(key)
  1138. if key is Ellipsis:
  1139. key = slice(None)
  1140. labels = self.obj._get_axis(axis)
  1141. if isinstance(key, tuple) and isinstance(labels, MultiIndex):
  1142. key = tuple(key)
  1143. if isinstance(key, slice):
  1144. self._validate_key(key, axis)
  1145. return self._get_slice_axis(key, axis=axis)
  1146. elif com.is_bool_indexer(key):
  1147. return self._getbool_axis(key, axis=axis)
  1148. elif is_list_like_indexer(key):
  1149. # an iterable multi-selection
  1150. if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
  1151. if hasattr(key, "ndim") and key.ndim > 1:
  1152. raise ValueError("Cannot index with multidimensional key")
  1153. return self._getitem_iterable(key, axis=axis)
  1154. # nested tuple slicing
  1155. if is_nested_tuple(key, labels):
  1156. locs = labels.get_locs(key)
  1157. indexer: list[slice | npt.NDArray[np.intp]] = [slice(None)] * self.ndim
  1158. indexer[axis] = locs
  1159. return self.obj.iloc[tuple(indexer)]
  1160. # fall thru to straight lookup
  1161. self._validate_key(key, axis)
  1162. return self._get_label(key, axis=axis)
  1163. def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
  1164. """
  1165. This is pretty simple as we just have to deal with labels.
  1166. """
  1167. # caller is responsible for ensuring non-None axis
  1168. obj = self.obj
  1169. if not need_slice(slice_obj):
  1170. return obj.copy(deep=False)
  1171. labels = obj._get_axis(axis)
  1172. indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
  1173. if isinstance(indexer, slice):
  1174. return self.obj._slice(indexer, axis=axis)
  1175. else:
  1176. # DatetimeIndex overrides Index.slice_indexer and may
  1177. # return a DatetimeIndex instead of a slice object.
  1178. return self.obj.take(indexer, axis=axis)
  1179. def _convert_to_indexer(self, key, axis: AxisInt):
  1180. """
  1181. Convert indexing key into something we can use to do actual fancy
  1182. indexing on a ndarray.
  1183. Examples
  1184. ix[:5] -> slice(0, 5)
  1185. ix[[1,2,3]] -> [1,2,3]
  1186. ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
  1187. Going by Zen of Python?
  1188. 'In the face of ambiguity, refuse the temptation to guess.'
  1189. raise AmbiguousIndexError with integer labels?
  1190. - No, prefer label-based indexing
  1191. """
  1192. labels = self.obj._get_axis(axis)
  1193. if isinstance(key, slice):
  1194. return labels._convert_slice_indexer(key, kind="loc")
  1195. if (
  1196. isinstance(key, tuple)
  1197. and not isinstance(labels, MultiIndex)
  1198. and self.ndim < 2
  1199. and len(key) > 1
  1200. ):
  1201. raise IndexingError("Too many indexers")
  1202. # Slices are not valid keys passed in by the user,
  1203. # even though they are hashable in Python 3.12
  1204. contains_slice = False
  1205. if isinstance(key, tuple):
  1206. contains_slice = any(isinstance(v, slice) for v in key)
  1207. if is_scalar(key) or (
  1208. isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice
  1209. ):
  1210. # Otherwise get_loc will raise InvalidIndexError
  1211. # if we are a label return me
  1212. try:
  1213. return labels.get_loc(key)
  1214. except LookupError:
  1215. if isinstance(key, tuple) and isinstance(labels, MultiIndex):
  1216. if len(key) == labels.nlevels:
  1217. return {"key": key}
  1218. raise
  1219. except InvalidIndexError:
  1220. # GH35015, using datetime as column indices raises exception
  1221. if not isinstance(labels, MultiIndex):
  1222. raise
  1223. except ValueError:
  1224. if not is_integer(key):
  1225. raise
  1226. return {"key": key}
  1227. if is_nested_tuple(key, labels):
  1228. if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
  1229. # GH#35349 Raise if tuple in tuple for series
  1230. raise IndexingError("Too many indexers")
  1231. return labels.get_locs(key)
  1232. elif is_list_like_indexer(key):
  1233. if is_iterator(key):
  1234. key = list(key)
  1235. if com.is_bool_indexer(key):
  1236. key = check_bool_indexer(labels, key)
  1237. return key
  1238. else:
  1239. return self._get_listlike_indexer(key, axis)[1]
  1240. else:
  1241. try:
  1242. return labels.get_loc(key)
  1243. except LookupError:
  1244. # allow a not found key only if we are a setter
  1245. if not is_list_like_indexer(key):
  1246. return {"key": key}
  1247. raise
  1248. def _get_listlike_indexer(self, key, axis: AxisInt):
  1249. """
  1250. Transform a list-like of keys into a new index and an indexer.
  1251. Parameters
  1252. ----------
  1253. key : list-like
  1254. Targeted labels.
  1255. axis: int
  1256. Dimension on which the indexing is being made.
  1257. Raises
  1258. ------
  1259. KeyError
  1260. If at least one key was requested but none was found.
  1261. Returns
  1262. -------
  1263. keyarr: Index
  1264. New index (coinciding with 'key' if the axis is unique).
  1265. values : array-like
  1266. Indexer for the return object, -1 denotes keys not found.
  1267. """
  1268. ax = self.obj._get_axis(axis)
  1269. axis_name = self.obj._get_axis_name(axis)
  1270. keyarr, indexer = ax._get_indexer_strict(key, axis_name)
  1271. return keyarr, indexer
  1272. @doc(IndexingMixin.iloc)
  1273. class _iLocIndexer(_LocationIndexer):
  1274. _valid_types = (
  1275. "integer, integer slice (START point is INCLUDED, END "
  1276. "point is EXCLUDED), listlike of integers, boolean array"
  1277. )
  1278. _takeable = True
  1279. # -------------------------------------------------------------------
  1280. # Key Checks
  1281. def _validate_key(self, key, axis: AxisInt):
  1282. if com.is_bool_indexer(key):
  1283. if hasattr(key, "index") and isinstance(key.index, Index):
  1284. if key.index.inferred_type == "integer":
  1285. raise NotImplementedError(
  1286. "iLocation based boolean "
  1287. "indexing on an integer type "
  1288. "is not available"
  1289. )
  1290. raise ValueError(
  1291. "iLocation based boolean indexing cannot use "
  1292. "an indexable as a mask"
  1293. )
  1294. return
  1295. if isinstance(key, slice):
  1296. return
  1297. elif is_integer(key):
  1298. self._validate_integer(key, axis)
  1299. elif isinstance(key, tuple):
  1300. # a tuple should already have been caught by this point
  1301. # so don't treat a tuple as a valid indexer
  1302. raise IndexingError("Too many indexers")
  1303. elif is_list_like_indexer(key):
  1304. if isinstance(key, ABCSeries):
  1305. arr = key._values
  1306. elif is_array_like(key):
  1307. arr = key
  1308. else:
  1309. arr = np.array(key)
  1310. len_axis = len(self.obj._get_axis(axis))
  1311. # check that the key has a numeric dtype
  1312. if not is_numeric_dtype(arr.dtype):
  1313. raise IndexError(f".iloc requires numeric indexers, got {arr}")
  1314. # check that the key does not exceed the maximum size of the index
  1315. if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
  1316. raise IndexError("positional indexers are out-of-bounds")
  1317. else:
  1318. raise ValueError(f"Can only index by location with a [{self._valid_types}]")
  1319. def _has_valid_setitem_indexer(self, indexer) -> bool:
  1320. """
  1321. Validate that a positional indexer cannot enlarge its target
  1322. will raise if needed, does not modify the indexer externally.
  1323. Returns
  1324. -------
  1325. bool
  1326. """
  1327. if isinstance(indexer, dict):
  1328. raise IndexError("iloc cannot enlarge its target object")
  1329. if isinstance(indexer, ABCDataFrame):
  1330. raise TypeError(
  1331. "DataFrame indexer for .iloc is not supported. "
  1332. "Consider using .loc with a DataFrame indexer for automatic alignment.",
  1333. )
  1334. if not isinstance(indexer, tuple):
  1335. indexer = _tuplify(self.ndim, indexer)
  1336. for ax, i in zip(self.obj.axes, indexer):
  1337. if isinstance(i, slice):
  1338. # should check the stop slice?
  1339. pass
  1340. elif is_list_like_indexer(i):
  1341. # should check the elements?
  1342. pass
  1343. elif is_integer(i):
  1344. if i >= len(ax):
  1345. raise IndexError("iloc cannot enlarge its target object")
  1346. elif isinstance(i, dict):
  1347. raise IndexError("iloc cannot enlarge its target object")
  1348. return True
  1349. def _is_scalar_access(self, key: tuple) -> bool:
  1350. """
  1351. Returns
  1352. -------
  1353. bool
  1354. """
  1355. # this is a shortcut accessor to both .loc and .iloc
  1356. # that provide the equivalent access of .at and .iat
  1357. # a) avoid getting things via sections and (to minimize dtype changes)
  1358. # b) provide a performant path
  1359. if len(key) != self.ndim:
  1360. return False
  1361. return all(is_integer(k) for k in key)
  1362. def _validate_integer(self, key: int | np.integer, axis: AxisInt) -> None:
  1363. """
  1364. Check that 'key' is a valid position in the desired axis.
  1365. Parameters
  1366. ----------
  1367. key : int
  1368. Requested position.
  1369. axis : int
  1370. Desired axis.
  1371. Raises
  1372. ------
  1373. IndexError
  1374. If 'key' is not a valid position in axis 'axis'.
  1375. """
  1376. len_axis = len(self.obj._get_axis(axis))
  1377. if key >= len_axis or key < -len_axis:
  1378. raise IndexError("single positional indexer is out-of-bounds")
  1379. # -------------------------------------------------------------------
  1380. def _getitem_tuple(self, tup: tuple):
  1381. tup = self._validate_tuple_indexer(tup)
  1382. with suppress(IndexingError):
  1383. return self._getitem_lowerdim(tup)
  1384. return self._getitem_tuple_same_dim(tup)
  1385. def _get_list_axis(self, key, axis: AxisInt):
  1386. """
  1387. Return Series values by list or array of integers.
  1388. Parameters
  1389. ----------
  1390. key : list-like positional indexer
  1391. axis : int
  1392. Returns
  1393. -------
  1394. Series object
  1395. Notes
  1396. -----
  1397. `axis` can only be zero.
  1398. """
  1399. try:
  1400. return self.obj._take_with_is_copy(key, axis=axis)
  1401. except IndexError as err:
  1402. # re-raise with different error message, e.g. test_getitem_ndarray_3d
  1403. raise IndexError("positional indexers are out-of-bounds") from err
  1404. def _getitem_axis(self, key, axis: AxisInt):
  1405. if key is Ellipsis:
  1406. key = slice(None)
  1407. elif isinstance(key, ABCDataFrame):
  1408. raise IndexError(
  1409. "DataFrame indexer is not allowed for .iloc\n"
  1410. "Consider using .loc for automatic alignment."
  1411. )
  1412. if isinstance(key, slice):
  1413. return self._get_slice_axis(key, axis=axis)
  1414. if is_iterator(key):
  1415. key = list(key)
  1416. if isinstance(key, list):
  1417. key = np.asarray(key)
  1418. if com.is_bool_indexer(key):
  1419. self._validate_key(key, axis)
  1420. return self._getbool_axis(key, axis=axis)
  1421. # a list of integers
  1422. elif is_list_like_indexer(key):
  1423. return self._get_list_axis(key, axis=axis)
  1424. # a single integer
  1425. else:
  1426. key = item_from_zerodim(key)
  1427. if not is_integer(key):
  1428. raise TypeError("Cannot index by location index with a non-integer key")
  1429. # validate the location
  1430. self._validate_integer(key, axis)
  1431. return self.obj._ixs(key, axis=axis)
  1432. def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
  1433. # caller is responsible for ensuring non-None axis
  1434. obj = self.obj
  1435. if not need_slice(slice_obj):
  1436. return obj.copy(deep=False)
  1437. labels = obj._get_axis(axis)
  1438. labels._validate_positional_slice(slice_obj)
  1439. return self.obj._slice(slice_obj, axis=axis)
  1440. def _convert_to_indexer(self, key, axis: AxisInt):
  1441. """
  1442. Much simpler as we only have to deal with our valid types.
  1443. """
  1444. return key
  1445. def _get_setitem_indexer(self, key):
  1446. # GH#32257 Fall through to let numpy do validation
  1447. if is_iterator(key):
  1448. key = list(key)
  1449. if self.axis is not None:
  1450. key = _tupleize_axis_indexer(self.ndim, self.axis, key)
  1451. return key
  1452. # -------------------------------------------------------------------
  1453. def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
  1454. """
  1455. _setitem_with_indexer is for setting values on a Series/DataFrame
  1456. using positional indexers.
  1457. If the relevant keys are not present, the Series/DataFrame may be
  1458. expanded.
  1459. This method is currently broken when dealing with non-unique Indexes,
  1460. since it goes from positional indexers back to labels when calling
  1461. BlockManager methods, see GH#12991, GH#22046, GH#15686.
  1462. """
  1463. info_axis = self.obj._info_axis_number
  1464. # maybe partial set
  1465. take_split_path = not self.obj._mgr.is_single_block
  1466. if not take_split_path and isinstance(value, ABCDataFrame):
  1467. # Avoid cast of values
  1468. take_split_path = not value._mgr.is_single_block
  1469. # if there is only one block/type, still have to take split path
  1470. # unless the block is one-dimensional or it can hold the value
  1471. if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
  1472. # in case of dict, keys are indices
  1473. val = list(value.values()) if isinstance(value, dict) else value
  1474. arr = self.obj._mgr.arrays[0]
  1475. take_split_path = not can_hold_element(
  1476. arr, extract_array(val, extract_numpy=True)
  1477. )
  1478. # if we have any multi-indexes that have non-trivial slices
  1479. # (not null slices) then we must take the split path, xref
  1480. # GH 10360, GH 27841
  1481. if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
  1482. for i, ax in zip(indexer, self.obj.axes):
  1483. if isinstance(ax, MultiIndex) and not (
  1484. is_integer(i) or com.is_null_slice(i)
  1485. ):
  1486. take_split_path = True
  1487. break
  1488. if isinstance(indexer, tuple):
  1489. nindexer = []
  1490. for i, idx in enumerate(indexer):
  1491. if isinstance(idx, dict):
  1492. # reindex the axis to the new value
  1493. # and set inplace
  1494. key, _ = convert_missing_indexer(idx)
  1495. # if this is the items axes, then take the main missing
  1496. # path first
  1497. # this correctly sets the dtype and avoids cache issues
  1498. # essentially this separates out the block that is needed
  1499. # to possibly be modified
  1500. if self.ndim > 1 and i == info_axis:
  1501. # add the new item, and set the value
  1502. # must have all defined axes if we have a scalar
  1503. # or a list-like on the non-info axes if we have a
  1504. # list-like
  1505. if not len(self.obj):
  1506. if not is_list_like_indexer(value):
  1507. raise ValueError(
  1508. "cannot set a frame with no "
  1509. "defined index and a scalar"
  1510. )
  1511. self.obj[key] = value
  1512. return
  1513. # add a new item with the dtype setup
  1514. if com.is_null_slice(indexer[0]):
  1515. # We are setting an entire column
  1516. self.obj[key] = value
  1517. return
  1518. elif is_array_like(value):
  1519. # GH#42099
  1520. arr = extract_array(value, extract_numpy=True)
  1521. taker = -1 * np.ones(len(self.obj), dtype=np.intp)
  1522. empty_value = algos.take_nd(arr, taker)
  1523. if not isinstance(value, ABCSeries):
  1524. # if not Series (in which case we need to align),
  1525. # we can short-circuit
  1526. if (
  1527. isinstance(arr, np.ndarray)
  1528. and arr.ndim == 1
  1529. and len(arr) == 1
  1530. ):
  1531. # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
  1532. arr = arr[0, ...]
  1533. empty_value[indexer[0]] = arr
  1534. self.obj[key] = empty_value
  1535. return
  1536. self.obj[key] = empty_value
  1537. elif not is_list_like(value):
  1538. self.obj[key] = construct_1d_array_from_inferred_fill_value(
  1539. value, len(self.obj)
  1540. )
  1541. else:
  1542. # FIXME: GH#42099#issuecomment-864326014
  1543. self.obj[key] = infer_fill_value(value)
  1544. new_indexer = convert_from_missing_indexer_tuple(
  1545. indexer, self.obj.axes
  1546. )
  1547. self._setitem_with_indexer(new_indexer, value, name)
  1548. return
  1549. # reindex the axis
  1550. # make sure to clear the cache because we are
  1551. # just replacing the block manager here
  1552. # so the object is the same
  1553. index = self.obj._get_axis(i)
  1554. with warnings.catch_warnings():
  1555. # TODO: re-issue this with setitem-specific message?
  1556. warnings.filterwarnings(
  1557. "ignore",
  1558. "The behavior of Index.insert with object-dtype "
  1559. "is deprecated",
  1560. category=FutureWarning,
  1561. )
  1562. labels = index.insert(len(index), key)
  1563. # We are expanding the Series/DataFrame values to match
  1564. # the length of thenew index `labels`. GH#40096 ensure
  1565. # this is valid even if the index has duplicates.
  1566. taker = np.arange(len(index) + 1, dtype=np.intp)
  1567. taker[-1] = -1
  1568. reindexers = {i: (labels, taker)}
  1569. new_obj = self.obj._reindex_with_indexers(
  1570. reindexers, allow_dups=True
  1571. )
  1572. self.obj._mgr = new_obj._mgr
  1573. self.obj._maybe_update_cacher(clear=True)
  1574. self.obj._is_copy = None
  1575. nindexer.append(labels.get_loc(key))
  1576. else:
  1577. nindexer.append(idx)
  1578. indexer = tuple(nindexer)
  1579. else:
  1580. indexer, missing = convert_missing_indexer(indexer)
  1581. if missing:
  1582. self._setitem_with_indexer_missing(indexer, value)
  1583. return
  1584. if name == "loc":
  1585. # must come after setting of missing
  1586. indexer, value = self._maybe_mask_setitem_value(indexer, value)
  1587. # align and set the values
  1588. if take_split_path:
  1589. # We have to operate column-wise
  1590. self._setitem_with_indexer_split_path(indexer, value, name)
  1591. else:
  1592. self._setitem_single_block(indexer, value, name)
  1593. def _setitem_with_indexer_split_path(self, indexer, value, name: str):
  1594. """
  1595. Setitem column-wise.
  1596. """
  1597. # Above we only set take_split_path to True for 2D cases
  1598. assert self.ndim == 2
  1599. if not isinstance(indexer, tuple):
  1600. indexer = _tuplify(self.ndim, indexer)
  1601. if len(indexer) > self.ndim:
  1602. raise IndexError("too many indices for array")
  1603. if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
  1604. raise ValueError(r"Cannot set values with ndim > 2")
  1605. if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
  1606. from pandas import Series
  1607. value = self._align_series(indexer, Series(value))
  1608. # Ensure we have something we can iterate over
  1609. info_axis = indexer[1]
  1610. ilocs = self._ensure_iterable_column_indexer(info_axis)
  1611. pi = indexer[0]
  1612. lplane_indexer = length_of_indexer(pi, self.obj.index)
  1613. # lplane_indexer gives the expected length of obj[indexer[0]]
  1614. # we need an iterable, with a ndim of at least 1
  1615. # eg. don't pass through np.array(0)
  1616. if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
  1617. if isinstance(value, ABCDataFrame):
  1618. self._setitem_with_indexer_frame_value(indexer, value, name)
  1619. elif np.ndim(value) == 2:
  1620. # TODO: avoid np.ndim call in case it isn't an ndarray, since
  1621. # that will construct an ndarray, which will be wasteful
  1622. self._setitem_with_indexer_2d_value(indexer, value)
  1623. elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
  1624. # We are setting multiple rows in a single column.
  1625. self._setitem_single_column(ilocs[0], value, pi)
  1626. elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
  1627. # We are trying to set N values into M entries of a single
  1628. # column, which is invalid for N != M
  1629. # Exclude zero-len for e.g. boolean masking that is all-false
  1630. if len(value) == 1 and not is_integer(info_axis):
  1631. # This is a case like df.iloc[:3, [1]] = [0]
  1632. # where we treat as df.iloc[:3, 1] = 0
  1633. return self._setitem_with_indexer((pi, info_axis[0]), value[0])
  1634. raise ValueError(
  1635. "Must have equal len keys and value "
  1636. "when setting with an iterable"
  1637. )
  1638. elif lplane_indexer == 0 and len(value) == len(self.obj.index):
  1639. # We get here in one case via .loc with a all-False mask
  1640. pass
  1641. elif self._is_scalar_access(indexer) and is_object_dtype(
  1642. self.obj.dtypes._values[ilocs[0]]
  1643. ):
  1644. # We are setting nested data, only possible for object dtype data
  1645. self._setitem_single_column(indexer[1], value, pi)
  1646. elif len(ilocs) == len(value):
  1647. # We are setting multiple columns in a single row.
  1648. for loc, v in zip(ilocs, value):
  1649. self._setitem_single_column(loc, v, pi)
  1650. elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
  1651. # This is a setitem-with-expansion, see
  1652. # test_loc_setitem_empty_append_expands_rows_mixed_dtype
  1653. # e.g. df = DataFrame(columns=["x", "y"])
  1654. # df["x"] = df["x"].astype(np.int64)
  1655. # df.loc[:, "x"] = [1, 2, 3]
  1656. self._setitem_single_column(ilocs[0], value, pi)
  1657. else:
  1658. raise ValueError(
  1659. "Must have equal len keys and value "
  1660. "when setting with an iterable"
  1661. )
  1662. else:
  1663. # scalar value
  1664. for loc in ilocs:
  1665. self._setitem_single_column(loc, value, pi)
  1666. def _setitem_with_indexer_2d_value(self, indexer, value):
  1667. # We get here with np.ndim(value) == 2, excluding DataFrame,
  1668. # which goes through _setitem_with_indexer_frame_value
  1669. pi = indexer[0]
  1670. ilocs = self._ensure_iterable_column_indexer(indexer[1])
  1671. if not is_array_like(value):
  1672. # cast lists to array
  1673. value = np.array(value, dtype=object)
  1674. if len(ilocs) != value.shape[1]:
  1675. raise ValueError(
  1676. "Must have equal len keys and value when setting with an ndarray"
  1677. )
  1678. for i, loc in enumerate(ilocs):
  1679. value_col = value[:, i]
  1680. if is_object_dtype(value_col.dtype):
  1681. # casting to list so that we do type inference in setitem_single_column
  1682. value_col = value_col.tolist()
  1683. self._setitem_single_column(loc, value_col, pi)
  1684. def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
  1685. ilocs = self._ensure_iterable_column_indexer(indexer[1])
  1686. sub_indexer = list(indexer)
  1687. pi = indexer[0]
  1688. multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
  1689. unique_cols = value.columns.is_unique
  1690. # We do not want to align the value in case of iloc GH#37728
  1691. if name == "iloc":
  1692. for i, loc in enumerate(ilocs):
  1693. val = value.iloc[:, i]
  1694. self._setitem_single_column(loc, val, pi)
  1695. elif not unique_cols and value.columns.equals(self.obj.columns):
  1696. # We assume we are already aligned, see
  1697. # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
  1698. for loc in ilocs:
  1699. item = self.obj.columns[loc]
  1700. if item in value:
  1701. sub_indexer[1] = item
  1702. val = self._align_series(
  1703. tuple(sub_indexer),
  1704. value.iloc[:, loc],
  1705. multiindex_indexer,
  1706. )
  1707. else:
  1708. val = np.nan
  1709. self._setitem_single_column(loc, val, pi)
  1710. elif not unique_cols:
  1711. raise ValueError("Setting with non-unique columns is not allowed.")
  1712. else:
  1713. for loc in ilocs:
  1714. item = self.obj.columns[loc]
  1715. if item in value:
  1716. sub_indexer[1] = item
  1717. val = self._align_series(
  1718. tuple(sub_indexer),
  1719. value[item],
  1720. multiindex_indexer,
  1721. using_cow=using_copy_on_write(),
  1722. )
  1723. else:
  1724. val = np.nan
  1725. self._setitem_single_column(loc, val, pi)
  1726. def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
  1727. """
  1728. Parameters
  1729. ----------
  1730. loc : int
  1731. Indexer for column position
  1732. plane_indexer : int, slice, listlike[int]
  1733. The indexer we use for setitem along axis=0.
  1734. """
  1735. pi = plane_indexer
  1736. is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
  1737. is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
  1738. if is_null_setter:
  1739. # no-op, don't cast dtype later
  1740. return
  1741. elif is_full_setter:
  1742. try:
  1743. self.obj._mgr.column_setitem(
  1744. loc, plane_indexer, value, inplace_only=True
  1745. )
  1746. except (ValueError, TypeError, LossySetitemError):
  1747. # If we're setting an entire column and we can't do it inplace,
  1748. # then we can use value's dtype (or inferred dtype)
  1749. # instead of object
  1750. dtype = self.obj.dtypes.iloc[loc]
  1751. if dtype not in (np.void, object) and not self.obj.empty:
  1752. # - Exclude np.void, as that is a special case for expansion.
  1753. # We want to warn for
  1754. # df = pd.DataFrame({'a': [1, 2]})
  1755. # df.loc[:, 'a'] = .3
  1756. # but not for
  1757. # df = pd.DataFrame({'a': [1, 2]})
  1758. # df.loc[:, 'b'] = .3
  1759. # - Exclude `object`, as then no upcasting happens.
  1760. # - Exclude empty initial object with enlargement,
  1761. # as then there's nothing to be inconsistent with.
  1762. warnings.warn(
  1763. f"Setting an item of incompatible dtype is deprecated "
  1764. "and will raise in a future error of pandas. "
  1765. f"Value '{value}' has dtype incompatible with {dtype}, "
  1766. "please explicitly cast to a compatible dtype first.",
  1767. FutureWarning,
  1768. stacklevel=find_stack_level(),
  1769. )
  1770. self.obj.isetitem(loc, value)
  1771. else:
  1772. # set value into the column (first attempting to operate inplace, then
  1773. # falling back to casting if necessary)
  1774. dtype = self.obj.dtypes.iloc[loc]
  1775. if dtype == np.void:
  1776. # This means we're expanding, with multiple columns, e.g.
  1777. # df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
  1778. # df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')
  1779. # Columns F and G will initially be set to np.void.
  1780. # Here, we replace those temporary `np.void` columns with
  1781. # columns of the appropriate dtype, based on `value`.
  1782. self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
  1783. value, len(self.obj)
  1784. )
  1785. self.obj._mgr.column_setitem(loc, plane_indexer, value)
  1786. self.obj._clear_item_cache()
  1787. def _setitem_single_block(self, indexer, value, name: str) -> None:
  1788. """
  1789. _setitem_with_indexer for the case when we have a single Block.
  1790. """
  1791. from pandas import Series
  1792. if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
  1793. # TODO(EA): ExtensionBlock.setitem this causes issues with
  1794. # setting for extensionarrays that store dicts. Need to decide
  1795. # if it's worth supporting that.
  1796. value = self._align_series(indexer, Series(value))
  1797. info_axis = self.obj._info_axis_number
  1798. item_labels = self.obj._get_axis(info_axis)
  1799. if isinstance(indexer, tuple):
  1800. # if we are setting on the info axis ONLY
  1801. # set using those methods to avoid block-splitting
  1802. # logic here
  1803. if (
  1804. self.ndim == len(indexer) == 2
  1805. and is_integer(indexer[1])
  1806. and com.is_null_slice(indexer[0])
  1807. ):
  1808. col = item_labels[indexer[info_axis]]
  1809. if len(item_labels.get_indexer_for([col])) == 1:
  1810. # e.g. test_loc_setitem_empty_append_expands_rows
  1811. loc = item_labels.get_loc(col)
  1812. self._setitem_single_column(loc, value, indexer[0])
  1813. return
  1814. indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
  1815. if isinstance(value, ABCDataFrame) and name != "iloc":
  1816. value = self._align_frame(indexer, value)._values
  1817. # check for chained assignment
  1818. self.obj._check_is_chained_assignment_possible()
  1819. # actually do the set
  1820. self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
  1821. self.obj._maybe_update_cacher(clear=True, inplace=True)
  1822. def _setitem_with_indexer_missing(self, indexer, value):
  1823. """
  1824. Insert new row(s) or column(s) into the Series or DataFrame.
  1825. """
  1826. from pandas import Series
  1827. # reindex the axis to the new value
  1828. # and set inplace
  1829. if self.ndim == 1:
  1830. index = self.obj.index
  1831. with warnings.catch_warnings():
  1832. # TODO: re-issue this with setitem-specific message?
  1833. warnings.filterwarnings(
  1834. "ignore",
  1835. "The behavior of Index.insert with object-dtype is deprecated",
  1836. category=FutureWarning,
  1837. )
  1838. new_index = index.insert(len(index), indexer)
  1839. # we have a coerced indexer, e.g. a float
  1840. # that matches in an int64 Index, so
  1841. # we will not create a duplicate index, rather
  1842. # index to that element
  1843. # e.g. 0.0 -> 0
  1844. # GH#12246
  1845. if index.is_unique:
  1846. # pass new_index[-1:] instead if [new_index[-1]]
  1847. # so that we retain dtype
  1848. new_indexer = index.get_indexer(new_index[-1:])
  1849. if (new_indexer != -1).any():
  1850. # We get only here with loc, so can hard code
  1851. return self._setitem_with_indexer(new_indexer, value, "loc")
  1852. # this preserves dtype of the value and of the object
  1853. if not is_scalar(value):
  1854. new_dtype = None
  1855. elif is_valid_na_for_dtype(value, self.obj.dtype):
  1856. if not is_object_dtype(self.obj.dtype):
  1857. # Every NA value is suitable for object, no conversion needed
  1858. value = na_value_for_dtype(self.obj.dtype, compat=False)
  1859. new_dtype = maybe_promote(self.obj.dtype, value)[0]
  1860. elif isna(value):
  1861. new_dtype = None
  1862. elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
  1863. # We should not cast, if we have object dtype because we can
  1864. # set timedeltas into object series
  1865. curr_dtype = self.obj.dtype
  1866. curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
  1867. new_dtype = maybe_promote(curr_dtype, value)[0]
  1868. else:
  1869. new_dtype = None
  1870. new_values = Series([value], dtype=new_dtype)._values
  1871. if len(self.obj._values):
  1872. # GH#22717 handle casting compatibility that np.concatenate
  1873. # does incorrectly
  1874. new_values = concat_compat([self.obj._values, new_values])
  1875. self.obj._mgr = self.obj._constructor(
  1876. new_values, index=new_index, name=self.obj.name
  1877. )._mgr
  1878. self.obj._maybe_update_cacher(clear=True)
  1879. elif self.ndim == 2:
  1880. if not len(self.obj.columns):
  1881. # no columns and scalar
  1882. raise ValueError("cannot set a frame with no defined columns")
  1883. has_dtype = hasattr(value, "dtype")
  1884. if isinstance(value, ABCSeries):
  1885. # append a Series
  1886. value = value.reindex(index=self.obj.columns, copy=True)
  1887. value.name = indexer
  1888. elif isinstance(value, dict):
  1889. value = Series(
  1890. value, index=self.obj.columns, name=indexer, dtype=object
  1891. )
  1892. else:
  1893. # a list-list
  1894. if is_list_like_indexer(value):
  1895. # must have conforming columns
  1896. if len(value) != len(self.obj.columns):
  1897. raise ValueError("cannot set a row with mismatched columns")
  1898. value = Series(value, index=self.obj.columns, name=indexer)
  1899. if not len(self.obj):
  1900. # We will ignore the existing dtypes instead of using
  1901. # internals.concat logic
  1902. df = value.to_frame().T
  1903. idx = self.obj.index
  1904. if isinstance(idx, MultiIndex):
  1905. name = idx.names
  1906. else:
  1907. name = idx.name
  1908. df.index = Index([indexer], name=name)
  1909. if not has_dtype:
  1910. # i.e. if we already had a Series or ndarray, keep that
  1911. # dtype. But if we had a list or dict, then do inference
  1912. df = df.infer_objects(copy=False)
  1913. self.obj._mgr = df._mgr
  1914. else:
  1915. self.obj._mgr = self.obj._append(value)._mgr
  1916. self.obj._maybe_update_cacher(clear=True)
  1917. def _ensure_iterable_column_indexer(self, column_indexer):
  1918. """
  1919. Ensure that our column indexer is something that can be iterated over.
  1920. """
  1921. ilocs: Sequence[int | np.integer] | np.ndarray
  1922. if is_integer(column_indexer):
  1923. ilocs = [column_indexer]
  1924. elif isinstance(column_indexer, slice):
  1925. ilocs = np.arange(len(self.obj.columns))[column_indexer]
  1926. elif (
  1927. isinstance(column_indexer, np.ndarray) and column_indexer.dtype.kind == "b"
  1928. ):
  1929. ilocs = np.arange(len(column_indexer))[column_indexer]
  1930. else:
  1931. ilocs = column_indexer
  1932. return ilocs
  1933. def _align_series(
  1934. self,
  1935. indexer,
  1936. ser: Series,
  1937. multiindex_indexer: bool = False,
  1938. using_cow: bool = False,
  1939. ):
  1940. """
  1941. Parameters
  1942. ----------
  1943. indexer : tuple, slice, scalar
  1944. Indexer used to get the locations that will be set to `ser`.
  1945. ser : pd.Series
  1946. Values to assign to the locations specified by `indexer`.
  1947. multiindex_indexer : bool, optional
  1948. Defaults to False. Should be set to True if `indexer` was from
  1949. a `pd.MultiIndex`, to avoid unnecessary broadcasting.
  1950. Returns
  1951. -------
  1952. `np.array` of `ser` broadcast to the appropriate shape for assignment
  1953. to the locations selected by `indexer`
  1954. """
  1955. if isinstance(indexer, (slice, np.ndarray, list, Index)):
  1956. indexer = (indexer,)
  1957. if isinstance(indexer, tuple):
  1958. # flatten np.ndarray indexers
  1959. def ravel(i):
  1960. return i.ravel() if isinstance(i, np.ndarray) else i
  1961. indexer = tuple(map(ravel, indexer))
  1962. aligners = [not com.is_null_slice(idx) for idx in indexer]
  1963. sum_aligners = sum(aligners)
  1964. single_aligner = sum_aligners == 1
  1965. is_frame = self.ndim == 2
  1966. obj = self.obj
  1967. # are we a single alignable value on a non-primary
  1968. # dim (e.g. panel: 1,2, or frame: 0) ?
  1969. # hence need to align to a single axis dimension
  1970. # rather that find all valid dims
  1971. # frame
  1972. if is_frame:
  1973. single_aligner = single_aligner and aligners[0]
  1974. # we have a frame, with multiple indexers on both axes; and a
  1975. # series, so need to broadcast (see GH5206)
  1976. if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
  1977. ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
  1978. # single indexer
  1979. if len(indexer) > 1 and not multiindex_indexer:
  1980. len_indexer = len(indexer[1])
  1981. ser_values = (
  1982. np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
  1983. )
  1984. return ser_values
  1985. for i, idx in enumerate(indexer):
  1986. ax = obj.axes[i]
  1987. # multiple aligners (or null slices)
  1988. if is_sequence(idx) or isinstance(idx, slice):
  1989. if single_aligner and com.is_null_slice(idx):
  1990. continue
  1991. new_ix = ax[idx]
  1992. if not is_list_like_indexer(new_ix):
  1993. new_ix = Index([new_ix])
  1994. else:
  1995. new_ix = Index(new_ix)
  1996. if ser.index.equals(new_ix):
  1997. if using_cow:
  1998. return ser
  1999. return ser._values.copy()
  2000. return ser.reindex(new_ix)._values
  2001. # 2 dims
  2002. elif single_aligner:
  2003. # reindex along index
  2004. ax = self.obj.axes[1]
  2005. if ser.index.equals(ax) or not len(ax):
  2006. return ser._values.copy()
  2007. return ser.reindex(ax)._values
  2008. elif is_integer(indexer) and self.ndim == 1:
  2009. if is_object_dtype(self.obj.dtype):
  2010. return ser
  2011. ax = self.obj._get_axis(0)
  2012. if ser.index.equals(ax):
  2013. return ser._values.copy()
  2014. return ser.reindex(ax)._values[indexer]
  2015. elif is_integer(indexer):
  2016. ax = self.obj._get_axis(1)
  2017. if ser.index.equals(ax):
  2018. return ser._values.copy()
  2019. return ser.reindex(ax)._values
  2020. raise ValueError("Incompatible indexer with Series")
  2021. def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
  2022. is_frame = self.ndim == 2
  2023. if isinstance(indexer, tuple):
  2024. idx, cols = None, None
  2025. sindexers = []
  2026. for i, ix in enumerate(indexer):
  2027. ax = self.obj.axes[i]
  2028. if is_sequence(ix) or isinstance(ix, slice):
  2029. if isinstance(ix, np.ndarray):
  2030. ix = ix.ravel()
  2031. if idx is None:
  2032. idx = ax[ix]
  2033. elif cols is None:
  2034. cols = ax[ix]
  2035. else:
  2036. break
  2037. else:
  2038. sindexers.append(i)
  2039. if idx is not None and cols is not None:
  2040. if df.index.equals(idx) and df.columns.equals(cols):
  2041. val = df.copy()
  2042. else:
  2043. val = df.reindex(idx, columns=cols)
  2044. return val
  2045. elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
  2046. ax = self.obj.index[indexer]
  2047. if df.index.equals(ax):
  2048. val = df.copy()
  2049. else:
  2050. # we have a multi-index and are trying to align
  2051. # with a particular, level GH3738
  2052. if (
  2053. isinstance(ax, MultiIndex)
  2054. and isinstance(df.index, MultiIndex)
  2055. and ax.nlevels != df.index.nlevels
  2056. ):
  2057. raise TypeError(
  2058. "cannot align on a multi-index with out "
  2059. "specifying the join levels"
  2060. )
  2061. val = df.reindex(index=ax)
  2062. return val
  2063. raise ValueError("Incompatible indexer with DataFrame")
  2064. class _ScalarAccessIndexer(NDFrameIndexerBase):
  2065. """
  2066. Access scalars quickly.
  2067. """
  2068. # sub-classes need to set _takeable
  2069. _takeable: bool
  2070. def _convert_key(self, key):
  2071. raise AbstractMethodError(self)
  2072. def __getitem__(self, key):
  2073. if not isinstance(key, tuple):
  2074. # we could have a convertible item here (e.g. Timestamp)
  2075. if not is_list_like_indexer(key):
  2076. key = (key,)
  2077. else:
  2078. raise ValueError("Invalid call for scalar access (getting)!")
  2079. key = self._convert_key(key)
  2080. return self.obj._get_value(*key, takeable=self._takeable)
  2081. def __setitem__(self, key, value) -> None:
  2082. if isinstance(key, tuple):
  2083. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  2084. else:
  2085. # scalar callable may return tuple
  2086. key = com.apply_if_callable(key, self.obj)
  2087. if not isinstance(key, tuple):
  2088. key = _tuplify(self.ndim, key)
  2089. key = list(self._convert_key(key))
  2090. if len(key) != self.ndim:
  2091. raise ValueError("Not enough indexers for scalar access (setting)!")
  2092. self.obj._set_value(*key, value=value, takeable=self._takeable)
  2093. @doc(IndexingMixin.at)
  2094. class _AtIndexer(_ScalarAccessIndexer):
  2095. _takeable = False
  2096. def _convert_key(self, key):
  2097. """
  2098. Require they keys to be the same type as the index. (so we don't
  2099. fallback)
  2100. """
  2101. # GH 26989
  2102. # For series, unpacking key needs to result in the label.
  2103. # This is already the case for len(key) == 1; e.g. (1,)
  2104. if self.ndim == 1 and len(key) > 1:
  2105. key = (key,)
  2106. return key
  2107. @property
  2108. def _axes_are_unique(self) -> bool:
  2109. # Only relevant for self.ndim == 2
  2110. assert self.ndim == 2
  2111. return self.obj.index.is_unique and self.obj.columns.is_unique
  2112. def __getitem__(self, key):
  2113. if self.ndim == 2 and not self._axes_are_unique:
  2114. # GH#33041 fall back to .loc
  2115. if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
  2116. raise ValueError("Invalid call for scalar access (getting)!")
  2117. return self.obj.loc[key]
  2118. return super().__getitem__(key)
  2119. def __setitem__(self, key, value) -> None:
  2120. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  2121. if sys.getrefcount(self.obj) <= 2:
  2122. warnings.warn(
  2123. _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
  2124. )
  2125. elif not PYPY and not WARNING_CHECK_DISABLED and not using_copy_on_write():
  2126. ctr = sys.getrefcount(self.obj)
  2127. ref_count = 2
  2128. if not warn_copy_on_write() and _check_cacher(self.obj):
  2129. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  2130. ref_count += 1
  2131. if ctr <= ref_count:
  2132. warnings.warn(
  2133. _chained_assignment_warning_msg, FutureWarning, stacklevel=2
  2134. )
  2135. if self.ndim == 2 and not self._axes_are_unique:
  2136. # GH#33041 fall back to .loc
  2137. if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
  2138. raise ValueError("Invalid call for scalar access (setting)!")
  2139. self.obj.loc[key] = value
  2140. return
  2141. return super().__setitem__(key, value)
  2142. @doc(IndexingMixin.iat)
  2143. class _iAtIndexer(_ScalarAccessIndexer):
  2144. _takeable = True
  2145. def _convert_key(self, key):
  2146. """
  2147. Require integer args. (and convert to label arguments)
  2148. """
  2149. for i in key:
  2150. if not is_integer(i):
  2151. raise ValueError("iAt based indexing can only have integer indexers")
  2152. return key
  2153. def __setitem__(self, key, value) -> None:
  2154. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  2155. if sys.getrefcount(self.obj) <= 2:
  2156. warnings.warn(
  2157. _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
  2158. )
  2159. elif not PYPY and not WARNING_CHECK_DISABLED and not using_copy_on_write():
  2160. ctr = sys.getrefcount(self.obj)
  2161. ref_count = 2
  2162. if not warn_copy_on_write() and _check_cacher(self.obj):
  2163. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  2164. ref_count += 1
  2165. if ctr <= ref_count:
  2166. warnings.warn(
  2167. _chained_assignment_warning_msg, FutureWarning, stacklevel=2
  2168. )
  2169. return super().__setitem__(key, value)
  2170. def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
  2171. """
  2172. Given an indexer for the first dimension, create an equivalent tuple
  2173. for indexing over all dimensions.
  2174. Parameters
  2175. ----------
  2176. ndim : int
  2177. loc : object
  2178. Returns
  2179. -------
  2180. tuple
  2181. """
  2182. _tup: list[Hashable | slice]
  2183. _tup = [slice(None, None) for _ in range(ndim)]
  2184. _tup[0] = loc
  2185. return tuple(_tup)
  2186. def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
  2187. """
  2188. If we have an axis, adapt the given key to be axis-independent.
  2189. """
  2190. new_key = [slice(None)] * ndim
  2191. new_key[axis] = key
  2192. return tuple(new_key)
  2193. def check_bool_indexer(index: Index, key) -> np.ndarray:
  2194. """
  2195. Check if key is a valid boolean indexer for an object with such index and
  2196. perform reindexing or conversion if needed.
  2197. This function assumes that is_bool_indexer(key) == True.
  2198. Parameters
  2199. ----------
  2200. index : Index
  2201. Index of the object on which the indexing is done.
  2202. key : list-like
  2203. Boolean indexer to check.
  2204. Returns
  2205. -------
  2206. np.array
  2207. Resulting key.
  2208. Raises
  2209. ------
  2210. IndexError
  2211. If the key does not have the same length as index.
  2212. IndexingError
  2213. If the index of the key is unalignable to index.
  2214. """
  2215. result = key
  2216. if isinstance(key, ABCSeries) and not key.index.equals(index):
  2217. indexer = result.index.get_indexer_for(index)
  2218. if -1 in indexer:
  2219. raise IndexingError(
  2220. "Unalignable boolean Series provided as "
  2221. "indexer (index of the boolean Series and of "
  2222. "the indexed object do not match)."
  2223. )
  2224. result = result.take(indexer)
  2225. # fall through for boolean
  2226. if not isinstance(result.dtype, ExtensionDtype):
  2227. return result.astype(bool)._values
  2228. if is_object_dtype(key):
  2229. # key might be object-dtype bool, check_array_indexer needs bool array
  2230. result = np.asarray(result, dtype=bool)
  2231. elif not is_array_like(result):
  2232. # GH 33924
  2233. # key may contain nan elements, check_array_indexer needs bool array
  2234. result = pd_array(result, dtype=bool)
  2235. return check_array_indexer(index, result)
  2236. def convert_missing_indexer(indexer):
  2237. """
  2238. Reverse convert a missing indexer, which is a dict
  2239. return the scalar indexer and a boolean indicating if we converted
  2240. """
  2241. if isinstance(indexer, dict):
  2242. # a missing key (but not a tuple indexer)
  2243. indexer = indexer["key"]
  2244. if isinstance(indexer, bool):
  2245. raise KeyError("cannot use a single bool to index into setitem")
  2246. return indexer, True
  2247. return indexer, False
  2248. def convert_from_missing_indexer_tuple(indexer, axes):
  2249. """
  2250. Create a filtered indexer that doesn't have any missing indexers.
  2251. """
  2252. def get_indexer(_i, _idx):
  2253. return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
  2254. return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
  2255. def maybe_convert_ix(*args):
  2256. """
  2257. We likely want to take the cross-product.
  2258. """
  2259. for arg in args:
  2260. if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
  2261. return args
  2262. return np.ix_(*args)
  2263. def is_nested_tuple(tup, labels) -> bool:
  2264. """
  2265. Returns
  2266. -------
  2267. bool
  2268. """
  2269. # check for a compatible nested tuple and multiindexes among the axes
  2270. if not isinstance(tup, tuple):
  2271. return False
  2272. for k in tup:
  2273. if is_list_like(k) or isinstance(k, slice):
  2274. return isinstance(labels, MultiIndex)
  2275. return False
  2276. def is_label_like(key) -> bool:
  2277. """
  2278. Returns
  2279. -------
  2280. bool
  2281. """
  2282. # select a label or row
  2283. return (
  2284. not isinstance(key, slice)
  2285. and not is_list_like_indexer(key)
  2286. and key is not Ellipsis
  2287. )
  2288. def need_slice(obj: slice) -> bool:
  2289. """
  2290. Returns
  2291. -------
  2292. bool
  2293. """
  2294. return (
  2295. obj.start is not None
  2296. or obj.stop is not None
  2297. or (obj.step is not None and obj.step != 1)
  2298. )
  2299. def check_dict_or_set_indexers(key) -> None:
  2300. """
  2301. Check if the indexer is or contains a dict or set, which is no longer allowed.
  2302. """
  2303. if (
  2304. isinstance(key, set)
  2305. or isinstance(key, tuple)
  2306. and any(isinstance(x, set) for x in key)
  2307. ):
  2308. raise TypeError(
  2309. "Passing a set as an indexer is not supported. Use a list instead."
  2310. )
  2311. if (
  2312. isinstance(key, dict)
  2313. or isinstance(key, tuple)
  2314. and any(isinstance(x, dict) for x in key)
  2315. ):
  2316. raise TypeError(
  2317. "Passing a dict as an indexer is not supported. Use a list instead."
  2318. )