test_loc.py 118 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411
  1. """ test label based indexing with loc """
  2. from collections import namedtuple
  3. import contextlib
  4. from datetime import (
  5. date,
  6. datetime,
  7. time,
  8. timedelta,
  9. )
  10. import re
  11. from dateutil.tz import gettz
  12. import numpy as np
  13. import pytest
  14. from pandas._config import using_string_dtype
  15. from pandas._libs import index as libindex
  16. from pandas.compat.numpy import np_version_gt2
  17. from pandas.errors import IndexingError
  18. import pandas.util._test_decorators as td
  19. import pandas as pd
  20. from pandas import (
  21. Categorical,
  22. CategoricalDtype,
  23. CategoricalIndex,
  24. DataFrame,
  25. DatetimeIndex,
  26. Index,
  27. IndexSlice,
  28. MultiIndex,
  29. Period,
  30. PeriodIndex,
  31. Series,
  32. SparseDtype,
  33. Timedelta,
  34. Timestamp,
  35. date_range,
  36. timedelta_range,
  37. to_datetime,
  38. to_timedelta,
  39. )
  40. import pandas._testing as tm
  41. from pandas.api.types import is_scalar
  42. from pandas.core.indexing import _one_ellipsis_message
  43. from pandas.tests.indexing.common import check_indexing_smoketest_or_raises
  44. @pytest.mark.parametrize(
  45. "series, new_series, expected_ser",
  46. [
  47. [[np.nan, np.nan, "b"], ["a", np.nan, np.nan], [False, True, True]],
  48. [[np.nan, "b"], ["a", np.nan], [False, True]],
  49. ],
  50. )
  51. def test_not_change_nan_loc(series, new_series, expected_ser):
  52. # GH 28403
  53. df = DataFrame({"A": series})
  54. df.loc[:, "A"] = new_series
  55. expected = DataFrame({"A": expected_ser})
  56. tm.assert_frame_equal(df.isna(), expected)
  57. tm.assert_frame_equal(df.notna(), ~expected)
  58. class TestLoc:
  59. def test_none_values_on_string_columns(self, using_infer_string):
  60. # Issue #32218
  61. df = DataFrame(["1", "2", None], columns=["a"], dtype=object)
  62. assert df.loc[2, "a"] is None
  63. df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
  64. if using_infer_string:
  65. assert np.isnan(df.loc[2, "a"])
  66. else:
  67. assert df.loc[2, "a"] is None
  68. @pytest.mark.parametrize("kind", ["series", "frame"])
  69. def test_loc_getitem_int(self, kind, request):
  70. # int label
  71. obj = request.getfixturevalue(f"{kind}_labels")
  72. check_indexing_smoketest_or_raises(obj, "loc", 2, fails=KeyError)
  73. @pytest.mark.parametrize("kind", ["series", "frame"])
  74. def test_loc_getitem_label(self, kind, request):
  75. # label
  76. obj = request.getfixturevalue(f"{kind}_empty")
  77. check_indexing_smoketest_or_raises(obj, "loc", "c", fails=KeyError)
  78. @pytest.mark.parametrize(
  79. "key, typs, axes",
  80. [
  81. ["f", ["ints", "uints", "labels", "mixed", "ts"], None],
  82. ["f", ["floats"], None],
  83. [20, ["ints", "uints", "mixed"], None],
  84. [20, ["labels"], None],
  85. [20, ["ts"], 0],
  86. [20, ["floats"], 0],
  87. ],
  88. )
  89. @pytest.mark.parametrize("kind", ["series", "frame"])
  90. def test_loc_getitem_label_out_of_range(self, key, typs, axes, kind, request):
  91. for typ in typs:
  92. obj = request.getfixturevalue(f"{kind}_{typ}")
  93. # out of range label
  94. check_indexing_smoketest_or_raises(
  95. obj, "loc", key, axes=axes, fails=KeyError
  96. )
  97. @pytest.mark.parametrize(
  98. "key, typs",
  99. [
  100. [[0, 1, 2], ["ints", "uints", "floats"]],
  101. [[1, 3.0, "A"], ["ints", "uints", "floats"]],
  102. ],
  103. )
  104. @pytest.mark.parametrize("kind", ["series", "frame"])
  105. def test_loc_getitem_label_list(self, key, typs, kind, request):
  106. for typ in typs:
  107. obj = request.getfixturevalue(f"{kind}_{typ}")
  108. # list of labels
  109. check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError)
  110. @pytest.mark.parametrize(
  111. "key, typs, axes",
  112. [
  113. [[0, 1, 2], ["empty"], None],
  114. [[0, 2, 10], ["ints", "uints", "floats"], 0],
  115. [[3, 6, 7], ["ints", "uints", "floats"], 1],
  116. # GH 17758 - MultiIndex and missing keys
  117. [[(1, 3), (1, 4), (2, 5)], ["multi"], 0],
  118. ],
  119. )
  120. @pytest.mark.parametrize("kind", ["series", "frame"])
  121. def test_loc_getitem_label_list_with_missing(self, key, typs, axes, kind, request):
  122. for typ in typs:
  123. obj = request.getfixturevalue(f"{kind}_{typ}")
  124. check_indexing_smoketest_or_raises(
  125. obj, "loc", key, axes=axes, fails=KeyError
  126. )
  127. @pytest.mark.parametrize("typs", ["ints", "uints"])
  128. @pytest.mark.parametrize("kind", ["series", "frame"])
  129. def test_loc_getitem_label_list_fails(self, typs, kind, request):
  130. # fails
  131. obj = request.getfixturevalue(f"{kind}_{typs}")
  132. check_indexing_smoketest_or_raises(
  133. obj, "loc", [20, 30, 40], axes=1, fails=KeyError
  134. )
  135. def test_loc_getitem_label_array_like(self):
  136. # TODO: test something?
  137. # array like
  138. pass
  139. @pytest.mark.parametrize("kind", ["series", "frame"])
  140. def test_loc_getitem_bool(self, kind, request):
  141. obj = request.getfixturevalue(f"{kind}_empty")
  142. # boolean indexers
  143. b = [True, False, True, False]
  144. check_indexing_smoketest_or_raises(obj, "loc", b, fails=IndexError)
  145. @pytest.mark.parametrize(
  146. "slc, typs, axes, fails",
  147. [
  148. [
  149. slice(1, 3),
  150. ["labels", "mixed", "empty", "ts", "floats"],
  151. None,
  152. TypeError,
  153. ],
  154. [slice("20130102", "20130104"), ["ts"], 1, TypeError],
  155. [slice(2, 8), ["mixed"], 0, TypeError],
  156. [slice(2, 8), ["mixed"], 1, KeyError],
  157. [slice(2, 4, 2), ["mixed"], 0, TypeError],
  158. ],
  159. )
  160. @pytest.mark.parametrize("kind", ["series", "frame"])
  161. def test_loc_getitem_label_slice(self, slc, typs, axes, fails, kind, request):
  162. # label slices (with ints)
  163. # real label slices
  164. # GH 14316
  165. for typ in typs:
  166. obj = request.getfixturevalue(f"{kind}_{typ}")
  167. check_indexing_smoketest_or_raises(
  168. obj,
  169. "loc",
  170. slc,
  171. axes=axes,
  172. fails=fails,
  173. )
  174. def test_setitem_from_duplicate_axis(self):
  175. # GH#34034
  176. df = DataFrame(
  177. [[20, "a"], [200, "a"], [200, "a"]],
  178. columns=["col1", "col2"],
  179. index=[10, 1, 1],
  180. )
  181. df.loc[1, "col1"] = np.arange(2)
  182. expected = DataFrame(
  183. [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1]
  184. )
  185. tm.assert_frame_equal(df, expected)
  186. def test_column_types_consistent(self):
  187. # GH 26779
  188. df = DataFrame(
  189. data={
  190. "channel": [1, 2, 3],
  191. "A": ["String 1", np.nan, "String 2"],
  192. "B": [
  193. Timestamp("2019-06-11 11:00:00"),
  194. pd.NaT,
  195. Timestamp("2019-06-11 12:00:00"),
  196. ],
  197. }
  198. )
  199. df2 = DataFrame(
  200. data={"A": ["String 3"], "B": [Timestamp("2019-06-11 12:00:00")]}
  201. )
  202. # Change Columns A and B to df2.values wherever Column A is NaN
  203. df.loc[df["A"].isna(), ["A", "B"]] = df2.values
  204. expected = DataFrame(
  205. data={
  206. "channel": [1, 2, 3],
  207. "A": ["String 1", "String 3", "String 2"],
  208. "B": [
  209. Timestamp("2019-06-11 11:00:00"),
  210. Timestamp("2019-06-11 12:00:00"),
  211. Timestamp("2019-06-11 12:00:00"),
  212. ],
  213. }
  214. )
  215. tm.assert_frame_equal(df, expected)
  216. @pytest.mark.parametrize(
  217. "obj, key, exp",
  218. [
  219. (
  220. DataFrame([[1]], columns=Index([False])),
  221. IndexSlice[:, False],
  222. Series([1], name=False),
  223. ),
  224. (Series([1], index=Index([False])), False, [1]),
  225. (DataFrame([[1]], index=Index([False])), False, Series([1], name=False)),
  226. ],
  227. )
  228. def test_loc_getitem_single_boolean_arg(self, obj, key, exp):
  229. # GH 44322
  230. res = obj.loc[key]
  231. if isinstance(exp, (DataFrame, Series)):
  232. tm.assert_equal(res, exp)
  233. else:
  234. assert res == exp
  235. class TestLocBaseIndependent:
  236. # Tests for loc that do not depend on subclassing Base
  237. def test_loc_npstr(self):
  238. # GH#45580
  239. df = DataFrame(index=date_range("2021", "2022"))
  240. result = df.loc[np.array(["2021/6/1"])[0] :]
  241. expected = df.iloc[151:]
  242. tm.assert_frame_equal(result, expected)
  243. @pytest.mark.parametrize(
  244. "msg, key",
  245. [
  246. (r"Period\('2019', 'Y-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")),
  247. (r"Period\('2019', 'Y-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")),
  248. (r"Period\('2019', 'Y-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")),
  249. (
  250. r"Period\('2018', 'Y-DEC'\), Period\('2016', 'Y-DEC'\), 'bar'",
  251. (Period(2018), Period(2016), "bar"),
  252. ),
  253. (r"Period\('2018', 'Y-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")),
  254. (
  255. r"Period\('2017', 'Y-DEC'\), 'foo', Period\('2015', 'Y-DEC'\)",
  256. (Period(2017), "foo", Period(2015)),
  257. ),
  258. (r"Period\('2017', 'Y-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")),
  259. ],
  260. )
  261. def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
  262. # GH#20684
  263. """
  264. parse_datetime_string_with_reso return parameter if type not matched.
  265. PeriodIndex.get_loc takes returned value from parse_datetime_string_with_reso
  266. as a tuple.
  267. If first argument is Period and a tuple has 3 items,
  268. process go on not raise exception
  269. """
  270. df = DataFrame(
  271. {
  272. "A": [Period(2019), "x1", "x2"],
  273. "B": [Period(2018), Period(2016), "y1"],
  274. "C": [Period(2017), "z1", Period(2015)],
  275. "V1": [1, 2, 3],
  276. "V2": [10, 20, 30],
  277. }
  278. ).set_index(["A", "B", "C"])
  279. with pytest.raises(KeyError, match=msg):
  280. df.loc[key]
  281. def test_loc_getitem_missing_unicode_key(self):
  282. df = DataFrame({"a": [1]})
  283. with pytest.raises(KeyError, match="\u05d0"):
  284. df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
  285. def test_loc_getitem_dups(self):
  286. # GH 5678
  287. # repeated getitems on a dup index returning a ndarray
  288. df = DataFrame(
  289. np.random.default_rng(2).random((20, 5)),
  290. index=["ABCDE"[x % 5] for x in range(20)],
  291. )
  292. expected = df.loc["A", 0]
  293. result = df.loc[:, 0].loc["A"]
  294. tm.assert_series_equal(result, expected)
  295. def test_loc_getitem_dups2(self):
  296. # GH4726
  297. # dup indexing with iloc/loc
  298. df = DataFrame(
  299. [[1, 2, "foo", "bar", Timestamp("20130101")]],
  300. columns=["a", "a", "a", "a", "a"],
  301. index=[1],
  302. )
  303. expected = Series(
  304. [1, 2, "foo", "bar", Timestamp("20130101")],
  305. index=["a", "a", "a", "a", "a"],
  306. name=1,
  307. )
  308. result = df.iloc[0]
  309. tm.assert_series_equal(result, expected)
  310. result = df.loc[1]
  311. tm.assert_series_equal(result, expected)
  312. def test_loc_setitem_dups(self):
  313. # GH 6541
  314. df_orig = DataFrame(
  315. {
  316. "me": list("rttti"),
  317. "foo": list("aaade"),
  318. "bar": np.arange(5, dtype="float64") * 1.34 + 2,
  319. "bar2": np.arange(5, dtype="float64") * -0.34 + 2,
  320. }
  321. ).set_index("me")
  322. indexer = (
  323. "r",
  324. ["bar", "bar2"],
  325. )
  326. df = df_orig.copy()
  327. df.loc[indexer] *= 2.0
  328. tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  329. indexer = (
  330. "r",
  331. "bar",
  332. )
  333. df = df_orig.copy()
  334. df.loc[indexer] *= 2.0
  335. assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
  336. indexer = (
  337. "t",
  338. ["bar", "bar2"],
  339. )
  340. df = df_orig.copy()
  341. df.loc[indexer] *= 2.0
  342. tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  343. def test_loc_setitem_slice(self):
  344. # GH10503
  345. # assigning the same type should not change the type
  346. df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")})
  347. ix = df1["a"] == 1
  348. newb1 = df1.loc[ix, "b"] + 1
  349. df1.loc[ix, "b"] = newb1
  350. expected = DataFrame(
  351. {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")}
  352. )
  353. tm.assert_frame_equal(df1, expected)
  354. # assigning a new type should get the inferred type
  355. df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
  356. ix = df1["a"] == 1
  357. newb2 = df2.loc[ix, "b"]
  358. with tm.assert_produces_warning(
  359. FutureWarning, match="item of incompatible dtype"
  360. ):
  361. df1.loc[ix, "b"] = newb2
  362. expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
  363. tm.assert_frame_equal(df2, expected)
  364. def test_loc_setitem_dtype(self):
  365. # GH31340
  366. df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]})
  367. cols = ["a", "b", "c"]
  368. df.loc[:, cols] = df.loc[:, cols].astype("float32")
  369. # pre-2.0 this setting would swap in new arrays, in 2.0 it is correctly
  370. # in-place, consistent with non-split-path
  371. expected = DataFrame(
  372. {
  373. "id": ["A"],
  374. "a": np.array([1.2], dtype="float64"),
  375. "b": np.array([0.0], dtype="float64"),
  376. "c": np.array([-2.5], dtype="float64"),
  377. }
  378. ) # id is inferred as object
  379. tm.assert_frame_equal(df, expected)
  380. def test_getitem_label_list_with_missing(self):
  381. s = Series(range(3), index=["a", "b", "c"])
  382. # consistency
  383. with pytest.raises(KeyError, match="not in index"):
  384. s[["a", "d"]]
  385. s = Series(range(3))
  386. with pytest.raises(KeyError, match="not in index"):
  387. s[[0, 3]]
  388. @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
  389. def test_loc_getitem_bool_diff_len(self, index):
  390. # GH26658
  391. s = Series([1, 2, 3])
  392. msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}"
  393. with pytest.raises(IndexError, match=msg):
  394. s.loc[index]
  395. def test_loc_getitem_int_slice(self):
  396. # TODO: test something here?
  397. pass
  398. def test_loc_to_fail(self):
  399. # GH3449
  400. df = DataFrame(
  401. np.random.default_rng(2).random((3, 3)),
  402. index=["a", "b", "c"],
  403. columns=["e", "f", "g"],
  404. )
  405. msg = (
  406. rf"\"None of \[Index\(\[1, 2\], dtype='{np.dtype(int)}'\)\] are "
  407. r"in the \[index\]\""
  408. )
  409. with pytest.raises(KeyError, match=msg):
  410. df.loc[[1, 2], [1, 2]]
  411. def test_loc_to_fail2(self):
  412. # GH 7496
  413. # loc should not fallback
  414. s = Series(dtype=object)
  415. s.loc[1] = 1
  416. s.loc["a"] = 2
  417. with pytest.raises(KeyError, match=r"^-1$"):
  418. s.loc[-1]
  419. msg = (
  420. rf"\"None of \[Index\(\[-1, -2\], dtype='{np.dtype(int)}'\)\] are "
  421. r"in the \[index\]\""
  422. )
  423. with pytest.raises(KeyError, match=msg):
  424. s.loc[[-1, -2]]
  425. msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\""
  426. with pytest.raises(KeyError, match=msg):
  427. s.loc[Index(["4"], dtype=object)]
  428. s.loc[-1] = 3
  429. with pytest.raises(KeyError, match="not in index"):
  430. s.loc[[-1, -2]]
  431. s["a"] = 2
  432. msg = (
  433. rf"\"None of \[Index\(\[-2\], dtype='{np.dtype(int)}'\)\] are "
  434. r"in the \[index\]\""
  435. )
  436. with pytest.raises(KeyError, match=msg):
  437. s.loc[[-2]]
  438. del s["a"]
  439. with pytest.raises(KeyError, match=msg):
  440. s.loc[[-2]] = 0
  441. def test_loc_to_fail3(self):
  442. # inconsistency between .loc[values] and .loc[values,:]
  443. # GH 7999
  444. df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"])
  445. msg = (
  446. rf"\"None of \[Index\(\[3\], dtype='{np.dtype(int)}'\)\] are "
  447. r"in the \[index\]\""
  448. )
  449. with pytest.raises(KeyError, match=msg):
  450. df.loc[[3], :]
  451. with pytest.raises(KeyError, match=msg):
  452. df.loc[[3]]
  453. def test_loc_getitem_list_with_fail(self):
  454. # 15747
  455. # should KeyError if *any* missing labels
  456. s = Series([1, 2, 3])
  457. s.loc[[2]]
  458. msg = f"\"None of [Index([3], dtype='{np.dtype(int)}')] are in the [index]"
  459. with pytest.raises(KeyError, match=re.escape(msg)):
  460. s.loc[[3]]
  461. # a non-match and a match
  462. with pytest.raises(KeyError, match="not in index"):
  463. s.loc[[2, 3]]
  464. def test_loc_index(self):
  465. # gh-17131
  466. # a boolean index should index like a boolean numpy array
  467. df = DataFrame(
  468. np.random.default_rng(2).random(size=(5, 10)),
  469. index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"],
  470. )
  471. mask = df.index.map(lambda x: "alpha" in x)
  472. expected = df.loc[np.array(mask)]
  473. result = df.loc[mask]
  474. tm.assert_frame_equal(result, expected)
  475. result = df.loc[mask.values]
  476. tm.assert_frame_equal(result, expected)
  477. result = df.loc[pd.array(mask, dtype="boolean")]
  478. tm.assert_frame_equal(result, expected)
  479. def test_loc_general(self):
  480. df = DataFrame(
  481. np.random.default_rng(2).random((4, 4)),
  482. columns=["A", "B", "C", "D"],
  483. index=["A", "B", "C", "D"],
  484. )
  485. # want this to work
  486. result = df.loc[:, "A":"B"].iloc[0:2, :]
  487. assert (result.columns == ["A", "B"]).all()
  488. assert (result.index == ["A", "B"]).all()
  489. # mixed type
  490. result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0]
  491. expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0)
  492. tm.assert_series_equal(result, expected)
  493. assert result.dtype == object
  494. @pytest.fixture
  495. def frame_for_consistency(self):
  496. return DataFrame(
  497. {
  498. "date": date_range("2000-01-01", "2000-01-5"),
  499. "val": Series(range(5), dtype=np.int64),
  500. }
  501. )
  502. @pytest.mark.parametrize(
  503. "val",
  504. [0, np.array(0, dtype=np.int64), np.array([0, 0, 0, 0, 0], dtype=np.int64)],
  505. )
  506. def test_loc_setitem_consistency(self, frame_for_consistency, val):
  507. # GH 6149
  508. # coerce similarly for setitem and loc when rows have a null-slice
  509. expected = DataFrame(
  510. {
  511. "date": Series(0, index=range(5), dtype=np.int64),
  512. "val": Series(range(5), dtype=np.int64),
  513. }
  514. )
  515. df = frame_for_consistency.copy()
  516. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  517. df.loc[:, "date"] = val
  518. tm.assert_frame_equal(df, expected)
  519. def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
  520. # GH 6149
  521. # coerce similarly for setitem and loc when rows have a null-slice
  522. expected = DataFrame(
  523. {
  524. "date": Series("foo", index=range(5)),
  525. "val": Series(range(5), dtype=np.int64),
  526. }
  527. )
  528. df = frame_for_consistency.copy()
  529. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  530. df.loc[:, "date"] = "foo"
  531. tm.assert_frame_equal(df, expected)
  532. def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
  533. # GH 6149
  534. # coerce similarly for setitem and loc when rows have a null-slice
  535. expected = DataFrame(
  536. {
  537. "date": Series(1.0, index=range(5)),
  538. "val": Series(range(5), dtype=np.int64),
  539. }
  540. )
  541. df = frame_for_consistency.copy()
  542. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  543. df.loc[:, "date"] = 1.0
  544. tm.assert_frame_equal(df, expected)
  545. def test_loc_setitem_consistency_single_row(self):
  546. # GH 15494
  547. # setting on frame with single row
  548. df = DataFrame({"date": Series([Timestamp("20180101")])})
  549. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  550. df.loc[:, "date"] = "string"
  551. expected = DataFrame({"date": Series(["string"])})
  552. tm.assert_frame_equal(df, expected)
  553. def test_loc_setitem_consistency_empty(self):
  554. # empty (essentially noops)
  555. # before the enforcement of #45333 in 2.0, the loc.setitem here would
  556. # change the dtype of df.x to int64
  557. expected = DataFrame(columns=["x", "y"])
  558. df = DataFrame(columns=["x", "y"])
  559. with tm.assert_produces_warning(None):
  560. df.loc[:, "x"] = 1
  561. tm.assert_frame_equal(df, expected)
  562. # setting with setitem swaps in a new array, so changes the dtype
  563. df = DataFrame(columns=["x", "y"])
  564. df["x"] = 1
  565. expected["x"] = expected["x"].astype(np.int64)
  566. tm.assert_frame_equal(df, expected)
  567. # incompatible dtype warning
  568. @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
  569. def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
  570. # .loc[:,column] setting with slice == len of the column
  571. # GH10408
  572. levels = [
  573. ["Region_1"] * 4,
  574. ["Site_1", "Site_1", "Site_2", "Site_2"],
  575. [3987227376, 3980680971, 3977723249, 3977723089],
  576. ]
  577. mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"])
  578. clevels = [
  579. ["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"],
  580. ["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"],
  581. ]
  582. cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"])
  583. values = [
  584. ["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan],
  585. ["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"],
  586. ["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan],
  587. ["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"],
  588. ]
  589. df = DataFrame(values, index=mi, columns=cols)
  590. ctx = contextlib.nullcontext()
  591. if using_infer_string:
  592. ctx = pytest.raises(TypeError, match="Invalid value")
  593. with ctx:
  594. df.loc[:, ("Respondent", "StartDate")] = to_datetime(
  595. df.loc[:, ("Respondent", "StartDate")]
  596. )
  597. with ctx:
  598. df.loc[:, ("Respondent", "EndDate")] = to_datetime(
  599. df.loc[:, ("Respondent", "EndDate")]
  600. )
  601. if using_infer_string:
  602. # infer-objects won't infer stuff anymore
  603. return
  604. df = df.infer_objects()
  605. # Adding a new key
  606. df.loc[:, ("Respondent", "Duration")] = (
  607. df.loc[:, ("Respondent", "EndDate")]
  608. - df.loc[:, ("Respondent", "StartDate")]
  609. )
  610. # timedelta64[m] -> float, so this cannot be done inplace, so
  611. # no warning
  612. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  613. df.loc[:, ("Respondent", "Duration")] = df.loc[
  614. :, ("Respondent", "Duration")
  615. ] / Timedelta(60_000_000_000)
  616. expected = Series(
  617. [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
  618. )
  619. tm.assert_series_equal(df[("Respondent", "Duration")], expected)
  620. @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"])
  621. def test_loc_assign_non_ns_datetime(self, unit):
  622. # GH 27395, non-ns dtype assignment via .loc should work
  623. # and return the same result when using simple assignment
  624. df = DataFrame(
  625. {
  626. "timestamp": [
  627. np.datetime64("2017-02-11 12:41:29"),
  628. np.datetime64("1991-11-07 04:22:37"),
  629. ]
  630. }
  631. )
  632. df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
  633. df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
  634. expected = Series(df.loc[:, "expected"], name=unit)
  635. tm.assert_series_equal(df.loc[:, unit], expected)
  636. def test_loc_modify_datetime(self):
  637. # see gh-28837
  638. df = DataFrame.from_dict(
  639. {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]}
  640. )
  641. df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True)
  642. df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"]
  643. df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"]
  644. expected = DataFrame(
  645. [
  646. [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"],
  647. [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"],
  648. [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"],
  649. [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"],
  650. ],
  651. columns=["date", "date_dt", "date_dt_cp"],
  652. )
  653. columns = ["date_dt", "date_dt_cp"]
  654. expected[columns] = expected[columns].apply(to_datetime)
  655. tm.assert_frame_equal(df, expected)
  656. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  657. @pytest.mark.parametrize("has_ref", [True, False])
  658. def test_loc_setitem_frame_with_reindex(self, has_ref):
  659. # GH#6254 setting issue
  660. df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
  661. if has_ref:
  662. view = df[:] # noqa: F841
  663. df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
  664. # setting integer values into a float dataframe with loc is inplace,
  665. # so we retain float dtype
  666. ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
  667. expected = DataFrame({"A": ser})
  668. tm.assert_frame_equal(df, expected)
  669. def test_loc_setitem_frame_with_reindex_mixed(self):
  670. # GH#40480
  671. df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float)
  672. df["B"] = "string"
  673. df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
  674. ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
  675. # pre-2.0 this setting swapped in a new array, now it is inplace
  676. # consistent with non-split-path
  677. expected = DataFrame({"A": ser.astype(float)})
  678. expected["B"] = "string"
  679. tm.assert_frame_equal(df, expected)
  680. def test_loc_setitem_frame_with_inverted_slice(self):
  681. # GH#40480
  682. df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float)
  683. df["B"] = "string"
  684. df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64")
  685. # pre-2.0 this setting swapped in a new array, now it is inplace
  686. # consistent with non-split-path
  687. expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3])
  688. tm.assert_frame_equal(df, expected)
  689. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  690. @pytest.mark.parametrize("has_ref", [True, False])
  691. def test_loc_setitem_empty_frame(self, has_ref):
  692. # GH#6252 setting with an empty frame
  693. keys1 = ["@" + str(i) for i in range(5)]
  694. val1 = np.arange(5, dtype="int64")
  695. keys2 = ["@" + str(i) for i in range(4)]
  696. val2 = np.arange(4, dtype="int64")
  697. index = list(set(keys1).union(keys2))
  698. df = DataFrame(index=index)
  699. df["A"] = np.nan
  700. if has_ref:
  701. view = df[:] # noqa: F841
  702. df.loc[keys1, "A"] = val1
  703. df["B"] = np.nan
  704. df.loc[keys2, "B"] = val2
  705. # Because df["A"] was initialized as float64, setting values into it
  706. # is inplace, so that dtype is retained
  707. sera = Series(val1, index=keys1, dtype=np.float64)
  708. serb = Series(val2, index=keys2)
  709. expected = DataFrame({"A": sera, "B": serb}, columns=Index(["A", "B"])).reindex(
  710. index=index
  711. )
  712. tm.assert_frame_equal(df, expected)
  713. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  714. @pytest.mark.parametrize("has_ref", [True, False])
  715. def test_loc_setitem_frame(self, has_ref):
  716. df = DataFrame(
  717. np.random.default_rng(2).standard_normal((4, 4)),
  718. index=list("abcd"),
  719. columns=list("ABCD"),
  720. )
  721. if has_ref:
  722. view = df[:] # noqa: F841
  723. result = df.iloc[0, 0]
  724. df.loc["a", "A"] = 1
  725. result = df.loc["a", "A"]
  726. assert result == 1
  727. result = df.iloc[0, 0]
  728. assert result == 1
  729. df.loc[:, "B":"D"] = 0
  730. expected = df.loc[:, "B":"D"]
  731. result = df.iloc[:, 1:]
  732. tm.assert_frame_equal(result, expected)
  733. def test_loc_setitem_frame_nan_int_coercion_invalid(self):
  734. # GH 8669
  735. # invalid coercion of nan -> int
  736. df = DataFrame({"A": [1, 2, 3], "B": np.nan})
  737. df.loc[df.B > df.A, "B"] = df.A
  738. expected = DataFrame({"A": [1, 2, 3], "B": np.nan})
  739. tm.assert_frame_equal(df, expected)
  740. def test_loc_setitem_frame_mixed_labels(self):
  741. # GH 6546
  742. # setting with mixed labels
  743. df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
  744. result = df.loc[0, [1, 2]]
  745. expected = Series(
  746. [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0
  747. )
  748. tm.assert_series_equal(result, expected)
  749. expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]})
  750. df.loc[0, [1, 2]] = [5, 6]
  751. tm.assert_frame_equal(df, expected)
  752. @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
  753. def test_loc_setitem_frame_multiples(self, warn_copy_on_write):
  754. # multiple setting
  755. df = DataFrame(
  756. {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)}
  757. )
  758. rhs = df.loc[1:2]
  759. rhs.index = df.index[0:2]
  760. df.loc[0:1] = rhs
  761. expected = DataFrame(
  762. {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)}
  763. )
  764. tm.assert_frame_equal(df, expected)
  765. # multiple setting with frame on rhs (with M8)
  766. df = DataFrame(
  767. {
  768. "date": date_range("2000-01-01", "2000-01-5"),
  769. "val": Series(range(5), dtype=np.int64),
  770. }
  771. )
  772. expected = DataFrame(
  773. {
  774. "date": [
  775. Timestamp("20000101"),
  776. Timestamp("20000102"),
  777. Timestamp("20000101"),
  778. Timestamp("20000102"),
  779. Timestamp("20000103"),
  780. ],
  781. "val": Series([0, 1, 0, 1, 2], dtype=np.int64),
  782. }
  783. )
  784. rhs = df.loc[0:2]
  785. rhs.index = df.index[2:5]
  786. df.loc[2:4] = rhs
  787. tm.assert_frame_equal(df, expected)
  788. @pytest.mark.parametrize(
  789. "indexer", [["A"], slice(None, "A", None), np.array(["A"])]
  790. )
  791. @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
  792. def test_loc_setitem_with_scalar_index(self, indexer, value):
  793. # GH #19474
  794. # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
  795. # elementwisely, not using "setter('A', ['Z'])".
  796. # Set object dtype to avoid upcast when setting 'Z'
  797. df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
  798. df.loc[0, indexer] = value
  799. result = df.loc[0, "A"]
  800. assert is_scalar(result) and result == "Z"
  801. @pytest.mark.parametrize(
  802. "index,box,expected",
  803. [
  804. (
  805. ([0, 2], ["A", "B", "C", "D"]),
  806. 7,
  807. DataFrame(
  808. [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]],
  809. columns=["A", "B", "C", "D"],
  810. ),
  811. ),
  812. (
  813. (1, ["C", "D"]),
  814. [7, 8],
  815. DataFrame(
  816. [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
  817. columns=["A", "B", "C", "D"],
  818. ),
  819. ),
  820. (
  821. (1, ["A", "B", "C"]),
  822. np.array([7, 8, 9], dtype=np.int64),
  823. DataFrame(
  824. [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"]
  825. ),
  826. ),
  827. (
  828. (slice(1, 3, None), ["B", "C", "D"]),
  829. [[7, 8, 9], [10, 11, 12]],
  830. DataFrame(
  831. [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
  832. columns=["A", "B", "C", "D"],
  833. ),
  834. ),
  835. (
  836. (slice(1, 3, None), ["C", "A", "D"]),
  837. np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64),
  838. DataFrame(
  839. [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]],
  840. columns=["A", "B", "C", "D"],
  841. ),
  842. ),
  843. (
  844. (slice(None, None, None), ["A", "C"]),
  845. DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
  846. DataFrame(
  847. [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
  848. ),
  849. ),
  850. ],
  851. )
  852. def test_loc_setitem_missing_columns(self, index, box, expected):
  853. # GH 29334
  854. df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
  855. df.loc[index] = box
  856. tm.assert_frame_equal(df, expected)
  857. def test_loc_coercion(self):
  858. # GH#12411
  859. df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
  860. expected = df.dtypes
  861. result = df.iloc[[0]]
  862. tm.assert_series_equal(result.dtypes, expected)
  863. result = df.iloc[[1]]
  864. tm.assert_series_equal(result.dtypes, expected)
  865. def test_loc_coercion2(self):
  866. # GH#12045
  867. df = DataFrame({"date": [datetime(2012, 1, 1), datetime(1012, 1, 2)]})
  868. expected = df.dtypes
  869. result = df.iloc[[0]]
  870. tm.assert_series_equal(result.dtypes, expected)
  871. result = df.iloc[[1]]
  872. tm.assert_series_equal(result.dtypes, expected)
  873. def test_loc_coercion3(self):
  874. # GH#11594
  875. df = DataFrame({"text": ["some words"] + [None] * 9})
  876. expected = df.dtypes
  877. result = df.iloc[0:2]
  878. tm.assert_series_equal(result.dtypes, expected)
  879. result = df.iloc[3:]
  880. tm.assert_series_equal(result.dtypes, expected)
  881. def test_setitem_new_key_tz(self, indexer_sl):
  882. # GH#12862 should not raise on assigning the second value
  883. vals = [
  884. to_datetime(42).tz_localize("UTC"),
  885. to_datetime(666).tz_localize("UTC"),
  886. ]
  887. expected = Series(vals, index=Index(["foo", "bar"]))
  888. ser = Series(dtype=object)
  889. indexer_sl(ser)["foo"] = vals[0]
  890. indexer_sl(ser)["bar"] = vals[1]
  891. tm.assert_series_equal(ser, expected)
  892. def test_loc_non_unique(self):
  893. # GH3659
  894. # non-unique indexer with loc slice
  895. # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
  896. # these are going to raise because the we are non monotonic
  897. df = DataFrame(
  898. {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
  899. )
  900. msg = "'Cannot get left slice bound for non-unique label: 1'"
  901. with pytest.raises(KeyError, match=msg):
  902. df.loc[1:]
  903. msg = "'Cannot get left slice bound for non-unique label: 0'"
  904. with pytest.raises(KeyError, match=msg):
  905. df.loc[0:]
  906. msg = "'Cannot get left slice bound for non-unique label: 1'"
  907. with pytest.raises(KeyError, match=msg):
  908. df.loc[1:2]
  909. # monotonic are ok
  910. df = DataFrame(
  911. {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
  912. ).sort_index(axis=0)
  913. result = df.loc[1:]
  914. expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3])
  915. tm.assert_frame_equal(result, expected)
  916. result = df.loc[0:]
  917. tm.assert_frame_equal(result, df)
  918. result = df.loc[1:2]
  919. expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2])
  920. tm.assert_frame_equal(result, expected)
  921. @pytest.mark.arm_slow
  922. @pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]])
  923. def test_loc_non_unique_memory_error(self, length, l2):
  924. # GH 4280
  925. # non_unique index with a large selection triggers a memory error
  926. columns = list("ABCDEFG")
  927. df = pd.concat(
  928. [
  929. DataFrame(
  930. np.random.default_rng(2).standard_normal((length, len(columns))),
  931. index=np.arange(length),
  932. columns=columns,
  933. ),
  934. DataFrame(np.ones((l2, len(columns))), index=[0] * l2, columns=columns),
  935. ]
  936. )
  937. assert df.index.is_unique is False
  938. mask = np.arange(l2)
  939. result = df.loc[mask]
  940. expected = pd.concat(
  941. [
  942. df.take([0]),
  943. DataFrame(
  944. np.ones((len(mask), len(columns))),
  945. index=[0] * len(mask),
  946. columns=columns,
  947. ),
  948. df.take(mask[1:]),
  949. ]
  950. )
  951. tm.assert_frame_equal(result, expected)
  952. def test_loc_name(self):
  953. # GH 3880
  954. df = DataFrame([[1, 1], [1, 1]])
  955. df.index.name = "index_name"
  956. result = df.iloc[[0, 1]].index.name
  957. assert result == "index_name"
  958. result = df.loc[[0, 1]].index.name
  959. assert result == "index_name"
  960. def test_loc_empty_list_indexer_is_ok(self):
  961. df = DataFrame(
  962. np.ones((5, 2)),
  963. index=Index([f"i-{i}" for i in range(5)], name="a"),
  964. columns=Index([f"i-{i}" for i in range(2)], name="a"),
  965. )
  966. # vertical empty
  967. tm.assert_frame_equal(
  968. df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True
  969. )
  970. # horizontal empty
  971. tm.assert_frame_equal(
  972. df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True
  973. )
  974. # horizontal empty
  975. tm.assert_frame_equal(
  976. df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
  977. )
  978. def test_identity_slice_returns_new_object(
  979. self, using_copy_on_write, warn_copy_on_write
  980. ):
  981. # GH13873
  982. original_df = DataFrame({"a": [1, 2, 3]})
  983. sliced_df = original_df.loc[:]
  984. assert sliced_df is not original_df
  985. assert original_df[:] is not original_df
  986. assert original_df.loc[:, :] is not original_df
  987. # should be a shallow copy
  988. assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values)
  989. # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig
  990. # depending on CoW
  991. with tm.assert_cow_warning(warn_copy_on_write):
  992. original_df.loc[:, "a"] = [4, 4, 4]
  993. if using_copy_on_write:
  994. assert (sliced_df["a"] == [1, 2, 3]).all()
  995. else:
  996. assert (sliced_df["a"] == 4).all()
  997. # These should not return copies
  998. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
  999. if using_copy_on_write or warn_copy_on_write:
  1000. assert df[0] is not df.loc[:, 0]
  1001. else:
  1002. assert df[0] is df.loc[:, 0]
  1003. # Same tests for Series
  1004. original_series = Series([1, 2, 3, 4, 5, 6])
  1005. sliced_series = original_series.loc[:]
  1006. assert sliced_series is not original_series
  1007. assert original_series[:] is not original_series
  1008. with tm.assert_cow_warning(warn_copy_on_write):
  1009. original_series[:3] = [7, 8, 9]
  1010. if using_copy_on_write:
  1011. assert all(sliced_series[:3] == [1, 2, 3])
  1012. else:
  1013. assert all(sliced_series[:3] == [7, 8, 9])
  1014. def test_loc_copy_vs_view(self, request, using_copy_on_write):
  1015. # GH 15631
  1016. if not using_copy_on_write:
  1017. mark = pytest.mark.xfail(reason="accidental fix reverted - GH37497")
  1018. request.applymarker(mark)
  1019. x = DataFrame(zip(range(3), range(3)), columns=["a", "b"])
  1020. y = x.copy()
  1021. q = y.loc[:, "a"]
  1022. q += 2
  1023. tm.assert_frame_equal(x, y)
  1024. z = x.copy()
  1025. q = z.loc[x.index, "a"]
  1026. q += 2
  1027. tm.assert_frame_equal(x, z)
  1028. def test_loc_uint64(self):
  1029. # GH20722
  1030. # Test whether loc accept uint64 max value as index.
  1031. umax = np.iinfo("uint64").max
  1032. ser = Series([1, 2], index=[umax - 1, umax])
  1033. result = ser.loc[umax - 1]
  1034. expected = ser.iloc[0]
  1035. assert result == expected
  1036. result = ser.loc[[umax - 1]]
  1037. expected = ser.iloc[[0]]
  1038. tm.assert_series_equal(result, expected)
  1039. result = ser.loc[[umax - 1, umax]]
  1040. tm.assert_series_equal(result, ser)
  1041. def test_loc_uint64_disallow_negative(self):
  1042. # GH#41775
  1043. umax = np.iinfo("uint64").max
  1044. ser = Series([1, 2], index=[umax - 1, umax])
  1045. with pytest.raises(KeyError, match="-1"):
  1046. # don't wrap around
  1047. ser.loc[-1]
  1048. with pytest.raises(KeyError, match="-1"):
  1049. # don't wrap around
  1050. ser.loc[[-1]]
  1051. def test_loc_setitem_empty_append_expands_rows(self):
  1052. # GH6173, various appends to an empty dataframe
  1053. data = [1, 2, 3]
  1054. expected = DataFrame(
  1055. {"x": data, "y": np.array([np.nan] * len(data), dtype=object)}
  1056. )
  1057. # appends to fit length of data
  1058. df = DataFrame(columns=["x", "y"])
  1059. df.loc[:, "x"] = data
  1060. tm.assert_frame_equal(df, expected)
  1061. def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self):
  1062. # GH#37932 same as test_loc_setitem_empty_append_expands_rows
  1063. # but with mixed dtype so we go through take_split_path
  1064. data = [1, 2, 3]
  1065. expected = DataFrame(
  1066. {"x": data, "y": np.array([np.nan] * len(data), dtype=object)}
  1067. )
  1068. df = DataFrame(columns=["x", "y"])
  1069. df["x"] = df["x"].astype(np.int64)
  1070. df.loc[:, "x"] = data
  1071. tm.assert_frame_equal(df, expected)
  1072. def test_loc_setitem_empty_append_single_value(self):
  1073. # only appends one value
  1074. expected = DataFrame({"x": [1.0], "y": [np.nan]})
  1075. df = DataFrame(columns=["x", "y"], dtype=float)
  1076. df.loc[0, "x"] = expected.loc[0, "x"]
  1077. tm.assert_frame_equal(df, expected)
  1078. def test_loc_setitem_empty_append_raises(self):
  1079. # GH6173, various appends to an empty dataframe
  1080. data = [1, 2]
  1081. df = DataFrame(columns=["x", "y"])
  1082. df.index = df.index.astype(np.int64)
  1083. msg = (
  1084. rf"None of \[Index\(\[0, 1\], dtype='{np.dtype(int)}'\)\] "
  1085. r"are in the \[index\]"
  1086. )
  1087. with pytest.raises(KeyError, match=msg):
  1088. df.loc[[0, 1], "x"] = data
  1089. msg = "setting an array element with a sequence."
  1090. with pytest.raises(ValueError, match=msg):
  1091. df.loc[0:2, "x"] = data
  1092. def test_indexing_zerodim_np_array(self):
  1093. # GH24924
  1094. df = DataFrame([[1, 2], [3, 4]])
  1095. result = df.loc[np.array(0)]
  1096. s = Series([1, 2], name=0)
  1097. tm.assert_series_equal(result, s)
  1098. def test_series_indexing_zerodim_np_array(self):
  1099. # GH24924
  1100. s = Series([1, 2])
  1101. result = s.loc[np.array(0)]
  1102. assert result == 1
  1103. def test_loc_reverse_assignment(self):
  1104. # GH26939
  1105. data = [1, 2, 3, 4, 5, 6] + [None] * 4
  1106. expected = Series(data, index=range(2010, 2020))
  1107. result = Series(index=range(2010, 2020), dtype=np.float64)
  1108. result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1]
  1109. tm.assert_series_equal(result, expected)
  1110. def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
  1111. # GH#20388
  1112. col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
  1113. result = DataFrame(col_data, columns=["A"])
  1114. expected = DataFrame(col_data, columns=["A"])
  1115. tm.assert_frame_equal(result, expected)
  1116. # assigning with loc/iloc attempts to set the values inplace, which
  1117. # in this case is successful
  1118. if using_infer_string:
  1119. with pytest.raises(TypeError, match="Invalid value"):
  1120. result.loc[result.index, "A"] = [float(x) for x in col_data]
  1121. else:
  1122. result.loc[result.index, "A"] = [float(x) for x in col_data]
  1123. expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
  1124. tm.assert_frame_equal(result, expected)
  1125. # assigning the entire column using __setitem__ swaps in the new array
  1126. # GH#???
  1127. result["A"] = [float(x) for x in col_data]
  1128. expected = DataFrame(col_data, columns=["A"], dtype=float)
  1129. tm.assert_frame_equal(result, expected)
  1130. def test_loc_getitem_time_object(self, frame_or_series):
  1131. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  1132. mask = (rng.hour == 9) & (rng.minute == 30)
  1133. obj = DataFrame(
  1134. np.random.default_rng(2).standard_normal((len(rng), 3)), index=rng
  1135. )
  1136. obj = tm.get_obj(obj, frame_or_series)
  1137. result = obj.loc[time(9, 30)]
  1138. exp = obj.loc[mask]
  1139. tm.assert_equal(result, exp)
  1140. chunk = obj.loc["1/4/2000":]
  1141. result = chunk.loc[time(9, 30)]
  1142. expected = result[-1:]
  1143. # Without resetting the freqs, these are 5 min and 1440 min, respectively
  1144. result.index = result.index._with_freq(None)
  1145. expected.index = expected.index._with_freq(None)
  1146. tm.assert_equal(result, expected)
  1147. @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
  1148. @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
  1149. def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
  1150. sp_sparse = pytest.importorskip("scipy.sparse")
  1151. spmatrix_t = getattr(sp_sparse, spmatrix_t)
  1152. # The bug is triggered by a sparse matrix with purely sparse columns. So the
  1153. # recipe below generates a rectangular matrix of dimension (5, 7) where all the
  1154. # diagonal cells are ones, meaning the last two columns are purely sparse.
  1155. rows, cols = 5, 7
  1156. spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
  1157. df = DataFrame.sparse.from_spmatrix(spmatrix)
  1158. # regression test for GH#34526
  1159. itr_idx = range(2, rows)
  1160. result = df.loc[itr_idx].values
  1161. expected = spmatrix.toarray()[itr_idx]
  1162. tm.assert_numpy_array_equal(result, expected)
  1163. # regression test for GH#34540
  1164. result = df.loc[itr_idx].dtypes.values
  1165. expected = np.full(cols, SparseDtype(dtype, fill_value=0))
  1166. tm.assert_numpy_array_equal(result, expected)
  1167. def test_loc_getitem_listlike_all_retains_sparse(self):
  1168. df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))})
  1169. result = df.loc[[0, 1]]
  1170. tm.assert_frame_equal(result, df)
  1171. def test_loc_getitem_sparse_frame(self):
  1172. # GH34687
  1173. sp_sparse = pytest.importorskip("scipy.sparse")
  1174. df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5))
  1175. result = df.loc[range(2)]
  1176. expected = DataFrame(
  1177. [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],
  1178. dtype=SparseDtype("float64", 0.0),
  1179. )
  1180. tm.assert_frame_equal(result, expected)
  1181. result = df.loc[range(2)].loc[range(1)]
  1182. expected = DataFrame(
  1183. [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0)
  1184. )
  1185. tm.assert_frame_equal(result, expected)
  1186. def test_loc_getitem_sparse_series(self):
  1187. # GH34687
  1188. s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1189. result = s.loc[range(2)]
  1190. expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1191. tm.assert_series_equal(result, expected)
  1192. result = s.loc[range(3)].loc[range(2)]
  1193. expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1194. tm.assert_series_equal(result, expected)
  1195. @pytest.mark.parametrize("indexer", ["loc", "iloc"])
  1196. def test_getitem_single_row_sparse_df(self, indexer):
  1197. # GH#46406
  1198. df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float))
  1199. result = getattr(df, indexer)[0]
  1200. expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0)
  1201. tm.assert_series_equal(result, expected)
  1202. @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
  1203. def test_loc_getitem_iterable(self, float_frame, key_type):
  1204. idx = key_type(["A", "B", "C"])
  1205. result = float_frame.loc[:, idx]
  1206. expected = float_frame.loc[:, ["A", "B", "C"]]
  1207. tm.assert_frame_equal(result, expected)
  1208. def test_loc_getitem_timedelta_0seconds(self):
  1209. # GH#10583
  1210. df = DataFrame(np.random.default_rng(2).normal(size=(10, 4)))
  1211. df.index = timedelta_range(start="0s", periods=10, freq="s")
  1212. expected = df.loc[Timedelta("0s") :, :]
  1213. result = df.loc["0s":, :]
  1214. tm.assert_frame_equal(result, expected)
  1215. @pytest.mark.parametrize(
  1216. "val,expected", [(2**63 - 1, Series([1])), (2**63, Series([2]))]
  1217. )
  1218. def test_loc_getitem_uint64_scalar(self, val, expected):
  1219. # see GH#19399
  1220. df = DataFrame([1, 2], index=[2**63 - 1, 2**63])
  1221. result = df.loc[val]
  1222. expected.name = val
  1223. tm.assert_series_equal(result, expected)
  1224. def test_loc_setitem_int_label_with_float_index(self, float_numpy_dtype):
  1225. # note labels are floats
  1226. dtype = float_numpy_dtype
  1227. ser = Series(["a", "b", "c"], index=Index([0, 0.5, 1], dtype=dtype))
  1228. expected = ser.copy()
  1229. ser.loc[1] = "zoo"
  1230. expected.iloc[2] = "zoo"
  1231. tm.assert_series_equal(ser, expected)
  1232. @pytest.mark.parametrize(
  1233. "indexer, expected",
  1234. [
  1235. # The test name is a misnomer in the 0 case as df.index[indexer]
  1236. # is a scalar.
  1237. (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
  1238. (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
  1239. ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]),
  1240. ],
  1241. )
  1242. def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
  1243. # GH#16637
  1244. tdi = to_timedelta(range(10), unit="s")
  1245. df = DataFrame({"x": range(10)}, dtype="int64", index=tdi)
  1246. df.loc[df.index[indexer], "x"] = 20
  1247. expected = DataFrame(
  1248. expected,
  1249. index=tdi,
  1250. columns=["x"],
  1251. dtype="int64",
  1252. )
  1253. tm.assert_frame_equal(expected, df)
  1254. def test_loc_setitem_categorical_values_partial_column_slice(self):
  1255. # Assigning a Category to parts of a int/... column uses the values of
  1256. # the Categorical
  1257. df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
  1258. exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
  1259. with tm.assert_produces_warning(
  1260. FutureWarning, match="item of incompatible dtype"
  1261. ):
  1262. df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
  1263. df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
  1264. tm.assert_frame_equal(df, exp)
  1265. def test_loc_setitem_single_row_categorical(self, using_infer_string):
  1266. # GH#25495
  1267. df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
  1268. categories = Categorical(df["Alpha"], categories=["a", "b", "c"])
  1269. # pre-2.0 this swapped in a new array, in 2.0 it operates inplace,
  1270. # consistent with non-split-path
  1271. df.loc[:, "Alpha"] = categories
  1272. result = df["Alpha"]
  1273. expected = Series(categories, index=df.index, name="Alpha").astype(
  1274. object if not using_infer_string else "str"
  1275. )
  1276. tm.assert_series_equal(result, expected)
  1277. # double-check that the non-loc setting retains categoricalness
  1278. df["Alpha"] = categories
  1279. tm.assert_series_equal(df["Alpha"], Series(categories, name="Alpha"))
  1280. def test_loc_setitem_datetime_coercion(self):
  1281. # GH#1048
  1282. df = DataFrame({"c": [Timestamp("2010-10-01")] * 3})
  1283. df.loc[0:1, "c"] = np.datetime64("2008-08-08")
  1284. assert Timestamp("2008-08-08") == df.loc[0, "c"]
  1285. assert Timestamp("2008-08-08") == df.loc[1, "c"]
  1286. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  1287. df.loc[2, "c"] = date(2005, 5, 5)
  1288. assert Timestamp("2005-05-05").date() == df.loc[2, "c"]
  1289. @pytest.mark.parametrize("idxer", ["var", ["var"]])
  1290. def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
  1291. # GH#11365
  1292. tz = tz_naive_fixture
  1293. idx = date_range(start="2015-07-12", periods=3, freq="h", tz=tz)
  1294. expected = DataFrame(1.2, index=idx, columns=["var"])
  1295. # if result started off with object dtype, then the .loc.__setitem__
  1296. # below would retain object dtype
  1297. result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
  1298. with tm.assert_produces_warning(
  1299. FutureWarning if idxer == "var" else None, match="incompatible dtype"
  1300. ):
  1301. # See https://github.com/pandas-dev/pandas/issues/56223
  1302. result.loc[:, idxer] = expected
  1303. tm.assert_frame_equal(result, expected)
  1304. def test_loc_setitem_time_key(self, using_array_manager):
  1305. index = date_range("2012-01-01", "2012-01-05", freq="30min")
  1306. df = DataFrame(
  1307. np.random.default_rng(2).standard_normal((len(index), 5)), index=index
  1308. )
  1309. akey = time(12, 0, 0)
  1310. bkey = slice(time(13, 0, 0), time(14, 0, 0))
  1311. ainds = [24, 72, 120, 168]
  1312. binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
  1313. result = df.copy()
  1314. result.loc[akey] = 0
  1315. result = result.loc[akey]
  1316. expected = df.loc[akey].copy()
  1317. expected.loc[:] = 0
  1318. if using_array_manager:
  1319. # TODO(ArrayManager) we are still overwriting columns
  1320. expected = expected.astype(float)
  1321. tm.assert_frame_equal(result, expected)
  1322. result = df.copy()
  1323. result.loc[akey] = 0
  1324. result.loc[akey] = df.iloc[ainds]
  1325. tm.assert_frame_equal(result, df)
  1326. result = df.copy()
  1327. result.loc[bkey] = 0
  1328. result = result.loc[bkey]
  1329. expected = df.loc[bkey].copy()
  1330. expected.loc[:] = 0
  1331. if using_array_manager:
  1332. # TODO(ArrayManager) we are still overwriting columns
  1333. expected = expected.astype(float)
  1334. tm.assert_frame_equal(result, expected)
  1335. result = df.copy()
  1336. result.loc[bkey] = 0
  1337. result.loc[bkey] = df.iloc[binds]
  1338. tm.assert_frame_equal(result, df)
  1339. @pytest.mark.parametrize("key", ["A", ["A"], ("A", slice(None))])
  1340. def test_loc_setitem_unsorted_multiindex_columns(self, key):
  1341. # GH#38601
  1342. mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")])
  1343. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
  1344. obj = df.copy()
  1345. obj.loc[:, key] = np.zeros((2, 2), dtype="int64")
  1346. expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi)
  1347. tm.assert_frame_equal(obj, expected)
  1348. df = df.sort_index(axis=1)
  1349. df.loc[:, key] = np.zeros((2, 2), dtype="int64")
  1350. expected = expected.sort_index(axis=1)
  1351. tm.assert_frame_equal(df, expected)
  1352. def test_loc_setitem_uint_drop(self, any_int_numpy_dtype):
  1353. # see GH#18311
  1354. # assigning series.loc[0] = 4 changed series.dtype to int
  1355. series = Series([1, 2, 3], dtype=any_int_numpy_dtype)
  1356. series.loc[0] = 4
  1357. expected = Series([4, 2, 3], dtype=any_int_numpy_dtype)
  1358. tm.assert_series_equal(series, expected)
  1359. def test_loc_setitem_td64_non_nano(self):
  1360. # GH#14155
  1361. ser = Series(10 * [np.timedelta64(10, "m")])
  1362. ser.loc[[1, 2, 3]] = np.timedelta64(20, "m")
  1363. expected = Series(10 * [np.timedelta64(10, "m")])
  1364. expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m"))
  1365. tm.assert_series_equal(ser, expected)
  1366. def test_loc_setitem_2d_to_1d_raises(self):
  1367. data = np.random.default_rng(2).standard_normal((2, 2))
  1368. # float64 dtype to avoid upcast when trying to set float data
  1369. ser = Series(range(2), dtype="float64")
  1370. msg = "setting an array element with a sequence."
  1371. with pytest.raises(ValueError, match=msg):
  1372. ser.loc[range(2)] = data
  1373. with pytest.raises(ValueError, match=msg):
  1374. ser.loc[:] = data
  1375. def test_loc_getitem_interval_index(self):
  1376. # GH#19977
  1377. index = pd.interval_range(start=0, periods=3)
  1378. df = DataFrame(
  1379. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
  1380. )
  1381. expected = 1
  1382. result = df.loc[0.5, "A"]
  1383. tm.assert_almost_equal(result, expected)
  1384. def test_loc_getitem_interval_index2(self):
  1385. # GH#19977
  1386. index = pd.interval_range(start=0, periods=3, closed="both")
  1387. df = DataFrame(
  1388. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
  1389. )
  1390. index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both")
  1391. expected = Series([1, 4], index=index_exp, name="A")
  1392. result = df.loc[1, "A"]
  1393. tm.assert_series_equal(result, expected)
  1394. @pytest.mark.parametrize("tpl", [(1,), (1, 2)])
  1395. def test_loc_getitem_index_single_double_tuples(self, tpl):
  1396. # GH#20991
  1397. idx = Index(
  1398. [(1,), (1, 2)],
  1399. name="A",
  1400. tupleize_cols=False,
  1401. )
  1402. df = DataFrame(index=idx)
  1403. result = df.loc[[tpl]]
  1404. idx = Index([tpl], name="A", tupleize_cols=False)
  1405. expected = DataFrame(index=idx)
  1406. tm.assert_frame_equal(result, expected)
  1407. def test_loc_getitem_index_namedtuple(self):
  1408. IndexType = namedtuple("IndexType", ["a", "b"])
  1409. idx1 = IndexType("foo", "bar")
  1410. idx2 = IndexType("baz", "bof")
  1411. index = Index([idx1, idx2], name="composite_index", tupleize_cols=False)
  1412. df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
  1413. result = df.loc[IndexType("foo", "bar")]["A"]
  1414. assert result == 1
  1415. def test_loc_setitem_single_column_mixed(self, using_infer_string):
  1416. df = DataFrame(
  1417. np.random.default_rng(2).standard_normal((5, 3)),
  1418. index=["a", "b", "c", "d", "e"],
  1419. columns=["foo", "bar", "baz"],
  1420. )
  1421. df["str"] = "qux"
  1422. df.loc[df.index[::2], "str"] = np.nan
  1423. expected = Series(
  1424. [np.nan, "qux", np.nan, "qux", np.nan],
  1425. dtype=object if not using_infer_string else "str",
  1426. ).values
  1427. tm.assert_almost_equal(df["str"].values, expected)
  1428. def test_loc_setitem_cast2(self):
  1429. # GH#7704
  1430. # dtype conversion on setting
  1431. df = DataFrame(np.random.default_rng(2).random((30, 3)), columns=tuple("ABC"))
  1432. df["event"] = np.nan
  1433. with tm.assert_produces_warning(
  1434. FutureWarning, match="item of incompatible dtype"
  1435. ):
  1436. df.loc[10, "event"] = "foo"
  1437. result = df.dtypes
  1438. expected = Series(
  1439. [np.dtype("float64")] * 3 + [np.dtype("object")],
  1440. index=["A", "B", "C", "event"],
  1441. )
  1442. tm.assert_series_equal(result, expected)
  1443. def test_loc_setitem_cast3(self):
  1444. # Test that data type is preserved . GH#5782
  1445. df = DataFrame({"one": np.arange(6, dtype=np.int8)})
  1446. df.loc[1, "one"] = 6
  1447. assert df.dtypes.one == np.dtype(np.int8)
  1448. df.one = np.int8(7)
  1449. assert df.dtypes.one == np.dtype(np.int8)
  1450. def test_loc_setitem_range_key(self, frame_or_series):
  1451. # GH#45479 don't treat range key as positional
  1452. obj = frame_or_series(range(5), index=[3, 4, 1, 0, 2])
  1453. values = [9, 10, 11]
  1454. if obj.ndim == 2:
  1455. values = [[9], [10], [11]]
  1456. obj.loc[range(3)] = values
  1457. expected = frame_or_series([0, 1, 10, 9, 11], index=obj.index)
  1458. tm.assert_equal(obj, expected)
  1459. def test_loc_setitem_numpy_frame_categorical_value(self):
  1460. # GH#52927
  1461. df = DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
  1462. df.loc[1:2, "a"] = Categorical([2, 2], categories=[1, 2])
  1463. expected = DataFrame({"a": [1, 2, 2, 1, 1], "b": ["a", "a", "a", "a", "a"]})
  1464. tm.assert_frame_equal(df, expected)
  1465. class TestLocWithEllipsis:
  1466. @pytest.fixture(params=[tm.loc, tm.iloc])
  1467. def indexer(self, request):
  1468. # Test iloc while we're here
  1469. return request.param
  1470. @pytest.fixture
  1471. def obj(self, series_with_simple_index, frame_or_series):
  1472. obj = series_with_simple_index
  1473. if frame_or_series is not Series:
  1474. obj = obj.to_frame()
  1475. return obj
  1476. def test_loc_iloc_getitem_ellipsis(self, obj, indexer):
  1477. result = indexer(obj)[...]
  1478. tm.assert_equal(result, obj)
  1479. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  1480. def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer):
  1481. obj = series_with_simple_index
  1482. key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
  1483. if indexer is tm.loc and obj.index.inferred_type == "boolean":
  1484. # passing [False] will get interpreted as a boolean mask
  1485. # TODO: should it? unambiguous when lengths dont match?
  1486. return
  1487. if indexer is tm.loc and isinstance(obj.index, MultiIndex):
  1488. msg = "MultiIndex does not support indexing with Ellipsis"
  1489. with pytest.raises(NotImplementedError, match=msg):
  1490. result = indexer(obj)[..., [key]]
  1491. elif len(obj) != 0:
  1492. result = indexer(obj)[..., [key]]
  1493. expected = indexer(obj)[[key]]
  1494. tm.assert_series_equal(result, expected)
  1495. key2 = 0 if indexer is tm.iloc else obj.name
  1496. df = obj.to_frame()
  1497. result = indexer(df)[..., [key2]]
  1498. expected = indexer(df)[:, [key2]]
  1499. tm.assert_frame_equal(result, expected)
  1500. def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer):
  1501. # GH37750
  1502. key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
  1503. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1504. indexer(obj)[..., ...]
  1505. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1506. indexer(obj)[..., [key], ...]
  1507. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1508. indexer(obj)[..., ..., key]
  1509. # one_ellipsis_message takes precedence over "Too many indexers"
  1510. # only when the first key is Ellipsis
  1511. with pytest.raises(IndexingError, match="Too many indexers"):
  1512. indexer(obj)[key, ..., ...]
  1513. class TestLocWithMultiIndex:
  1514. @pytest.mark.parametrize(
  1515. "keys, expected",
  1516. [
  1517. (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
  1518. (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
  1519. ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
  1520. ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
  1521. ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
  1522. ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
  1523. ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
  1524. ],
  1525. )
  1526. @pytest.mark.parametrize("dim", ["index", "columns"])
  1527. def test_loc_getitem_multilevel_index_order(self, dim, keys, expected):
  1528. # GH#22797
  1529. # Try to respect order of keys given for MultiIndex.loc
  1530. kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
  1531. df = DataFrame(np.arange(25).reshape(5, 5), **kwargs)
  1532. exp_index = MultiIndex.from_arrays(expected)
  1533. if dim == "index":
  1534. res = df.loc[keys, :]
  1535. tm.assert_index_equal(res.index, exp_index)
  1536. elif dim == "columns":
  1537. res = df.loc[:, keys]
  1538. tm.assert_index_equal(res.columns, exp_index)
  1539. def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data):
  1540. ymd = multiindex_year_month_day_dataframe_random_data
  1541. result = ymd.loc[2000]
  1542. result2 = ymd["A"].loc[2000]
  1543. assert result.index.names == ymd.index.names[1:]
  1544. assert result2.index.names == ymd.index.names[1:]
  1545. result = ymd.loc[2000, 2]
  1546. result2 = ymd["A"].loc[2000, 2]
  1547. assert result.index.name == ymd.index.names[2]
  1548. assert result2.index.name == ymd.index.names[2]
  1549. def test_loc_getitem_multiindex_nonunique_len_zero(self):
  1550. # GH#13691
  1551. mi = MultiIndex.from_product([[0], [1, 1]])
  1552. ser = Series(0, index=mi)
  1553. res = ser.loc[[]]
  1554. expected = ser[:0]
  1555. tm.assert_series_equal(res, expected)
  1556. res2 = ser.loc[ser.iloc[0:0]]
  1557. tm.assert_series_equal(res2, expected)
  1558. def test_loc_getitem_access_none_value_in_multiindex(self):
  1559. # GH#34318: test that you can access a None value using .loc
  1560. # through a Multiindex
  1561. ser = Series([None], MultiIndex.from_arrays([["Level1"], ["Level2"]]))
  1562. result = ser.loc[("Level1", "Level2")]
  1563. assert result is None
  1564. midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]])
  1565. ser = Series([None] * len(midx), dtype=object, index=midx)
  1566. result = ser.loc[("Level1", "Level2_a")]
  1567. assert result is None
  1568. ser = Series([1] * len(midx), dtype=object, index=midx)
  1569. result = ser.loc[("Level1", "Level2_a")]
  1570. assert result == 1
  1571. def test_loc_setitem_multiindex_slice(self):
  1572. # GH 34870
  1573. index = MultiIndex.from_tuples(
  1574. zip(
  1575. ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
  1576. ["one", "two", "one", "two", "one", "two", "one", "two"],
  1577. ),
  1578. names=["first", "second"],
  1579. )
  1580. result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index)
  1581. result.loc[("baz", "one"):("foo", "two")] = 100
  1582. expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index)
  1583. tm.assert_series_equal(result, expected)
  1584. def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self):
  1585. times = date_range("2000-01-01", freq="10min", periods=100000)
  1586. ser = Series(range(100000), times)
  1587. result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
  1588. tm.assert_series_equal(result, ser)
  1589. def test_loc_getitem_datetime_string_with_datetimeindex(self):
  1590. # GH 16710
  1591. df = DataFrame(
  1592. {"a": range(10), "b": range(10)},
  1593. index=date_range("2010-01-01", "2010-01-10"),
  1594. )
  1595. result = df.loc[["2010-01-01", "2010-01-05"], ["a", "b"]]
  1596. expected = DataFrame(
  1597. {"a": [0, 4], "b": [0, 4]},
  1598. index=DatetimeIndex(["2010-01-01", "2010-01-05"]),
  1599. )
  1600. tm.assert_frame_equal(result, expected)
  1601. def test_loc_getitem_sorted_index_level_with_duplicates(self):
  1602. # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes
  1603. mi = MultiIndex.from_tuples(
  1604. [
  1605. ("foo", "bar"),
  1606. ("foo", "bar"),
  1607. ("bah", "bam"),
  1608. ("bah", "bam"),
  1609. ("foo", "bar"),
  1610. ("bah", "bam"),
  1611. ],
  1612. names=["A", "B"],
  1613. )
  1614. df = DataFrame(
  1615. [
  1616. [1.0, 1],
  1617. [2.0, 2],
  1618. [3.0, 3],
  1619. [4.0, 4],
  1620. [5.0, 5],
  1621. [6.0, 6],
  1622. ],
  1623. index=mi,
  1624. columns=["C", "D"],
  1625. )
  1626. df = df.sort_index(level=0)
  1627. expected = DataFrame(
  1628. [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4])
  1629. )
  1630. result = df.loc[("foo", "bar")]
  1631. tm.assert_frame_equal(result, expected)
  1632. def test_additional_element_to_categorical_series_loc(self):
  1633. # GH#47677
  1634. result = Series(["a", "b", "c"], dtype="category")
  1635. result.loc[3] = 0
  1636. expected = Series(["a", "b", "c", 0], dtype="object")
  1637. tm.assert_series_equal(result, expected)
  1638. def test_additional_categorical_element_loc(self):
  1639. # GH#47677
  1640. result = Series(["a", "b", "c"], dtype="category")
  1641. result.loc[3] = "a"
  1642. expected = Series(["a", "b", "c", "a"], dtype="category")
  1643. tm.assert_series_equal(result, expected)
  1644. def test_loc_set_nan_in_categorical_series(self, any_numeric_ea_dtype):
  1645. # GH#47677
  1646. srs = Series(
  1647. [1, 2, 3],
  1648. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1649. )
  1650. # enlarge
  1651. srs.loc[3] = np.nan
  1652. expected = Series(
  1653. [1, 2, 3, np.nan],
  1654. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1655. )
  1656. tm.assert_series_equal(srs, expected)
  1657. # set into
  1658. srs.loc[1] = np.nan
  1659. expected = Series(
  1660. [1, np.nan, 3, np.nan],
  1661. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1662. )
  1663. tm.assert_series_equal(srs, expected)
  1664. @pytest.mark.parametrize("na", (np.nan, pd.NA, None, pd.NaT))
  1665. def test_loc_consistency_series_enlarge_set_into(self, na):
  1666. # GH#47677
  1667. srs_enlarge = Series(["a", "b", "c"], dtype="category")
  1668. srs_enlarge.loc[3] = na
  1669. srs_setinto = Series(["a", "b", "c", "a"], dtype="category")
  1670. srs_setinto.loc[3] = na
  1671. tm.assert_series_equal(srs_enlarge, srs_setinto)
  1672. expected = Series(["a", "b", "c", na], dtype="category")
  1673. tm.assert_series_equal(srs_enlarge, expected)
  1674. def test_loc_getitem_preserves_index_level_category_dtype(self):
  1675. # GH#15166
  1676. df = DataFrame(
  1677. data=np.arange(2, 22, 2),
  1678. index=MultiIndex(
  1679. levels=[CategoricalIndex(["a", "b"]), range(10)],
  1680. codes=[[0] * 5 + [1] * 5, range(10)],
  1681. names=["Index1", "Index2"],
  1682. ),
  1683. )
  1684. expected = CategoricalIndex(
  1685. ["a", "b"],
  1686. categories=["a", "b"],
  1687. ordered=False,
  1688. name="Index1",
  1689. dtype="category",
  1690. )
  1691. result = df.index.levels[0]
  1692. tm.assert_index_equal(result, expected)
  1693. result = df.loc[["a"]].index.levels[0]
  1694. tm.assert_index_equal(result, expected)
  1695. @pytest.mark.parametrize("lt_value", [30, 10])
  1696. def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value):
  1697. # GH#41170
  1698. df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]])
  1699. with pytest.raises(KeyError, match=r"\['b'\] not in index"):
  1700. df.loc[df["a"] < lt_value, :].loc[["b"], :]
  1701. def test_loc_multiindex_null_slice_na_level(self):
  1702. # GH#42055
  1703. lev1 = np.array([np.nan, np.nan])
  1704. lev2 = ["bar", "baz"]
  1705. mi = MultiIndex.from_arrays([lev1, lev2])
  1706. ser = Series([0, 1], index=mi)
  1707. result = ser.loc[:, "bar"]
  1708. # TODO: should we have name="bar"?
  1709. expected = Series([0], index=[np.nan])
  1710. tm.assert_series_equal(result, expected)
  1711. def test_loc_drops_level(self):
  1712. # Based on test_series_varied_multiindex_alignment, where
  1713. # this used to fail to drop the first level
  1714. mi = MultiIndex.from_product(
  1715. [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"]
  1716. )
  1717. ser = Series(range(8), index=mi)
  1718. loc_result = ser.loc["a", :, :]
  1719. expected = ser.index.droplevel(0)[:4]
  1720. tm.assert_index_equal(loc_result.index, expected)
  1721. class TestLocSetitemWithExpansion:
  1722. def test_loc_setitem_with_expansion_large_dataframe(self, monkeypatch):
  1723. # GH#10692
  1724. size_cutoff = 50
  1725. with monkeypatch.context():
  1726. monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
  1727. result = DataFrame({"x": range(size_cutoff)}, dtype="int64")
  1728. result.loc[size_cutoff] = size_cutoff
  1729. expected = DataFrame({"x": range(size_cutoff + 1)}, dtype="int64")
  1730. tm.assert_frame_equal(result, expected)
  1731. def test_loc_setitem_empty_series(self):
  1732. # GH#5226
  1733. # partially set with an empty object series
  1734. ser = Series(dtype=object)
  1735. ser.loc[1] = 1
  1736. tm.assert_series_equal(ser, Series([1], index=[1]))
  1737. ser.loc[3] = 3
  1738. tm.assert_series_equal(ser, Series([1, 3], index=[1, 3]))
  1739. def test_loc_setitem_empty_series_float(self):
  1740. # GH#5226
  1741. # partially set with an empty object series
  1742. ser = Series(dtype=object)
  1743. ser.loc[1] = 1.0
  1744. tm.assert_series_equal(ser, Series([1.0], index=[1]))
  1745. ser.loc[3] = 3.0
  1746. tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3]))
  1747. def test_loc_setitem_empty_series_str_idx(self):
  1748. # GH#5226
  1749. # partially set with an empty object series
  1750. ser = Series(dtype=object)
  1751. ser.loc["foo"] = 1
  1752. tm.assert_series_equal(ser, Series([1], index=Index(["foo"])))
  1753. ser.loc["bar"] = 3
  1754. tm.assert_series_equal(ser, Series([1, 3], index=Index(["foo", "bar"])))
  1755. ser.loc[3] = 4
  1756. tm.assert_series_equal(ser, Series([1, 3, 4], index=Index(["foo", "bar", 3])))
  1757. def test_loc_setitem_incremental_with_dst(self):
  1758. # GH#20724
  1759. base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific"))
  1760. idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
  1761. result = Series([0], index=[idxs[0]])
  1762. for ts in idxs:
  1763. result.loc[ts] = 1
  1764. expected = Series(1, index=idxs)
  1765. tm.assert_series_equal(result, expected)
  1766. @pytest.mark.parametrize(
  1767. "conv",
  1768. [
  1769. lambda x: x,
  1770. lambda x: x.to_datetime64(),
  1771. lambda x: x.to_pydatetime(),
  1772. lambda x: np.datetime64(x),
  1773. ],
  1774. ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"],
  1775. )
  1776. def test_loc_setitem_datetime_keys_cast(self, conv, using_infer_string):
  1777. # GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert
  1778. dt1 = Timestamp("20130101 09:00:00")
  1779. dt2 = Timestamp("20130101 10:00:00")
  1780. df = DataFrame()
  1781. df.loc[conv(dt1), "one"] = 100
  1782. df.loc[conv(dt2), "one"] = 200
  1783. # the dtype constructed by Index([..]) does not yet follow the unit
  1784. # of the input on 2.3.x -> so checking this is datetime64, but then
  1785. # specifying the exact dtype in the expected result
  1786. if using_infer_string:
  1787. assert df.index.dtype.kind == "M"
  1788. exp_dtype = df.index.dtype
  1789. else:
  1790. exp_dtype = "datetime64[ns]"
  1791. expected = DataFrame(
  1792. {"one": [100.0, 200.0]},
  1793. index=Index([dt1, dt2], dtype=exp_dtype),
  1794. columns=Index(["one"]),
  1795. )
  1796. tm.assert_frame_equal(df, expected)
  1797. def test_loc_setitem_categorical_column_retains_dtype(self, ordered):
  1798. # GH16360
  1799. result = DataFrame({"A": [1]})
  1800. result.loc[:, "B"] = Categorical(["b"], ordered=ordered)
  1801. expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)})
  1802. tm.assert_frame_equal(result, expected)
  1803. def test_loc_setitem_with_expansion_and_existing_dst(self):
  1804. # GH#18308
  1805. start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
  1806. end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
  1807. ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
  1808. idx = date_range(start, end, inclusive="left", freq="h")
  1809. assert ts not in idx # i.e. result.loc setitem is with-expansion
  1810. result = DataFrame(index=idx, columns=["value"])
  1811. result.loc[ts, "value"] = 12
  1812. expected = DataFrame(
  1813. [np.nan] * len(idx) + [12],
  1814. index=idx.append(DatetimeIndex([ts])),
  1815. columns=["value"],
  1816. dtype=object,
  1817. )
  1818. tm.assert_frame_equal(result, expected)
  1819. def test_setitem_with_expansion(self):
  1820. # indexing - setting an element
  1821. df = DataFrame(
  1822. data=to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]),
  1823. columns=["time"],
  1824. )
  1825. df["new_col"] = ["new", "old"]
  1826. df.time = df.set_index("time").index.tz_localize("UTC")
  1827. v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific")
  1828. # pre-2.0 trying to set a single element on a part of a different
  1829. # timezone converted to object; in 2.0 it retains dtype
  1830. df2 = df.copy()
  1831. df2.loc[df2.new_col == "new", "time"] = v
  1832. expected = Series([v[0].tz_convert("UTC"), df.loc[1, "time"]], name="time")
  1833. tm.assert_series_equal(df2.time, expected)
  1834. v = df.loc[df.new_col == "new", "time"] + Timedelta("1s")
  1835. df.loc[df.new_col == "new", "time"] = v
  1836. tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v)
  1837. def test_loc_setitem_with_expansion_inf_upcast_empty(self):
  1838. # Test with np.inf in columns
  1839. df = DataFrame()
  1840. df.loc[0, 0] = 1
  1841. df.loc[1, 1] = 2
  1842. df.loc[0, np.inf] = 3
  1843. result = df.columns
  1844. expected = Index([0, 1, np.inf], dtype=np.float64)
  1845. tm.assert_index_equal(result, expected)
  1846. @pytest.mark.filterwarnings("ignore:indexing past lexsort depth")
  1847. @pytest.mark.parametrize("has_ref", [True, False])
  1848. def test_loc_setitem_with_expansion_nonunique_index(self, index, has_ref):
  1849. # GH#40096
  1850. if not len(index):
  1851. pytest.skip("Not relevant for empty Index")
  1852. index = index.repeat(2) # ensure non-unique
  1853. N = len(index)
  1854. arr = np.arange(N).astype(np.int64)
  1855. orig = DataFrame(arr, index=index, columns=[0])
  1856. # key that will requiring object-dtype casting in the index
  1857. key = "kapow"
  1858. assert key not in index # otherwise test is invalid
  1859. # TODO: using a tuple key breaks here in many cases
  1860. exp_index = index.insert(len(index), key)
  1861. if isinstance(index, MultiIndex):
  1862. assert exp_index[-1][0] == key
  1863. else:
  1864. assert exp_index[-1] == key
  1865. exp_data = np.arange(N + 1).astype(np.float64)
  1866. expected = DataFrame(exp_data, index=exp_index, columns=[0])
  1867. # Add new row, but no new columns
  1868. df = orig.copy()
  1869. if has_ref:
  1870. view = df[:]
  1871. df.loc[key, 0] = N
  1872. tm.assert_frame_equal(df, expected)
  1873. # add new row on a Series
  1874. ser = orig.copy()[0]
  1875. if has_ref:
  1876. view = ser[:]
  1877. ser.loc[key] = N
  1878. # the series machinery lets us preserve int dtype instead of float
  1879. expected = expected[0].astype(np.int64)
  1880. tm.assert_series_equal(ser, expected)
  1881. # add new row and new column
  1882. df = orig.copy()
  1883. if has_ref:
  1884. view = df[:] # noqa: F841
  1885. df.loc[key, 1] = N
  1886. expected = DataFrame(
  1887. {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]},
  1888. index=exp_index,
  1889. )
  1890. tm.assert_frame_equal(df, expected)
  1891. @pytest.mark.parametrize(
  1892. "dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"]
  1893. )
  1894. def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
  1895. # GH#42099
  1896. ser = Series([0, 1, 2, 3], dtype=dtype)
  1897. df = DataFrame({"data": ser})
  1898. result = DataFrame(index=df.index)
  1899. result.loc[df.index, "data"] = ser
  1900. tm.assert_frame_equal(result, df, check_column_type=False)
  1901. result = DataFrame(index=df.index)
  1902. result.loc[df.index, "data"] = ser._values
  1903. tm.assert_frame_equal(result, df, check_column_type=False)
  1904. def test_loc_setitem_ea_not_full_column(self):
  1905. # GH#39163
  1906. df = DataFrame({"A": range(5)})
  1907. val = date_range("2016-01-01", periods=3, tz="US/Pacific")
  1908. df.loc[[0, 1, 2], "B"] = val
  1909. bex = val.append(DatetimeIndex([pd.NaT, pd.NaT], dtype=val.dtype))
  1910. expected = DataFrame({"A": range(5), "B": bex})
  1911. assert expected.dtypes["B"] == val.dtype
  1912. tm.assert_frame_equal(df, expected)
  1913. class TestLocCallable:
  1914. def test_frame_loc_getitem_callable(self):
  1915. # GH#11485
  1916. df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
  1917. # iloc cannot use boolean Series (see GH3635)
  1918. # return bool indexer
  1919. res = df.loc[lambda x: x.A > 2]
  1920. tm.assert_frame_equal(res, df.loc[df.A > 2])
  1921. res = df.loc[lambda x: x.B == "b", :]
  1922. tm.assert_frame_equal(res, df.loc[df.B == "b", :])
  1923. res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
  1924. tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
  1925. res = df.loc[lambda x: x.A > 2, lambda x: "B"]
  1926. tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
  1927. res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
  1928. tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
  1929. res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
  1930. tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
  1931. # scalar
  1932. res = df.loc[lambda x: 1, lambda x: "A"]
  1933. assert res == df.loc[1, "A"]
  1934. def test_frame_loc_getitem_callable_mixture(self):
  1935. # GH#11485
  1936. df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
  1937. res = df.loc[lambda x: x.A > 2, ["A", "B"]]
  1938. tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
  1939. res = df.loc[[2, 3], lambda x: ["A", "B"]]
  1940. tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
  1941. res = df.loc[3, lambda x: ["A", "B"]]
  1942. tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
  1943. def test_frame_loc_getitem_callable_labels(self):
  1944. # GH#11485
  1945. df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
  1946. # return label
  1947. res = df.loc[lambda x: ["A", "C"]]
  1948. tm.assert_frame_equal(res, df.loc[["A", "C"]])
  1949. res = df.loc[lambda x: ["A", "C"], :]
  1950. tm.assert_frame_equal(res, df.loc[["A", "C"], :])
  1951. res = df.loc[lambda x: ["A", "C"], lambda x: "X"]
  1952. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1953. res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]]
  1954. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1955. # mixture
  1956. res = df.loc[["A", "C"], lambda x: "X"]
  1957. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1958. res = df.loc[["A", "C"], lambda x: ["X"]]
  1959. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1960. res = df.loc[lambda x: ["A", "C"], "X"]
  1961. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1962. res = df.loc[lambda x: ["A", "C"], ["X"]]
  1963. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1964. def test_frame_loc_setitem_callable(self):
  1965. # GH#11485
  1966. df = DataFrame(
  1967. {"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
  1968. index=list("ABCD"),
  1969. )
  1970. # return label
  1971. res = df.copy()
  1972. res.loc[lambda x: ["A", "C"]] = -20
  1973. exp = df.copy()
  1974. exp.loc[["A", "C"]] = -20
  1975. tm.assert_frame_equal(res, exp)
  1976. res = df.copy()
  1977. res.loc[lambda x: ["A", "C"], :] = 20
  1978. exp = df.copy()
  1979. exp.loc[["A", "C"], :] = 20
  1980. tm.assert_frame_equal(res, exp)
  1981. res = df.copy()
  1982. res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1
  1983. exp = df.copy()
  1984. exp.loc[["A", "C"], "X"] = -1
  1985. tm.assert_frame_equal(res, exp)
  1986. res = df.copy()
  1987. res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10]
  1988. exp = df.copy()
  1989. exp.loc[["A", "C"], ["X"]] = [5, 10]
  1990. tm.assert_frame_equal(res, exp)
  1991. # mixture
  1992. res = df.copy()
  1993. res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2])
  1994. exp = df.copy()
  1995. exp.loc[["A", "C"], "X"] = np.array([-1, -2])
  1996. tm.assert_frame_equal(res, exp)
  1997. res = df.copy()
  1998. res.loc[["A", "C"], lambda x: ["X"]] = 10
  1999. exp = df.copy()
  2000. exp.loc[["A", "C"], ["X"]] = 10
  2001. tm.assert_frame_equal(res, exp)
  2002. res = df.copy()
  2003. res.loc[lambda x: ["A", "C"], "X"] = -2
  2004. exp = df.copy()
  2005. exp.loc[["A", "C"], "X"] = -2
  2006. tm.assert_frame_equal(res, exp)
  2007. res = df.copy()
  2008. res.loc[lambda x: ["A", "C"], ["X"]] = -4
  2009. exp = df.copy()
  2010. exp.loc[["A", "C"], ["X"]] = -4
  2011. tm.assert_frame_equal(res, exp)
  2012. class TestPartialStringSlicing:
  2013. def test_loc_getitem_partial_string_slicing_datetimeindex(self):
  2014. # GH#35509
  2015. df = DataFrame(
  2016. {"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
  2017. index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
  2018. )
  2019. expected = DataFrame(
  2020. {"col1": ["a", "c"], "col2": [1, 3]},
  2021. index=to_datetime(["2020-08-01", "2020-08-05"]),
  2022. )
  2023. result = df.loc["2020-08"]
  2024. tm.assert_frame_equal(result, expected)
  2025. def test_loc_getitem_partial_string_slicing_with_periodindex(self):
  2026. pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
  2027. ser = pi.to_series()
  2028. result = ser.loc[:"2017-12"]
  2029. expected = ser.iloc[:-1]
  2030. tm.assert_series_equal(result, expected)
  2031. def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self):
  2032. ix = timedelta_range(start="1 day", end="2 days", freq="1h")
  2033. ser = ix.to_series()
  2034. result = ser.loc[:"1 days"]
  2035. expected = ser.iloc[:-1]
  2036. tm.assert_series_equal(result, expected)
  2037. def test_loc_getitem_str_timedeltaindex(self):
  2038. # GH#16896
  2039. df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days"))
  2040. expected = df.iloc[0]
  2041. sliced = df.loc["0 days"]
  2042. tm.assert_series_equal(sliced, expected)
  2043. @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"])
  2044. def test_loc_getitem_partial_slice_non_monotonicity(
  2045. self, tz_aware_fixture, indexer_end, frame_or_series
  2046. ):
  2047. # GH#33146
  2048. obj = frame_or_series(
  2049. [1] * 5,
  2050. index=DatetimeIndex(
  2051. [
  2052. Timestamp("2019-12-30"),
  2053. Timestamp("2020-01-01"),
  2054. Timestamp("2019-12-25"),
  2055. Timestamp("2020-01-02 23:59:59.999999999"),
  2056. Timestamp("2019-12-19"),
  2057. ],
  2058. tz=tz_aware_fixture,
  2059. ),
  2060. )
  2061. expected = frame_or_series(
  2062. [1] * 2,
  2063. index=DatetimeIndex(
  2064. [
  2065. Timestamp("2020-01-01"),
  2066. Timestamp("2020-01-02 23:59:59.999999999"),
  2067. ],
  2068. tz=tz_aware_fixture,
  2069. ),
  2070. )
  2071. indexer = slice("2020-01-01", indexer_end)
  2072. result = obj[indexer]
  2073. tm.assert_equal(result, expected)
  2074. result = obj.loc[indexer]
  2075. tm.assert_equal(result, expected)
  2076. class TestLabelSlicing:
  2077. def test_loc_getitem_slicing_datetimes_frame(self):
  2078. # GH#7523
  2079. # unique
  2080. df_unique = DataFrame(
  2081. np.arange(4.0, dtype="float64"),
  2082. index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
  2083. )
  2084. # duplicates
  2085. df_dups = DataFrame(
  2086. np.arange(5.0, dtype="float64"),
  2087. index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
  2088. )
  2089. for df in [df_unique, df_dups]:
  2090. result = df.loc[datetime(2001, 1, 1, 10) :]
  2091. tm.assert_frame_equal(result, df)
  2092. result = df.loc[: datetime(2001, 1, 4, 10)]
  2093. tm.assert_frame_equal(result, df)
  2094. result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
  2095. tm.assert_frame_equal(result, df)
  2096. result = df.loc[datetime(2001, 1, 1, 11) :]
  2097. expected = df.iloc[1:]
  2098. tm.assert_frame_equal(result, expected)
  2099. result = df.loc["20010101 11":]
  2100. tm.assert_frame_equal(result, expected)
  2101. def test_loc_getitem_label_slice_across_dst(self):
  2102. # GH#21846
  2103. idx = date_range(
  2104. "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
  2105. )
  2106. series2 = Series([0, 1, 2, 3, 4], index=idx)
  2107. t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin")
  2108. t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin")
  2109. result = series2.loc[t_1:t_2]
  2110. expected = Series([2, 3], index=idx[2:4])
  2111. tm.assert_series_equal(result, expected)
  2112. result = series2[t_1]
  2113. expected = 2
  2114. assert result == expected
  2115. @pytest.mark.parametrize(
  2116. "index",
  2117. [
  2118. pd.period_range(start="2017-01-01", end="2018-01-01", freq="M"),
  2119. timedelta_range(start="1 day", end="2 days", freq="1h"),
  2120. ],
  2121. )
  2122. def test_loc_getitem_label_slice_period_timedelta(self, index):
  2123. ser = index.to_series()
  2124. result = ser.loc[: index[-2]]
  2125. expected = ser.iloc[:-1]
  2126. tm.assert_series_equal(result, expected)
  2127. def test_loc_getitem_slice_floats_inexact(self):
  2128. index = [52195.504153, 52196.303147, 52198.369883]
  2129. df = DataFrame(np.random.default_rng(2).random((3, 2)), index=index)
  2130. s1 = df.loc[52195.1:52196.5]
  2131. assert len(s1) == 2
  2132. s1 = df.loc[52195.1:52196.6]
  2133. assert len(s1) == 2
  2134. s1 = df.loc[52195.1:52198.9]
  2135. assert len(s1) == 3
  2136. def test_loc_getitem_float_slice_floatindex(self, float_numpy_dtype):
  2137. dtype = float_numpy_dtype
  2138. ser = Series(
  2139. np.random.default_rng(2).random(10), index=np.arange(10, 20, dtype=dtype)
  2140. )
  2141. assert len(ser.loc[12.0:]) == 8
  2142. assert len(ser.loc[12.5:]) == 7
  2143. idx = np.arange(10, 20, dtype=dtype)
  2144. idx[2] = 12.2
  2145. ser.index = idx
  2146. assert len(ser.loc[12.0:]) == 8
  2147. assert len(ser.loc[12.5:]) == 7
  2148. @pytest.mark.parametrize(
  2149. "start,stop, expected_slice",
  2150. [
  2151. [np.timedelta64(0, "ns"), None, slice(0, 11)],
  2152. [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)],
  2153. [None, np.timedelta64(4, "D"), slice(0, 5)],
  2154. ],
  2155. )
  2156. def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice):
  2157. # GH#20393
  2158. ser = Series(range(11), timedelta_range("0 days", "10 days"))
  2159. result = ser.loc[slice(start, stop)]
  2160. expected = ser.iloc[expected_slice]
  2161. tm.assert_series_equal(result, expected)
  2162. @pytest.mark.parametrize("start", ["2018", "2020"])
  2163. def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start):
  2164. obj = frame_or_series(
  2165. [1, 2, 3],
  2166. index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")],
  2167. )
  2168. with pytest.raises(
  2169. KeyError, match="Value based partial slicing on non-monotonic"
  2170. ):
  2171. obj.loc[start:"2022"]
  2172. @pytest.mark.parametrize("value", [1, 1.5])
  2173. def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value):
  2174. # GH: 26491
  2175. obj = frame_or_series(range(4), index=[value, "first", 2, "third"])
  2176. result = obj.loc[value:"third"]
  2177. expected = frame_or_series(range(4), index=[value, "first", 2, "third"])
  2178. tm.assert_equal(result, expected)
  2179. def test_loc_getitem_slice_columns_mixed_dtype(self):
  2180. # GH: 20975
  2181. df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0])
  2182. expected = DataFrame(
  2183. data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object)
  2184. )
  2185. tm.assert_frame_equal(df.loc[:, 1:], expected)
  2186. class TestLocBooleanLabelsAndSlices:
  2187. @pytest.mark.parametrize("bool_value", [True, False])
  2188. def test_loc_bool_incompatible_index_raises(
  2189. self, index, frame_or_series, bool_value
  2190. ):
  2191. # GH20432
  2192. message = f"{bool_value}: boolean label can not be used without a boolean index"
  2193. if index.inferred_type != "boolean":
  2194. obj = frame_or_series(index=index, dtype="object")
  2195. with pytest.raises(KeyError, match=message):
  2196. obj.loc[bool_value]
  2197. @pytest.mark.parametrize("bool_value", [True, False])
  2198. def test_loc_bool_should_not_raise(self, frame_or_series, bool_value):
  2199. obj = frame_or_series(
  2200. index=Index([True, False], dtype="boolean"), dtype="object"
  2201. )
  2202. obj.loc[bool_value]
  2203. def test_loc_bool_slice_raises(self, index, frame_or_series):
  2204. # GH20432
  2205. message = (
  2206. r"slice\(True, False, None\): boolean values can not be used in a slice"
  2207. )
  2208. obj = frame_or_series(index=index, dtype="object")
  2209. with pytest.raises(TypeError, match=message):
  2210. obj.loc[True:False]
  2211. class TestLocBooleanMask:
  2212. def test_loc_setitem_bool_mask_timedeltaindex(self):
  2213. # GH#14946
  2214. df = DataFrame({"x": range(10)})
  2215. df.index = to_timedelta(range(10), unit="s")
  2216. conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3]
  2217. expected_data = [
  2218. [0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
  2219. [0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
  2220. [10, 10, 10, 3, 4, 5, 6, 7, 8, 9],
  2221. ]
  2222. for cond, data in zip(conditions, expected_data):
  2223. result = df.copy()
  2224. result.loc[cond, "x"] = 10
  2225. expected = DataFrame(
  2226. data,
  2227. index=to_timedelta(range(10), unit="s"),
  2228. columns=["x"],
  2229. dtype="int64",
  2230. )
  2231. tm.assert_frame_equal(expected, result)
  2232. @pytest.mark.parametrize("tz", [None, "UTC"])
  2233. def test_loc_setitem_mask_with_datetimeindex_tz(self, tz):
  2234. # GH#16889
  2235. # support .loc with alignment and tz-aware DatetimeIndex
  2236. mask = np.array([True, False, True, False])
  2237. idx = date_range("20010101", periods=4, tz=tz)
  2238. df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
  2239. result = df.copy()
  2240. result.loc[mask, :] = df.loc[mask, :]
  2241. tm.assert_frame_equal(result, df)
  2242. result = df.copy()
  2243. result.loc[mask] = df.loc[mask]
  2244. tm.assert_frame_equal(result, df)
  2245. def test_loc_setitem_mask_and_label_with_datetimeindex(self):
  2246. # GH#9478
  2247. # a datetimeindex alignment issue with partial setting
  2248. df = DataFrame(
  2249. np.arange(6.0).reshape(3, 2),
  2250. columns=list("AB"),
  2251. index=date_range("1/1/2000", periods=3, freq="1h"),
  2252. )
  2253. expected = df.copy()
  2254. expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT]
  2255. mask = df.A < 1
  2256. df.loc[mask, "C"] = df.loc[mask].index
  2257. tm.assert_frame_equal(df, expected)
  2258. def test_loc_setitem_mask_td64_series_value(self):
  2259. # GH#23462 key list of bools, value is a Series
  2260. td1 = Timedelta(0)
  2261. td2 = Timedelta(28767471428571405)
  2262. df = DataFrame({"col": Series([td1, td2])})
  2263. df_copy = df.copy()
  2264. ser = Series([td1])
  2265. expected = df["col"].iloc[1]._value
  2266. df.loc[[True, False]] = ser
  2267. result = df["col"].iloc[1]._value
  2268. assert expected == result
  2269. tm.assert_frame_equal(df, df_copy)
  2270. @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
  2271. def test_loc_setitem_boolean_and_column(self, float_frame):
  2272. expected = float_frame.copy()
  2273. mask = float_frame["A"] > 0
  2274. float_frame.loc[mask, "B"] = 0
  2275. values = expected.values.copy()
  2276. values[mask.values, 1] = 0
  2277. expected = DataFrame(values, index=expected.index, columns=expected.columns)
  2278. tm.assert_frame_equal(float_frame, expected)
  2279. def test_loc_setitem_ndframe_values_alignment(
  2280. self, using_copy_on_write, warn_copy_on_write
  2281. ):
  2282. # GH#45501
  2283. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2284. df.loc[[False, False, True], ["a"]] = DataFrame(
  2285. {"a": [10, 20, 30]}, index=[2, 1, 0]
  2286. )
  2287. expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]})
  2288. tm.assert_frame_equal(df, expected)
  2289. # same thing with Series RHS
  2290. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2291. df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0])
  2292. tm.assert_frame_equal(df, expected)
  2293. # same thing but setting "a" instead of ["a"]
  2294. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2295. df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0])
  2296. tm.assert_frame_equal(df, expected)
  2297. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2298. df_orig = df.copy()
  2299. ser = df["a"]
  2300. with tm.assert_cow_warning(warn_copy_on_write):
  2301. ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0])
  2302. if using_copy_on_write:
  2303. tm.assert_frame_equal(df, df_orig)
  2304. else:
  2305. tm.assert_frame_equal(df, expected)
  2306. def test_loc_indexer_empty_broadcast(self):
  2307. # GH#51450
  2308. df = DataFrame({"a": [], "b": []}, dtype=object)
  2309. expected = df.copy()
  2310. df.loc[np.array([], dtype=np.bool_), ["a"]] = df["a"].copy()
  2311. tm.assert_frame_equal(df, expected)
  2312. def test_loc_indexer_all_false_broadcast(self):
  2313. # GH#51450
  2314. df = DataFrame({"a": ["x"], "b": ["y"]}, dtype=object)
  2315. expected = df.copy()
  2316. df.loc[np.array([False], dtype=np.bool_), ["a"]] = df["b"].copy()
  2317. tm.assert_frame_equal(df, expected)
  2318. def test_loc_indexer_length_one(self):
  2319. # GH#51435
  2320. df = DataFrame({"a": ["x"], "b": ["y"]}, dtype=object)
  2321. expected = DataFrame({"a": ["y"], "b": ["y"]}, dtype=object)
  2322. df.loc[np.array([True], dtype=np.bool_), ["a"]] = df["b"].copy()
  2323. tm.assert_frame_equal(df, expected)
  2324. class TestLocListlike:
  2325. @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])
  2326. def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box):
  2327. # passing a list can include valid categories _or_ NA values
  2328. ci = CategoricalIndex(["A", "B", np.nan])
  2329. ser = Series(range(3), index=ci)
  2330. result = ser.loc[box(ci)]
  2331. tm.assert_series_equal(result, ser)
  2332. result = ser[box(ci)]
  2333. tm.assert_series_equal(result, ser)
  2334. result = ser.to_frame().loc[box(ci)]
  2335. tm.assert_frame_equal(result, ser.to_frame())
  2336. ser2 = ser[:-1]
  2337. ci2 = ci[1:]
  2338. # but if there are no NAs present, this should raise KeyError
  2339. msg = "not in index"
  2340. with pytest.raises(KeyError, match=msg):
  2341. ser2.loc[box(ci2)]
  2342. with pytest.raises(KeyError, match=msg):
  2343. ser2[box(ci2)]
  2344. with pytest.raises(KeyError, match=msg):
  2345. ser2.to_frame().loc[box(ci2)]
  2346. def test_loc_getitem_series_label_list_missing_values(self):
  2347. # gh-11428
  2348. key = np.array(
  2349. ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
  2350. )
  2351. ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
  2352. with pytest.raises(KeyError, match="not in index"):
  2353. ser.loc[key]
  2354. def test_loc_getitem_series_label_list_missing_integer_values(self):
  2355. # GH: 25927
  2356. ser = Series(
  2357. index=np.array([9730701000001104, 10049011000001109]),
  2358. data=np.array([999000011000001104, 999000011000001104]),
  2359. )
  2360. with pytest.raises(KeyError, match="not in index"):
  2361. ser.loc[np.array([9730701000001104, 10047311000001102])]
  2362. @pytest.mark.parametrize("to_period", [True, False])
  2363. def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
  2364. # GH#11497
  2365. idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx")
  2366. if to_period:
  2367. idx = idx.to_period("D")
  2368. ser = Series([0.1, 0.2], index=idx, name="s")
  2369. keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
  2370. if to_period:
  2371. keys = [x.to_period("D") for x in keys]
  2372. result = ser.loc[keys]
  2373. exp = Series([0.1, 0.2], index=idx, name="s")
  2374. if not to_period:
  2375. exp.index = exp.index._with_freq(None)
  2376. tm.assert_series_equal(result, exp, check_index_type=True)
  2377. keys = [
  2378. Timestamp("2011-01-02"),
  2379. Timestamp("2011-01-02"),
  2380. Timestamp("2011-01-01"),
  2381. ]
  2382. if to_period:
  2383. keys = [x.to_period("D") for x in keys]
  2384. exp = Series(
  2385. [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s"
  2386. )
  2387. result = ser.loc[keys]
  2388. tm.assert_series_equal(result, exp, check_index_type=True)
  2389. keys = [
  2390. Timestamp("2011-01-03"),
  2391. Timestamp("2011-01-02"),
  2392. Timestamp("2011-01-03"),
  2393. ]
  2394. if to_period:
  2395. keys = [x.to_period("D") for x in keys]
  2396. with pytest.raises(KeyError, match="not in index"):
  2397. ser.loc[keys]
  2398. def test_loc_named_index(self):
  2399. # GH 42790
  2400. df = DataFrame(
  2401. [[1, 2], [4, 5], [7, 8]],
  2402. index=["cobra", "viper", "sidewinder"],
  2403. columns=["max_speed", "shield"],
  2404. )
  2405. expected = df.iloc[:2]
  2406. expected.index.name = "foo"
  2407. result = df.loc[Index(["cobra", "viper"], name="foo")]
  2408. tm.assert_frame_equal(result, expected)
  2409. @pytest.mark.parametrize(
  2410. "columns, column_key, expected_columns",
  2411. [
  2412. ([2011, 2012, 2013], [2011, 2012], [0, 1]),
  2413. ([2011, 2012, "All"], [2011, 2012], [0, 1]),
  2414. ([2011, 2012, "All"], [2011, "All"], [0, 2]),
  2415. ],
  2416. )
  2417. def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns):
  2418. # gh-14836
  2419. df = DataFrame(
  2420. np.random.default_rng(2).random((3, 3)), columns=columns, index=list("ABC")
  2421. )
  2422. expected = df.iloc[:, expected_columns]
  2423. result = df.loc[["A", "B", "C"], column_key]
  2424. tm.assert_frame_equal(result, expected, check_column_type=True)
  2425. def test_loc_setitem_float_intindex():
  2426. # GH 8720
  2427. rand_data = np.random.default_rng(2).standard_normal((8, 4))
  2428. result = DataFrame(rand_data)
  2429. result.loc[:, 0.5] = np.nan
  2430. expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
  2431. expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
  2432. tm.assert_frame_equal(result, expected)
  2433. result = DataFrame(rand_data)
  2434. result.loc[:, 0.5] = np.nan
  2435. tm.assert_frame_equal(result, expected)
  2436. def test_loc_axis_1_slice():
  2437. # GH 10586
  2438. cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]]
  2439. df = DataFrame(
  2440. np.ones((10, 8)),
  2441. index=tuple("ABCDEFGHIJ"),
  2442. columns=MultiIndex.from_tuples(cols),
  2443. )
  2444. result = df.loc(axis=1)[(2014, 9):(2015, 8)]
  2445. expected = DataFrame(
  2446. np.ones((10, 4)),
  2447. index=tuple("ABCDEFGHIJ"),
  2448. columns=MultiIndex.from_tuples([(2014, 9), (2014, 10), (2015, 7), (2015, 8)]),
  2449. )
  2450. tm.assert_frame_equal(result, expected)
  2451. def test_loc_set_dataframe_multiindex():
  2452. # GH 14592
  2453. expected = DataFrame(
  2454. "a", index=range(2), columns=MultiIndex.from_product([range(2), range(2)])
  2455. )
  2456. result = expected.copy()
  2457. result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]]
  2458. tm.assert_frame_equal(result, expected)
  2459. def test_loc_mixed_int_float():
  2460. # GH#19456
  2461. ser = Series(range(2), Index([1, 2.0], dtype=object))
  2462. result = ser.loc[1]
  2463. assert result == 0
  2464. def test_loc_with_positional_slice_raises():
  2465. # GH#31840
  2466. ser = Series(range(4), index=["A", "B", "C", "D"])
  2467. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2468. ser.loc[:3] = 2
  2469. def test_loc_slice_disallows_positional():
  2470. # GH#16121, GH#24612, GH#31810
  2471. dti = date_range("2016-01-01", periods=3)
  2472. df = DataFrame(np.random.default_rng(2).random((3, 2)), index=dti)
  2473. ser = df[0]
  2474. msg = (
  2475. "cannot do slice indexing on DatetimeIndex with these "
  2476. r"indexers \[1\] of type int"
  2477. )
  2478. for obj in [df, ser]:
  2479. with pytest.raises(TypeError, match=msg):
  2480. obj.loc[1:3]
  2481. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2482. # GH#31840 enforce incorrect behavior
  2483. obj.loc[1:3] = 1
  2484. with pytest.raises(TypeError, match=msg):
  2485. df.loc[1:3, 1]
  2486. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2487. # GH#31840 enforce incorrect behavior
  2488. df.loc[1:3, 1] = 2
  2489. def test_loc_datetimelike_mismatched_dtypes():
  2490. # GH#32650 dont mix and match datetime/timedelta/period dtypes
  2491. df = DataFrame(
  2492. np.random.default_rng(2).standard_normal((5, 3)),
  2493. columns=["a", "b", "c"],
  2494. index=date_range("2012", freq="h", periods=5),
  2495. )
  2496. # create dataframe with non-unique DatetimeIndex
  2497. df = df.iloc[[0, 2, 2, 3]].copy()
  2498. dti = df.index
  2499. tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values
  2500. msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
  2501. with pytest.raises(KeyError, match=msg):
  2502. df.loc[tdi]
  2503. with pytest.raises(KeyError, match=msg):
  2504. df["a"].loc[tdi]
  2505. def test_loc_with_period_index_indexer():
  2506. # GH#4125
  2507. idx = pd.period_range("2002-01", "2003-12", freq="M")
  2508. df = DataFrame(np.random.default_rng(2).standard_normal((24, 10)), index=idx)
  2509. tm.assert_frame_equal(df, df.loc[idx])
  2510. tm.assert_frame_equal(df, df.loc[list(idx)])
  2511. tm.assert_frame_equal(df, df.loc[list(idx)])
  2512. tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
  2513. tm.assert_frame_equal(df, df.loc[list(idx)])
  2514. def test_loc_setitem_multiindex_timestamp():
  2515. # GH#13831
  2516. vals = np.random.default_rng(2).standard_normal((8, 6))
  2517. idx = date_range("1/1/2000", periods=8)
  2518. cols = ["A", "B", "C", "D", "E", "F"]
  2519. exp = DataFrame(vals, index=idx, columns=cols)
  2520. exp.loc[exp.index[1], ("A", "B")] = np.nan
  2521. vals[1][0:2] = np.nan
  2522. res = DataFrame(vals, index=idx, columns=cols)
  2523. tm.assert_frame_equal(res, exp)
  2524. def test_loc_getitem_multiindex_tuple_level():
  2525. # GH#27591
  2526. lev1 = ["a", "b", "c"]
  2527. lev2 = [(0, 1), (1, 0)]
  2528. lev3 = [0, 1]
  2529. cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"])
  2530. df = DataFrame(6, index=range(5), columns=cols)
  2531. # the lev2[0] here should be treated as a single label, not as a sequence
  2532. # of labels
  2533. result = df.loc[:, (lev1[0], lev2[0], lev3[0])]
  2534. # TODO: i think this actually should drop levels
  2535. expected = df.iloc[:, :1]
  2536. tm.assert_frame_equal(result, expected)
  2537. alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1)
  2538. tm.assert_frame_equal(alt, expected)
  2539. # same thing on a Series
  2540. ser = df.iloc[0]
  2541. expected2 = ser.iloc[:1]
  2542. alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0)
  2543. tm.assert_series_equal(alt2, expected2)
  2544. result2 = ser.loc[lev1[0], lev2[0], lev3[0]]
  2545. assert result2 == 6
  2546. def test_loc_getitem_nullable_index_with_duplicates():
  2547. # GH#34497
  2548. df = DataFrame(
  2549. data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T,
  2550. columns=["a", "b", "c"],
  2551. dtype="Int64",
  2552. )
  2553. df2 = df.set_index("c")
  2554. assert df2.index.dtype == "Int64"
  2555. res = df2.loc[1]
  2556. expected = Series([1, 5], index=df2.columns, dtype="Int64", name=1)
  2557. tm.assert_series_equal(res, expected)
  2558. # pd.NA and duplicates in an object-dtype Index
  2559. df2.index = df2.index.astype(object)
  2560. res = df2.loc[1]
  2561. tm.assert_series_equal(res, expected)
  2562. @pytest.mark.parametrize("value", [300, np.uint16(300), np.int16(300)])
  2563. def test_loc_setitem_uint8_upcast(value):
  2564. # GH#26049
  2565. df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
  2566. with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"):
  2567. df.loc[2, "col1"] = value # value that can't be held in uint8
  2568. if np_version_gt2 and isinstance(value, np.int16):
  2569. # Note, result type of uint8 + int16 is int16
  2570. # in numpy < 2, though, numpy would inspect the
  2571. # value and see that it could fit in an uint16, resulting in a uint16
  2572. dtype = "int16"
  2573. else:
  2574. dtype = "uint16"
  2575. expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype=dtype)
  2576. tm.assert_frame_equal(df, expected)
  2577. @pytest.mark.parametrize(
  2578. "fill_val,exp_dtype",
  2579. [
  2580. (Timestamp("2022-01-06"), "datetime64[ns]"),
  2581. (Timestamp("2022-01-07", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
  2582. ],
  2583. )
  2584. def test_loc_setitem_using_datetimelike_str_as_index(fill_val, exp_dtype):
  2585. data = ["2022-01-02", "2022-01-03", "2022-01-04", fill_val.date()]
  2586. index = DatetimeIndex(data, tz=fill_val.tz, dtype=exp_dtype)
  2587. df = DataFrame([10, 11, 12, 14], columns=["a"], index=index)
  2588. # adding new row using an unexisting datetime-like str index
  2589. df.loc["2022-01-08", "a"] = 13
  2590. data.append("2022-01-08")
  2591. expected_index = DatetimeIndex(data, dtype=exp_dtype)
  2592. tm.assert_index_equal(df.index, expected_index, exact=True)
  2593. def test_loc_set_int_dtype():
  2594. # GH#23326
  2595. df = DataFrame([list("abc")])
  2596. df.loc[:, "col1"] = 5
  2597. expected = DataFrame({0: ["a"], 1: ["b"], 2: ["c"], "col1": [5]})
  2598. tm.assert_frame_equal(df, expected)
  2599. @pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
  2600. @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
  2601. def test_loc_periodindex_3_levels():
  2602. # GH#24091
  2603. p_index = PeriodIndex(
  2604. ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
  2605. name="datetime",
  2606. freq="B",
  2607. )
  2608. mi_series = DataFrame(
  2609. [["A", "B", 1.0], ["A", "C", 2.0], ["Z", "Q", 3.0], ["W", "F", 4.0]],
  2610. index=p_index,
  2611. columns=["ONE", "TWO", "VALUES"],
  2612. )
  2613. mi_series = mi_series.set_index(["ONE", "TWO"], append=True)["VALUES"]
  2614. assert mi_series.loc[(p_index[0], "A", "B")] == 1.0
  2615. def test_loc_setitem_pyarrow_strings():
  2616. # GH#52319
  2617. pytest.importorskip("pyarrow")
  2618. df = DataFrame(
  2619. {
  2620. "strings": Series(["A", "B", "C"], dtype="string[pyarrow]"),
  2621. "ids": Series([True, True, False]),
  2622. }
  2623. )
  2624. new_value = Series(["X", "Y"])
  2625. df.loc[df.ids, "strings"] = new_value
  2626. expected_df = DataFrame(
  2627. {
  2628. "strings": Series(["X", "Y", "C"], dtype="string[pyarrow]"),
  2629. "ids": Series([True, True, False]),
  2630. }
  2631. )
  2632. tm.assert_frame_equal(df, expected_df)
  2633. class TestLocSeries:
  2634. @pytest.mark.parametrize("val,expected", [(2**63 - 1, 3), (2**63, 4)])
  2635. def test_loc_uint64(self, val, expected):
  2636. # see GH#19399
  2637. ser = Series({2**63 - 1: 3, 2**63: 4})
  2638. assert ser.loc[val] == expected
  2639. def test_loc_getitem(self, string_series, datetime_series):
  2640. inds = string_series.index[[3, 4, 7]]
  2641. tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds))
  2642. tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2])
  2643. # slice with indices
  2644. d1, d2 = datetime_series.index[[5, 15]]
  2645. result = datetime_series.loc[d1:d2]
  2646. expected = datetime_series.truncate(d1, d2)
  2647. tm.assert_series_equal(result, expected)
  2648. # boolean
  2649. mask = string_series > string_series.median()
  2650. tm.assert_series_equal(string_series.loc[mask], string_series[mask])
  2651. # ask for index value
  2652. assert datetime_series.loc[d1] == datetime_series[d1]
  2653. assert datetime_series.loc[d2] == datetime_series[d2]
  2654. def test_loc_getitem_not_monotonic(self, datetime_series):
  2655. d1, d2 = datetime_series.index[[5, 15]]
  2656. ts2 = datetime_series[::2].iloc[[1, 2, 0]]
  2657. msg = r"Timestamp\('2000-01-10 00:00:00'\)"
  2658. with pytest.raises(KeyError, match=msg):
  2659. ts2.loc[d1:d2]
  2660. with pytest.raises(KeyError, match=msg):
  2661. ts2.loc[d1:d2] = 0
  2662. def test_loc_getitem_setitem_integer_slice_keyerrors(self):
  2663. ser = Series(
  2664. np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
  2665. )
  2666. # this is OK
  2667. cp = ser.copy()
  2668. cp.iloc[4:10] = 0
  2669. assert (cp.iloc[4:10] == 0).all()
  2670. # so is this
  2671. cp = ser.copy()
  2672. cp.iloc[3:11] = 0
  2673. assert (cp.iloc[3:11] == 0).values.all()
  2674. result = ser.iloc[2:6]
  2675. result2 = ser.loc[3:11]
  2676. expected = ser.reindex([4, 6, 8, 10])
  2677. tm.assert_series_equal(result, expected)
  2678. tm.assert_series_equal(result2, expected)
  2679. # non-monotonic, raise KeyError
  2680. s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))]
  2681. with pytest.raises(KeyError, match=r"^3$"):
  2682. s2.loc[3:11]
  2683. with pytest.raises(KeyError, match=r"^3$"):
  2684. s2.loc[3:11] = 0
  2685. def test_loc_getitem_iterator(self, string_series):
  2686. idx = iter(string_series.index[:10])
  2687. result = string_series.loc[idx]
  2688. tm.assert_series_equal(result, string_series[:10])
  2689. def test_loc_setitem_boolean(self, string_series):
  2690. mask = string_series > string_series.median()
  2691. result = string_series.copy()
  2692. result.loc[mask] = 0
  2693. expected = string_series
  2694. expected[mask] = 0
  2695. tm.assert_series_equal(result, expected)
  2696. def test_loc_setitem_corner(self, string_series):
  2697. inds = list(string_series.index[[5, 8, 12]])
  2698. string_series.loc[inds] = 5
  2699. msg = r"\['foo'\] not in index"
  2700. with pytest.raises(KeyError, match=msg):
  2701. string_series.loc[inds + ["foo"]] = 5
  2702. def test_basic_setitem_with_labels(self, datetime_series):
  2703. indices = datetime_series.index[[5, 10, 15]]
  2704. cp = datetime_series.copy()
  2705. exp = datetime_series.copy()
  2706. cp[indices] = 0
  2707. exp.loc[indices] = 0
  2708. tm.assert_series_equal(cp, exp)
  2709. cp = datetime_series.copy()
  2710. exp = datetime_series.copy()
  2711. cp[indices[0] : indices[2]] = 0
  2712. exp.loc[indices[0] : indices[2]] = 0
  2713. tm.assert_series_equal(cp, exp)
  2714. def test_loc_setitem_listlike_of_ints(self):
  2715. # integer indexes, be careful
  2716. ser = Series(
  2717. np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
  2718. )
  2719. inds = [0, 4, 6]
  2720. arr_inds = np.array([0, 4, 6])
  2721. cp = ser.copy()
  2722. exp = ser.copy()
  2723. ser[inds] = 0
  2724. ser.loc[inds] = 0
  2725. tm.assert_series_equal(cp, exp)
  2726. cp = ser.copy()
  2727. exp = ser.copy()
  2728. ser[arr_inds] = 0
  2729. ser.loc[arr_inds] = 0
  2730. tm.assert_series_equal(cp, exp)
  2731. inds_notfound = [0, 4, 5, 6]
  2732. arr_inds_notfound = np.array([0, 4, 5, 6])
  2733. msg = r"\[5\] not in index"
  2734. with pytest.raises(KeyError, match=msg):
  2735. ser[inds_notfound] = 0
  2736. with pytest.raises(Exception, match=msg):
  2737. ser[arr_inds_notfound] = 0
  2738. def test_loc_setitem_dt64tz_values(self):
  2739. # GH#12089
  2740. ser = Series(
  2741. date_range("2011-01-01", periods=3, tz="US/Eastern"),
  2742. index=["a", "b", "c"],
  2743. )
  2744. s2 = ser.copy()
  2745. expected = Timestamp("2011-01-03", tz="US/Eastern")
  2746. s2.loc["a"] = expected
  2747. result = s2.loc["a"]
  2748. assert result == expected
  2749. s2 = ser.copy()
  2750. s2.iloc[0] = expected
  2751. result = s2.iloc[0]
  2752. assert result == expected
  2753. s2 = ser.copy()
  2754. s2["a"] = expected
  2755. result = s2["a"]
  2756. assert result == expected
  2757. @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple])
  2758. @pytest.mark.parametrize("size", [0, 4, 5, 6])
  2759. def test_loc_iloc_setitem_with_listlike(self, size, array_fn):
  2760. # GH37748
  2761. # testing insertion, in a Series of size N (here 5), of a listlike object
  2762. # of size 0, N-1, N, N+1
  2763. arr = array_fn([0] * size)
  2764. expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object)
  2765. ser = Series(0, index=list("abcde"), dtype=object)
  2766. ser.loc["a"] = arr
  2767. tm.assert_series_equal(ser, expected)
  2768. ser = Series(0, index=list("abcde"), dtype=object)
  2769. ser.iloc[0] = arr
  2770. tm.assert_series_equal(ser, expected)
  2771. @pytest.mark.parametrize("indexer", [IndexSlice["A", :], ("A", slice(None))])
  2772. def test_loc_series_getitem_too_many_dimensions(self, indexer):
  2773. # GH#35349
  2774. ser = Series(
  2775. index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]),
  2776. data=[21, 22, 23],
  2777. )
  2778. msg = "Too many indexers"
  2779. with pytest.raises(IndexingError, match=msg):
  2780. ser.loc[indexer, :]
  2781. with pytest.raises(IndexingError, match=msg):
  2782. ser.loc[indexer, :] = 1
  2783. def test_loc_setitem(self, string_series):
  2784. inds = string_series.index[[3, 4, 7]]
  2785. result = string_series.copy()
  2786. result.loc[inds] = 5
  2787. expected = string_series.copy()
  2788. expected.iloc[[3, 4, 7]] = 5
  2789. tm.assert_series_equal(result, expected)
  2790. result.iloc[5:10] = 10
  2791. expected[5:10] = 10
  2792. tm.assert_series_equal(result, expected)
  2793. # set slice with indices
  2794. d1, d2 = string_series.index[[5, 15]]
  2795. result.loc[d1:d2] = 6
  2796. expected[5:16] = 6 # because it's inclusive
  2797. tm.assert_series_equal(result, expected)
  2798. # set index value
  2799. string_series.loc[d1] = 4
  2800. string_series.loc[d2] = 6
  2801. assert string_series[d1] == 4
  2802. assert string_series[d2] == 6
  2803. @pytest.mark.parametrize("dtype", ["object", "string"])
  2804. def test_loc_assign_dict_to_row(self, dtype):
  2805. # GH41044
  2806. df = DataFrame({"A": ["abc", "def"], "B": ["ghi", "jkl"]}, dtype=dtype)
  2807. df.loc[0, :] = {"A": "newA", "B": "newB"}
  2808. expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype)
  2809. tm.assert_frame_equal(df, expected)
  2810. @td.skip_array_manager_invalid_test
  2811. def test_loc_setitem_dict_timedelta_multiple_set(self):
  2812. # GH 16309
  2813. result = DataFrame(columns=["time", "value"])
  2814. result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
  2815. result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
  2816. expected = DataFrame(
  2817. [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1]
  2818. )
  2819. tm.assert_frame_equal(result, expected)
  2820. def test_loc_set_multiple_items_in_multiple_new_columns(self):
  2821. # GH 25594
  2822. df = DataFrame(index=[1, 2], columns=["a"])
  2823. df.loc[1, ["b", "c"]] = [6, 7]
  2824. expected = DataFrame(
  2825. {
  2826. "a": Series([np.nan, np.nan], dtype="object"),
  2827. "b": [6, np.nan],
  2828. "c": [7, np.nan],
  2829. },
  2830. index=[1, 2],
  2831. )
  2832. tm.assert_frame_equal(df, expected)
  2833. def test_getitem_loc_str_periodindex(self):
  2834. # GH#33964
  2835. msg = "Period with BDay freq is deprecated"
  2836. with tm.assert_produces_warning(FutureWarning, match=msg):
  2837. index = pd.period_range(start="2000", periods=20, freq="B")
  2838. series = Series(range(20), index=index)
  2839. assert series.loc["2000-01-14"] == 9
  2840. def test_loc_nonunique_masked_index(self):
  2841. # GH 57027
  2842. ids = list(range(11))
  2843. index = Index(ids * 1000, dtype="Int64")
  2844. df = DataFrame({"val": np.arange(len(index), dtype=np.intp)}, index=index)
  2845. result = df.loc[ids]
  2846. expected = DataFrame(
  2847. {"val": index.argsort(kind="stable").astype(np.intp)},
  2848. index=Index(np.array(ids).repeat(1000), dtype="Int64"),
  2849. )
  2850. tm.assert_frame_equal(result, expected)