generic.py 466 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025
  1. # pyright: reportPropertyTypeMismatch=false
  2. from __future__ import annotations
  3. import collections
  4. from copy import deepcopy
  5. import datetime as dt
  6. from functools import partial
  7. import gc
  8. from json import loads
  9. import operator
  10. import pickle
  11. import re
  12. import sys
  13. from typing import (
  14. TYPE_CHECKING,
  15. Any,
  16. Callable,
  17. ClassVar,
  18. Literal,
  19. NoReturn,
  20. cast,
  21. final,
  22. overload,
  23. )
  24. import warnings
  25. import weakref
  26. import numpy as np
  27. from pandas._config import (
  28. config,
  29. using_copy_on_write,
  30. warn_copy_on_write,
  31. )
  32. from pandas._libs import lib
  33. from pandas._libs.lib import is_range_indexer
  34. from pandas._libs.tslibs import (
  35. Period,
  36. Tick,
  37. Timestamp,
  38. to_offset,
  39. )
  40. from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
  41. from pandas._typing import (
  42. AlignJoin,
  43. AnyArrayLike,
  44. ArrayLike,
  45. Axes,
  46. Axis,
  47. AxisInt,
  48. CompressionOptions,
  49. DtypeArg,
  50. DtypeBackend,
  51. DtypeObj,
  52. FilePath,
  53. FillnaOptions,
  54. FloatFormatType,
  55. FormattersType,
  56. Frequency,
  57. IgnoreRaise,
  58. IndexKeyFunc,
  59. IndexLabel,
  60. InterpolateOptions,
  61. IntervalClosedType,
  62. JSONSerializable,
  63. Level,
  64. Manager,
  65. NaPosition,
  66. NDFrameT,
  67. OpenFileErrors,
  68. RandomState,
  69. ReindexMethod,
  70. Renamer,
  71. Scalar,
  72. Self,
  73. SequenceNotStr,
  74. SortKind,
  75. StorageOptions,
  76. Suffixes,
  77. T,
  78. TimeAmbiguous,
  79. TimedeltaConvertibleTypes,
  80. TimeNonexistent,
  81. TimestampConvertibleTypes,
  82. TimeUnit,
  83. ValueKeyFunc,
  84. WriteBuffer,
  85. WriteExcelBuffer,
  86. npt,
  87. )
  88. from pandas.compat import PYPY
  89. from pandas.compat._constants import (
  90. REF_COUNT,
  91. WARNING_CHECK_DISABLED,
  92. )
  93. from pandas.compat._optional import import_optional_dependency
  94. from pandas.compat.numpy import function as nv
  95. from pandas.errors import (
  96. AbstractMethodError,
  97. ChainedAssignmentError,
  98. InvalidIndexError,
  99. SettingWithCopyError,
  100. SettingWithCopyWarning,
  101. _chained_assignment_method_msg,
  102. _chained_assignment_warning_method_msg,
  103. _check_cacher,
  104. )
  105. from pandas.util._decorators import (
  106. deprecate_nonkeyword_arguments,
  107. doc,
  108. )
  109. from pandas.util._exceptions import find_stack_level
  110. from pandas.util._validators import (
  111. check_dtype_backend,
  112. validate_ascending,
  113. validate_bool_kwarg,
  114. validate_fillna_kwargs,
  115. validate_inclusive,
  116. )
  117. from pandas.core.dtypes.astype import astype_is_view
  118. from pandas.core.dtypes.common import (
  119. ensure_object,
  120. ensure_platform_int,
  121. ensure_str,
  122. is_bool,
  123. is_bool_dtype,
  124. is_dict_like,
  125. is_extension_array_dtype,
  126. is_list_like,
  127. is_number,
  128. is_numeric_dtype,
  129. is_re_compilable,
  130. is_scalar,
  131. pandas_dtype,
  132. )
  133. from pandas.core.dtypes.dtypes import (
  134. DatetimeTZDtype,
  135. ExtensionDtype,
  136. )
  137. from pandas.core.dtypes.generic import (
  138. ABCDataFrame,
  139. ABCSeries,
  140. )
  141. from pandas.core.dtypes.inference import (
  142. is_hashable,
  143. is_nested_list_like,
  144. )
  145. from pandas.core.dtypes.missing import (
  146. isna,
  147. notna,
  148. )
  149. from pandas.core import (
  150. algorithms as algos,
  151. arraylike,
  152. common,
  153. indexing,
  154. missing,
  155. nanops,
  156. sample,
  157. )
  158. from pandas.core.array_algos.replace import should_use_regex
  159. from pandas.core.arrays import ExtensionArray
  160. from pandas.core.base import PandasObject
  161. from pandas.core.construction import extract_array
  162. from pandas.core.flags import Flags
  163. from pandas.core.indexes.api import (
  164. DatetimeIndex,
  165. Index,
  166. MultiIndex,
  167. PeriodIndex,
  168. RangeIndex,
  169. default_index,
  170. ensure_index,
  171. )
  172. from pandas.core.internals import (
  173. ArrayManager,
  174. BlockManager,
  175. SingleArrayManager,
  176. )
  177. from pandas.core.internals.construction import (
  178. mgr_to_mgr,
  179. ndarray_to_mgr,
  180. )
  181. from pandas.core.methods.describe import describe_ndframe
  182. from pandas.core.missing import (
  183. clean_fill_method,
  184. clean_reindex_fill_method,
  185. find_valid_index,
  186. )
  187. from pandas.core.reshape.concat import concat
  188. from pandas.core.shared_docs import _shared_docs
  189. from pandas.core.sorting import get_indexer_indexer
  190. from pandas.core.window import (
  191. Expanding,
  192. ExponentialMovingWindow,
  193. Rolling,
  194. Window,
  195. )
  196. from pandas.io.formats.format import (
  197. DataFrameFormatter,
  198. DataFrameRenderer,
  199. )
  200. from pandas.io.formats.printing import pprint_thing
  201. if TYPE_CHECKING:
  202. from collections.abc import (
  203. Hashable,
  204. Iterator,
  205. Mapping,
  206. Sequence,
  207. )
  208. from pandas._libs.tslibs import BaseOffset
  209. from pandas import (
  210. DataFrame,
  211. ExcelWriter,
  212. HDFStore,
  213. Series,
  214. )
  215. from pandas.core.indexers.objects import BaseIndexer
  216. from pandas.core.resample import Resampler
  217. # goal is to be able to define the docs close to function, while still being
  218. # able to share
  219. _shared_docs = {**_shared_docs}
  220. _shared_doc_kwargs = {
  221. "axes": "keywords for axes",
  222. "klass": "Series/DataFrame",
  223. "axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame", # noqa: E501
  224. "inplace": """
  225. inplace : bool, default False
  226. If True, performs operation inplace and returns None.""",
  227. "optional_by": """
  228. by : str or list of str
  229. Name or list of names to sort by""",
  230. }
  231. bool_t = bool # Need alias because NDFrame has def bool:
  232. class NDFrame(PandasObject, indexing.IndexingMixin):
  233. """
  234. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  235. size-mutable, labeled data structure
  236. Parameters
  237. ----------
  238. data : BlockManager
  239. axes : list
  240. copy : bool, default False
  241. """
  242. _internal_names: list[str] = [
  243. "_mgr",
  244. "_cacher",
  245. "_item_cache",
  246. "_cache",
  247. "_is_copy",
  248. "_name",
  249. "_metadata",
  250. "_flags",
  251. ]
  252. _internal_names_set: set[str] = set(_internal_names)
  253. _accessors: set[str] = set()
  254. _hidden_attrs: frozenset[str] = frozenset([])
  255. _metadata: list[str] = []
  256. _is_copy: weakref.ReferenceType[NDFrame] | str | None = None
  257. _mgr: Manager
  258. _attrs: dict[Hashable, Any]
  259. _typ: str
  260. # ----------------------------------------------------------------------
  261. # Constructors
  262. def __init__(self, data: Manager) -> None:
  263. object.__setattr__(self, "_is_copy", None)
  264. object.__setattr__(self, "_mgr", data)
  265. object.__setattr__(self, "_item_cache", {})
  266. object.__setattr__(self, "_attrs", {})
  267. object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))
  268. @final
  269. @classmethod
  270. def _init_mgr(
  271. cls,
  272. mgr: Manager,
  273. axes: dict[Literal["index", "columns"], Axes | None],
  274. dtype: DtypeObj | None = None,
  275. copy: bool_t = False,
  276. ) -> Manager:
  277. """passed a manager and a axes dict"""
  278. for a, axe in axes.items():
  279. if axe is not None:
  280. axe = ensure_index(axe)
  281. bm_axis = cls._get_block_manager_axis(a)
  282. mgr = mgr.reindex_axis(axe, axis=bm_axis)
  283. # make a copy if explicitly requested
  284. if copy:
  285. mgr = mgr.copy()
  286. if dtype is not None:
  287. # avoid further copies if we can
  288. if (
  289. isinstance(mgr, BlockManager)
  290. and len(mgr.blocks) == 1
  291. and mgr.blocks[0].values.dtype == dtype
  292. ):
  293. pass
  294. else:
  295. mgr = mgr.astype(dtype=dtype)
  296. return mgr
  297. @final
  298. def _as_manager(self, typ: str, copy: bool_t = True) -> Self:
  299. """
  300. Private helper function to create a DataFrame with specific manager.
  301. Parameters
  302. ----------
  303. typ : {"block", "array"}
  304. copy : bool, default True
  305. Only controls whether the conversion from Block->ArrayManager
  306. copies the 1D arrays (to ensure proper/contiguous memory layout).
  307. Returns
  308. -------
  309. DataFrame
  310. New DataFrame using specified manager type. Is not guaranteed
  311. to be a copy or not.
  312. """
  313. new_mgr: Manager
  314. new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy)
  315. # fastpath of passing a manager doesn't check the option/manager class
  316. return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self)
  317. @final
  318. @classmethod
  319. def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self:
  320. """
  321. Construct a new object of this type from a Manager object and axes.
  322. Parameters
  323. ----------
  324. mgr : Manager
  325. Must have the same ndim as cls.
  326. axes : list[Index]
  327. Notes
  328. -----
  329. The axes must match mgr.axes, but are required for future-proofing
  330. in the event that axes are refactored out of the Manager objects.
  331. """
  332. obj = cls.__new__(cls)
  333. NDFrame.__init__(obj, mgr)
  334. return obj
  335. # ----------------------------------------------------------------------
  336. # attrs and flags
  337. @property
  338. def attrs(self) -> dict[Hashable, Any]:
  339. """
  340. Dictionary of global attributes of this dataset.
  341. .. warning::
  342. attrs is experimental and may change without warning.
  343. See Also
  344. --------
  345. DataFrame.flags : Global flags applying to this object.
  346. Notes
  347. -----
  348. Many operations that create new datasets will copy ``attrs``. Copies
  349. are always deep so that changing ``attrs`` will only affect the
  350. present dataset. ``pandas.concat`` copies ``attrs`` only if all input
  351. datasets have the same ``attrs``.
  352. Examples
  353. --------
  354. For Series:
  355. >>> ser = pd.Series([1, 2, 3])
  356. >>> ser.attrs = {"A": [10, 20, 30]}
  357. >>> ser.attrs
  358. {'A': [10, 20, 30]}
  359. For DataFrame:
  360. >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
  361. >>> df.attrs = {"A": [10, 20, 30]}
  362. >>> df.attrs
  363. {'A': [10, 20, 30]}
  364. """
  365. return self._attrs
  366. @attrs.setter
  367. def attrs(self, value: Mapping[Hashable, Any]) -> None:
  368. self._attrs = dict(value)
  369. @final
  370. @property
  371. def flags(self) -> Flags:
  372. """
  373. Get the properties associated with this pandas object.
  374. The available flags are
  375. * :attr:`Flags.allows_duplicate_labels`
  376. See Also
  377. --------
  378. Flags : Flags that apply to pandas objects.
  379. DataFrame.attrs : Global metadata applying to this dataset.
  380. Notes
  381. -----
  382. "Flags" differ from "metadata". Flags reflect properties of the
  383. pandas object (the Series or DataFrame). Metadata refer to properties
  384. of the dataset, and should be stored in :attr:`DataFrame.attrs`.
  385. Examples
  386. --------
  387. >>> df = pd.DataFrame({"A": [1, 2]})
  388. >>> df.flags
  389. <Flags(allows_duplicate_labels=True)>
  390. Flags can be get or set using ``.``
  391. >>> df.flags.allows_duplicate_labels
  392. True
  393. >>> df.flags.allows_duplicate_labels = False
  394. Or by slicing with a key
  395. >>> df.flags["allows_duplicate_labels"]
  396. False
  397. >>> df.flags["allows_duplicate_labels"] = True
  398. """
  399. return self._flags
  400. @final
  401. def set_flags(
  402. self,
  403. *,
  404. copy: bool_t = False,
  405. allows_duplicate_labels: bool_t | None = None,
  406. ) -> Self:
  407. """
  408. Return a new object with updated flags.
  409. Parameters
  410. ----------
  411. copy : bool, default False
  412. Specify if a copy of the object should be made.
  413. .. note::
  414. The `copy` keyword will change behavior in pandas 3.0.
  415. `Copy-on-Write
  416. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  417. will be enabled by default, which means that all methods with a
  418. `copy` keyword will use a lazy copy mechanism to defer the copy and
  419. ignore the `copy` keyword. The `copy` keyword will be removed in a
  420. future version of pandas.
  421. You can already get the future behavior and improvements through
  422. enabling copy on write ``pd.options.mode.copy_on_write = True``
  423. allows_duplicate_labels : bool, optional
  424. Whether the returned object allows duplicate labels.
  425. Returns
  426. -------
  427. Series or DataFrame
  428. The same type as the caller.
  429. See Also
  430. --------
  431. DataFrame.attrs : Global metadata applying to this dataset.
  432. DataFrame.flags : Global flags applying to this object.
  433. Notes
  434. -----
  435. This method returns a new object that's a view on the same data
  436. as the input. Mutating the input or the output values will be reflected
  437. in the other.
  438. This method is intended to be used in method chains.
  439. "Flags" differ from "metadata". Flags reflect properties of the
  440. pandas object (the Series or DataFrame). Metadata refer to properties
  441. of the dataset, and should be stored in :attr:`DataFrame.attrs`.
  442. Examples
  443. --------
  444. >>> df = pd.DataFrame({"A": [1, 2]})
  445. >>> df.flags.allows_duplicate_labels
  446. True
  447. >>> df2 = df.set_flags(allows_duplicate_labels=False)
  448. >>> df2.flags.allows_duplicate_labels
  449. False
  450. """
  451. df = self.copy(deep=copy and not using_copy_on_write())
  452. if allows_duplicate_labels is not None:
  453. df.flags["allows_duplicate_labels"] = allows_duplicate_labels
  454. return df
  455. @final
  456. @classmethod
  457. def _validate_dtype(cls, dtype) -> DtypeObj | None:
  458. """validate the passed dtype"""
  459. if dtype is not None:
  460. dtype = pandas_dtype(dtype)
  461. # a compound dtype
  462. if dtype.kind == "V":
  463. raise NotImplementedError(
  464. "compound dtypes are not implemented "
  465. f"in the {cls.__name__} constructor"
  466. )
  467. return dtype
  468. # ----------------------------------------------------------------------
  469. # Construction
  470. @property
  471. def _constructor(self) -> Callable[..., Self]:
  472. """
  473. Used when a manipulation result has the same dimensions as the
  474. original.
  475. """
  476. raise AbstractMethodError(self)
  477. # ----------------------------------------------------------------------
  478. # Internals
  479. @final
  480. @property
  481. def _data(self):
  482. # GH#33054 retained because some downstream packages uses this,
  483. # e.g. fastparquet
  484. # GH#33333
  485. warnings.warn(
  486. f"{type(self).__name__}._data is deprecated and will be removed in "
  487. "a future version. Use public APIs instead.",
  488. DeprecationWarning,
  489. stacklevel=find_stack_level(),
  490. )
  491. return self._mgr
  492. # ----------------------------------------------------------------------
  493. # Axis
  494. _AXIS_ORDERS: list[Literal["index", "columns"]]
  495. _AXIS_TO_AXIS_NUMBER: dict[Axis, AxisInt] = {0: 0, "index": 0, "rows": 0}
  496. _info_axis_number: int
  497. _info_axis_name: Literal["index", "columns"]
  498. _AXIS_LEN: int
  499. @final
  500. def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs):
  501. """Return an axes dictionary for myself."""
  502. d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
  503. # error: Argument 1 to "update" of "MutableMapping" has incompatible type
  504. # "Dict[str, Any]"; expected "SupportsKeysAndGetItem[Union[int, str], Any]"
  505. d.update(kwargs) # type: ignore[arg-type]
  506. return d
  507. @final
  508. @classmethod
  509. def _get_axis_number(cls, axis: Axis) -> AxisInt:
  510. try:
  511. return cls._AXIS_TO_AXIS_NUMBER[axis]
  512. except KeyError:
  513. raise ValueError(f"No axis named {axis} for object type {cls.__name__}")
  514. @final
  515. @classmethod
  516. def _get_axis_name(cls, axis: Axis) -> Literal["index", "columns"]:
  517. axis_number = cls._get_axis_number(axis)
  518. return cls._AXIS_ORDERS[axis_number]
  519. @final
  520. def _get_axis(self, axis: Axis) -> Index:
  521. axis_number = self._get_axis_number(axis)
  522. assert axis_number in {0, 1}
  523. return self.index if axis_number == 0 else self.columns
  524. @final
  525. @classmethod
  526. def _get_block_manager_axis(cls, axis: Axis) -> AxisInt:
  527. """Map the axis to the block_manager axis."""
  528. axis = cls._get_axis_number(axis)
  529. ndim = cls._AXIS_LEN
  530. if ndim == 2:
  531. # i.e. DataFrame
  532. return 1 - axis
  533. return axis
  534. @final
  535. def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]:
  536. # index or columns
  537. axis_index = getattr(self, axis)
  538. d = {}
  539. prefix = axis[0]
  540. for i, name in enumerate(axis_index.names):
  541. if name is not None:
  542. key = level = name
  543. else:
  544. # prefix with 'i' or 'c' depending on the input axis
  545. # e.g., you must do ilevel_0 for the 0th level of an unnamed
  546. # multiiindex
  547. key = f"{prefix}level_{i}"
  548. level = i
  549. level_values = axis_index.get_level_values(level)
  550. s = level_values.to_series()
  551. s.index = axis_index
  552. d[key] = s
  553. # put the index/columns itself in the dict
  554. if isinstance(axis_index, MultiIndex):
  555. dindex = axis_index
  556. else:
  557. dindex = axis_index.to_series()
  558. d[axis] = dindex
  559. return d
  560. @final
  561. def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]:
  562. from pandas.core.computation.parsing import clean_column_name
  563. d: dict[str, Series | MultiIndex] = {}
  564. for axis_name in self._AXIS_ORDERS:
  565. d.update(self._get_axis_resolvers(axis_name))
  566. return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)}
  567. @final
  568. def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
  569. """
  570. Return the special character free column resolvers of a dataframe.
  571. Column names with special characters are 'cleaned up' so that they can
  572. be referred to by backtick quoting.
  573. Used in :meth:`DataFrame.eval`.
  574. """
  575. from pandas.core.computation.parsing import clean_column_name
  576. from pandas.core.series import Series
  577. if isinstance(self, ABCSeries):
  578. return {clean_column_name(self.name): self}
  579. return {
  580. clean_column_name(k): Series(
  581. v, copy=False, index=self.index, name=k, dtype=self.dtypes[k]
  582. ).__finalize__(self)
  583. for k, v in zip(self.columns, self._iter_column_arrays())
  584. if not isinstance(k, int)
  585. }
  586. @final
  587. @property
  588. def _info_axis(self) -> Index:
  589. return getattr(self, self._info_axis_name)
  590. def _is_view_after_cow_rules(self):
  591. # Only to be used in cases of chained assignment checks, this is a
  592. # simplified check that assumes that either the whole object is a view
  593. # or a copy
  594. if len(self._mgr.blocks) == 0: # type: ignore[union-attr]
  595. return False
  596. return self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr]
  597. @property
  598. def shape(self) -> tuple[int, ...]:
  599. """
  600. Return a tuple of axis dimensions
  601. """
  602. return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)
  603. @property
  604. def axes(self) -> list[Index]:
  605. """
  606. Return index label(s) of the internal NDFrame
  607. """
  608. # we do it this way because if we have reversed axes, then
  609. # the block manager shows then reversed
  610. return [self._get_axis(a) for a in self._AXIS_ORDERS]
  611. @final
  612. @property
  613. def ndim(self) -> int:
  614. """
  615. Return an int representing the number of axes / array dimensions.
  616. Return 1 if Series. Otherwise return 2 if DataFrame.
  617. See Also
  618. --------
  619. ndarray.ndim : Number of array dimensions.
  620. Examples
  621. --------
  622. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  623. >>> s.ndim
  624. 1
  625. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  626. >>> df.ndim
  627. 2
  628. """
  629. return self._mgr.ndim
  630. @final
  631. @property
  632. def size(self) -> int:
  633. """
  634. Return an int representing the number of elements in this object.
  635. Return the number of rows if Series. Otherwise return the number of
  636. rows times number of columns if DataFrame.
  637. See Also
  638. --------
  639. ndarray.size : Number of elements in the array.
  640. Examples
  641. --------
  642. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  643. >>> s.size
  644. 3
  645. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  646. >>> df.size
  647. 4
  648. """
  649. return int(np.prod(self.shape))
  650. def set_axis(
  651. self,
  652. labels,
  653. *,
  654. axis: Axis = 0,
  655. copy: bool_t | None = None,
  656. ) -> Self:
  657. """
  658. Assign desired index to given axis.
  659. Indexes for%(extended_summary_sub)s row labels can be changed by assigning
  660. a list-like or Index.
  661. Parameters
  662. ----------
  663. labels : list-like, Index
  664. The values for the new index.
  665. axis : %(axes_single_arg)s, default 0
  666. The axis to update. The value 0 identifies the rows. For `Series`
  667. this parameter is unused and defaults to 0.
  668. copy : bool, default True
  669. Whether to make a copy of the underlying data.
  670. .. note::
  671. The `copy` keyword will change behavior in pandas 3.0.
  672. `Copy-on-Write
  673. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  674. will be enabled by default, which means that all methods with a
  675. `copy` keyword will use a lazy copy mechanism to defer the copy and
  676. ignore the `copy` keyword. The `copy` keyword will be removed in a
  677. future version of pandas.
  678. You can already get the future behavior and improvements through
  679. enabling copy on write ``pd.options.mode.copy_on_write = True``
  680. Returns
  681. -------
  682. %(klass)s
  683. An object of type %(klass)s.
  684. See Also
  685. --------
  686. %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s.
  687. """
  688. return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy)
  689. @final
  690. def _set_axis_nocheck(
  691. self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None
  692. ):
  693. if inplace:
  694. setattr(self, self._get_axis_name(axis), labels)
  695. else:
  696. # With copy=False, we create a new object but don't copy the
  697. # underlying data.
  698. obj = self.copy(deep=copy and not using_copy_on_write())
  699. setattr(obj, obj._get_axis_name(axis), labels)
  700. return obj
  701. @final
  702. def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:
  703. """
  704. This is called from the cython code when we set the `index` attribute
  705. directly, e.g. `series.index = [1, 2, 3]`.
  706. """
  707. labels = ensure_index(labels)
  708. self._mgr.set_axis(axis, labels)
  709. self._clear_item_cache()
  710. @final
  711. def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self:
  712. """
  713. Interchange axes and swap values axes appropriately.
  714. .. deprecated:: 2.1.0
  715. ``swapaxes`` is deprecated and will be removed.
  716. Please use ``transpose`` instead.
  717. Returns
  718. -------
  719. same as input
  720. Examples
  721. --------
  722. Please see examples for :meth:`DataFrame.transpose`.
  723. """
  724. warnings.warn(
  725. # GH#51946
  726. f"'{type(self).__name__}.swapaxes' is deprecated and "
  727. "will be removed in a future version. "
  728. f"Please use '{type(self).__name__}.transpose' instead.",
  729. FutureWarning,
  730. stacklevel=find_stack_level(),
  731. )
  732. i = self._get_axis_number(axis1)
  733. j = self._get_axis_number(axis2)
  734. if i == j:
  735. return self.copy(deep=copy and not using_copy_on_write())
  736. mapping = {i: j, j: i}
  737. new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)]
  738. new_values = self._values.swapaxes(i, j) # type: ignore[union-attr]
  739. if self._mgr.is_single_block and isinstance(self._mgr, BlockManager):
  740. # This should only get hit in case of having a single block, otherwise a
  741. # copy is made, we don't have to set up references.
  742. new_mgr = ndarray_to_mgr(
  743. new_values,
  744. new_axes[0],
  745. new_axes[1],
  746. dtype=None,
  747. copy=False,
  748. typ="block",
  749. )
  750. assert isinstance(new_mgr, BlockManager)
  751. assert isinstance(self._mgr, BlockManager)
  752. new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
  753. new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0])
  754. if not using_copy_on_write() and copy is not False:
  755. new_mgr = new_mgr.copy(deep=True)
  756. out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  757. return out.__finalize__(self, method="swapaxes")
  758. return self._constructor(
  759. new_values,
  760. *new_axes,
  761. # The no-copy case for CoW is handled above
  762. copy=False,
  763. ).__finalize__(self, method="swapaxes")
  764. @final
  765. @doc(klass=_shared_doc_kwargs["klass"])
  766. def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self:
  767. """
  768. Return {klass} with requested index / column level(s) removed.
  769. Parameters
  770. ----------
  771. level : int, str, or list-like
  772. If a string is given, must be the name of a level
  773. If list-like, elements must be names or positional indexes
  774. of levels.
  775. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  776. Axis along which the level(s) is removed:
  777. * 0 or 'index': remove level(s) in column.
  778. * 1 or 'columns': remove level(s) in row.
  779. For `Series` this parameter is unused and defaults to 0.
  780. Returns
  781. -------
  782. {klass}
  783. {klass} with requested index / column level(s) removed.
  784. Examples
  785. --------
  786. >>> df = pd.DataFrame([
  787. ... [1, 2, 3, 4],
  788. ... [5, 6, 7, 8],
  789. ... [9, 10, 11, 12]
  790. ... ]).set_index([0, 1]).rename_axis(['a', 'b'])
  791. >>> df.columns = pd.MultiIndex.from_tuples([
  792. ... ('c', 'e'), ('d', 'f')
  793. ... ], names=['level_1', 'level_2'])
  794. >>> df
  795. level_1 c d
  796. level_2 e f
  797. a b
  798. 1 2 3 4
  799. 5 6 7 8
  800. 9 10 11 12
  801. >>> df.droplevel('a')
  802. level_1 c d
  803. level_2 e f
  804. b
  805. 2 3 4
  806. 6 7 8
  807. 10 11 12
  808. >>> df.droplevel('level_2', axis=1)
  809. level_1 c d
  810. a b
  811. 1 2 3 4
  812. 5 6 7 8
  813. 9 10 11 12
  814. """
  815. labels = self._get_axis(axis)
  816. new_labels = labels.droplevel(level)
  817. return self.set_axis(new_labels, axis=axis, copy=None)
  818. def pop(self, item: Hashable) -> Series | Any:
  819. result = self[item]
  820. del self[item]
  821. return result
  822. @final
  823. def squeeze(self, axis: Axis | None = None):
  824. """
  825. Squeeze 1 dimensional axis objects into scalars.
  826. Series or DataFrames with a single element are squeezed to a scalar.
  827. DataFrames with a single column or a single row are squeezed to a
  828. Series. Otherwise the object is unchanged.
  829. This method is most useful when you don't know if your
  830. object is a Series or DataFrame, but you do know it has just a single
  831. column. In that case you can safely call `squeeze` to ensure you have a
  832. Series.
  833. Parameters
  834. ----------
  835. axis : {0 or 'index', 1 or 'columns', None}, default None
  836. A specific axis to squeeze. By default, all length-1 axes are
  837. squeezed. For `Series` this parameter is unused and defaults to `None`.
  838. Returns
  839. -------
  840. DataFrame, Series, or scalar
  841. The projection after squeezing `axis` or all the axes.
  842. See Also
  843. --------
  844. Series.iloc : Integer-location based indexing for selecting scalars.
  845. DataFrame.iloc : Integer-location based indexing for selecting Series.
  846. Series.to_frame : Inverse of DataFrame.squeeze for a
  847. single-column DataFrame.
  848. Examples
  849. --------
  850. >>> primes = pd.Series([2, 3, 5, 7])
  851. Slicing might produce a Series with a single value:
  852. >>> even_primes = primes[primes % 2 == 0]
  853. >>> even_primes
  854. 0 2
  855. dtype: int64
  856. >>> even_primes.squeeze()
  857. 2
  858. Squeezing objects with more than one value in every axis does nothing:
  859. >>> odd_primes = primes[primes % 2 == 1]
  860. >>> odd_primes
  861. 1 3
  862. 2 5
  863. 3 7
  864. dtype: int64
  865. >>> odd_primes.squeeze()
  866. 1 3
  867. 2 5
  868. 3 7
  869. dtype: int64
  870. Squeezing is even more effective when used with DataFrames.
  871. >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
  872. >>> df
  873. a b
  874. 0 1 2
  875. 1 3 4
  876. Slicing a single column will produce a DataFrame with the columns
  877. having only one value:
  878. >>> df_a = df[['a']]
  879. >>> df_a
  880. a
  881. 0 1
  882. 1 3
  883. So the columns can be squeezed down, resulting in a Series:
  884. >>> df_a.squeeze('columns')
  885. 0 1
  886. 1 3
  887. Name: a, dtype: int64
  888. Slicing a single row from a single column will produce a single
  889. scalar DataFrame:
  890. >>> df_0a = df.loc[df.index < 1, ['a']]
  891. >>> df_0a
  892. a
  893. 0 1
  894. Squeezing the rows produces a single scalar Series:
  895. >>> df_0a.squeeze('rows')
  896. a 1
  897. Name: 0, dtype: int64
  898. Squeezing all axes will project directly into a scalar:
  899. >>> df_0a.squeeze()
  900. 1
  901. """
  902. axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),)
  903. result = self.iloc[
  904. tuple(
  905. 0 if i in axes and len(a) == 1 else slice(None)
  906. for i, a in enumerate(self.axes)
  907. )
  908. ]
  909. if isinstance(result, NDFrame):
  910. result = result.__finalize__(self, method="squeeze")
  911. return result
  912. # ----------------------------------------------------------------------
  913. # Rename
  914. @final
  915. def _rename(
  916. self,
  917. mapper: Renamer | None = None,
  918. *,
  919. index: Renamer | None = None,
  920. columns: Renamer | None = None,
  921. axis: Axis | None = None,
  922. copy: bool_t | None = None,
  923. inplace: bool_t = False,
  924. level: Level | None = None,
  925. errors: str = "ignore",
  926. ) -> Self | None:
  927. # called by Series.rename and DataFrame.rename
  928. if mapper is None and index is None and columns is None:
  929. raise TypeError("must pass an index to rename")
  930. if index is not None or columns is not None:
  931. if axis is not None:
  932. raise TypeError(
  933. "Cannot specify both 'axis' and any of 'index' or 'columns'"
  934. )
  935. if mapper is not None:
  936. raise TypeError(
  937. "Cannot specify both 'mapper' and any of 'index' or 'columns'"
  938. )
  939. else:
  940. # use the mapper argument
  941. if axis and self._get_axis_number(axis) == 1:
  942. columns = mapper
  943. else:
  944. index = mapper
  945. self._check_inplace_and_allows_duplicate_labels(inplace)
  946. result = self if inplace else self.copy(deep=copy and not using_copy_on_write())
  947. for axis_no, replacements in enumerate((index, columns)):
  948. if replacements is None:
  949. continue
  950. ax = self._get_axis(axis_no)
  951. f = common.get_rename_function(replacements)
  952. if level is not None:
  953. level = ax._get_level_number(level)
  954. # GH 13473
  955. if not callable(replacements):
  956. if ax._is_multi and level is not None:
  957. indexer = ax.get_level_values(level).get_indexer_for(replacements)
  958. else:
  959. indexer = ax.get_indexer_for(replacements)
  960. if errors == "raise" and len(indexer[indexer == -1]):
  961. missing_labels = [
  962. label
  963. for index, label in enumerate(replacements)
  964. if indexer[index] == -1
  965. ]
  966. raise KeyError(f"{missing_labels} not found in axis")
  967. new_index = ax._transform_index(f, level=level)
  968. result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False)
  969. result._clear_item_cache()
  970. if inplace:
  971. self._update_inplace(result)
  972. return None
  973. else:
  974. return result.__finalize__(self, method="rename")
  975. @overload
  976. def rename_axis(
  977. self,
  978. mapper: IndexLabel | lib.NoDefault = ...,
  979. *,
  980. index=...,
  981. columns=...,
  982. axis: Axis = ...,
  983. copy: bool_t | None = ...,
  984. inplace: Literal[False] = ...,
  985. ) -> Self:
  986. ...
  987. @overload
  988. def rename_axis(
  989. self,
  990. mapper: IndexLabel | lib.NoDefault = ...,
  991. *,
  992. index=...,
  993. columns=...,
  994. axis: Axis = ...,
  995. copy: bool_t | None = ...,
  996. inplace: Literal[True],
  997. ) -> None:
  998. ...
  999. @overload
  1000. def rename_axis(
  1001. self,
  1002. mapper: IndexLabel | lib.NoDefault = ...,
  1003. *,
  1004. index=...,
  1005. columns=...,
  1006. axis: Axis = ...,
  1007. copy: bool_t | None = ...,
  1008. inplace: bool_t = ...,
  1009. ) -> Self | None:
  1010. ...
  1011. def rename_axis(
  1012. self,
  1013. mapper: IndexLabel | lib.NoDefault = lib.no_default,
  1014. *,
  1015. index=lib.no_default,
  1016. columns=lib.no_default,
  1017. axis: Axis = 0,
  1018. copy: bool_t | None = None,
  1019. inplace: bool_t = False,
  1020. ) -> Self | None:
  1021. """
  1022. Set the name of the axis for the index or columns.
  1023. Parameters
  1024. ----------
  1025. mapper : scalar, list-like, optional
  1026. Value to set the axis name attribute.
  1027. index, columns : scalar, list-like, dict-like or function, optional
  1028. A scalar, list-like, dict-like or functions transformations to
  1029. apply to that axis' values.
  1030. Note that the ``columns`` parameter is not allowed if the
  1031. object is a Series. This parameter only apply for DataFrame
  1032. type objects.
  1033. Use either ``mapper`` and ``axis`` to
  1034. specify the axis to target with ``mapper``, or ``index``
  1035. and/or ``columns``.
  1036. axis : {0 or 'index', 1 or 'columns'}, default 0
  1037. The axis to rename. For `Series` this parameter is unused and defaults to 0.
  1038. copy : bool, default None
  1039. Also copy underlying data.
  1040. .. note::
  1041. The `copy` keyword will change behavior in pandas 3.0.
  1042. `Copy-on-Write
  1043. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  1044. will be enabled by default, which means that all methods with a
  1045. `copy` keyword will use a lazy copy mechanism to defer the copy and
  1046. ignore the `copy` keyword. The `copy` keyword will be removed in a
  1047. future version of pandas.
  1048. You can already get the future behavior and improvements through
  1049. enabling copy on write ``pd.options.mode.copy_on_write = True``
  1050. inplace : bool, default False
  1051. Modifies the object directly, instead of creating a new Series
  1052. or DataFrame.
  1053. Returns
  1054. -------
  1055. Series, DataFrame, or None
  1056. The same type as the caller or None if ``inplace=True``.
  1057. See Also
  1058. --------
  1059. Series.rename : Alter Series index labels or name.
  1060. DataFrame.rename : Alter DataFrame index labels or name.
  1061. Index.rename : Set new names on index.
  1062. Notes
  1063. -----
  1064. ``DataFrame.rename_axis`` supports two calling conventions
  1065. * ``(index=index_mapper, columns=columns_mapper, ...)``
  1066. * ``(mapper, axis={'index', 'columns'}, ...)``
  1067. The first calling convention will only modify the names of
  1068. the index and/or the names of the Index object that is the columns.
  1069. In this case, the parameter ``copy`` is ignored.
  1070. The second calling convention will modify the names of the
  1071. corresponding index if mapper is a list or a scalar.
  1072. However, if mapper is dict-like or a function, it will use the
  1073. deprecated behavior of modifying the axis *labels*.
  1074. We *highly* recommend using keyword arguments to clarify your
  1075. intent.
  1076. Examples
  1077. --------
  1078. **Series**
  1079. >>> s = pd.Series(["dog", "cat", "monkey"])
  1080. >>> s
  1081. 0 dog
  1082. 1 cat
  1083. 2 monkey
  1084. dtype: object
  1085. >>> s.rename_axis("animal")
  1086. animal
  1087. 0 dog
  1088. 1 cat
  1089. 2 monkey
  1090. dtype: object
  1091. **DataFrame**
  1092. >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
  1093. ... "num_arms": [0, 0, 2]},
  1094. ... ["dog", "cat", "monkey"])
  1095. >>> df
  1096. num_legs num_arms
  1097. dog 4 0
  1098. cat 4 0
  1099. monkey 2 2
  1100. >>> df = df.rename_axis("animal")
  1101. >>> df
  1102. num_legs num_arms
  1103. animal
  1104. dog 4 0
  1105. cat 4 0
  1106. monkey 2 2
  1107. >>> df = df.rename_axis("limbs", axis="columns")
  1108. >>> df
  1109. limbs num_legs num_arms
  1110. animal
  1111. dog 4 0
  1112. cat 4 0
  1113. monkey 2 2
  1114. **MultiIndex**
  1115. >>> df.index = pd.MultiIndex.from_product([['mammal'],
  1116. ... ['dog', 'cat', 'monkey']],
  1117. ... names=['type', 'name'])
  1118. >>> df
  1119. limbs num_legs num_arms
  1120. type name
  1121. mammal dog 4 0
  1122. cat 4 0
  1123. monkey 2 2
  1124. >>> df.rename_axis(index={'type': 'class'})
  1125. limbs num_legs num_arms
  1126. class name
  1127. mammal dog 4 0
  1128. cat 4 0
  1129. monkey 2 2
  1130. >>> df.rename_axis(columns=str.upper)
  1131. LIMBS num_legs num_arms
  1132. type name
  1133. mammal dog 4 0
  1134. cat 4 0
  1135. monkey 2 2
  1136. """
  1137. axes = {"index": index, "columns": columns}
  1138. if axis is not None:
  1139. axis = self._get_axis_number(axis)
  1140. inplace = validate_bool_kwarg(inplace, "inplace")
  1141. if copy and using_copy_on_write():
  1142. copy = False
  1143. if mapper is not lib.no_default:
  1144. # Use v0.23 behavior if a scalar or list
  1145. non_mapper = is_scalar(mapper) or (
  1146. is_list_like(mapper) and not is_dict_like(mapper)
  1147. )
  1148. if non_mapper:
  1149. return self._set_axis_name(
  1150. mapper, axis=axis, inplace=inplace, copy=copy
  1151. )
  1152. else:
  1153. raise ValueError("Use `.rename` to alter labels with a mapper.")
  1154. else:
  1155. # Use new behavior. Means that index and/or columns
  1156. # is specified
  1157. result = self if inplace else self.copy(deep=copy)
  1158. for axis in range(self._AXIS_LEN):
  1159. v = axes.get(self._get_axis_name(axis))
  1160. if v is lib.no_default:
  1161. continue
  1162. non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
  1163. if non_mapper:
  1164. newnames = v
  1165. else:
  1166. f = common.get_rename_function(v)
  1167. curnames = self._get_axis(axis).names
  1168. newnames = [f(name) for name in curnames]
  1169. result._set_axis_name(newnames, axis=axis, inplace=True, copy=copy)
  1170. if not inplace:
  1171. return result
  1172. return None
  1173. @final
  1174. def _set_axis_name(
  1175. self, name, axis: Axis = 0, inplace: bool_t = False, copy: bool_t | None = True
  1176. ):
  1177. """
  1178. Set the name(s) of the axis.
  1179. Parameters
  1180. ----------
  1181. name : str or list of str
  1182. Name(s) to set.
  1183. axis : {0 or 'index', 1 or 'columns'}, default 0
  1184. The axis to set the label. The value 0 or 'index' specifies index,
  1185. and the value 1 or 'columns' specifies columns.
  1186. inplace : bool, default False
  1187. If `True`, do operation inplace and return None.
  1188. copy:
  1189. Whether to make a copy of the result.
  1190. Returns
  1191. -------
  1192. Series, DataFrame, or None
  1193. The same type as the caller or `None` if `inplace` is `True`.
  1194. See Also
  1195. --------
  1196. DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
  1197. Series.rename : Alter the index labels or set the index name
  1198. of :class:`Series`.
  1199. Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.
  1200. Examples
  1201. --------
  1202. >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
  1203. ... ["dog", "cat", "monkey"])
  1204. >>> df
  1205. num_legs
  1206. dog 4
  1207. cat 4
  1208. monkey 2
  1209. >>> df._set_axis_name("animal")
  1210. num_legs
  1211. animal
  1212. dog 4
  1213. cat 4
  1214. monkey 2
  1215. >>> df.index = pd.MultiIndex.from_product(
  1216. ... [["mammal"], ['dog', 'cat', 'monkey']])
  1217. >>> df._set_axis_name(["type", "name"])
  1218. num_legs
  1219. type name
  1220. mammal dog 4
  1221. cat 4
  1222. monkey 2
  1223. """
  1224. axis = self._get_axis_number(axis)
  1225. idx = self._get_axis(axis).set_names(name)
  1226. inplace = validate_bool_kwarg(inplace, "inplace")
  1227. renamed = self if inplace else self.copy(deep=copy)
  1228. if axis == 0:
  1229. renamed.index = idx
  1230. else:
  1231. renamed.columns = idx
  1232. if not inplace:
  1233. return renamed
  1234. # ----------------------------------------------------------------------
  1235. # Comparison Methods
  1236. @final
  1237. def _indexed_same(self, other) -> bool_t:
  1238. return all(
  1239. self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS
  1240. )
  1241. @final
  1242. def equals(self, other: object) -> bool_t:
  1243. """
  1244. Test whether two objects contain the same elements.
  1245. This function allows two Series or DataFrames to be compared against
  1246. each other to see if they have the same shape and elements. NaNs in
  1247. the same location are considered equal.
  1248. The row/column index do not need to have the same type, as long
  1249. as the values are considered equal. Corresponding columns and
  1250. index must be of the same dtype.
  1251. Parameters
  1252. ----------
  1253. other : Series or DataFrame
  1254. The other Series or DataFrame to be compared with the first.
  1255. Returns
  1256. -------
  1257. bool
  1258. True if all elements are the same in both objects, False
  1259. otherwise.
  1260. See Also
  1261. --------
  1262. Series.eq : Compare two Series objects of the same length
  1263. and return a Series where each element is True if the element
  1264. in each Series is equal, False otherwise.
  1265. DataFrame.eq : Compare two DataFrame objects of the same shape and
  1266. return a DataFrame where each element is True if the respective
  1267. element in each DataFrame is equal, False otherwise.
  1268. testing.assert_series_equal : Raises an AssertionError if left and
  1269. right are not equal. Provides an easy interface to ignore
  1270. inequality in dtypes, indexes and precision among others.
  1271. testing.assert_frame_equal : Like assert_series_equal, but targets
  1272. DataFrames.
  1273. numpy.array_equal : Return True if two arrays have the same shape
  1274. and elements, False otherwise.
  1275. Examples
  1276. --------
  1277. >>> df = pd.DataFrame({1: [10], 2: [20]})
  1278. >>> df
  1279. 1 2
  1280. 0 10 20
  1281. DataFrames df and exactly_equal have the same types and values for
  1282. their elements and column labels, which will return True.
  1283. >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
  1284. >>> exactly_equal
  1285. 1 2
  1286. 0 10 20
  1287. >>> df.equals(exactly_equal)
  1288. True
  1289. DataFrames df and different_column_type have the same element
  1290. types and values, but have different types for the column labels,
  1291. which will still return True.
  1292. >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
  1293. >>> different_column_type
  1294. 1.0 2.0
  1295. 0 10 20
  1296. >>> df.equals(different_column_type)
  1297. True
  1298. DataFrames df and different_data_type have different types for the
  1299. same values for their elements, and will return False even though
  1300. their column labels are the same values and types.
  1301. >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
  1302. >>> different_data_type
  1303. 1 2
  1304. 0 10.0 20.0
  1305. >>> df.equals(different_data_type)
  1306. False
  1307. """
  1308. if not (isinstance(other, type(self)) or isinstance(self, type(other))):
  1309. return False
  1310. other = cast(NDFrame, other)
  1311. return self._mgr.equals(other._mgr)
  1312. # -------------------------------------------------------------------------
  1313. # Unary Methods
  1314. @final
  1315. def __neg__(self) -> Self:
  1316. def blk_func(values: ArrayLike):
  1317. if is_bool_dtype(values.dtype):
  1318. # error: Argument 1 to "inv" has incompatible type "Union
  1319. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1320. # "_SupportsInversion[ndarray[Any, dtype[bool_]]]"
  1321. return operator.inv(values) # type: ignore[arg-type]
  1322. else:
  1323. # error: Argument 1 to "neg" has incompatible type "Union
  1324. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1325. # "_SupportsNeg[ndarray[Any, dtype[Any]]]"
  1326. return operator.neg(values) # type: ignore[arg-type]
  1327. new_data = self._mgr.apply(blk_func)
  1328. res = self._constructor_from_mgr(new_data, axes=new_data.axes)
  1329. return res.__finalize__(self, method="__neg__")
  1330. @final
  1331. def __pos__(self) -> Self:
  1332. def blk_func(values: ArrayLike):
  1333. if is_bool_dtype(values.dtype):
  1334. return values.copy()
  1335. else:
  1336. # error: Argument 1 to "pos" has incompatible type "Union
  1337. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1338. # "_SupportsPos[ndarray[Any, dtype[Any]]]"
  1339. return operator.pos(values) # type: ignore[arg-type]
  1340. new_data = self._mgr.apply(blk_func)
  1341. res = self._constructor_from_mgr(new_data, axes=new_data.axes)
  1342. return res.__finalize__(self, method="__pos__")
  1343. @final
  1344. def __invert__(self) -> Self:
  1345. if not self.size:
  1346. # inv fails with 0 len
  1347. return self.copy(deep=False)
  1348. new_data = self._mgr.apply(operator.invert)
  1349. res = self._constructor_from_mgr(new_data, axes=new_data.axes)
  1350. return res.__finalize__(self, method="__invert__")
  1351. @final
  1352. def __nonzero__(self) -> NoReturn:
  1353. raise ValueError(
  1354. f"The truth value of a {type(self).__name__} is ambiguous. "
  1355. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1356. )
  1357. __bool__ = __nonzero__
  1358. @final
  1359. def bool(self) -> bool_t:
  1360. """
  1361. Return the bool of a single element Series or DataFrame.
  1362. .. deprecated:: 2.1.0
  1363. bool is deprecated and will be removed in future version of pandas.
  1364. For ``Series`` use ``pandas.Series.item``.
  1365. This must be a boolean scalar value, either True or False. It will raise a
  1366. ValueError if the Series or DataFrame does not have exactly 1 element, or that
  1367. element is not boolean (integer values 0 and 1 will also raise an exception).
  1368. Returns
  1369. -------
  1370. bool
  1371. The value in the Series or DataFrame.
  1372. See Also
  1373. --------
  1374. Series.astype : Change the data type of a Series, including to boolean.
  1375. DataFrame.astype : Change the data type of a DataFrame, including to boolean.
  1376. numpy.bool_ : NumPy boolean data type, used by pandas for boolean values.
  1377. Examples
  1378. --------
  1379. The method will only work for single element objects with a boolean value:
  1380. >>> pd.Series([True]).bool() # doctest: +SKIP
  1381. True
  1382. >>> pd.Series([False]).bool() # doctest: +SKIP
  1383. False
  1384. >>> pd.DataFrame({'col': [True]}).bool() # doctest: +SKIP
  1385. True
  1386. >>> pd.DataFrame({'col': [False]}).bool() # doctest: +SKIP
  1387. False
  1388. This is an alternative method and will only work
  1389. for single element objects with a boolean value:
  1390. >>> pd.Series([True]).item() # doctest: +SKIP
  1391. True
  1392. >>> pd.Series([False]).item() # doctest: +SKIP
  1393. False
  1394. """
  1395. warnings.warn(
  1396. f"{type(self).__name__}.bool is now deprecated and will be removed "
  1397. "in future version of pandas",
  1398. FutureWarning,
  1399. stacklevel=find_stack_level(),
  1400. )
  1401. v = self.squeeze()
  1402. if isinstance(v, (bool, np.bool_)):
  1403. return bool(v)
  1404. elif is_scalar(v):
  1405. raise ValueError(
  1406. "bool cannot act on a non-boolean single element "
  1407. f"{type(self).__name__}"
  1408. )
  1409. self.__nonzero__()
  1410. # for mypy (__nonzero__ raises)
  1411. return True
  1412. @final
  1413. def abs(self) -> Self:
  1414. """
  1415. Return a Series/DataFrame with absolute numeric value of each element.
  1416. This function only applies to elements that are all numeric.
  1417. Returns
  1418. -------
  1419. abs
  1420. Series/DataFrame containing the absolute value of each element.
  1421. See Also
  1422. --------
  1423. numpy.absolute : Calculate the absolute value element-wise.
  1424. Notes
  1425. -----
  1426. For ``complex`` inputs, ``1.2 + 1j``, the absolute value is
  1427. :math:`\\sqrt{ a^2 + b^2 }`.
  1428. Examples
  1429. --------
  1430. Absolute numeric values in a Series.
  1431. >>> s = pd.Series([-1.10, 2, -3.33, 4])
  1432. >>> s.abs()
  1433. 0 1.10
  1434. 1 2.00
  1435. 2 3.33
  1436. 3 4.00
  1437. dtype: float64
  1438. Absolute numeric values in a Series with complex numbers.
  1439. >>> s = pd.Series([1.2 + 1j])
  1440. >>> s.abs()
  1441. 0 1.56205
  1442. dtype: float64
  1443. Absolute numeric values in a Series with a Timedelta element.
  1444. >>> s = pd.Series([pd.Timedelta('1 days')])
  1445. >>> s.abs()
  1446. 0 1 days
  1447. dtype: timedelta64[ns]
  1448. Select rows with data closest to certain value using argsort (from
  1449. `StackOverflow <https://stackoverflow.com/a/17758115>`__).
  1450. >>> df = pd.DataFrame({
  1451. ... 'a': [4, 5, 6, 7],
  1452. ... 'b': [10, 20, 30, 40],
  1453. ... 'c': [100, 50, -30, -50]
  1454. ... })
  1455. >>> df
  1456. a b c
  1457. 0 4 10 100
  1458. 1 5 20 50
  1459. 2 6 30 -30
  1460. 3 7 40 -50
  1461. >>> df.loc[(df.c - 43).abs().argsort()]
  1462. a b c
  1463. 1 5 20 50
  1464. 0 4 10 100
  1465. 2 6 30 -30
  1466. 3 7 40 -50
  1467. """
  1468. res_mgr = self._mgr.apply(np.abs)
  1469. return self._constructor_from_mgr(res_mgr, axes=res_mgr.axes).__finalize__(
  1470. self, name="abs"
  1471. )
  1472. @final
  1473. def __abs__(self) -> Self:
  1474. return self.abs()
  1475. @final
  1476. def __round__(self, decimals: int = 0) -> Self:
  1477. return self.round(decimals).__finalize__(self, method="__round__")
  1478. # -------------------------------------------------------------------------
  1479. # Label or Level Combination Helpers
  1480. #
  1481. # A collection of helper methods for DataFrame/Series operations that
  1482. # accept a combination of column/index labels and levels. All such
  1483. # operations should utilize/extend these methods when possible so that we
  1484. # have consistent precedence and validation logic throughout the library.
  1485. @final
  1486. def _is_level_reference(self, key: Level, axis: Axis = 0) -> bool_t:
  1487. """
  1488. Test whether a key is a level reference for a given axis.
  1489. To be considered a level reference, `key` must be a string that:
  1490. - (axis=0): Matches the name of an index level and does NOT match
  1491. a column label.
  1492. - (axis=1): Matches the name of a column level and does NOT match
  1493. an index label.
  1494. Parameters
  1495. ----------
  1496. key : Hashable
  1497. Potential level name for the given axis
  1498. axis : int, default 0
  1499. Axis that levels are associated with (0 for index, 1 for columns)
  1500. Returns
  1501. -------
  1502. is_level : bool
  1503. """
  1504. axis_int = self._get_axis_number(axis)
  1505. return (
  1506. key is not None
  1507. and is_hashable(key)
  1508. and key in self.axes[axis_int].names
  1509. and not self._is_label_reference(key, axis=axis_int)
  1510. )
  1511. @final
  1512. def _is_label_reference(self, key: Level, axis: Axis = 0) -> bool_t:
  1513. """
  1514. Test whether a key is a label reference for a given axis.
  1515. To be considered a label reference, `key` must be a string that:
  1516. - (axis=0): Matches a column label
  1517. - (axis=1): Matches an index label
  1518. Parameters
  1519. ----------
  1520. key : Hashable
  1521. Potential label name, i.e. Index entry.
  1522. axis : int, default 0
  1523. Axis perpendicular to the axis that labels are associated with
  1524. (0 means search for column labels, 1 means search for index labels)
  1525. Returns
  1526. -------
  1527. is_label: bool
  1528. """
  1529. axis_int = self._get_axis_number(axis)
  1530. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int)
  1531. return (
  1532. key is not None
  1533. and is_hashable(key)
  1534. and any(key in self.axes[ax] for ax in other_axes)
  1535. )
  1536. @final
  1537. def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t:
  1538. """
  1539. Test whether a key is a label or level reference for a given axis.
  1540. To be considered either a label or a level reference, `key` must be a
  1541. string that:
  1542. - (axis=0): Matches a column label or an index level
  1543. - (axis=1): Matches an index label or a column level
  1544. Parameters
  1545. ----------
  1546. key : Hashable
  1547. Potential label or level name
  1548. axis : int, default 0
  1549. Axis that levels are associated with (0 for index, 1 for columns)
  1550. Returns
  1551. -------
  1552. bool
  1553. """
  1554. return self._is_level_reference(key, axis=axis) or self._is_label_reference(
  1555. key, axis=axis
  1556. )
  1557. @final
  1558. def _check_label_or_level_ambiguity(self, key: Level, axis: Axis = 0) -> None:
  1559. """
  1560. Check whether `key` is ambiguous.
  1561. By ambiguous, we mean that it matches both a level of the input
  1562. `axis` and a label of the other axis.
  1563. Parameters
  1564. ----------
  1565. key : Hashable
  1566. Label or level name.
  1567. axis : int, default 0
  1568. Axis that levels are associated with (0 for index, 1 for columns).
  1569. Raises
  1570. ------
  1571. ValueError: `key` is ambiguous
  1572. """
  1573. axis_int = self._get_axis_number(axis)
  1574. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int)
  1575. if (
  1576. key is not None
  1577. and is_hashable(key)
  1578. and key in self.axes[axis_int].names
  1579. and any(key in self.axes[ax] for ax in other_axes)
  1580. ):
  1581. # Build an informative and grammatical warning
  1582. level_article, level_type = (
  1583. ("an", "index") if axis_int == 0 else ("a", "column")
  1584. )
  1585. label_article, label_type = (
  1586. ("a", "column") if axis_int == 0 else ("an", "index")
  1587. )
  1588. msg = (
  1589. f"'{key}' is both {level_article} {level_type} level and "
  1590. f"{label_article} {label_type} label, which is ambiguous."
  1591. )
  1592. raise ValueError(msg)
  1593. @final
  1594. def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike:
  1595. """
  1596. Return a 1-D array of values associated with `key`, a label or level
  1597. from the given `axis`.
  1598. Retrieval logic:
  1599. - (axis=0): Return column values if `key` matches a column label.
  1600. Otherwise return index level values if `key` matches an index
  1601. level.
  1602. - (axis=1): Return row values if `key` matches an index label.
  1603. Otherwise return column level values if 'key' matches a column
  1604. level
  1605. Parameters
  1606. ----------
  1607. key : Hashable
  1608. Label or level name.
  1609. axis : int, default 0
  1610. Axis that levels are associated with (0 for index, 1 for columns)
  1611. Returns
  1612. -------
  1613. np.ndarray or ExtensionArray
  1614. Raises
  1615. ------
  1616. KeyError
  1617. if `key` matches neither a label nor a level
  1618. ValueError
  1619. if `key` matches multiple labels
  1620. """
  1621. axis = self._get_axis_number(axis)
  1622. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1623. if self._is_label_reference(key, axis=axis):
  1624. self._check_label_or_level_ambiguity(key, axis=axis)
  1625. values = self.xs(key, axis=other_axes[0])._values
  1626. elif self._is_level_reference(key, axis=axis):
  1627. values = self.axes[axis].get_level_values(key)._values
  1628. else:
  1629. raise KeyError(key)
  1630. # Check for duplicates
  1631. if values.ndim > 1:
  1632. if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex):
  1633. multi_message = (
  1634. "\n"
  1635. "For a multi-index, the label must be a "
  1636. "tuple with elements corresponding to each level."
  1637. )
  1638. else:
  1639. multi_message = ""
  1640. label_axis_name = "column" if axis == 0 else "index"
  1641. raise ValueError(
  1642. f"The {label_axis_name} label '{key}' is not unique.{multi_message}"
  1643. )
  1644. return values
  1645. @final
  1646. def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
  1647. """
  1648. Drop labels and/or levels for the given `axis`.
  1649. For each key in `keys`:
  1650. - (axis=0): If key matches a column label then drop the column.
  1651. Otherwise if key matches an index level then drop the level.
  1652. - (axis=1): If key matches an index label then drop the row.
  1653. Otherwise if key matches a column level then drop the level.
  1654. Parameters
  1655. ----------
  1656. keys : str or list of str
  1657. labels or levels to drop
  1658. axis : int, default 0
  1659. Axis that levels are associated with (0 for index, 1 for columns)
  1660. Returns
  1661. -------
  1662. dropped: DataFrame
  1663. Raises
  1664. ------
  1665. ValueError
  1666. if any `keys` match neither a label nor a level
  1667. """
  1668. axis = self._get_axis_number(axis)
  1669. # Validate keys
  1670. keys = common.maybe_make_list(keys)
  1671. invalid_keys = [
  1672. k for k in keys if not self._is_label_or_level_reference(k, axis=axis)
  1673. ]
  1674. if invalid_keys:
  1675. raise ValueError(
  1676. "The following keys are not valid labels or "
  1677. f"levels for axis {axis}: {invalid_keys}"
  1678. )
  1679. # Compute levels and labels to drop
  1680. levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)]
  1681. labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)]
  1682. # Perform copy upfront and then use inplace operations below.
  1683. # This ensures that we always perform exactly one copy.
  1684. # ``copy`` and/or ``inplace`` options could be added in the future.
  1685. dropped = self.copy(deep=False)
  1686. if axis == 0:
  1687. # Handle dropping index levels
  1688. if levels_to_drop:
  1689. dropped.reset_index(levels_to_drop, drop=True, inplace=True)
  1690. # Handle dropping columns labels
  1691. if labels_to_drop:
  1692. dropped.drop(labels_to_drop, axis=1, inplace=True)
  1693. else:
  1694. # Handle dropping column levels
  1695. if levels_to_drop:
  1696. if isinstance(dropped.columns, MultiIndex):
  1697. # Drop the specified levels from the MultiIndex
  1698. dropped.columns = dropped.columns.droplevel(levels_to_drop)
  1699. else:
  1700. # Drop the last level of Index by replacing with
  1701. # a RangeIndex
  1702. dropped.columns = RangeIndex(dropped.columns.size)
  1703. # Handle dropping index labels
  1704. if labels_to_drop:
  1705. dropped.drop(labels_to_drop, axis=0, inplace=True)
  1706. return dropped
  1707. # ----------------------------------------------------------------------
  1708. # Iteration
  1709. # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
  1710. # Incompatible types in assignment (expression has type "None", base class
  1711. # "object" defined the type as "Callable[[object], int]")
  1712. __hash__: ClassVar[None] # type: ignore[assignment]
  1713. def __iter__(self) -> Iterator:
  1714. """
  1715. Iterate over info axis.
  1716. Returns
  1717. -------
  1718. iterator
  1719. Info axis as iterator.
  1720. Examples
  1721. --------
  1722. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
  1723. >>> for x in df:
  1724. ... print(x)
  1725. A
  1726. B
  1727. """
  1728. return iter(self._info_axis)
  1729. # can we get a better explanation of this?
  1730. def keys(self) -> Index:
  1731. """
  1732. Get the 'info axis' (see Indexing for more).
  1733. This is index for Series, columns for DataFrame.
  1734. Returns
  1735. -------
  1736. Index
  1737. Info axis.
  1738. Examples
  1739. --------
  1740. >>> d = pd.DataFrame(data={'A': [1, 2, 3], 'B': [0, 4, 8]},
  1741. ... index=['a', 'b', 'c'])
  1742. >>> d
  1743. A B
  1744. a 1 0
  1745. b 2 4
  1746. c 3 8
  1747. >>> d.keys()
  1748. Index(['A', 'B'], dtype='object')
  1749. """
  1750. return self._info_axis
  1751. def items(self):
  1752. """
  1753. Iterate over (label, values) on info axis
  1754. This is index for Series and columns for DataFrame.
  1755. Returns
  1756. -------
  1757. Generator
  1758. """
  1759. for h in self._info_axis:
  1760. yield h, self[h]
  1761. def __len__(self) -> int:
  1762. """Returns length of info axis"""
  1763. return len(self._info_axis)
  1764. @final
  1765. def __contains__(self, key) -> bool_t:
  1766. """True if the key is in the info axis"""
  1767. return key in self._info_axis
  1768. @property
  1769. def empty(self) -> bool_t:
  1770. """
  1771. Indicator whether Series/DataFrame is empty.
  1772. True if Series/DataFrame is entirely empty (no items), meaning any of the
  1773. axes are of length 0.
  1774. Returns
  1775. -------
  1776. bool
  1777. If Series/DataFrame is empty, return True, if not return False.
  1778. See Also
  1779. --------
  1780. Series.dropna : Return series without null values.
  1781. DataFrame.dropna : Return DataFrame with labels on given axis omitted
  1782. where (all or any) data are missing.
  1783. Notes
  1784. -----
  1785. If Series/DataFrame contains only NaNs, it is still not considered empty. See
  1786. the example below.
  1787. Examples
  1788. --------
  1789. An example of an actual empty DataFrame. Notice the index is empty:
  1790. >>> df_empty = pd.DataFrame({'A' : []})
  1791. >>> df_empty
  1792. Empty DataFrame
  1793. Columns: [A]
  1794. Index: []
  1795. >>> df_empty.empty
  1796. True
  1797. If we only have NaNs in our DataFrame, it is not considered empty! We
  1798. will need to drop the NaNs to make the DataFrame empty:
  1799. >>> df = pd.DataFrame({'A' : [np.nan]})
  1800. >>> df
  1801. A
  1802. 0 NaN
  1803. >>> df.empty
  1804. False
  1805. >>> df.dropna().empty
  1806. True
  1807. >>> ser_empty = pd.Series({'A' : []})
  1808. >>> ser_empty
  1809. A []
  1810. dtype: object
  1811. >>> ser_empty.empty
  1812. False
  1813. >>> ser_empty = pd.Series()
  1814. >>> ser_empty.empty
  1815. True
  1816. """
  1817. return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)
  1818. # ----------------------------------------------------------------------
  1819. # Array Interface
  1820. # This is also set in IndexOpsMixin
  1821. # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
  1822. __array_priority__: int = 1000
  1823. def __array__(
  1824. self, dtype: npt.DTypeLike | None = None, copy: bool_t | None = None
  1825. ) -> np.ndarray:
  1826. if copy is False and not self._mgr.is_single_block and not self.empty:
  1827. # check this manually, otherwise ._values will already return a copy
  1828. # and np.array(values, copy=False) will not raise a warning
  1829. warnings.warn(
  1830. "Starting with NumPy 2.0, the behavior of the 'copy' keyword has "
  1831. "changed and passing 'copy=False' raises an error when returning "
  1832. "a zero-copy NumPy array is not possible. pandas will follow "
  1833. "this behavior starting with pandas 3.0.\nThis conversion to "
  1834. "NumPy requires a copy, but 'copy=False' was passed. Consider "
  1835. "using 'np.asarray(..)' instead.",
  1836. FutureWarning,
  1837. stacklevel=find_stack_level(),
  1838. )
  1839. values = self._values
  1840. if copy is None:
  1841. # Note: branch avoids `copy=None` for NumPy 1.x support
  1842. arr = np.asarray(values, dtype=dtype)
  1843. else:
  1844. arr = np.array(values, dtype=dtype, copy=copy)
  1845. if (
  1846. copy is not True
  1847. and astype_is_view(values.dtype, arr.dtype)
  1848. and using_copy_on_write()
  1849. and self._mgr.is_single_block
  1850. ):
  1851. # Check if both conversions can be done without a copy
  1852. if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
  1853. values.dtype, arr.dtype
  1854. ):
  1855. arr = arr.view()
  1856. arr.flags.writeable = False
  1857. return arr
  1858. @final
  1859. def __array_ufunc__(
  1860. self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any
  1861. ):
  1862. return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs)
  1863. # ----------------------------------------------------------------------
  1864. # Picklability
  1865. @final
  1866. def __getstate__(self) -> dict[str, Any]:
  1867. meta = {k: getattr(self, k, None) for k in self._metadata}
  1868. return {
  1869. "_mgr": self._mgr,
  1870. "_typ": self._typ,
  1871. "_metadata": self._metadata,
  1872. "attrs": self.attrs,
  1873. "_flags": {k: self.flags[k] for k in self.flags._keys},
  1874. **meta,
  1875. }
  1876. @final
  1877. def __setstate__(self, state) -> None:
  1878. if isinstance(state, BlockManager):
  1879. self._mgr = state
  1880. elif isinstance(state, dict):
  1881. if "_data" in state and "_mgr" not in state:
  1882. # compat for older pickles
  1883. state["_mgr"] = state.pop("_data")
  1884. typ = state.get("_typ")
  1885. if typ is not None:
  1886. attrs = state.get("_attrs", {})
  1887. if attrs is None: # should not happen, but better be on the safe side
  1888. attrs = {}
  1889. object.__setattr__(self, "_attrs", attrs)
  1890. flags = state.get("_flags", {"allows_duplicate_labels": True})
  1891. object.__setattr__(self, "_flags", Flags(self, **flags))
  1892. # set in the order of internal names
  1893. # to avoid definitional recursion
  1894. # e.g. say fill_value needing _mgr to be
  1895. # defined
  1896. meta = set(self._internal_names + self._metadata)
  1897. for k in list(meta):
  1898. if k in state and k != "_flags":
  1899. v = state[k]
  1900. object.__setattr__(self, k, v)
  1901. for k, v in state.items():
  1902. if k not in meta:
  1903. object.__setattr__(self, k, v)
  1904. else:
  1905. raise NotImplementedError("Pre-0.12 pickles are no longer supported")
  1906. elif len(state) == 2:
  1907. raise NotImplementedError("Pre-0.12 pickles are no longer supported")
  1908. self._item_cache: dict[Hashable, Series] = {}
  1909. # ----------------------------------------------------------------------
  1910. # Rendering Methods
  1911. def __repr__(self) -> str:
  1912. # string representation based upon iterating over self
  1913. # (since, by definition, `PandasContainers` are iterable)
  1914. prepr = f"[{','.join(map(pprint_thing, self))}]"
  1915. return f"{type(self).__name__}({prepr})"
  1916. @final
  1917. def _repr_latex_(self):
  1918. """
  1919. Returns a LaTeX representation for a particular object.
  1920. Mainly for use with nbconvert (jupyter notebook conversion to pdf).
  1921. """
  1922. if config.get_option("styler.render.repr") == "latex":
  1923. return self.to_latex()
  1924. else:
  1925. return None
  1926. @final
  1927. def _repr_data_resource_(self):
  1928. """
  1929. Not a real Jupyter special repr method, but we use the same
  1930. naming convention.
  1931. """
  1932. if config.get_option("display.html.table_schema"):
  1933. data = self.head(config.get_option("display.max_rows"))
  1934. as_json = data.to_json(orient="table")
  1935. as_json = cast(str, as_json)
  1936. return loads(as_json, object_pairs_hook=collections.OrderedDict)
  1937. # ----------------------------------------------------------------------
  1938. # I/O Methods
  1939. @final
  1940. @deprecate_nonkeyword_arguments(
  1941. version="3.0", allowed_args=["self", "excel_writer"], name="to_excel"
  1942. )
  1943. @doc(
  1944. klass="object",
  1945. storage_options=_shared_docs["storage_options"],
  1946. storage_options_versionadded="1.2.0",
  1947. )
  1948. def to_excel(
  1949. self,
  1950. excel_writer: FilePath | WriteExcelBuffer | ExcelWriter,
  1951. sheet_name: str = "Sheet1",
  1952. na_rep: str = "",
  1953. float_format: str | None = None,
  1954. columns: Sequence[Hashable] | None = None,
  1955. header: Sequence[Hashable] | bool_t = True,
  1956. index: bool_t = True,
  1957. index_label: IndexLabel | None = None,
  1958. startrow: int = 0,
  1959. startcol: int = 0,
  1960. engine: Literal["openpyxl", "xlsxwriter"] | None = None,
  1961. merge_cells: bool_t = True,
  1962. inf_rep: str = "inf",
  1963. freeze_panes: tuple[int, int] | None = None,
  1964. storage_options: StorageOptions | None = None,
  1965. engine_kwargs: dict[str, Any] | None = None,
  1966. ) -> None:
  1967. """
  1968. Write {klass} to an Excel sheet.
  1969. To write a single {klass} to an Excel .xlsx file it is only necessary to
  1970. specify a target file name. To write to multiple sheets it is necessary to
  1971. create an `ExcelWriter` object with a target file name, and specify a sheet
  1972. in the file to write to.
  1973. Multiple sheets may be written to by specifying unique `sheet_name`.
  1974. With all data written to the file it is necessary to save the changes.
  1975. Note that creating an `ExcelWriter` object with a file name that already
  1976. exists will result in the contents of the existing file being erased.
  1977. Parameters
  1978. ----------
  1979. excel_writer : path-like, file-like, or ExcelWriter object
  1980. File path or existing ExcelWriter.
  1981. sheet_name : str, default 'Sheet1'
  1982. Name of sheet which will contain DataFrame.
  1983. na_rep : str, default ''
  1984. Missing data representation.
  1985. float_format : str, optional
  1986. Format string for floating point numbers. For example
  1987. ``float_format="%.2f"`` will format 0.1234 to 0.12.
  1988. columns : sequence or list of str, optional
  1989. Columns to write.
  1990. header : bool or list of str, default True
  1991. Write out the column names. If a list of string is given it is
  1992. assumed to be aliases for the column names.
  1993. index : bool, default True
  1994. Write row names (index).
  1995. index_label : str or sequence, optional
  1996. Column label for index column(s) if desired. If not specified, and
  1997. `header` and `index` are True, then the index names are used. A
  1998. sequence should be given if the DataFrame uses MultiIndex.
  1999. startrow : int, default 0
  2000. Upper left cell row to dump data frame.
  2001. startcol : int, default 0
  2002. Upper left cell column to dump data frame.
  2003. engine : str, optional
  2004. Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
  2005. via the options ``io.excel.xlsx.writer`` or
  2006. ``io.excel.xlsm.writer``.
  2007. merge_cells : bool, default True
  2008. Write MultiIndex and Hierarchical Rows as merged cells.
  2009. inf_rep : str, default 'inf'
  2010. Representation for infinity (there is no native representation for
  2011. infinity in Excel).
  2012. freeze_panes : tuple of int (length 2), optional
  2013. Specifies the one-based bottommost row and rightmost column that
  2014. is to be frozen.
  2015. {storage_options}
  2016. .. versionadded:: {storage_options_versionadded}
  2017. engine_kwargs : dict, optional
  2018. Arbitrary keyword arguments passed to excel engine.
  2019. See Also
  2020. --------
  2021. to_csv : Write DataFrame to a comma-separated values (csv) file.
  2022. ExcelWriter : Class for writing DataFrame objects into excel sheets.
  2023. read_excel : Read an Excel file into a pandas DataFrame.
  2024. read_csv : Read a comma-separated values (csv) file into DataFrame.
  2025. io.formats.style.Styler.to_excel : Add styles to Excel sheet.
  2026. Notes
  2027. -----
  2028. For compatibility with :meth:`~DataFrame.to_csv`,
  2029. to_excel serializes lists and dicts to strings before writing.
  2030. Once a workbook has been saved it is not possible to write further
  2031. data without rewriting the whole workbook.
  2032. Examples
  2033. --------
  2034. Create, write to and save a workbook:
  2035. >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
  2036. ... index=['row 1', 'row 2'],
  2037. ... columns=['col 1', 'col 2'])
  2038. >>> df1.to_excel("output.xlsx") # doctest: +SKIP
  2039. To specify the sheet name:
  2040. >>> df1.to_excel("output.xlsx",
  2041. ... sheet_name='Sheet_name_1') # doctest: +SKIP
  2042. If you wish to write to more than one sheet in the workbook, it is
  2043. necessary to specify an ExcelWriter object:
  2044. >>> df2 = df1.copy()
  2045. >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP
  2046. ... df1.to_excel(writer, sheet_name='Sheet_name_1')
  2047. ... df2.to_excel(writer, sheet_name='Sheet_name_2')
  2048. ExcelWriter can also be used to append to an existing Excel file:
  2049. >>> with pd.ExcelWriter('output.xlsx',
  2050. ... mode='a') as writer: # doctest: +SKIP
  2051. ... df1.to_excel(writer, sheet_name='Sheet_name_3')
  2052. To set the library that is used to write the Excel file,
  2053. you can pass the `engine` keyword (the default engine is
  2054. automatically chosen depending on the file extension):
  2055. >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP
  2056. """
  2057. if engine_kwargs is None:
  2058. engine_kwargs = {}
  2059. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  2060. from pandas.io.formats.excel import ExcelFormatter
  2061. formatter = ExcelFormatter(
  2062. df,
  2063. na_rep=na_rep,
  2064. cols=columns,
  2065. header=header,
  2066. float_format=float_format,
  2067. index=index,
  2068. index_label=index_label,
  2069. merge_cells=merge_cells,
  2070. inf_rep=inf_rep,
  2071. )
  2072. formatter.write(
  2073. excel_writer,
  2074. sheet_name=sheet_name,
  2075. startrow=startrow,
  2076. startcol=startcol,
  2077. freeze_panes=freeze_panes,
  2078. engine=engine,
  2079. storage_options=storage_options,
  2080. engine_kwargs=engine_kwargs,
  2081. )
  2082. @final
  2083. @deprecate_nonkeyword_arguments(
  2084. version="3.0", allowed_args=["self", "path_or_buf"], name="to_json"
  2085. )
  2086. @doc(
  2087. storage_options=_shared_docs["storage_options"],
  2088. compression_options=_shared_docs["compression_options"] % "path_or_buf",
  2089. )
  2090. def to_json(
  2091. self,
  2092. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
  2093. orient: Literal["split", "records", "index", "table", "columns", "values"]
  2094. | None = None,
  2095. date_format: str | None = None,
  2096. double_precision: int = 10,
  2097. force_ascii: bool_t = True,
  2098. date_unit: TimeUnit = "ms",
  2099. default_handler: Callable[[Any], JSONSerializable] | None = None,
  2100. lines: bool_t = False,
  2101. compression: CompressionOptions = "infer",
  2102. index: bool_t | None = None,
  2103. indent: int | None = None,
  2104. storage_options: StorageOptions | None = None,
  2105. mode: Literal["a", "w"] = "w",
  2106. ) -> str | None:
  2107. """
  2108. Convert the object to a JSON string.
  2109. Note NaN's and None will be converted to null and datetime objects
  2110. will be converted to UNIX timestamps.
  2111. Parameters
  2112. ----------
  2113. path_or_buf : str, path object, file-like object, or None, default None
  2114. String, path object (implementing os.PathLike[str]), or file-like
  2115. object implementing a write() function. If None, the result is
  2116. returned as a string.
  2117. orient : str
  2118. Indication of expected JSON string format.
  2119. * Series:
  2120. - default is 'index'
  2121. - allowed values are: {{'split', 'records', 'index', 'table'}}.
  2122. * DataFrame:
  2123. - default is 'columns'
  2124. - allowed values are: {{'split', 'records', 'index', 'columns',
  2125. 'values', 'table'}}.
  2126. * The format of the JSON string:
  2127. - 'split' : dict like {{'index' -> [index], 'columns' -> [columns],
  2128. 'data' -> [values]}}
  2129. - 'records' : list like [{{column -> value}}, ... , {{column -> value}}]
  2130. - 'index' : dict like {{index -> {{column -> value}}}}
  2131. - 'columns' : dict like {{column -> {{index -> value}}}}
  2132. - 'values' : just the values array
  2133. - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}}
  2134. Describing the data, where data component is like ``orient='records'``.
  2135. date_format : {{None, 'epoch', 'iso'}}
  2136. Type of date conversion. 'epoch' = epoch milliseconds,
  2137. 'iso' = ISO8601. The default depends on the `orient`. For
  2138. ``orient='table'``, the default is 'iso'. For all other orients,
  2139. the default is 'epoch'.
  2140. double_precision : int, default 10
  2141. The number of decimal places to use when encoding
  2142. floating point values. The possible maximal value is 15.
  2143. Passing double_precision greater than 15 will raise a ValueError.
  2144. force_ascii : bool, default True
  2145. Force encoded string to be ASCII.
  2146. date_unit : str, default 'ms' (milliseconds)
  2147. The time unit to encode to, governs timestamp and ISO8601
  2148. precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
  2149. microsecond, and nanosecond respectively.
  2150. default_handler : callable, default None
  2151. Handler to call if object cannot otherwise be converted to a
  2152. suitable format for JSON. Should receive a single argument which is
  2153. the object to convert and return a serialisable object.
  2154. lines : bool, default False
  2155. If 'orient' is 'records' write out line-delimited json format. Will
  2156. throw ValueError if incorrect 'orient' since others are not
  2157. list-like.
  2158. {compression_options}
  2159. .. versionchanged:: 1.4.0 Zstandard support.
  2160. index : bool or None, default None
  2161. The index is only used when 'orient' is 'split', 'index', 'column',
  2162. or 'table'. Of these, 'index' and 'column' do not support
  2163. `index=False`.
  2164. indent : int, optional
  2165. Length of whitespace used to indent each record.
  2166. {storage_options}
  2167. mode : str, default 'w' (writing)
  2168. Specify the IO mode for output when supplying a path_or_buf.
  2169. Accepted args are 'w' (writing) and 'a' (append) only.
  2170. mode='a' is only supported when lines is True and orient is 'records'.
  2171. Returns
  2172. -------
  2173. None or str
  2174. If path_or_buf is None, returns the resulting json format as a
  2175. string. Otherwise returns None.
  2176. See Also
  2177. --------
  2178. read_json : Convert a JSON string to pandas object.
  2179. Notes
  2180. -----
  2181. The behavior of ``indent=0`` varies from the stdlib, which does not
  2182. indent the output but does insert newlines. Currently, ``indent=0``
  2183. and the default ``indent=None`` are equivalent in pandas, though this
  2184. may change in a future release.
  2185. ``orient='table'`` contains a 'pandas_version' field under 'schema'.
  2186. This stores the version of `pandas` used in the latest revision of the
  2187. schema.
  2188. Examples
  2189. --------
  2190. >>> from json import loads, dumps
  2191. >>> df = pd.DataFrame(
  2192. ... [["a", "b"], ["c", "d"]],
  2193. ... index=["row 1", "row 2"],
  2194. ... columns=["col 1", "col 2"],
  2195. ... )
  2196. >>> result = df.to_json(orient="split")
  2197. >>> parsed = loads(result)
  2198. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2199. {{
  2200. "columns": [
  2201. "col 1",
  2202. "col 2"
  2203. ],
  2204. "index": [
  2205. "row 1",
  2206. "row 2"
  2207. ],
  2208. "data": [
  2209. [
  2210. "a",
  2211. "b"
  2212. ],
  2213. [
  2214. "c",
  2215. "d"
  2216. ]
  2217. ]
  2218. }}
  2219. Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
  2220. Note that index labels are not preserved with this encoding.
  2221. >>> result = df.to_json(orient="records")
  2222. >>> parsed = loads(result)
  2223. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2224. [
  2225. {{
  2226. "col 1": "a",
  2227. "col 2": "b"
  2228. }},
  2229. {{
  2230. "col 1": "c",
  2231. "col 2": "d"
  2232. }}
  2233. ]
  2234. Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
  2235. >>> result = df.to_json(orient="index")
  2236. >>> parsed = loads(result)
  2237. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2238. {{
  2239. "row 1": {{
  2240. "col 1": "a",
  2241. "col 2": "b"
  2242. }},
  2243. "row 2": {{
  2244. "col 1": "c",
  2245. "col 2": "d"
  2246. }}
  2247. }}
  2248. Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:
  2249. >>> result = df.to_json(orient="columns")
  2250. >>> parsed = loads(result)
  2251. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2252. {{
  2253. "col 1": {{
  2254. "row 1": "a",
  2255. "row 2": "c"
  2256. }},
  2257. "col 2": {{
  2258. "row 1": "b",
  2259. "row 2": "d"
  2260. }}
  2261. }}
  2262. Encoding/decoding a Dataframe using ``'values'`` formatted JSON:
  2263. >>> result = df.to_json(orient="values")
  2264. >>> parsed = loads(result)
  2265. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2266. [
  2267. [
  2268. "a",
  2269. "b"
  2270. ],
  2271. [
  2272. "c",
  2273. "d"
  2274. ]
  2275. ]
  2276. Encoding with Table Schema:
  2277. >>> result = df.to_json(orient="table")
  2278. >>> parsed = loads(result)
  2279. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2280. {{
  2281. "schema": {{
  2282. "fields": [
  2283. {{
  2284. "name": "index",
  2285. "type": "string"
  2286. }},
  2287. {{
  2288. "name": "col 1",
  2289. "type": "string"
  2290. }},
  2291. {{
  2292. "name": "col 2",
  2293. "type": "string"
  2294. }}
  2295. ],
  2296. "primaryKey": [
  2297. "index"
  2298. ],
  2299. "pandas_version": "1.4.0"
  2300. }},
  2301. "data": [
  2302. {{
  2303. "index": "row 1",
  2304. "col 1": "a",
  2305. "col 2": "b"
  2306. }},
  2307. {{
  2308. "index": "row 2",
  2309. "col 1": "c",
  2310. "col 2": "d"
  2311. }}
  2312. ]
  2313. }}
  2314. """
  2315. from pandas.io import json
  2316. if date_format is None and orient == "table":
  2317. date_format = "iso"
  2318. elif date_format is None:
  2319. date_format = "epoch"
  2320. config.is_nonnegative_int(indent)
  2321. indent = indent or 0
  2322. return json.to_json(
  2323. path_or_buf=path_or_buf,
  2324. obj=self,
  2325. orient=orient,
  2326. date_format=date_format,
  2327. double_precision=double_precision,
  2328. force_ascii=force_ascii,
  2329. date_unit=date_unit,
  2330. default_handler=default_handler,
  2331. lines=lines,
  2332. compression=compression,
  2333. index=index,
  2334. indent=indent,
  2335. storage_options=storage_options,
  2336. mode=mode,
  2337. )
  2338. @final
  2339. @deprecate_nonkeyword_arguments(
  2340. version="3.0", allowed_args=["self", "path_or_buf"], name="to_hdf"
  2341. )
  2342. def to_hdf(
  2343. self,
  2344. path_or_buf: FilePath | HDFStore,
  2345. key: str,
  2346. mode: Literal["a", "w", "r+"] = "a",
  2347. complevel: int | None = None,
  2348. complib: Literal["zlib", "lzo", "bzip2", "blosc"] | None = None,
  2349. append: bool_t = False,
  2350. format: Literal["fixed", "table"] | None = None,
  2351. index: bool_t = True,
  2352. min_itemsize: int | dict[str, int] | None = None,
  2353. nan_rep=None,
  2354. dropna: bool_t | None = None,
  2355. data_columns: Literal[True] | list[str] | None = None,
  2356. errors: OpenFileErrors = "strict",
  2357. encoding: str = "UTF-8",
  2358. ) -> None:
  2359. """
  2360. Write the contained data to an HDF5 file using HDFStore.
  2361. Hierarchical Data Format (HDF) is self-describing, allowing an
  2362. application to interpret the structure and contents of a file with
  2363. no outside information. One HDF file can hold a mix of related objects
  2364. which can be accessed as a group or as individual objects.
  2365. In order to add another DataFrame or Series to an existing HDF file
  2366. please use append mode and a different a key.
  2367. .. warning::
  2368. One can store a subclass of ``DataFrame`` or ``Series`` to HDF5,
  2369. but the type of the subclass is lost upon storing.
  2370. For more information see the :ref:`user guide <io.hdf5>`.
  2371. Parameters
  2372. ----------
  2373. path_or_buf : str or pandas.HDFStore
  2374. File path or HDFStore object.
  2375. key : str
  2376. Identifier for the group in the store.
  2377. mode : {'a', 'w', 'r+'}, default 'a'
  2378. Mode to open file:
  2379. - 'w': write, a new file is created (an existing file with
  2380. the same name would be deleted).
  2381. - 'a': append, an existing file is opened for reading and
  2382. writing, and if the file does not exist it is created.
  2383. - 'r+': similar to 'a', but the file must already exist.
  2384. complevel : {0-9}, default None
  2385. Specifies a compression level for data.
  2386. A value of 0 or None disables compression.
  2387. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
  2388. Specifies the compression library to be used.
  2389. These additional compressors for Blosc are supported
  2390. (default if no compressor specified: 'blosc:blosclz'):
  2391. {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
  2392. 'blosc:zlib', 'blosc:zstd'}.
  2393. Specifying a compression library which is not available issues
  2394. a ValueError.
  2395. append : bool, default False
  2396. For Table formats, append the input data to the existing.
  2397. format : {'fixed', 'table', None}, default 'fixed'
  2398. Possible values:
  2399. - 'fixed': Fixed format. Fast writing/reading. Not-appendable,
  2400. nor searchable.
  2401. - 'table': Table format. Write as a PyTables Table structure
  2402. which may perform worse but allow more flexible operations
  2403. like searching / selecting subsets of the data.
  2404. - If None, pd.get_option('io.hdf.default_format') is checked,
  2405. followed by fallback to "fixed".
  2406. index : bool, default True
  2407. Write DataFrame index as a column.
  2408. min_itemsize : dict or int, optional
  2409. Map column names to minimum string sizes for columns.
  2410. nan_rep : Any, optional
  2411. How to represent null values as str.
  2412. Not allowed with append=True.
  2413. dropna : bool, default False, optional
  2414. Remove missing values.
  2415. data_columns : list of columns or True, optional
  2416. List of columns to create as indexed data columns for on-disk
  2417. queries, or True to use all columns. By default only the axes
  2418. of the object are indexed. See
  2419. :ref:`Query via data columns<io.hdf5-query-data-columns>`. for
  2420. more information.
  2421. Applicable only to format='table'.
  2422. errors : str, default 'strict'
  2423. Specifies how encoding and decoding errors are to be handled.
  2424. See the errors argument for :func:`open` for a full list
  2425. of options.
  2426. encoding : str, default "UTF-8"
  2427. See Also
  2428. --------
  2429. read_hdf : Read from HDF file.
  2430. DataFrame.to_orc : Write a DataFrame to the binary orc format.
  2431. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2432. DataFrame.to_sql : Write to a SQL table.
  2433. DataFrame.to_feather : Write out feather-format for DataFrames.
  2434. DataFrame.to_csv : Write out to a csv file.
  2435. Examples
  2436. --------
  2437. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
  2438. ... index=['a', 'b', 'c']) # doctest: +SKIP
  2439. >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP
  2440. We can add another object to the same file:
  2441. >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP
  2442. >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP
  2443. Reading from HDF file:
  2444. >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP
  2445. A B
  2446. a 1 4
  2447. b 2 5
  2448. c 3 6
  2449. >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP
  2450. 0 1
  2451. 1 2
  2452. 2 3
  2453. 3 4
  2454. dtype: int64
  2455. """
  2456. from pandas.io import pytables
  2457. # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected
  2458. # "Union[DataFrame, Series]" [arg-type]
  2459. pytables.to_hdf(
  2460. path_or_buf,
  2461. key,
  2462. self, # type: ignore[arg-type]
  2463. mode=mode,
  2464. complevel=complevel,
  2465. complib=complib,
  2466. append=append,
  2467. format=format,
  2468. index=index,
  2469. min_itemsize=min_itemsize,
  2470. nan_rep=nan_rep,
  2471. dropna=dropna,
  2472. data_columns=data_columns,
  2473. errors=errors,
  2474. encoding=encoding,
  2475. )
  2476. @final
  2477. @deprecate_nonkeyword_arguments(
  2478. version="3.0", allowed_args=["self", "name", "con"], name="to_sql"
  2479. )
  2480. def to_sql(
  2481. self,
  2482. name: str,
  2483. con,
  2484. schema: str | None = None,
  2485. if_exists: Literal["fail", "replace", "append"] = "fail",
  2486. index: bool_t = True,
  2487. index_label: IndexLabel | None = None,
  2488. chunksize: int | None = None,
  2489. dtype: DtypeArg | None = None,
  2490. method: Literal["multi"] | Callable | None = None,
  2491. ) -> int | None:
  2492. """
  2493. Write records stored in a DataFrame to a SQL database.
  2494. Databases supported by SQLAlchemy [1]_ are supported. Tables can be
  2495. newly created, appended to, or overwritten.
  2496. Parameters
  2497. ----------
  2498. name : str
  2499. Name of SQL table.
  2500. con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection
  2501. Using SQLAlchemy makes it possible to use any DB supported by that
  2502. library. Legacy support is provided for sqlite3.Connection objects. The user
  2503. is responsible for engine disposal and connection closure for the SQLAlchemy
  2504. connectable. See `here \
  2505. <https://docs.sqlalchemy.org/en/20/core/connections.html>`_.
  2506. If passing a sqlalchemy.engine.Connection which is already in a transaction,
  2507. the transaction will not be committed. If passing a sqlite3.Connection,
  2508. it will not be possible to roll back the record insertion.
  2509. schema : str, optional
  2510. Specify the schema (if database flavor supports this). If None, use
  2511. default schema.
  2512. if_exists : {'fail', 'replace', 'append'}, default 'fail'
  2513. How to behave if the table already exists.
  2514. * fail: Raise a ValueError.
  2515. * replace: Drop the table before inserting new values.
  2516. * append: Insert new values to the existing table.
  2517. index : bool, default True
  2518. Write DataFrame index as a column. Uses `index_label` as the column
  2519. name in the table. Creates a table index for this column.
  2520. index_label : str or sequence, default None
  2521. Column label for index column(s). If None is given (default) and
  2522. `index` is True, then the index names are used.
  2523. A sequence should be given if the DataFrame uses MultiIndex.
  2524. chunksize : int, optional
  2525. Specify the number of rows in each batch to be written at a time.
  2526. By default, all rows will be written at once.
  2527. dtype : dict or scalar, optional
  2528. Specifying the datatype for columns. If a dictionary is used, the
  2529. keys should be the column names and the values should be the
  2530. SQLAlchemy types or strings for the sqlite3 legacy mode. If a
  2531. scalar is provided, it will be applied to all columns.
  2532. method : {None, 'multi', callable}, optional
  2533. Controls the SQL insertion clause used:
  2534. * None : Uses standard SQL ``INSERT`` clause (one per row).
  2535. * 'multi': Pass multiple values in a single ``INSERT`` clause.
  2536. * callable with signature ``(pd_table, conn, keys, data_iter)``.
  2537. Details and a sample callable implementation can be found in the
  2538. section :ref:`insert method <io.sql.method>`.
  2539. Returns
  2540. -------
  2541. None or int
  2542. Number of rows affected by to_sql. None is returned if the callable
  2543. passed into ``method`` does not return an integer number of rows.
  2544. The number of returned rows affected is the sum of the ``rowcount``
  2545. attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not
  2546. reflect the exact number of written rows as stipulated in the
  2547. `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
  2548. `SQLAlchemy <https://docs.sqlalchemy.org/en/20/core/connections.html#sqlalchemy.engine.CursorResult.rowcount>`__.
  2549. .. versionadded:: 1.4.0
  2550. Raises
  2551. ------
  2552. ValueError
  2553. When the table already exists and `if_exists` is 'fail' (the
  2554. default).
  2555. See Also
  2556. --------
  2557. read_sql : Read a DataFrame from a table.
  2558. Notes
  2559. -----
  2560. Timezone aware datetime columns will be written as
  2561. ``Timestamp with timezone`` type with SQLAlchemy if supported by the
  2562. database. Otherwise, the datetimes will be stored as timezone unaware
  2563. timestamps local to the original timezone.
  2564. Not all datastores support ``method="multi"``. Oracle, for example,
  2565. does not support multi-value insert.
  2566. References
  2567. ----------
  2568. .. [1] https://docs.sqlalchemy.org
  2569. .. [2] https://www.python.org/dev/peps/pep-0249/
  2570. Examples
  2571. --------
  2572. Create an in-memory SQLite database.
  2573. >>> from sqlalchemy import create_engine
  2574. >>> engine = create_engine('sqlite://', echo=False)
  2575. Create a table from scratch with 3 rows.
  2576. >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
  2577. >>> df
  2578. name
  2579. 0 User 1
  2580. 1 User 2
  2581. 2 User 3
  2582. >>> df.to_sql(name='users', con=engine)
  2583. 3
  2584. >>> from sqlalchemy import text
  2585. >>> with engine.connect() as conn:
  2586. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2587. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
  2588. An `sqlalchemy.engine.Connection` can also be passed to `con`:
  2589. >>> with engine.begin() as connection:
  2590. ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
  2591. ... df1.to_sql(name='users', con=connection, if_exists='append')
  2592. 2
  2593. This is allowed to support operations that require that the same
  2594. DBAPI connection is used for the entire operation.
  2595. >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']})
  2596. >>> df2.to_sql(name='users', con=engine, if_exists='append')
  2597. 2
  2598. >>> with engine.connect() as conn:
  2599. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2600. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
  2601. (0, 'User 4'), (1, 'User 5'), (0, 'User 6'),
  2602. (1, 'User 7')]
  2603. Overwrite the table with just ``df2``.
  2604. >>> df2.to_sql(name='users', con=engine, if_exists='replace',
  2605. ... index_label='id')
  2606. 2
  2607. >>> with engine.connect() as conn:
  2608. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2609. [(0, 'User 6'), (1, 'User 7')]
  2610. Use ``method`` to define a callable insertion method to do nothing
  2611. if there's a primary key conflict on a table in a PostgreSQL database.
  2612. >>> from sqlalchemy.dialects.postgresql import insert
  2613. >>> def insert_on_conflict_nothing(table, conn, keys, data_iter):
  2614. ... # "a" is the primary key in "conflict_table"
  2615. ... data = [dict(zip(keys, row)) for row in data_iter]
  2616. ... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"])
  2617. ... result = conn.execute(stmt)
  2618. ... return result.rowcount
  2619. >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP
  2620. 0
  2621. For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict
  2622. on a primary key.
  2623. >>> from sqlalchemy.dialects.mysql import insert
  2624. >>> def insert_on_conflict_update(table, conn, keys, data_iter):
  2625. ... # update columns "b" and "c" on primary key conflict
  2626. ... data = [dict(zip(keys, row)) for row in data_iter]
  2627. ... stmt = (
  2628. ... insert(table.table)
  2629. ... .values(data)
  2630. ... )
  2631. ... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c)
  2632. ... result = conn.execute(stmt)
  2633. ... return result.rowcount
  2634. >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP
  2635. 2
  2636. Specify the dtype (especially useful for integers with missing values).
  2637. Notice that while pandas is forced to store the data as floating point,
  2638. the database supports nullable integers. When fetching the data with
  2639. Python, we get back integer scalars.
  2640. >>> df = pd.DataFrame({"A": [1, None, 2]})
  2641. >>> df
  2642. A
  2643. 0 1.0
  2644. 1 NaN
  2645. 2 2.0
  2646. >>> from sqlalchemy.types import Integer
  2647. >>> df.to_sql(name='integers', con=engine, index=False,
  2648. ... dtype={"A": Integer()})
  2649. 3
  2650. >>> with engine.connect() as conn:
  2651. ... conn.execute(text("SELECT * FROM integers")).fetchall()
  2652. [(1,), (None,), (2,)]
  2653. """ # noqa: E501
  2654. from pandas.io import sql
  2655. return sql.to_sql(
  2656. self,
  2657. name,
  2658. con,
  2659. schema=schema,
  2660. if_exists=if_exists,
  2661. index=index,
  2662. index_label=index_label,
  2663. chunksize=chunksize,
  2664. dtype=dtype,
  2665. method=method,
  2666. )
  2667. @final
  2668. @deprecate_nonkeyword_arguments(
  2669. version="3.0", allowed_args=["self", "path"], name="to_pickle"
  2670. )
  2671. @doc(
  2672. storage_options=_shared_docs["storage_options"],
  2673. compression_options=_shared_docs["compression_options"] % "path",
  2674. )
  2675. def to_pickle(
  2676. self,
  2677. path: FilePath | WriteBuffer[bytes],
  2678. compression: CompressionOptions = "infer",
  2679. protocol: int = pickle.HIGHEST_PROTOCOL,
  2680. storage_options: StorageOptions | None = None,
  2681. ) -> None:
  2682. """
  2683. Pickle (serialize) object to file.
  2684. Parameters
  2685. ----------
  2686. path : str, path object, or file-like object
  2687. String, path object (implementing ``os.PathLike[str]``), or file-like
  2688. object implementing a binary ``write()`` function. File path where
  2689. the pickled object will be stored.
  2690. {compression_options}
  2691. protocol : int
  2692. Int which indicates which protocol should be used by the pickler,
  2693. default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible
  2694. values are 0, 1, 2, 3, 4, 5. A negative value for the protocol
  2695. parameter is equivalent to setting its value to HIGHEST_PROTOCOL.
  2696. .. [1] https://docs.python.org/3/library/pickle.html.
  2697. {storage_options}
  2698. See Also
  2699. --------
  2700. read_pickle : Load pickled pandas object (or any object) from file.
  2701. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2702. DataFrame.to_sql : Write DataFrame to a SQL database.
  2703. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2704. Examples
  2705. --------
  2706. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
  2707. >>> original_df # doctest: +SKIP
  2708. foo bar
  2709. 0 0 5
  2710. 1 1 6
  2711. 2 2 7
  2712. 3 3 8
  2713. 4 4 9
  2714. >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP
  2715. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  2716. >>> unpickled_df # doctest: +SKIP
  2717. foo bar
  2718. 0 0 5
  2719. 1 1 6
  2720. 2 2 7
  2721. 3 3 8
  2722. 4 4 9
  2723. """ # noqa: E501
  2724. from pandas.io.pickle import to_pickle
  2725. to_pickle(
  2726. self,
  2727. path,
  2728. compression=compression,
  2729. protocol=protocol,
  2730. storage_options=storage_options,
  2731. )
  2732. @final
  2733. @deprecate_nonkeyword_arguments(
  2734. version="3.0", allowed_args=["self"], name="to_clipboard"
  2735. )
  2736. def to_clipboard(
  2737. self, excel: bool_t = True, sep: str | None = None, **kwargs
  2738. ) -> None:
  2739. r"""
  2740. Copy object to the system clipboard.
  2741. Write a text representation of object to the system clipboard.
  2742. This can be pasted into Excel, for example.
  2743. Parameters
  2744. ----------
  2745. excel : bool, default True
  2746. Produce output in a csv format for easy pasting into excel.
  2747. - True, use the provided separator for csv pasting.
  2748. - False, write a string representation of the object to the clipboard.
  2749. sep : str, default ``'\t'``
  2750. Field delimiter.
  2751. **kwargs
  2752. These parameters will be passed to DataFrame.to_csv.
  2753. See Also
  2754. --------
  2755. DataFrame.to_csv : Write a DataFrame to a comma-separated values
  2756. (csv) file.
  2757. read_clipboard : Read text from clipboard and pass to read_csv.
  2758. Notes
  2759. -----
  2760. Requirements for your platform.
  2761. - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
  2762. - Windows : none
  2763. - macOS : none
  2764. This method uses the processes developed for the package `pyperclip`. A
  2765. solution to render any output string format is given in the examples.
  2766. Examples
  2767. --------
  2768. Copy the contents of a DataFrame to the clipboard.
  2769. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
  2770. >>> df.to_clipboard(sep=',') # doctest: +SKIP
  2771. ... # Wrote the following to the system clipboard:
  2772. ... # ,A,B,C
  2773. ... # 0,1,2,3
  2774. ... # 1,4,5,6
  2775. We can omit the index by passing the keyword `index` and setting
  2776. it to false.
  2777. >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
  2778. ... # Wrote the following to the system clipboard:
  2779. ... # A,B,C
  2780. ... # 1,2,3
  2781. ... # 4,5,6
  2782. Using the original `pyperclip` package for any string output format.
  2783. .. code-block:: python
  2784. import pyperclip
  2785. html = df.style.to_html()
  2786. pyperclip.copy(html)
  2787. """
  2788. from pandas.io import clipboards
  2789. clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs)
  2790. @final
  2791. def to_xarray(self):
  2792. """
  2793. Return an xarray object from the pandas object.
  2794. Returns
  2795. -------
  2796. xarray.DataArray or xarray.Dataset
  2797. Data in the pandas structure converted to Dataset if the object is
  2798. a DataFrame, or a DataArray if the object is a Series.
  2799. See Also
  2800. --------
  2801. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2802. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2803. Notes
  2804. -----
  2805. See the `xarray docs <https://xarray.pydata.org/en/stable/>`__
  2806. Examples
  2807. --------
  2808. >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),
  2809. ... ('parrot', 'bird', 24.0, 2),
  2810. ... ('lion', 'mammal', 80.5, 4),
  2811. ... ('monkey', 'mammal', np.nan, 4)],
  2812. ... columns=['name', 'class', 'max_speed',
  2813. ... 'num_legs'])
  2814. >>> df
  2815. name class max_speed num_legs
  2816. 0 falcon bird 389.0 2
  2817. 1 parrot bird 24.0 2
  2818. 2 lion mammal 80.5 4
  2819. 3 monkey mammal NaN 4
  2820. >>> df.to_xarray() # doctest: +SKIP
  2821. <xarray.Dataset>
  2822. Dimensions: (index: 4)
  2823. Coordinates:
  2824. * index (index) int64 32B 0 1 2 3
  2825. Data variables:
  2826. name (index) object 32B 'falcon' 'parrot' 'lion' 'monkey'
  2827. class (index) object 32B 'bird' 'bird' 'mammal' 'mammal'
  2828. max_speed (index) float64 32B 389.0 24.0 80.5 nan
  2829. num_legs (index) int64 32B 2 2 4 4
  2830. >>> df['max_speed'].to_xarray() # doctest: +SKIP
  2831. <xarray.DataArray 'max_speed' (index: 4)>
  2832. array([389. , 24. , 80.5, nan])
  2833. Coordinates:
  2834. * index (index) int64 0 1 2 3
  2835. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',
  2836. ... '2018-01-02', '2018-01-02'])
  2837. >>> df_multiindex = pd.DataFrame({'date': dates,
  2838. ... 'animal': ['falcon', 'parrot',
  2839. ... 'falcon', 'parrot'],
  2840. ... 'speed': [350, 18, 361, 15]})
  2841. >>> df_multiindex = df_multiindex.set_index(['date', 'animal'])
  2842. >>> df_multiindex
  2843. speed
  2844. date animal
  2845. 2018-01-01 falcon 350
  2846. parrot 18
  2847. 2018-01-02 falcon 361
  2848. parrot 15
  2849. >>> df_multiindex.to_xarray() # doctest: +SKIP
  2850. <xarray.Dataset>
  2851. Dimensions: (date: 2, animal: 2)
  2852. Coordinates:
  2853. * date (date) datetime64[ns] 2018-01-01 2018-01-02
  2854. * animal (animal) object 'falcon' 'parrot'
  2855. Data variables:
  2856. speed (date, animal) int64 350 18 361 15
  2857. """
  2858. xarray = import_optional_dependency("xarray")
  2859. if self.ndim == 1:
  2860. return xarray.DataArray.from_series(self)
  2861. else:
  2862. return xarray.Dataset.from_dataframe(self)
  2863. @overload
  2864. def to_latex(
  2865. self,
  2866. buf: None = ...,
  2867. columns: Sequence[Hashable] | None = ...,
  2868. header: bool_t | SequenceNotStr[str] = ...,
  2869. index: bool_t = ...,
  2870. na_rep: str = ...,
  2871. formatters: FormattersType | None = ...,
  2872. float_format: FloatFormatType | None = ...,
  2873. sparsify: bool_t | None = ...,
  2874. index_names: bool_t = ...,
  2875. bold_rows: bool_t = ...,
  2876. column_format: str | None = ...,
  2877. longtable: bool_t | None = ...,
  2878. escape: bool_t | None = ...,
  2879. encoding: str | None = ...,
  2880. decimal: str = ...,
  2881. multicolumn: bool_t | None = ...,
  2882. multicolumn_format: str | None = ...,
  2883. multirow: bool_t | None = ...,
  2884. caption: str | tuple[str, str] | None = ...,
  2885. label: str | None = ...,
  2886. position: str | None = ...,
  2887. ) -> str:
  2888. ...
  2889. @overload
  2890. def to_latex(
  2891. self,
  2892. buf: FilePath | WriteBuffer[str],
  2893. columns: Sequence[Hashable] | None = ...,
  2894. header: bool_t | SequenceNotStr[str] = ...,
  2895. index: bool_t = ...,
  2896. na_rep: str = ...,
  2897. formatters: FormattersType | None = ...,
  2898. float_format: FloatFormatType | None = ...,
  2899. sparsify: bool_t | None = ...,
  2900. index_names: bool_t = ...,
  2901. bold_rows: bool_t = ...,
  2902. column_format: str | None = ...,
  2903. longtable: bool_t | None = ...,
  2904. escape: bool_t | None = ...,
  2905. encoding: str | None = ...,
  2906. decimal: str = ...,
  2907. multicolumn: bool_t | None = ...,
  2908. multicolumn_format: str | None = ...,
  2909. multirow: bool_t | None = ...,
  2910. caption: str | tuple[str, str] | None = ...,
  2911. label: str | None = ...,
  2912. position: str | None = ...,
  2913. ) -> None:
  2914. ...
  2915. @final
  2916. @deprecate_nonkeyword_arguments(
  2917. version="3.0", allowed_args=["self", "buf"], name="to_latex"
  2918. )
  2919. def to_latex(
  2920. self,
  2921. buf: FilePath | WriteBuffer[str] | None = None,
  2922. columns: Sequence[Hashable] | None = None,
  2923. header: bool_t | SequenceNotStr[str] = True,
  2924. index: bool_t = True,
  2925. na_rep: str = "NaN",
  2926. formatters: FormattersType | None = None,
  2927. float_format: FloatFormatType | None = None,
  2928. sparsify: bool_t | None = None,
  2929. index_names: bool_t = True,
  2930. bold_rows: bool_t = False,
  2931. column_format: str | None = None,
  2932. longtable: bool_t | None = None,
  2933. escape: bool_t | None = None,
  2934. encoding: str | None = None,
  2935. decimal: str = ".",
  2936. multicolumn: bool_t | None = None,
  2937. multicolumn_format: str | None = None,
  2938. multirow: bool_t | None = None,
  2939. caption: str | tuple[str, str] | None = None,
  2940. label: str | None = None,
  2941. position: str | None = None,
  2942. ) -> str | None:
  2943. r"""
  2944. Render object to a LaTeX tabular, longtable, or nested table.
  2945. Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted
  2946. into a main LaTeX document or read from an external file
  2947. with ``\input{{table.tex}}``.
  2948. .. versionchanged:: 2.0.0
  2949. Refactored to use the Styler implementation via jinja2 templating.
  2950. Parameters
  2951. ----------
  2952. buf : str, Path or StringIO-like, optional, default None
  2953. Buffer to write to. If None, the output is returned as a string.
  2954. columns : list of label, optional
  2955. The subset of columns to write. Writes all columns by default.
  2956. header : bool or list of str, default True
  2957. Write out the column names. If a list of strings is given,
  2958. it is assumed to be aliases for the column names.
  2959. index : bool, default True
  2960. Write row names (index).
  2961. na_rep : str, default 'NaN'
  2962. Missing data representation.
  2963. formatters : list of functions or dict of {{str: function}}, optional
  2964. Formatter functions to apply to columns' elements by position or
  2965. name. The result of each function must be a unicode string.
  2966. List must be of length equal to the number of columns.
  2967. float_format : one-parameter function or str, optional, default None
  2968. Formatter for floating point numbers. For example
  2969. ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will
  2970. both result in 0.1234 being formatted as 0.12.
  2971. sparsify : bool, optional
  2972. Set to False for a DataFrame with a hierarchical index to print
  2973. every multiindex key at each row. By default, the value will be
  2974. read from the config module.
  2975. index_names : bool, default True
  2976. Prints the names of the indexes.
  2977. bold_rows : bool, default False
  2978. Make the row labels bold in the output.
  2979. column_format : str, optional
  2980. The columns format as specified in `LaTeX table format
  2981. <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3
  2982. columns. By default, 'l' will be used for all columns except
  2983. columns of numbers, which default to 'r'.
  2984. longtable : bool, optional
  2985. Use a longtable environment instead of tabular. Requires
  2986. adding a \usepackage{{longtable}} to your LaTeX preamble.
  2987. By default, the value will be read from the pandas config
  2988. module, and set to `True` if the option ``styler.latex.environment`` is
  2989. `"longtable"`.
  2990. .. versionchanged:: 2.0.0
  2991. The pandas option affecting this argument has changed.
  2992. escape : bool, optional
  2993. By default, the value will be read from the pandas config
  2994. module and set to `True` if the option ``styler.format.escape`` is
  2995. `"latex"`. When set to False prevents from escaping latex special
  2996. characters in column names.
  2997. .. versionchanged:: 2.0.0
  2998. The pandas option affecting this argument has changed, as has the
  2999. default value to `False`.
  3000. encoding : str, optional
  3001. A string representing the encoding to use in the output file,
  3002. defaults to 'utf-8'.
  3003. decimal : str, default '.'
  3004. Character recognized as decimal separator, e.g. ',' in Europe.
  3005. multicolumn : bool, default True
  3006. Use \multicolumn to enhance MultiIndex columns.
  3007. The default will be read from the config module, and is set
  3008. as the option ``styler.sparse.columns``.
  3009. .. versionchanged:: 2.0.0
  3010. The pandas option affecting this argument has changed.
  3011. multicolumn_format : str, default 'r'
  3012. The alignment for multicolumns, similar to `column_format`
  3013. The default will be read from the config module, and is set as the option
  3014. ``styler.latex.multicol_align``.
  3015. .. versionchanged:: 2.0.0
  3016. The pandas option affecting this argument has changed, as has the
  3017. default value to "r".
  3018. multirow : bool, default True
  3019. Use \multirow to enhance MultiIndex rows. Requires adding a
  3020. \usepackage{{multirow}} to your LaTeX preamble. Will print
  3021. centered labels (instead of top-aligned) across the contained
  3022. rows, separating groups via clines. The default will be read
  3023. from the pandas config module, and is set as the option
  3024. ``styler.sparse.index``.
  3025. .. versionchanged:: 2.0.0
  3026. The pandas option affecting this argument has changed, as has the
  3027. default value to `True`.
  3028. caption : str or tuple, optional
  3029. Tuple (full_caption, short_caption),
  3030. which results in ``\caption[short_caption]{{full_caption}}``;
  3031. if a single string is passed, no short caption will be set.
  3032. label : str, optional
  3033. The LaTeX label to be placed inside ``\label{{}}`` in the output.
  3034. This is used with ``\ref{{}}`` in the main ``.tex`` file.
  3035. position : str, optional
  3036. The LaTeX positional argument for tables, to be placed after
  3037. ``\begin{{}}`` in the output.
  3038. Returns
  3039. -------
  3040. str or None
  3041. If buf is None, returns the result as a string. Otherwise returns None.
  3042. See Also
  3043. --------
  3044. io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX
  3045. with conditional formatting.
  3046. DataFrame.to_string : Render a DataFrame to a console-friendly
  3047. tabular output.
  3048. DataFrame.to_html : Render a DataFrame as an HTML table.
  3049. Notes
  3050. -----
  3051. As of v2.0.0 this method has changed to use the Styler implementation as
  3052. part of :meth:`.Styler.to_latex` via ``jinja2`` templating. This means
  3053. that ``jinja2`` is a requirement, and needs to be installed, for this method
  3054. to function. It is advised that users switch to using Styler, since that
  3055. implementation is more frequently updated and contains much more
  3056. flexibility with the output.
  3057. Examples
  3058. --------
  3059. Convert a general DataFrame to LaTeX with formatting:
  3060. >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'],
  3061. ... age=[26, 45],
  3062. ... height=[181.23, 177.65]))
  3063. >>> print(df.to_latex(index=False,
  3064. ... formatters={"name": str.upper},
  3065. ... float_format="{:.1f}".format,
  3066. ... )) # doctest: +SKIP
  3067. \begin{tabular}{lrr}
  3068. \toprule
  3069. name & age & height \\
  3070. \midrule
  3071. RAPHAEL & 26 & 181.2 \\
  3072. DONATELLO & 45 & 177.7 \\
  3073. \bottomrule
  3074. \end{tabular}
  3075. """
  3076. # Get defaults from the pandas config
  3077. if self.ndim == 1:
  3078. self = self.to_frame()
  3079. if longtable is None:
  3080. longtable = config.get_option("styler.latex.environment") == "longtable"
  3081. if escape is None:
  3082. escape = config.get_option("styler.format.escape") == "latex"
  3083. if multicolumn is None:
  3084. multicolumn = config.get_option("styler.sparse.columns")
  3085. if multicolumn_format is None:
  3086. multicolumn_format = config.get_option("styler.latex.multicol_align")
  3087. if multirow is None:
  3088. multirow = config.get_option("styler.sparse.index")
  3089. if column_format is not None and not isinstance(column_format, str):
  3090. raise ValueError("`column_format` must be str or unicode")
  3091. length = len(self.columns) if columns is None else len(columns)
  3092. if isinstance(header, (list, tuple)) and len(header) != length:
  3093. raise ValueError(f"Writing {length} cols but got {len(header)} aliases")
  3094. # Refactor formatters/float_format/decimal/na_rep/escape to Styler structure
  3095. base_format_ = {
  3096. "na_rep": na_rep,
  3097. "escape": "latex" if escape else None,
  3098. "decimal": decimal,
  3099. }
  3100. index_format_: dict[str, Any] = {"axis": 0, **base_format_}
  3101. column_format_: dict[str, Any] = {"axis": 1, **base_format_}
  3102. if isinstance(float_format, str):
  3103. float_format_: Callable | None = lambda x: float_format % x
  3104. else:
  3105. float_format_ = float_format
  3106. def _wrap(x, alt_format_):
  3107. if isinstance(x, (float, complex)) and float_format_ is not None:
  3108. return float_format_(x)
  3109. else:
  3110. return alt_format_(x)
  3111. formatters_: list | tuple | dict | Callable | None = None
  3112. if isinstance(formatters, list):
  3113. formatters_ = {
  3114. c: partial(_wrap, alt_format_=formatters[i])
  3115. for i, c in enumerate(self.columns)
  3116. }
  3117. elif isinstance(formatters, dict):
  3118. index_formatter = formatters.pop("__index__", None)
  3119. column_formatter = formatters.pop("__columns__", None)
  3120. if index_formatter is not None:
  3121. index_format_.update({"formatter": index_formatter})
  3122. if column_formatter is not None:
  3123. column_format_.update({"formatter": column_formatter})
  3124. formatters_ = formatters
  3125. float_columns = self.select_dtypes(include="float").columns
  3126. for col in float_columns:
  3127. if col not in formatters.keys():
  3128. formatters_.update({col: float_format_})
  3129. elif formatters is None and float_format is not None:
  3130. formatters_ = partial(_wrap, alt_format_=lambda v: v)
  3131. format_index_ = [index_format_, column_format_]
  3132. # Deal with hiding indexes and relabelling column names
  3133. hide_: list[dict] = []
  3134. relabel_index_: list[dict] = []
  3135. if columns:
  3136. hide_.append(
  3137. {
  3138. "subset": [c for c in self.columns if c not in columns],
  3139. "axis": "columns",
  3140. }
  3141. )
  3142. if header is False:
  3143. hide_.append({"axis": "columns"})
  3144. elif isinstance(header, (list, tuple)):
  3145. relabel_index_.append({"labels": header, "axis": "columns"})
  3146. format_index_ = [index_format_] # column_format is overwritten
  3147. if index is False:
  3148. hide_.append({"axis": "index"})
  3149. if index_names is False:
  3150. hide_.append({"names": True, "axis": "index"})
  3151. render_kwargs_ = {
  3152. "hrules": True,
  3153. "sparse_index": sparsify,
  3154. "sparse_columns": sparsify,
  3155. "environment": "longtable" if longtable else None,
  3156. "multicol_align": multicolumn_format
  3157. if multicolumn
  3158. else f"naive-{multicolumn_format}",
  3159. "multirow_align": "t" if multirow else "naive",
  3160. "encoding": encoding,
  3161. "caption": caption,
  3162. "label": label,
  3163. "position": position,
  3164. "column_format": column_format,
  3165. "clines": "skip-last;data"
  3166. if (multirow and isinstance(self.index, MultiIndex))
  3167. else None,
  3168. "bold_rows": bold_rows,
  3169. }
  3170. return self._to_latex_via_styler(
  3171. buf,
  3172. hide=hide_,
  3173. relabel_index=relabel_index_,
  3174. format={"formatter": formatters_, **base_format_},
  3175. format_index=format_index_,
  3176. render_kwargs=render_kwargs_,
  3177. )
  3178. @final
  3179. def _to_latex_via_styler(
  3180. self,
  3181. buf=None,
  3182. *,
  3183. hide: dict | list[dict] | None = None,
  3184. relabel_index: dict | list[dict] | None = None,
  3185. format: dict | list[dict] | None = None,
  3186. format_index: dict | list[dict] | None = None,
  3187. render_kwargs: dict | None = None,
  3188. ):
  3189. """
  3190. Render object to a LaTeX tabular, longtable, or nested table.
  3191. Uses the ``Styler`` implementation with the following, ordered, method chaining:
  3192. .. code-block:: python
  3193. styler = Styler(DataFrame)
  3194. styler.hide(**hide)
  3195. styler.relabel_index(**relabel_index)
  3196. styler.format(**format)
  3197. styler.format_index(**format_index)
  3198. styler.to_latex(buf=buf, **render_kwargs)
  3199. Parameters
  3200. ----------
  3201. buf : str, Path or StringIO-like, optional, default None
  3202. Buffer to write to. If None, the output is returned as a string.
  3203. hide : dict, list of dict
  3204. Keyword args to pass to the method call of ``Styler.hide``. If a list will
  3205. call the method numerous times.
  3206. relabel_index : dict, list of dict
  3207. Keyword args to pass to the method of ``Styler.relabel_index``. If a list
  3208. will call the method numerous times.
  3209. format : dict, list of dict
  3210. Keyword args to pass to the method call of ``Styler.format``. If a list will
  3211. call the method numerous times.
  3212. format_index : dict, list of dict
  3213. Keyword args to pass to the method call of ``Styler.format_index``. If a
  3214. list will call the method numerous times.
  3215. render_kwargs : dict
  3216. Keyword args to pass to the method call of ``Styler.to_latex``.
  3217. Returns
  3218. -------
  3219. str or None
  3220. If buf is None, returns the result as a string. Otherwise returns None.
  3221. """
  3222. from pandas.io.formats.style import Styler
  3223. self = cast("DataFrame", self)
  3224. styler = Styler(self, uuid="")
  3225. for kw_name in ["hide", "relabel_index", "format", "format_index"]:
  3226. kw = vars()[kw_name]
  3227. if isinstance(kw, dict):
  3228. getattr(styler, kw_name)(**kw)
  3229. elif isinstance(kw, list):
  3230. for sub_kw in kw:
  3231. getattr(styler, kw_name)(**sub_kw)
  3232. # bold_rows is not a direct kwarg of Styler.to_latex
  3233. render_kwargs = {} if render_kwargs is None else render_kwargs
  3234. if render_kwargs.pop("bold_rows"):
  3235. styler.map_index(lambda v: "textbf:--rwrap;")
  3236. return styler.to_latex(buf=buf, **render_kwargs)
  3237. @overload
  3238. def to_csv(
  3239. self,
  3240. path_or_buf: None = ...,
  3241. sep: str = ...,
  3242. na_rep: str = ...,
  3243. float_format: str | Callable | None = ...,
  3244. columns: Sequence[Hashable] | None = ...,
  3245. header: bool_t | list[str] = ...,
  3246. index: bool_t = ...,
  3247. index_label: IndexLabel | None = ...,
  3248. mode: str = ...,
  3249. encoding: str | None = ...,
  3250. compression: CompressionOptions = ...,
  3251. quoting: int | None = ...,
  3252. quotechar: str = ...,
  3253. lineterminator: str | None = ...,
  3254. chunksize: int | None = ...,
  3255. date_format: str | None = ...,
  3256. doublequote: bool_t = ...,
  3257. escapechar: str | None = ...,
  3258. decimal: str = ...,
  3259. errors: OpenFileErrors = ...,
  3260. storage_options: StorageOptions = ...,
  3261. ) -> str:
  3262. ...
  3263. @overload
  3264. def to_csv(
  3265. self,
  3266. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str],
  3267. sep: str = ...,
  3268. na_rep: str = ...,
  3269. float_format: str | Callable | None = ...,
  3270. columns: Sequence[Hashable] | None = ...,
  3271. header: bool_t | list[str] = ...,
  3272. index: bool_t = ...,
  3273. index_label: IndexLabel | None = ...,
  3274. mode: str = ...,
  3275. encoding: str | None = ...,
  3276. compression: CompressionOptions = ...,
  3277. quoting: int | None = ...,
  3278. quotechar: str = ...,
  3279. lineterminator: str | None = ...,
  3280. chunksize: int | None = ...,
  3281. date_format: str | None = ...,
  3282. doublequote: bool_t = ...,
  3283. escapechar: str | None = ...,
  3284. decimal: str = ...,
  3285. errors: OpenFileErrors = ...,
  3286. storage_options: StorageOptions = ...,
  3287. ) -> None:
  3288. ...
  3289. @final
  3290. @deprecate_nonkeyword_arguments(
  3291. version="3.0", allowed_args=["self", "path_or_buf"], name="to_csv"
  3292. )
  3293. @doc(
  3294. storage_options=_shared_docs["storage_options"],
  3295. compression_options=_shared_docs["compression_options"] % "path_or_buf",
  3296. )
  3297. def to_csv(
  3298. self,
  3299. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
  3300. sep: str = ",",
  3301. na_rep: str = "",
  3302. float_format: str | Callable | None = None,
  3303. columns: Sequence[Hashable] | None = None,
  3304. header: bool_t | list[str] = True,
  3305. index: bool_t = True,
  3306. index_label: IndexLabel | None = None,
  3307. mode: str = "w",
  3308. encoding: str | None = None,
  3309. compression: CompressionOptions = "infer",
  3310. quoting: int | None = None,
  3311. quotechar: str = '"',
  3312. lineterminator: str | None = None,
  3313. chunksize: int | None = None,
  3314. date_format: str | None = None,
  3315. doublequote: bool_t = True,
  3316. escapechar: str | None = None,
  3317. decimal: str = ".",
  3318. errors: OpenFileErrors = "strict",
  3319. storage_options: StorageOptions | None = None,
  3320. ) -> str | None:
  3321. r"""
  3322. Write object to a comma-separated values (csv) file.
  3323. Parameters
  3324. ----------
  3325. path_or_buf : str, path object, file-like object, or None, default None
  3326. String, path object (implementing os.PathLike[str]), or file-like
  3327. object implementing a write() function. If None, the result is
  3328. returned as a string. If a non-binary file object is passed, it should
  3329. be opened with `newline=''`, disabling universal newlines. If a binary
  3330. file object is passed, `mode` might need to contain a `'b'`.
  3331. sep : str, default ','
  3332. String of length 1. Field delimiter for the output file.
  3333. na_rep : str, default ''
  3334. Missing data representation.
  3335. float_format : str, Callable, default None
  3336. Format string for floating point numbers. If a Callable is given, it takes
  3337. precedence over other numeric formatting parameters, like decimal.
  3338. columns : sequence, optional
  3339. Columns to write.
  3340. header : bool or list of str, default True
  3341. Write out the column names. If a list of strings is given it is
  3342. assumed to be aliases for the column names.
  3343. index : bool, default True
  3344. Write row names (index).
  3345. index_label : str or sequence, or False, default None
  3346. Column label for index column(s) if desired. If None is given, and
  3347. `header` and `index` are True, then the index names are used. A
  3348. sequence should be given if the object uses MultiIndex. If
  3349. False do not print fields for index names. Use index_label=False
  3350. for easier importing in R.
  3351. mode : {{'w', 'x', 'a'}}, default 'w'
  3352. Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control
  3353. the file opening. Typical values include:
  3354. - 'w', truncate the file first.
  3355. - 'x', exclusive creation, failing if the file already exists.
  3356. - 'a', append to the end of file if it exists.
  3357. encoding : str, optional
  3358. A string representing the encoding to use in the output file,
  3359. defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
  3360. is a non-binary file object.
  3361. {compression_options}
  3362. May be a dict with key 'method' as compression mode
  3363. and other entries as additional compression options if
  3364. compression mode is 'zip'.
  3365. Passing compression options as keys in dict is
  3366. supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
  3367. quoting : optional constant from csv module
  3368. Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
  3369. then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
  3370. will treat them as non-numeric.
  3371. quotechar : str, default '\"'
  3372. String of length 1. Character used to quote fields.
  3373. lineterminator : str, optional
  3374. The newline character or character sequence to use in the output
  3375. file. Defaults to `os.linesep`, which depends on the OS in which
  3376. this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
  3377. .. versionchanged:: 1.5.0
  3378. Previously was line_terminator, changed for consistency with
  3379. read_csv and the standard library 'csv' module.
  3380. chunksize : int or None
  3381. Rows to write at a time.
  3382. date_format : str, default None
  3383. Format string for datetime objects.
  3384. doublequote : bool, default True
  3385. Control quoting of `quotechar` inside a field.
  3386. escapechar : str, default None
  3387. String of length 1. Character used to escape `sep` and `quotechar`
  3388. when appropriate.
  3389. decimal : str, default '.'
  3390. Character recognized as decimal separator. E.g. use ',' for
  3391. European data.
  3392. errors : str, default 'strict'
  3393. Specifies how encoding and decoding errors are to be handled.
  3394. See the errors argument for :func:`open` for a full list
  3395. of options.
  3396. {storage_options}
  3397. Returns
  3398. -------
  3399. None or str
  3400. If path_or_buf is None, returns the resulting csv format as a
  3401. string. Otherwise returns None.
  3402. See Also
  3403. --------
  3404. read_csv : Load a CSV file into a DataFrame.
  3405. to_excel : Write DataFrame to an Excel file.
  3406. Examples
  3407. --------
  3408. Create 'out.csv' containing 'df' without indices
  3409. >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],
  3410. ... 'mask': ['red', 'purple'],
  3411. ... 'weapon': ['sai', 'bo staff']}})
  3412. >>> df.to_csv('out.csv', index=False) # doctest: +SKIP
  3413. Create 'out.zip' containing 'out.csv'
  3414. >>> df.to_csv(index=False)
  3415. 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
  3416. >>> compression_opts = dict(method='zip',
  3417. ... archive_name='out.csv') # doctest: +SKIP
  3418. >>> df.to_csv('out.zip', index=False,
  3419. ... compression=compression_opts) # doctest: +SKIP
  3420. To write a csv file to a new folder or nested folder you will first
  3421. need to create it using either Pathlib or os:
  3422. >>> from pathlib import Path # doctest: +SKIP
  3423. >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP
  3424. >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP
  3425. >>> df.to_csv(filepath) # doctest: +SKIP
  3426. >>> import os # doctest: +SKIP
  3427. >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP
  3428. >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP
  3429. """
  3430. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  3431. formatter = DataFrameFormatter(
  3432. frame=df,
  3433. header=header,
  3434. index=index,
  3435. na_rep=na_rep,
  3436. float_format=float_format,
  3437. decimal=decimal,
  3438. )
  3439. return DataFrameRenderer(formatter).to_csv(
  3440. path_or_buf,
  3441. lineterminator=lineterminator,
  3442. sep=sep,
  3443. encoding=encoding,
  3444. errors=errors,
  3445. compression=compression,
  3446. quoting=quoting,
  3447. columns=columns,
  3448. index_label=index_label,
  3449. mode=mode,
  3450. chunksize=chunksize,
  3451. quotechar=quotechar,
  3452. date_format=date_format,
  3453. doublequote=doublequote,
  3454. escapechar=escapechar,
  3455. storage_options=storage_options,
  3456. )
  3457. # ----------------------------------------------------------------------
  3458. # Lookup Caching
  3459. def _reset_cacher(self) -> None:
  3460. """
  3461. Reset the cacher.
  3462. """
  3463. raise AbstractMethodError(self)
  3464. def _maybe_update_cacher(
  3465. self,
  3466. clear: bool_t = False,
  3467. verify_is_copy: bool_t = True,
  3468. inplace: bool_t = False,
  3469. ) -> None:
  3470. """
  3471. See if we need to update our parent cacher if clear, then clear our
  3472. cache.
  3473. Parameters
  3474. ----------
  3475. clear : bool, default False
  3476. Clear the item cache.
  3477. verify_is_copy : bool, default True
  3478. Provide is_copy checks.
  3479. """
  3480. if using_copy_on_write():
  3481. return
  3482. if verify_is_copy:
  3483. self._check_setitem_copy(t="referent")
  3484. if clear:
  3485. self._clear_item_cache()
  3486. def _clear_item_cache(self) -> None:
  3487. raise AbstractMethodError(self)
  3488. # ----------------------------------------------------------------------
  3489. # Indexing Methods
  3490. @final
  3491. def take(self, indices, axis: Axis = 0, **kwargs) -> Self:
  3492. """
  3493. Return the elements in the given *positional* indices along an axis.
  3494. This means that we are not indexing according to actual values in
  3495. the index attribute of the object. We are indexing according to the
  3496. actual position of the element in the object.
  3497. Parameters
  3498. ----------
  3499. indices : array-like
  3500. An array of ints indicating which positions to take.
  3501. axis : {0 or 'index', 1 or 'columns', None}, default 0
  3502. The axis on which to select elements. ``0`` means that we are
  3503. selecting rows, ``1`` means that we are selecting columns.
  3504. For `Series` this parameter is unused and defaults to 0.
  3505. **kwargs
  3506. For compatibility with :meth:`numpy.take`. Has no effect on the
  3507. output.
  3508. Returns
  3509. -------
  3510. same type as caller
  3511. An array-like containing the elements taken from the object.
  3512. See Also
  3513. --------
  3514. DataFrame.loc : Select a subset of a DataFrame by labels.
  3515. DataFrame.iloc : Select a subset of a DataFrame by positions.
  3516. numpy.take : Take elements from an array along an axis.
  3517. Examples
  3518. --------
  3519. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  3520. ... ('parrot', 'bird', 24.0),
  3521. ... ('lion', 'mammal', 80.5),
  3522. ... ('monkey', 'mammal', np.nan)],
  3523. ... columns=['name', 'class', 'max_speed'],
  3524. ... index=[0, 2, 3, 1])
  3525. >>> df
  3526. name class max_speed
  3527. 0 falcon bird 389.0
  3528. 2 parrot bird 24.0
  3529. 3 lion mammal 80.5
  3530. 1 monkey mammal NaN
  3531. Take elements at positions 0 and 3 along the axis 0 (default).
  3532. Note how the actual indices selected (0 and 1) do not correspond to
  3533. our selected indices 0 and 3. That's because we are selecting the 0th
  3534. and 3rd rows, not rows whose indices equal 0 and 3.
  3535. >>> df.take([0, 3])
  3536. name class max_speed
  3537. 0 falcon bird 389.0
  3538. 1 monkey mammal NaN
  3539. Take elements at indices 1 and 2 along the axis 1 (column selection).
  3540. >>> df.take([1, 2], axis=1)
  3541. class max_speed
  3542. 0 bird 389.0
  3543. 2 bird 24.0
  3544. 3 mammal 80.5
  3545. 1 mammal NaN
  3546. We may take elements using negative integers for positive indices,
  3547. starting from the end of the object, just like with Python lists.
  3548. >>> df.take([-1, -2])
  3549. name class max_speed
  3550. 1 monkey mammal NaN
  3551. 3 lion mammal 80.5
  3552. """
  3553. nv.validate_take((), kwargs)
  3554. if not isinstance(indices, slice):
  3555. indices = np.asarray(indices, dtype=np.intp)
  3556. if (
  3557. axis == 0
  3558. and indices.ndim == 1
  3559. and using_copy_on_write()
  3560. and is_range_indexer(indices, len(self))
  3561. ):
  3562. return self.copy(deep=None)
  3563. elif self.ndim == 1:
  3564. raise TypeError(
  3565. f"{type(self).__name__}.take requires a sequence of integers, "
  3566. "not slice."
  3567. )
  3568. else:
  3569. warnings.warn(
  3570. # GH#51539
  3571. f"Passing a slice to {type(self).__name__}.take is deprecated "
  3572. "and will raise in a future version. Use `obj[slicer]` or pass "
  3573. "a sequence of integers instead.",
  3574. FutureWarning,
  3575. stacklevel=find_stack_level(),
  3576. )
  3577. # We can get here with a slice via DataFrame.__getitem__
  3578. indices = np.arange(
  3579. indices.start, indices.stop, indices.step, dtype=np.intp
  3580. )
  3581. new_data = self._mgr.take(
  3582. indices,
  3583. axis=self._get_block_manager_axis(axis),
  3584. verify=True,
  3585. )
  3586. return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__(
  3587. self, method="take"
  3588. )
  3589. @final
  3590. def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self:
  3591. """
  3592. Internal version of the `take` method that sets the `_is_copy`
  3593. attribute to keep track of the parent dataframe (using in indexing
  3594. for the SettingWithCopyWarning).
  3595. For Series this does the same as the public take (it never sets `_is_copy`).
  3596. See the docstring of `take` for full explanation of the parameters.
  3597. """
  3598. result = self.take(indices=indices, axis=axis)
  3599. # Maybe set copy if we didn't actually change the index.
  3600. if self.ndim == 2 and not result._get_axis(axis).equals(self._get_axis(axis)):
  3601. result._set_is_copy(self)
  3602. return result
  3603. @final
  3604. def xs(
  3605. self,
  3606. key: IndexLabel,
  3607. axis: Axis = 0,
  3608. level: IndexLabel | None = None,
  3609. drop_level: bool_t = True,
  3610. ) -> Self:
  3611. """
  3612. Return cross-section from the Series/DataFrame.
  3613. This method takes a `key` argument to select data at a particular
  3614. level of a MultiIndex.
  3615. Parameters
  3616. ----------
  3617. key : label or tuple of label
  3618. Label contained in the index, or partially in a MultiIndex.
  3619. axis : {0 or 'index', 1 or 'columns'}, default 0
  3620. Axis to retrieve cross-section on.
  3621. level : object, defaults to first n levels (n=1 or len(key))
  3622. In case of a key partially contained in a MultiIndex, indicate
  3623. which levels are used. Levels can be referred by label or position.
  3624. drop_level : bool, default True
  3625. If False, returns object with same levels as self.
  3626. Returns
  3627. -------
  3628. Series or DataFrame
  3629. Cross-section from the original Series or DataFrame
  3630. corresponding to the selected index levels.
  3631. See Also
  3632. --------
  3633. DataFrame.loc : Access a group of rows and columns
  3634. by label(s) or a boolean array.
  3635. DataFrame.iloc : Purely integer-location based indexing
  3636. for selection by position.
  3637. Notes
  3638. -----
  3639. `xs` can not be used to set values.
  3640. MultiIndex Slicers is a generic way to get/set values on
  3641. any level or levels.
  3642. It is a superset of `xs` functionality, see
  3643. :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
  3644. Examples
  3645. --------
  3646. >>> d = {'num_legs': [4, 4, 2, 2],
  3647. ... 'num_wings': [0, 0, 2, 2],
  3648. ... 'class': ['mammal', 'mammal', 'mammal', 'bird'],
  3649. ... 'animal': ['cat', 'dog', 'bat', 'penguin'],
  3650. ... 'locomotion': ['walks', 'walks', 'flies', 'walks']}
  3651. >>> df = pd.DataFrame(data=d)
  3652. >>> df = df.set_index(['class', 'animal', 'locomotion'])
  3653. >>> df
  3654. num_legs num_wings
  3655. class animal locomotion
  3656. mammal cat walks 4 0
  3657. dog walks 4 0
  3658. bat flies 2 2
  3659. bird penguin walks 2 2
  3660. Get values at specified index
  3661. >>> df.xs('mammal')
  3662. num_legs num_wings
  3663. animal locomotion
  3664. cat walks 4 0
  3665. dog walks 4 0
  3666. bat flies 2 2
  3667. Get values at several indexes
  3668. >>> df.xs(('mammal', 'dog', 'walks'))
  3669. num_legs 4
  3670. num_wings 0
  3671. Name: (mammal, dog, walks), dtype: int64
  3672. Get values at specified index and level
  3673. >>> df.xs('cat', level=1)
  3674. num_legs num_wings
  3675. class locomotion
  3676. mammal walks 4 0
  3677. Get values at several indexes and levels
  3678. >>> df.xs(('bird', 'walks'),
  3679. ... level=[0, 'locomotion'])
  3680. num_legs num_wings
  3681. animal
  3682. penguin 2 2
  3683. Get values at specified column and axis
  3684. >>> df.xs('num_wings', axis=1)
  3685. class animal locomotion
  3686. mammal cat walks 0
  3687. dog walks 0
  3688. bat flies 2
  3689. bird penguin walks 2
  3690. Name: num_wings, dtype: int64
  3691. """
  3692. axis = self._get_axis_number(axis)
  3693. labels = self._get_axis(axis)
  3694. if isinstance(key, list):
  3695. raise TypeError("list keys are not supported in xs, pass a tuple instead")
  3696. if level is not None:
  3697. if not isinstance(labels, MultiIndex):
  3698. raise TypeError("Index must be a MultiIndex")
  3699. loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)
  3700. # create the tuple of the indexer
  3701. _indexer = [slice(None)] * self.ndim
  3702. _indexer[axis] = loc
  3703. indexer = tuple(_indexer)
  3704. result = self.iloc[indexer]
  3705. setattr(result, result._get_axis_name(axis), new_ax)
  3706. return result
  3707. if axis == 1:
  3708. if drop_level:
  3709. return self[key]
  3710. index = self.columns
  3711. else:
  3712. index = self.index
  3713. if isinstance(index, MultiIndex):
  3714. loc, new_index = index._get_loc_level(key, level=0)
  3715. if not drop_level:
  3716. if lib.is_integer(loc):
  3717. # Slice index must be an integer or None
  3718. new_index = index[loc : loc + 1]
  3719. else:
  3720. new_index = index[loc]
  3721. else:
  3722. loc = index.get_loc(key)
  3723. if isinstance(loc, np.ndarray):
  3724. if loc.dtype == np.bool_:
  3725. (inds,) = loc.nonzero()
  3726. return self._take_with_is_copy(inds, axis=axis)
  3727. else:
  3728. return self._take_with_is_copy(loc, axis=axis)
  3729. if not is_scalar(loc):
  3730. new_index = index[loc]
  3731. if is_scalar(loc) and axis == 0:
  3732. # In this case loc should be an integer
  3733. if self.ndim == 1:
  3734. # if we encounter an array-like and we only have 1 dim
  3735. # that means that their are list/ndarrays inside the Series!
  3736. # so just return them (GH 6394)
  3737. return self._values[loc]
  3738. new_mgr = self._mgr.fast_xs(loc)
  3739. result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes)
  3740. result._name = self.index[loc]
  3741. result = result.__finalize__(self)
  3742. elif is_scalar(loc):
  3743. result = self.iloc[:, slice(loc, loc + 1)]
  3744. elif axis == 1:
  3745. result = self.iloc[:, loc]
  3746. else:
  3747. result = self.iloc[loc]
  3748. result.index = new_index
  3749. # this could be a view
  3750. # but only in a single-dtyped view sliceable case
  3751. result._set_is_copy(self, copy=not result._is_view)
  3752. return result
  3753. def __getitem__(self, item):
  3754. raise AbstractMethodError(self)
  3755. @final
  3756. def _getitem_slice(self, key: slice) -> Self:
  3757. """
  3758. __getitem__ for the case where the key is a slice object.
  3759. """
  3760. # _convert_slice_indexer to determine if this slice is positional
  3761. # or label based, and if the latter, convert to positional
  3762. slobj = self.index._convert_slice_indexer(key, kind="getitem")
  3763. if isinstance(slobj, np.ndarray):
  3764. # reachable with DatetimeIndex
  3765. indexer = lib.maybe_indices_to_slice(
  3766. slobj.astype(np.intp, copy=False), len(self)
  3767. )
  3768. if isinstance(indexer, np.ndarray):
  3769. # GH#43223 If we can not convert, use take
  3770. return self.take(indexer, axis=0)
  3771. slobj = indexer
  3772. return self._slice(slobj)
  3773. def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self:
  3774. """
  3775. Construct a slice of this container.
  3776. Slicing with this method is *always* positional.
  3777. """
  3778. assert isinstance(slobj, slice), type(slobj)
  3779. axis = self._get_block_manager_axis(axis)
  3780. new_mgr = self._mgr.get_slice(slobj, axis=axis)
  3781. result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  3782. result = result.__finalize__(self)
  3783. # this could be a view
  3784. # but only in a single-dtyped view sliceable case
  3785. is_copy = axis != 0 or result._is_view
  3786. result._set_is_copy(self, copy=is_copy)
  3787. return result
  3788. @final
  3789. def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None:
  3790. if not copy:
  3791. self._is_copy = None
  3792. else:
  3793. assert ref is not None
  3794. self._is_copy = weakref.ref(ref)
  3795. def _check_is_chained_assignment_possible(self) -> bool_t:
  3796. """
  3797. Check if we are a view, have a cacher, and are of mixed type.
  3798. If so, then force a setitem_copy check.
  3799. Should be called just near setting a value
  3800. Will return a boolean if it we are a view and are cached, but a
  3801. single-dtype meaning that the cacher should be updated following
  3802. setting.
  3803. """
  3804. if self._is_copy:
  3805. self._check_setitem_copy(t="referent")
  3806. return False
  3807. @final
  3808. def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
  3809. """
  3810. Parameters
  3811. ----------
  3812. t : str, the type of setting error
  3813. force : bool, default False
  3814. If True, then force showing an error.
  3815. validate if we are doing a setitem on a chained copy.
  3816. It is technically possible to figure out that we are setting on
  3817. a copy even WITH a multi-dtyped pandas object. In other words, some
  3818. blocks may be views while other are not. Currently _is_view will ALWAYS
  3819. return False for multi-blocks to avoid having to handle this case.
  3820. df = DataFrame(np.arange(0,9), columns=['count'])
  3821. df['group'] = 'b'
  3822. # This technically need not raise SettingWithCopy if both are view
  3823. # (which is not generally guaranteed but is usually True. However,
  3824. # this is in general not a good practice and we recommend using .loc.
  3825. df.iloc[0:5]['group'] = 'a'
  3826. """
  3827. if using_copy_on_write() or warn_copy_on_write():
  3828. return
  3829. # return early if the check is not needed
  3830. if not (force or self._is_copy):
  3831. return
  3832. value = config.get_option("mode.chained_assignment")
  3833. if value is None:
  3834. return
  3835. # see if the copy is not actually referred; if so, then dissolve
  3836. # the copy weakref
  3837. if self._is_copy is not None and not isinstance(self._is_copy, str):
  3838. r = self._is_copy()
  3839. if not gc.get_referents(r) or (r is not None and r.shape == self.shape):
  3840. self._is_copy = None
  3841. return
  3842. # a custom message
  3843. if isinstance(self._is_copy, str):
  3844. t = self._is_copy
  3845. elif t == "referent":
  3846. t = (
  3847. "\n"
  3848. "A value is trying to be set on a copy of a slice from a "
  3849. "DataFrame\n\n"
  3850. "See the caveats in the documentation: "
  3851. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3852. "indexing.html#returning-a-view-versus-a-copy"
  3853. )
  3854. else:
  3855. t = (
  3856. "\n"
  3857. "A value is trying to be set on a copy of a slice from a "
  3858. "DataFrame.\n"
  3859. "Try using .loc[row_indexer,col_indexer] = value "
  3860. "instead\n\nSee the caveats in the documentation: "
  3861. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3862. "indexing.html#returning-a-view-versus-a-copy"
  3863. )
  3864. if value == "raise":
  3865. raise SettingWithCopyError(t)
  3866. if value == "warn":
  3867. warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level())
  3868. @final
  3869. def __delitem__(self, key) -> None:
  3870. """
  3871. Delete item
  3872. """
  3873. deleted = False
  3874. maybe_shortcut = False
  3875. if self.ndim == 2 and isinstance(self.columns, MultiIndex):
  3876. try:
  3877. # By using engine's __contains__ we effectively
  3878. # restrict to same-length tuples
  3879. maybe_shortcut = key not in self.columns._engine
  3880. except TypeError:
  3881. pass
  3882. if maybe_shortcut:
  3883. # Allow shorthand to delete all columns whose first len(key)
  3884. # elements match key:
  3885. if not isinstance(key, tuple):
  3886. key = (key,)
  3887. for col in self.columns:
  3888. if isinstance(col, tuple) and col[: len(key)] == key:
  3889. del self[col]
  3890. deleted = True
  3891. if not deleted:
  3892. # If the above loop ran and didn't delete anything because
  3893. # there was no match, this call should raise the appropriate
  3894. # exception:
  3895. loc = self.axes[-1].get_loc(key)
  3896. self._mgr = self._mgr.idelete(loc)
  3897. # delete from the caches
  3898. try:
  3899. del self._item_cache[key]
  3900. except KeyError:
  3901. pass
  3902. # ----------------------------------------------------------------------
  3903. # Unsorted
  3904. @final
  3905. def _check_inplace_and_allows_duplicate_labels(self, inplace: bool_t):
  3906. if inplace and not self.flags.allows_duplicate_labels:
  3907. raise ValueError(
  3908. "Cannot specify 'inplace=True' when "
  3909. "'self.flags.allows_duplicate_labels' is False."
  3910. )
  3911. @final
  3912. def get(self, key, default=None):
  3913. """
  3914. Get item from object for given key (ex: DataFrame column).
  3915. Returns default value if not found.
  3916. Parameters
  3917. ----------
  3918. key : object
  3919. Returns
  3920. -------
  3921. same type as items contained in object
  3922. Examples
  3923. --------
  3924. >>> df = pd.DataFrame(
  3925. ... [
  3926. ... [24.3, 75.7, "high"],
  3927. ... [31, 87.8, "high"],
  3928. ... [22, 71.6, "medium"],
  3929. ... [35, 95, "medium"],
  3930. ... ],
  3931. ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
  3932. ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
  3933. ... )
  3934. >>> df
  3935. temp_celsius temp_fahrenheit windspeed
  3936. 2014-02-12 24.3 75.7 high
  3937. 2014-02-13 31.0 87.8 high
  3938. 2014-02-14 22.0 71.6 medium
  3939. 2014-02-15 35.0 95.0 medium
  3940. >>> df.get(["temp_celsius", "windspeed"])
  3941. temp_celsius windspeed
  3942. 2014-02-12 24.3 high
  3943. 2014-02-13 31.0 high
  3944. 2014-02-14 22.0 medium
  3945. 2014-02-15 35.0 medium
  3946. >>> ser = df['windspeed']
  3947. >>> ser.get('2014-02-13')
  3948. 'high'
  3949. If the key isn't found, the default value will be used.
  3950. >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")
  3951. 'default_value'
  3952. >>> ser.get('2014-02-10', '[unknown]')
  3953. '[unknown]'
  3954. """
  3955. try:
  3956. return self[key]
  3957. except (KeyError, ValueError, IndexError):
  3958. return default
  3959. @final
  3960. @property
  3961. def _is_view(self) -> bool_t:
  3962. """Return boolean indicating if self is view of another array"""
  3963. return self._mgr.is_view
  3964. @final
  3965. def reindex_like(
  3966. self,
  3967. other,
  3968. method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
  3969. copy: bool_t | None = None,
  3970. limit: int | None = None,
  3971. tolerance=None,
  3972. ) -> Self:
  3973. """
  3974. Return an object with matching indices as other object.
  3975. Conform the object to the same index on all axes. Optional
  3976. filling logic, placing NaN in locations having no value
  3977. in the previous index. A new object is produced unless the
  3978. new index is equivalent to the current one and copy=False.
  3979. Parameters
  3980. ----------
  3981. other : Object of the same data type
  3982. Its row and column indices are used to define the new indices
  3983. of this object.
  3984. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3985. Method to use for filling holes in reindexed DataFrame.
  3986. Please note: this is only applicable to DataFrames/Series with a
  3987. monotonically increasing/decreasing index.
  3988. * None (default): don't fill gaps
  3989. * pad / ffill: propagate last valid observation forward to next
  3990. valid
  3991. * backfill / bfill: use next valid observation to fill gap
  3992. * nearest: use nearest valid observations to fill gap.
  3993. copy : bool, default True
  3994. Return a new object, even if the passed indexes are the same.
  3995. .. note::
  3996. The `copy` keyword will change behavior in pandas 3.0.
  3997. `Copy-on-Write
  3998. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  3999. will be enabled by default, which means that all methods with a
  4000. `copy` keyword will use a lazy copy mechanism to defer the copy and
  4001. ignore the `copy` keyword. The `copy` keyword will be removed in a
  4002. future version of pandas.
  4003. You can already get the future behavior and improvements through
  4004. enabling copy on write ``pd.options.mode.copy_on_write = True``
  4005. limit : int, default None
  4006. Maximum number of consecutive labels to fill for inexact matches.
  4007. tolerance : optional
  4008. Maximum distance between original and new labels for inexact
  4009. matches. The values of the index at the matching locations must
  4010. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  4011. Tolerance may be a scalar value, which applies the same tolerance
  4012. to all values, or list-like, which applies variable tolerance per
  4013. element. List-like includes list, tuple, array, Series, and must be
  4014. the same size as the index and its dtype must exactly match the
  4015. index's type.
  4016. Returns
  4017. -------
  4018. Series or DataFrame
  4019. Same type as caller, but with changed indices on each axis.
  4020. See Also
  4021. --------
  4022. DataFrame.set_index : Set row labels.
  4023. DataFrame.reset_index : Remove row labels or move them to new columns.
  4024. DataFrame.reindex : Change to new indices or expand indices.
  4025. Notes
  4026. -----
  4027. Same as calling
  4028. ``.reindex(index=other.index, columns=other.columns,...)``.
  4029. Examples
  4030. --------
  4031. >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],
  4032. ... [31, 87.8, 'high'],
  4033. ... [22, 71.6, 'medium'],
  4034. ... [35, 95, 'medium']],
  4035. ... columns=['temp_celsius', 'temp_fahrenheit',
  4036. ... 'windspeed'],
  4037. ... index=pd.date_range(start='2014-02-12',
  4038. ... end='2014-02-15', freq='D'))
  4039. >>> df1
  4040. temp_celsius temp_fahrenheit windspeed
  4041. 2014-02-12 24.3 75.7 high
  4042. 2014-02-13 31.0 87.8 high
  4043. 2014-02-14 22.0 71.6 medium
  4044. 2014-02-15 35.0 95.0 medium
  4045. >>> df2 = pd.DataFrame([[28, 'low'],
  4046. ... [30, 'low'],
  4047. ... [35.1, 'medium']],
  4048. ... columns=['temp_celsius', 'windspeed'],
  4049. ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
  4050. ... '2014-02-15']))
  4051. >>> df2
  4052. temp_celsius windspeed
  4053. 2014-02-12 28.0 low
  4054. 2014-02-13 30.0 low
  4055. 2014-02-15 35.1 medium
  4056. >>> df2.reindex_like(df1)
  4057. temp_celsius temp_fahrenheit windspeed
  4058. 2014-02-12 28.0 NaN low
  4059. 2014-02-13 30.0 NaN low
  4060. 2014-02-14 NaN NaN NaN
  4061. 2014-02-15 35.1 NaN medium
  4062. """
  4063. d = other._construct_axes_dict(
  4064. axes=self._AXIS_ORDERS,
  4065. method=method,
  4066. copy=copy,
  4067. limit=limit,
  4068. tolerance=tolerance,
  4069. )
  4070. return self.reindex(**d)
  4071. @overload
  4072. def drop(
  4073. self,
  4074. labels: IndexLabel = ...,
  4075. *,
  4076. axis: Axis = ...,
  4077. index: IndexLabel = ...,
  4078. columns: IndexLabel = ...,
  4079. level: Level | None = ...,
  4080. inplace: Literal[True],
  4081. errors: IgnoreRaise = ...,
  4082. ) -> None:
  4083. ...
  4084. @overload
  4085. def drop(
  4086. self,
  4087. labels: IndexLabel = ...,
  4088. *,
  4089. axis: Axis = ...,
  4090. index: IndexLabel = ...,
  4091. columns: IndexLabel = ...,
  4092. level: Level | None = ...,
  4093. inplace: Literal[False] = ...,
  4094. errors: IgnoreRaise = ...,
  4095. ) -> Self:
  4096. ...
  4097. @overload
  4098. def drop(
  4099. self,
  4100. labels: IndexLabel = ...,
  4101. *,
  4102. axis: Axis = ...,
  4103. index: IndexLabel = ...,
  4104. columns: IndexLabel = ...,
  4105. level: Level | None = ...,
  4106. inplace: bool_t = ...,
  4107. errors: IgnoreRaise = ...,
  4108. ) -> Self | None:
  4109. ...
  4110. def drop(
  4111. self,
  4112. labels: IndexLabel | None = None,
  4113. *,
  4114. axis: Axis = 0,
  4115. index: IndexLabel | None = None,
  4116. columns: IndexLabel | None = None,
  4117. level: Level | None = None,
  4118. inplace: bool_t = False,
  4119. errors: IgnoreRaise = "raise",
  4120. ) -> Self | None:
  4121. inplace = validate_bool_kwarg(inplace, "inplace")
  4122. if labels is not None:
  4123. if index is not None or columns is not None:
  4124. raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
  4125. axis_name = self._get_axis_name(axis)
  4126. axes = {axis_name: labels}
  4127. elif index is not None or columns is not None:
  4128. axes = {"index": index}
  4129. if self.ndim == 2:
  4130. axes["columns"] = columns
  4131. else:
  4132. raise ValueError(
  4133. "Need to specify at least one of 'labels', 'index' or 'columns'"
  4134. )
  4135. obj = self
  4136. for axis, labels in axes.items():
  4137. if labels is not None:
  4138. obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  4139. if inplace:
  4140. self._update_inplace(obj)
  4141. return None
  4142. else:
  4143. return obj
  4144. @final
  4145. def _drop_axis(
  4146. self,
  4147. labels,
  4148. axis,
  4149. level=None,
  4150. errors: IgnoreRaise = "raise",
  4151. only_slice: bool_t = False,
  4152. ) -> Self:
  4153. """
  4154. Drop labels from specified axis. Used in the ``drop`` method
  4155. internally.
  4156. Parameters
  4157. ----------
  4158. labels : single label or list-like
  4159. axis : int or axis name
  4160. level : int or level name, default None
  4161. For MultiIndex
  4162. errors : {'ignore', 'raise'}, default 'raise'
  4163. If 'ignore', suppress error and existing labels are dropped.
  4164. only_slice : bool, default False
  4165. Whether indexing along columns should be view-only.
  4166. """
  4167. axis_num = self._get_axis_number(axis)
  4168. axis = self._get_axis(axis)
  4169. if axis.is_unique:
  4170. if level is not None:
  4171. if not isinstance(axis, MultiIndex):
  4172. raise AssertionError("axis must be a MultiIndex")
  4173. new_axis = axis.drop(labels, level=level, errors=errors)
  4174. else:
  4175. new_axis = axis.drop(labels, errors=errors)
  4176. indexer = axis.get_indexer(new_axis)
  4177. # Case for non-unique axis
  4178. else:
  4179. is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple)
  4180. labels = ensure_object(common.index_labels_to_array(labels))
  4181. if level is not None:
  4182. if not isinstance(axis, MultiIndex):
  4183. raise AssertionError("axis must be a MultiIndex")
  4184. mask = ~axis.get_level_values(level).isin(labels)
  4185. # GH 18561 MultiIndex.drop should raise if label is absent
  4186. if errors == "raise" and mask.all():
  4187. raise KeyError(f"{labels} not found in axis")
  4188. elif (
  4189. isinstance(axis, MultiIndex)
  4190. and labels.dtype == "object"
  4191. and not is_tuple_labels
  4192. ):
  4193. # Set level to zero in case of MultiIndex and label is string,
  4194. # because isin can't handle strings for MultiIndexes GH#36293
  4195. # In case of tuples we get dtype object but have to use isin GH#42771
  4196. mask = ~axis.get_level_values(0).isin(labels)
  4197. else:
  4198. mask = ~axis.isin(labels)
  4199. # Check if label doesn't exist along axis
  4200. labels_missing = (axis.get_indexer_for(labels) == -1).any()
  4201. if errors == "raise" and labels_missing:
  4202. raise KeyError(f"{labels} not found in axis")
  4203. if isinstance(mask.dtype, ExtensionDtype):
  4204. # GH#45860
  4205. mask = mask.to_numpy(dtype=bool)
  4206. indexer = mask.nonzero()[0]
  4207. new_axis = axis.take(indexer)
  4208. bm_axis = self.ndim - axis_num - 1
  4209. new_mgr = self._mgr.reindex_indexer(
  4210. new_axis,
  4211. indexer,
  4212. axis=bm_axis,
  4213. allow_dups=True,
  4214. copy=None,
  4215. only_slice=only_slice,
  4216. )
  4217. result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  4218. if self.ndim == 1:
  4219. result._name = self.name
  4220. return result.__finalize__(self)
  4221. @final
  4222. def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:
  4223. """
  4224. Replace self internals with result.
  4225. Parameters
  4226. ----------
  4227. result : same type as self
  4228. verify_is_copy : bool, default True
  4229. Provide is_copy checks.
  4230. """
  4231. # NOTE: This does *not* call __finalize__ and that's an explicit
  4232. # decision that we may revisit in the future.
  4233. self._reset_cache()
  4234. self._clear_item_cache()
  4235. self._mgr = result._mgr
  4236. self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True)
  4237. @final
  4238. def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self:
  4239. """
  4240. Prefix labels with string `prefix`.
  4241. For Series, the row labels are prefixed.
  4242. For DataFrame, the column labels are prefixed.
  4243. Parameters
  4244. ----------
  4245. prefix : str
  4246. The string to add before each label.
  4247. axis : {0 or 'index', 1 or 'columns', None}, default None
  4248. Axis to add prefix on
  4249. .. versionadded:: 2.0.0
  4250. Returns
  4251. -------
  4252. Series or DataFrame
  4253. New Series or DataFrame with updated labels.
  4254. See Also
  4255. --------
  4256. Series.add_suffix: Suffix row labels with string `suffix`.
  4257. DataFrame.add_suffix: Suffix column labels with string `suffix`.
  4258. Examples
  4259. --------
  4260. >>> s = pd.Series([1, 2, 3, 4])
  4261. >>> s
  4262. 0 1
  4263. 1 2
  4264. 2 3
  4265. 3 4
  4266. dtype: int64
  4267. >>> s.add_prefix('item_')
  4268. item_0 1
  4269. item_1 2
  4270. item_2 3
  4271. item_3 4
  4272. dtype: int64
  4273. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  4274. >>> df
  4275. A B
  4276. 0 1 3
  4277. 1 2 4
  4278. 2 3 5
  4279. 3 4 6
  4280. >>> df.add_prefix('col_')
  4281. col_A col_B
  4282. 0 1 3
  4283. 1 2 4
  4284. 2 3 5
  4285. 3 4 6
  4286. """
  4287. f = lambda x: f"{prefix}{x}"
  4288. axis_name = self._info_axis_name
  4289. if axis is not None:
  4290. axis_name = self._get_axis_name(axis)
  4291. mapper = {axis_name: f}
  4292. # error: Incompatible return value type (got "Optional[Self]",
  4293. # expected "Self")
  4294. # error: Argument 1 to "rename" of "NDFrame" has incompatible type
  4295. # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"
  4296. # error: Keywords must be strings
  4297. return self._rename(**mapper) # type: ignore[return-value, arg-type, misc]
  4298. @final
  4299. def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self:
  4300. """
  4301. Suffix labels with string `suffix`.
  4302. For Series, the row labels are suffixed.
  4303. For DataFrame, the column labels are suffixed.
  4304. Parameters
  4305. ----------
  4306. suffix : str
  4307. The string to add after each label.
  4308. axis : {0 or 'index', 1 or 'columns', None}, default None
  4309. Axis to add suffix on
  4310. .. versionadded:: 2.0.0
  4311. Returns
  4312. -------
  4313. Series or DataFrame
  4314. New Series or DataFrame with updated labels.
  4315. See Also
  4316. --------
  4317. Series.add_prefix: Prefix row labels with string `prefix`.
  4318. DataFrame.add_prefix: Prefix column labels with string `prefix`.
  4319. Examples
  4320. --------
  4321. >>> s = pd.Series([1, 2, 3, 4])
  4322. >>> s
  4323. 0 1
  4324. 1 2
  4325. 2 3
  4326. 3 4
  4327. dtype: int64
  4328. >>> s.add_suffix('_item')
  4329. 0_item 1
  4330. 1_item 2
  4331. 2_item 3
  4332. 3_item 4
  4333. dtype: int64
  4334. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  4335. >>> df
  4336. A B
  4337. 0 1 3
  4338. 1 2 4
  4339. 2 3 5
  4340. 3 4 6
  4341. >>> df.add_suffix('_col')
  4342. A_col B_col
  4343. 0 1 3
  4344. 1 2 4
  4345. 2 3 5
  4346. 3 4 6
  4347. """
  4348. f = lambda x: f"{x}{suffix}"
  4349. axis_name = self._info_axis_name
  4350. if axis is not None:
  4351. axis_name = self._get_axis_name(axis)
  4352. mapper = {axis_name: f}
  4353. # error: Incompatible return value type (got "Optional[Self]",
  4354. # expected "Self")
  4355. # error: Argument 1 to "rename" of "NDFrame" has incompatible type
  4356. # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"
  4357. # error: Keywords must be strings
  4358. return self._rename(**mapper) # type: ignore[return-value, arg-type, misc]
  4359. @overload
  4360. def sort_values(
  4361. self,
  4362. *,
  4363. axis: Axis = ...,
  4364. ascending: bool_t | Sequence[bool_t] = ...,
  4365. inplace: Literal[False] = ...,
  4366. kind: SortKind = ...,
  4367. na_position: NaPosition = ...,
  4368. ignore_index: bool_t = ...,
  4369. key: ValueKeyFunc = ...,
  4370. ) -> Self:
  4371. ...
  4372. @overload
  4373. def sort_values(
  4374. self,
  4375. *,
  4376. axis: Axis = ...,
  4377. ascending: bool_t | Sequence[bool_t] = ...,
  4378. inplace: Literal[True],
  4379. kind: SortKind = ...,
  4380. na_position: NaPosition = ...,
  4381. ignore_index: bool_t = ...,
  4382. key: ValueKeyFunc = ...,
  4383. ) -> None:
  4384. ...
  4385. @overload
  4386. def sort_values(
  4387. self,
  4388. *,
  4389. axis: Axis = ...,
  4390. ascending: bool_t | Sequence[bool_t] = ...,
  4391. inplace: bool_t = ...,
  4392. kind: SortKind = ...,
  4393. na_position: NaPosition = ...,
  4394. ignore_index: bool_t = ...,
  4395. key: ValueKeyFunc = ...,
  4396. ) -> Self | None:
  4397. ...
  4398. def sort_values(
  4399. self,
  4400. *,
  4401. axis: Axis = 0,
  4402. ascending: bool_t | Sequence[bool_t] = True,
  4403. inplace: bool_t = False,
  4404. kind: SortKind = "quicksort",
  4405. na_position: NaPosition = "last",
  4406. ignore_index: bool_t = False,
  4407. key: ValueKeyFunc | None = None,
  4408. ) -> Self | None:
  4409. """
  4410. Sort by the values along either axis.
  4411. Parameters
  4412. ----------%(optional_by)s
  4413. axis : %(axes_single_arg)s, default 0
  4414. Axis to be sorted.
  4415. ascending : bool or list of bool, default True
  4416. Sort ascending vs. descending. Specify list for multiple sort
  4417. orders. If this is a list of bools, must match the length of
  4418. the by.
  4419. inplace : bool, default False
  4420. If True, perform operation in-place.
  4421. kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
  4422. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  4423. information. `mergesort` and `stable` are the only stable algorithms. For
  4424. DataFrames, this option is only applied when sorting on a single
  4425. column or label.
  4426. na_position : {'first', 'last'}, default 'last'
  4427. Puts NaNs at the beginning if `first`; `last` puts NaNs at the
  4428. end.
  4429. ignore_index : bool, default False
  4430. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  4431. key : callable, optional
  4432. Apply the key function to the values
  4433. before sorting. This is similar to the `key` argument in the
  4434. builtin :meth:`sorted` function, with the notable difference that
  4435. this `key` function should be *vectorized*. It should expect a
  4436. ``Series`` and return a Series with the same shape as the input.
  4437. It will be applied to each column in `by` independently.
  4438. Returns
  4439. -------
  4440. DataFrame or None
  4441. DataFrame with sorted values or None if ``inplace=True``.
  4442. See Also
  4443. --------
  4444. DataFrame.sort_index : Sort a DataFrame by the index.
  4445. Series.sort_values : Similar method for a Series.
  4446. Examples
  4447. --------
  4448. >>> df = pd.DataFrame({
  4449. ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
  4450. ... 'col2': [2, 1, 9, 8, 7, 4],
  4451. ... 'col3': [0, 1, 9, 4, 2, 3],
  4452. ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F']
  4453. ... })
  4454. >>> df
  4455. col1 col2 col3 col4
  4456. 0 A 2 0 a
  4457. 1 A 1 1 B
  4458. 2 B 9 9 c
  4459. 3 NaN 8 4 D
  4460. 4 D 7 2 e
  4461. 5 C 4 3 F
  4462. Sort by col1
  4463. >>> df.sort_values(by=['col1'])
  4464. col1 col2 col3 col4
  4465. 0 A 2 0 a
  4466. 1 A 1 1 B
  4467. 2 B 9 9 c
  4468. 5 C 4 3 F
  4469. 4 D 7 2 e
  4470. 3 NaN 8 4 D
  4471. Sort by multiple columns
  4472. >>> df.sort_values(by=['col1', 'col2'])
  4473. col1 col2 col3 col4
  4474. 1 A 1 1 B
  4475. 0 A 2 0 a
  4476. 2 B 9 9 c
  4477. 5 C 4 3 F
  4478. 4 D 7 2 e
  4479. 3 NaN 8 4 D
  4480. Sort Descending
  4481. >>> df.sort_values(by='col1', ascending=False)
  4482. col1 col2 col3 col4
  4483. 4 D 7 2 e
  4484. 5 C 4 3 F
  4485. 2 B 9 9 c
  4486. 0 A 2 0 a
  4487. 1 A 1 1 B
  4488. 3 NaN 8 4 D
  4489. Putting NAs first
  4490. >>> df.sort_values(by='col1', ascending=False, na_position='first')
  4491. col1 col2 col3 col4
  4492. 3 NaN 8 4 D
  4493. 4 D 7 2 e
  4494. 5 C 4 3 F
  4495. 2 B 9 9 c
  4496. 0 A 2 0 a
  4497. 1 A 1 1 B
  4498. Sorting with a key function
  4499. >>> df.sort_values(by='col4', key=lambda col: col.str.lower())
  4500. col1 col2 col3 col4
  4501. 0 A 2 0 a
  4502. 1 A 1 1 B
  4503. 2 B 9 9 c
  4504. 3 NaN 8 4 D
  4505. 4 D 7 2 e
  4506. 5 C 4 3 F
  4507. Natural sort with the key argument,
  4508. using the `natsort <https://github.com/SethMMorton/natsort>` package.
  4509. >>> df = pd.DataFrame({
  4510. ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'],
  4511. ... "value": [10, 20, 30, 40, 50]
  4512. ... })
  4513. >>> df
  4514. time value
  4515. 0 0hr 10
  4516. 1 128hr 20
  4517. 2 72hr 30
  4518. 3 48hr 40
  4519. 4 96hr 50
  4520. >>> from natsort import index_natsorted
  4521. >>> df.sort_values(
  4522. ... by="time",
  4523. ... key=lambda x: np.argsort(index_natsorted(df["time"]))
  4524. ... )
  4525. time value
  4526. 0 0hr 10
  4527. 3 48hr 40
  4528. 2 72hr 30
  4529. 4 96hr 50
  4530. 1 128hr 20
  4531. """
  4532. raise AbstractMethodError(self)
  4533. @overload
  4534. def sort_index(
  4535. self,
  4536. *,
  4537. axis: Axis = ...,
  4538. level: IndexLabel = ...,
  4539. ascending: bool_t | Sequence[bool_t] = ...,
  4540. inplace: Literal[True],
  4541. kind: SortKind = ...,
  4542. na_position: NaPosition = ...,
  4543. sort_remaining: bool_t = ...,
  4544. ignore_index: bool_t = ...,
  4545. key: IndexKeyFunc = ...,
  4546. ) -> None:
  4547. ...
  4548. @overload
  4549. def sort_index(
  4550. self,
  4551. *,
  4552. axis: Axis = ...,
  4553. level: IndexLabel = ...,
  4554. ascending: bool_t | Sequence[bool_t] = ...,
  4555. inplace: Literal[False] = ...,
  4556. kind: SortKind = ...,
  4557. na_position: NaPosition = ...,
  4558. sort_remaining: bool_t = ...,
  4559. ignore_index: bool_t = ...,
  4560. key: IndexKeyFunc = ...,
  4561. ) -> Self:
  4562. ...
  4563. @overload
  4564. def sort_index(
  4565. self,
  4566. *,
  4567. axis: Axis = ...,
  4568. level: IndexLabel = ...,
  4569. ascending: bool_t | Sequence[bool_t] = ...,
  4570. inplace: bool_t = ...,
  4571. kind: SortKind = ...,
  4572. na_position: NaPosition = ...,
  4573. sort_remaining: bool_t = ...,
  4574. ignore_index: bool_t = ...,
  4575. key: IndexKeyFunc = ...,
  4576. ) -> Self | None:
  4577. ...
  4578. def sort_index(
  4579. self,
  4580. *,
  4581. axis: Axis = 0,
  4582. level: IndexLabel | None = None,
  4583. ascending: bool_t | Sequence[bool_t] = True,
  4584. inplace: bool_t = False,
  4585. kind: SortKind = "quicksort",
  4586. na_position: NaPosition = "last",
  4587. sort_remaining: bool_t = True,
  4588. ignore_index: bool_t = False,
  4589. key: IndexKeyFunc | None = None,
  4590. ) -> Self | None:
  4591. inplace = validate_bool_kwarg(inplace, "inplace")
  4592. axis = self._get_axis_number(axis)
  4593. ascending = validate_ascending(ascending)
  4594. target = self._get_axis(axis)
  4595. indexer = get_indexer_indexer(
  4596. target, level, ascending, kind, na_position, sort_remaining, key
  4597. )
  4598. if indexer is None:
  4599. if inplace:
  4600. result = self
  4601. else:
  4602. result = self.copy(deep=None)
  4603. if ignore_index:
  4604. result.index = default_index(len(self))
  4605. if inplace:
  4606. return None
  4607. else:
  4608. return result
  4609. baxis = self._get_block_manager_axis(axis)
  4610. new_data = self._mgr.take(indexer, axis=baxis, verify=False)
  4611. # reconstruct axis if needed
  4612. if not ignore_index:
  4613. new_axis = new_data.axes[baxis]._sort_levels_monotonic()
  4614. else:
  4615. new_axis = default_index(len(indexer))
  4616. new_data.set_axis(baxis, new_axis)
  4617. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  4618. if inplace:
  4619. return self._update_inplace(result)
  4620. else:
  4621. return result.__finalize__(self, method="sort_index")
  4622. @doc(
  4623. klass=_shared_doc_kwargs["klass"],
  4624. optional_reindex="",
  4625. )
  4626. def reindex(
  4627. self,
  4628. labels=None,
  4629. *,
  4630. index=None,
  4631. columns=None,
  4632. axis: Axis | None = None,
  4633. method: ReindexMethod | None = None,
  4634. copy: bool_t | None = None,
  4635. level: Level | None = None,
  4636. fill_value: Scalar | None = np.nan,
  4637. limit: int | None = None,
  4638. tolerance=None,
  4639. ) -> Self:
  4640. """
  4641. Conform {klass} to new index with optional filling logic.
  4642. Places NA/NaN in locations having no value in the previous index. A new object
  4643. is produced unless the new index is equivalent to the current one and
  4644. ``copy=False``.
  4645. Parameters
  4646. ----------
  4647. {optional_reindex}
  4648. method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}}
  4649. Method to use for filling holes in reindexed DataFrame.
  4650. Please note: this is only applicable to DataFrames/Series with a
  4651. monotonically increasing/decreasing index.
  4652. * None (default): don't fill gaps
  4653. * pad / ffill: Propagate last valid observation forward to next
  4654. valid.
  4655. * backfill / bfill: Use next valid observation to fill gap.
  4656. * nearest: Use nearest valid observations to fill gap.
  4657. copy : bool, default True
  4658. Return a new object, even if the passed indexes are the same.
  4659. .. note::
  4660. The `copy` keyword will change behavior in pandas 3.0.
  4661. `Copy-on-Write
  4662. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  4663. will be enabled by default, which means that all methods with a
  4664. `copy` keyword will use a lazy copy mechanism to defer the copy and
  4665. ignore the `copy` keyword. The `copy` keyword will be removed in a
  4666. future version of pandas.
  4667. You can already get the future behavior and improvements through
  4668. enabling copy on write ``pd.options.mode.copy_on_write = True``
  4669. level : int or name
  4670. Broadcast across a level, matching Index values on the
  4671. passed MultiIndex level.
  4672. fill_value : scalar, default np.nan
  4673. Value to use for missing values. Defaults to NaN, but can be any
  4674. "compatible" value.
  4675. limit : int, default None
  4676. Maximum number of consecutive elements to forward or backward fill.
  4677. tolerance : optional
  4678. Maximum distance between original and new labels for inexact
  4679. matches. The values of the index at the matching locations most
  4680. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  4681. Tolerance may be a scalar value, which applies the same tolerance
  4682. to all values, or list-like, which applies variable tolerance per
  4683. element. List-like includes list, tuple, array, Series, and must be
  4684. the same size as the index and its dtype must exactly match the
  4685. index's type.
  4686. Returns
  4687. -------
  4688. {klass} with changed index.
  4689. See Also
  4690. --------
  4691. DataFrame.set_index : Set row labels.
  4692. DataFrame.reset_index : Remove row labels or move them to new columns.
  4693. DataFrame.reindex_like : Change to same indices as other DataFrame.
  4694. Examples
  4695. --------
  4696. ``DataFrame.reindex`` supports two calling conventions
  4697. * ``(index=index_labels, columns=column_labels, ...)``
  4698. * ``(labels, axis={{'index', 'columns'}}, ...)``
  4699. We *highly* recommend using keyword arguments to clarify your
  4700. intent.
  4701. Create a dataframe with some fictional data.
  4702. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
  4703. >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301],
  4704. ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}},
  4705. ... index=index)
  4706. >>> df
  4707. http_status response_time
  4708. Firefox 200 0.04
  4709. Chrome 200 0.02
  4710. Safari 404 0.07
  4711. IE10 404 0.08
  4712. Konqueror 301 1.00
  4713. Create a new index and reindex the dataframe. By default
  4714. values in the new index that do not have corresponding
  4715. records in the dataframe are assigned ``NaN``.
  4716. >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
  4717. ... 'Chrome']
  4718. >>> df.reindex(new_index)
  4719. http_status response_time
  4720. Safari 404.0 0.07
  4721. Iceweasel NaN NaN
  4722. Comodo Dragon NaN NaN
  4723. IE10 404.0 0.08
  4724. Chrome 200.0 0.02
  4725. We can fill in the missing values by passing a value to
  4726. the keyword ``fill_value``. Because the index is not monotonically
  4727. increasing or decreasing, we cannot use arguments to the keyword
  4728. ``method`` to fill the ``NaN`` values.
  4729. >>> df.reindex(new_index, fill_value=0)
  4730. http_status response_time
  4731. Safari 404 0.07
  4732. Iceweasel 0 0.00
  4733. Comodo Dragon 0 0.00
  4734. IE10 404 0.08
  4735. Chrome 200 0.02
  4736. >>> df.reindex(new_index, fill_value='missing')
  4737. http_status response_time
  4738. Safari 404 0.07
  4739. Iceweasel missing missing
  4740. Comodo Dragon missing missing
  4741. IE10 404 0.08
  4742. Chrome 200 0.02
  4743. We can also reindex the columns.
  4744. >>> df.reindex(columns=['http_status', 'user_agent'])
  4745. http_status user_agent
  4746. Firefox 200 NaN
  4747. Chrome 200 NaN
  4748. Safari 404 NaN
  4749. IE10 404 NaN
  4750. Konqueror 301 NaN
  4751. Or we can use "axis-style" keyword arguments
  4752. >>> df.reindex(['http_status', 'user_agent'], axis="columns")
  4753. http_status user_agent
  4754. Firefox 200 NaN
  4755. Chrome 200 NaN
  4756. Safari 404 NaN
  4757. IE10 404 NaN
  4758. Konqueror 301 NaN
  4759. To further illustrate the filling functionality in
  4760. ``reindex``, we will create a dataframe with a
  4761. monotonically increasing index (for example, a sequence
  4762. of dates).
  4763. >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
  4764. >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}},
  4765. ... index=date_index)
  4766. >>> df2
  4767. prices
  4768. 2010-01-01 100.0
  4769. 2010-01-02 101.0
  4770. 2010-01-03 NaN
  4771. 2010-01-04 100.0
  4772. 2010-01-05 89.0
  4773. 2010-01-06 88.0
  4774. Suppose we decide to expand the dataframe to cover a wider
  4775. date range.
  4776. >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
  4777. >>> df2.reindex(date_index2)
  4778. prices
  4779. 2009-12-29 NaN
  4780. 2009-12-30 NaN
  4781. 2009-12-31 NaN
  4782. 2010-01-01 100.0
  4783. 2010-01-02 101.0
  4784. 2010-01-03 NaN
  4785. 2010-01-04 100.0
  4786. 2010-01-05 89.0
  4787. 2010-01-06 88.0
  4788. 2010-01-07 NaN
  4789. The index entries that did not have a value in the original data frame
  4790. (for example, '2009-12-29') are by default filled with ``NaN``.
  4791. If desired, we can fill in the missing values using one of several
  4792. options.
  4793. For example, to back-propagate the last valid value to fill the ``NaN``
  4794. values, pass ``bfill`` as an argument to the ``method`` keyword.
  4795. >>> df2.reindex(date_index2, method='bfill')
  4796. prices
  4797. 2009-12-29 100.0
  4798. 2009-12-30 100.0
  4799. 2009-12-31 100.0
  4800. 2010-01-01 100.0
  4801. 2010-01-02 101.0
  4802. 2010-01-03 NaN
  4803. 2010-01-04 100.0
  4804. 2010-01-05 89.0
  4805. 2010-01-06 88.0
  4806. 2010-01-07 NaN
  4807. Please note that the ``NaN`` value present in the original dataframe
  4808. (at index value 2010-01-03) will not be filled by any of the
  4809. value propagation schemes. This is because filling while reindexing
  4810. does not look at dataframe values, but only compares the original and
  4811. desired indexes. If you do want to fill in the ``NaN`` values present
  4812. in the original dataframe, use the ``fillna()`` method.
  4813. See the :ref:`user guide <basics.reindexing>` for more.
  4814. """
  4815. # TODO: Decide if we care about having different examples for different
  4816. # kinds
  4817. if index is not None and columns is not None and labels is not None:
  4818. raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.")
  4819. elif index is not None or columns is not None:
  4820. if axis is not None:
  4821. raise TypeError(
  4822. "Cannot specify both 'axis' and any of 'index' or 'columns'"
  4823. )
  4824. if labels is not None:
  4825. if index is not None:
  4826. columns = labels
  4827. else:
  4828. index = labels
  4829. else:
  4830. if axis and self._get_axis_number(axis) == 1:
  4831. columns = labels
  4832. else:
  4833. index = labels
  4834. axes: dict[Literal["index", "columns"], Any] = {
  4835. "index": index,
  4836. "columns": columns,
  4837. }
  4838. method = clean_reindex_fill_method(method)
  4839. # if all axes that are requested to reindex are equal, then only copy
  4840. # if indicated must have index names equal here as well as values
  4841. if copy and using_copy_on_write():
  4842. copy = False
  4843. if all(
  4844. self._get_axis(axis_name).identical(ax)
  4845. for axis_name, ax in axes.items()
  4846. if ax is not None
  4847. ):
  4848. return self.copy(deep=copy)
  4849. # check if we are a multi reindex
  4850. if self._needs_reindex_multi(axes, method, level):
  4851. return self._reindex_multi(axes, copy, fill_value)
  4852. # perform the reindex on the axes
  4853. return self._reindex_axes(
  4854. axes, level, limit, tolerance, method, fill_value, copy
  4855. ).__finalize__(self, method="reindex")
  4856. @final
  4857. def _reindex_axes(
  4858. self,
  4859. axes,
  4860. level: Level | None,
  4861. limit: int | None,
  4862. tolerance,
  4863. method,
  4864. fill_value: Scalar | None,
  4865. copy: bool_t | None,
  4866. ) -> Self:
  4867. """Perform the reindex for all the axes."""
  4868. obj = self
  4869. for a in self._AXIS_ORDERS:
  4870. labels = axes[a]
  4871. if labels is None:
  4872. continue
  4873. ax = self._get_axis(a)
  4874. new_index, indexer = ax.reindex(
  4875. labels, level=level, limit=limit, tolerance=tolerance, method=method
  4876. )
  4877. axis = self._get_axis_number(a)
  4878. obj = obj._reindex_with_indexers(
  4879. {axis: [new_index, indexer]},
  4880. fill_value=fill_value,
  4881. copy=copy,
  4882. allow_dups=False,
  4883. )
  4884. # If we've made a copy once, no need to make another one
  4885. copy = False
  4886. return obj
  4887. def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t:
  4888. """Check if we do need a multi reindex."""
  4889. return (
  4890. (common.count_not_none(*axes.values()) == self._AXIS_LEN)
  4891. and method is None
  4892. and level is None
  4893. # reindex_multi calls self.values, so we only want to go
  4894. # down that path when doing so is cheap.
  4895. and self._can_fast_transpose
  4896. )
  4897. def _reindex_multi(self, axes, copy, fill_value):
  4898. raise AbstractMethodError(self)
  4899. @final
  4900. def _reindex_with_indexers(
  4901. self,
  4902. reindexers,
  4903. fill_value=None,
  4904. copy: bool_t | None = False,
  4905. allow_dups: bool_t = False,
  4906. ) -> Self:
  4907. """allow_dups indicates an internal call here"""
  4908. # reindex doing multiple operations on different axes if indicated
  4909. new_data = self._mgr
  4910. for axis in sorted(reindexers.keys()):
  4911. index, indexer = reindexers[axis]
  4912. baxis = self._get_block_manager_axis(axis)
  4913. if index is None:
  4914. continue
  4915. index = ensure_index(index)
  4916. if indexer is not None:
  4917. indexer = ensure_platform_int(indexer)
  4918. # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)
  4919. new_data = new_data.reindex_indexer(
  4920. index,
  4921. indexer,
  4922. axis=baxis,
  4923. fill_value=fill_value,
  4924. allow_dups=allow_dups,
  4925. copy=copy,
  4926. )
  4927. # If we've made a copy once, no need to make another one
  4928. copy = False
  4929. if (
  4930. (copy or copy is None)
  4931. and new_data is self._mgr
  4932. and not using_copy_on_write()
  4933. ):
  4934. new_data = new_data.copy(deep=copy)
  4935. elif using_copy_on_write() and new_data is self._mgr:
  4936. new_data = new_data.copy(deep=False)
  4937. return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__(
  4938. self
  4939. )
  4940. def filter(
  4941. self,
  4942. items=None,
  4943. like: str | None = None,
  4944. regex: str | None = None,
  4945. axis: Axis | None = None,
  4946. ) -> Self:
  4947. """
  4948. Subset the dataframe rows or columns according to the specified index labels.
  4949. Note that this routine does not filter a dataframe on its
  4950. contents. The filter is applied to the labels of the index.
  4951. Parameters
  4952. ----------
  4953. items : list-like
  4954. Keep labels from axis which are in items.
  4955. like : str
  4956. Keep labels from axis for which "like in label == True".
  4957. regex : str (regular expression)
  4958. Keep labels from axis for which re.search(regex, label) == True.
  4959. axis : {0 or 'index', 1 or 'columns', None}, default None
  4960. The axis to filter on, expressed either as an index (int)
  4961. or axis name (str). By default this is the info axis, 'columns' for
  4962. DataFrame. For `Series` this parameter is unused and defaults to `None`.
  4963. Returns
  4964. -------
  4965. same type as input object
  4966. See Also
  4967. --------
  4968. DataFrame.loc : Access a group of rows and columns
  4969. by label(s) or a boolean array.
  4970. Notes
  4971. -----
  4972. The ``items``, ``like``, and ``regex`` parameters are
  4973. enforced to be mutually exclusive.
  4974. ``axis`` defaults to the info axis that is used when indexing
  4975. with ``[]``.
  4976. Examples
  4977. --------
  4978. >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
  4979. ... index=['mouse', 'rabbit'],
  4980. ... columns=['one', 'two', 'three'])
  4981. >>> df
  4982. one two three
  4983. mouse 1 2 3
  4984. rabbit 4 5 6
  4985. >>> # select columns by name
  4986. >>> df.filter(items=['one', 'three'])
  4987. one three
  4988. mouse 1 3
  4989. rabbit 4 6
  4990. >>> # select columns by regular expression
  4991. >>> df.filter(regex='e$', axis=1)
  4992. one three
  4993. mouse 1 3
  4994. rabbit 4 6
  4995. >>> # select rows containing 'bbi'
  4996. >>> df.filter(like='bbi', axis=0)
  4997. one two three
  4998. rabbit 4 5 6
  4999. """
  5000. nkw = common.count_not_none(items, like, regex)
  5001. if nkw > 1:
  5002. raise TypeError(
  5003. "Keyword arguments `items`, `like`, or `regex` "
  5004. "are mutually exclusive"
  5005. )
  5006. if axis is None:
  5007. axis = self._info_axis_name
  5008. labels = self._get_axis(axis)
  5009. if items is not None:
  5010. name = self._get_axis_name(axis)
  5011. items = Index(items).intersection(labels)
  5012. if len(items) == 0:
  5013. # Keep the dtype of labels when we are empty
  5014. items = items.astype(labels.dtype)
  5015. # error: Keywords must be strings
  5016. return self.reindex(**{name: items}) # type: ignore[misc]
  5017. elif like:
  5018. def f(x) -> bool_t:
  5019. assert like is not None # needed for mypy
  5020. return like in ensure_str(x)
  5021. values = labels.map(f)
  5022. return self.loc(axis=axis)[values]
  5023. elif regex:
  5024. def f(x) -> bool_t:
  5025. return matcher.search(ensure_str(x)) is not None
  5026. matcher = re.compile(regex)
  5027. values = labels.map(f)
  5028. return self.loc(axis=axis)[values]
  5029. else:
  5030. raise TypeError("Must pass either `items`, `like`, or `regex`")
  5031. @final
  5032. def head(self, n: int = 5) -> Self:
  5033. """
  5034. Return the first `n` rows.
  5035. This function returns the first `n` rows for the object based
  5036. on position. It is useful for quickly testing if your object
  5037. has the right type of data in it.
  5038. For negative values of `n`, this function returns all rows except
  5039. the last `|n|` rows, equivalent to ``df[:n]``.
  5040. If n is larger than the number of rows, this function returns all rows.
  5041. Parameters
  5042. ----------
  5043. n : int, default 5
  5044. Number of rows to select.
  5045. Returns
  5046. -------
  5047. same type as caller
  5048. The first `n` rows of the caller object.
  5049. See Also
  5050. --------
  5051. DataFrame.tail: Returns the last `n` rows.
  5052. Examples
  5053. --------
  5054. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  5055. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  5056. >>> df
  5057. animal
  5058. 0 alligator
  5059. 1 bee
  5060. 2 falcon
  5061. 3 lion
  5062. 4 monkey
  5063. 5 parrot
  5064. 6 shark
  5065. 7 whale
  5066. 8 zebra
  5067. Viewing the first 5 lines
  5068. >>> df.head()
  5069. animal
  5070. 0 alligator
  5071. 1 bee
  5072. 2 falcon
  5073. 3 lion
  5074. 4 monkey
  5075. Viewing the first `n` lines (three in this case)
  5076. >>> df.head(3)
  5077. animal
  5078. 0 alligator
  5079. 1 bee
  5080. 2 falcon
  5081. For negative values of `n`
  5082. >>> df.head(-3)
  5083. animal
  5084. 0 alligator
  5085. 1 bee
  5086. 2 falcon
  5087. 3 lion
  5088. 4 monkey
  5089. 5 parrot
  5090. """
  5091. if using_copy_on_write():
  5092. return self.iloc[:n].copy()
  5093. return self.iloc[:n]
  5094. @final
  5095. def tail(self, n: int = 5) -> Self:
  5096. """
  5097. Return the last `n` rows.
  5098. This function returns last `n` rows from the object based on
  5099. position. It is useful for quickly verifying data, for example,
  5100. after sorting or appending rows.
  5101. For negative values of `n`, this function returns all rows except
  5102. the first `|n|` rows, equivalent to ``df[|n|:]``.
  5103. If n is larger than the number of rows, this function returns all rows.
  5104. Parameters
  5105. ----------
  5106. n : int, default 5
  5107. Number of rows to select.
  5108. Returns
  5109. -------
  5110. type of caller
  5111. The last `n` rows of the caller object.
  5112. See Also
  5113. --------
  5114. DataFrame.head : The first `n` rows of the caller object.
  5115. Examples
  5116. --------
  5117. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  5118. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  5119. >>> df
  5120. animal
  5121. 0 alligator
  5122. 1 bee
  5123. 2 falcon
  5124. 3 lion
  5125. 4 monkey
  5126. 5 parrot
  5127. 6 shark
  5128. 7 whale
  5129. 8 zebra
  5130. Viewing the last 5 lines
  5131. >>> df.tail()
  5132. animal
  5133. 4 monkey
  5134. 5 parrot
  5135. 6 shark
  5136. 7 whale
  5137. 8 zebra
  5138. Viewing the last `n` lines (three in this case)
  5139. >>> df.tail(3)
  5140. animal
  5141. 6 shark
  5142. 7 whale
  5143. 8 zebra
  5144. For negative values of `n`
  5145. >>> df.tail(-3)
  5146. animal
  5147. 3 lion
  5148. 4 monkey
  5149. 5 parrot
  5150. 6 shark
  5151. 7 whale
  5152. 8 zebra
  5153. """
  5154. if using_copy_on_write():
  5155. if n == 0:
  5156. return self.iloc[0:0].copy()
  5157. return self.iloc[-n:].copy()
  5158. if n == 0:
  5159. return self.iloc[0:0]
  5160. return self.iloc[-n:]
  5161. @final
  5162. def sample(
  5163. self,
  5164. n: int | None = None,
  5165. frac: float | None = None,
  5166. replace: bool_t = False,
  5167. weights=None,
  5168. random_state: RandomState | None = None,
  5169. axis: Axis | None = None,
  5170. ignore_index: bool_t = False,
  5171. ) -> Self:
  5172. """
  5173. Return a random sample of items from an axis of object.
  5174. You can use `random_state` for reproducibility.
  5175. Parameters
  5176. ----------
  5177. n : int, optional
  5178. Number of items from axis to return. Cannot be used with `frac`.
  5179. Default = 1 if `frac` = None.
  5180. frac : float, optional
  5181. Fraction of axis items to return. Cannot be used with `n`.
  5182. replace : bool, default False
  5183. Allow or disallow sampling of the same row more than once.
  5184. weights : str or ndarray-like, optional
  5185. Default 'None' results in equal probability weighting.
  5186. If passed a Series, will align with target object on index. Index
  5187. values in weights not found in sampled object will be ignored and
  5188. index values in sampled object not in weights will be assigned
  5189. weights of zero.
  5190. If called on a DataFrame, will accept the name of a column
  5191. when axis = 0.
  5192. Unless weights are a Series, weights must be same length as axis
  5193. being sampled.
  5194. If weights do not sum to 1, they will be normalized to sum to 1.
  5195. Missing values in the weights column will be treated as zero.
  5196. Infinite values not allowed.
  5197. random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional
  5198. If int, array-like, or BitGenerator, seed for random number generator.
  5199. If np.random.RandomState or np.random.Generator, use as given.
  5200. .. versionchanged:: 1.4.0
  5201. np.random.Generator objects now accepted
  5202. axis : {0 or 'index', 1 or 'columns', None}, default None
  5203. Axis to sample. Accepts axis number or name. Default is stat axis
  5204. for given data type. For `Series` this parameter is unused and defaults to `None`.
  5205. ignore_index : bool, default False
  5206. If True, the resulting index will be labeled 0, 1, …, n - 1.
  5207. .. versionadded:: 1.3.0
  5208. Returns
  5209. -------
  5210. Series or DataFrame
  5211. A new object of same type as caller containing `n` items randomly
  5212. sampled from the caller object.
  5213. See Also
  5214. --------
  5215. DataFrameGroupBy.sample: Generates random samples from each group of a
  5216. DataFrame object.
  5217. SeriesGroupBy.sample: Generates random samples from each group of a
  5218. Series object.
  5219. numpy.random.choice: Generates a random sample from a given 1-D numpy
  5220. array.
  5221. Notes
  5222. -----
  5223. If `frac` > 1, `replacement` should be set to `True`.
  5224. Examples
  5225. --------
  5226. >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
  5227. ... 'num_wings': [2, 0, 0, 0],
  5228. ... 'num_specimen_seen': [10, 2, 1, 8]},
  5229. ... index=['falcon', 'dog', 'spider', 'fish'])
  5230. >>> df
  5231. num_legs num_wings num_specimen_seen
  5232. falcon 2 2 10
  5233. dog 4 0 2
  5234. spider 8 0 1
  5235. fish 0 0 8
  5236. Extract 3 random elements from the ``Series`` ``df['num_legs']``:
  5237. Note that we use `random_state` to ensure the reproducibility of
  5238. the examples.
  5239. >>> df['num_legs'].sample(n=3, random_state=1)
  5240. fish 0
  5241. spider 8
  5242. falcon 2
  5243. Name: num_legs, dtype: int64
  5244. A random 50% sample of the ``DataFrame`` with replacement:
  5245. >>> df.sample(frac=0.5, replace=True, random_state=1)
  5246. num_legs num_wings num_specimen_seen
  5247. dog 4 0 2
  5248. fish 0 0 8
  5249. An upsample sample of the ``DataFrame`` with replacement:
  5250. Note that `replace` parameter has to be `True` for `frac` parameter > 1.
  5251. >>> df.sample(frac=2, replace=True, random_state=1)
  5252. num_legs num_wings num_specimen_seen
  5253. dog 4 0 2
  5254. fish 0 0 8
  5255. falcon 2 2 10
  5256. falcon 2 2 10
  5257. fish 0 0 8
  5258. dog 4 0 2
  5259. fish 0 0 8
  5260. dog 4 0 2
  5261. Using a DataFrame column as weights. Rows with larger value in the
  5262. `num_specimen_seen` column are more likely to be sampled.
  5263. >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
  5264. num_legs num_wings num_specimen_seen
  5265. falcon 2 2 10
  5266. fish 0 0 8
  5267. """ # noqa: E501
  5268. if axis is None:
  5269. axis = 0
  5270. axis = self._get_axis_number(axis)
  5271. obj_len = self.shape[axis]
  5272. # Process random_state argument
  5273. rs = common.random_state(random_state)
  5274. size = sample.process_sampling_size(n, frac, replace)
  5275. if size is None:
  5276. assert frac is not None
  5277. size = round(frac * obj_len)
  5278. if weights is not None:
  5279. weights = sample.preprocess_weights(self, weights, axis)
  5280. sampled_indices = sample.sample(obj_len, size, replace, weights, rs)
  5281. result = self.take(sampled_indices, axis=axis)
  5282. if ignore_index:
  5283. result.index = default_index(len(result))
  5284. return result
  5285. @final
  5286. @doc(klass=_shared_doc_kwargs["klass"])
  5287. def pipe(
  5288. self,
  5289. func: Callable[..., T] | tuple[Callable[..., T], str],
  5290. *args,
  5291. **kwargs,
  5292. ) -> T:
  5293. r"""
  5294. Apply chainable functions that expect Series or DataFrames.
  5295. Parameters
  5296. ----------
  5297. func : function
  5298. Function to apply to the {klass}.
  5299. ``args``, and ``kwargs`` are passed into ``func``.
  5300. Alternatively a ``(callable, data_keyword)`` tuple where
  5301. ``data_keyword`` is a string indicating the keyword of
  5302. ``callable`` that expects the {klass}.
  5303. *args : iterable, optional
  5304. Positional arguments passed into ``func``.
  5305. **kwargs : mapping, optional
  5306. A dictionary of keyword arguments passed into ``func``.
  5307. Returns
  5308. -------
  5309. the return type of ``func``.
  5310. See Also
  5311. --------
  5312. DataFrame.apply : Apply a function along input axis of DataFrame.
  5313. DataFrame.map : Apply a function elementwise on a whole DataFrame.
  5314. Series.map : Apply a mapping correspondence on a
  5315. :class:`~pandas.Series`.
  5316. Notes
  5317. -----
  5318. Use ``.pipe`` when chaining together functions that expect
  5319. Series, DataFrames or GroupBy objects.
  5320. Examples
  5321. --------
  5322. Constructing a income DataFrame from a dictionary.
  5323. >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]]
  5324. >>> df = pd.DataFrame(data, columns=['Salary', 'Others'])
  5325. >>> df
  5326. Salary Others
  5327. 0 8000 1000.0
  5328. 1 9500 NaN
  5329. 2 5000 2000.0
  5330. Functions that perform tax reductions on an income DataFrame.
  5331. >>> def subtract_federal_tax(df):
  5332. ... return df * 0.9
  5333. >>> def subtract_state_tax(df, rate):
  5334. ... return df * (1 - rate)
  5335. >>> def subtract_national_insurance(df, rate, rate_increase):
  5336. ... new_rate = rate + rate_increase
  5337. ... return df * (1 - new_rate)
  5338. Instead of writing
  5339. >>> subtract_national_insurance(
  5340. ... subtract_state_tax(subtract_federal_tax(df), rate=0.12),
  5341. ... rate=0.05,
  5342. ... rate_increase=0.02) # doctest: +SKIP
  5343. You can write
  5344. >>> (
  5345. ... df.pipe(subtract_federal_tax)
  5346. ... .pipe(subtract_state_tax, rate=0.12)
  5347. ... .pipe(subtract_national_insurance, rate=0.05, rate_increase=0.02)
  5348. ... )
  5349. Salary Others
  5350. 0 5892.48 736.56
  5351. 1 6997.32 NaN
  5352. 2 3682.80 1473.12
  5353. If you have a function that takes the data as (say) the second
  5354. argument, pass a tuple indicating which keyword expects the
  5355. data. For example, suppose ``national_insurance`` takes its data as ``df``
  5356. in the second argument:
  5357. >>> def subtract_national_insurance(rate, df, rate_increase):
  5358. ... new_rate = rate + rate_increase
  5359. ... return df * (1 - new_rate)
  5360. >>> (
  5361. ... df.pipe(subtract_federal_tax)
  5362. ... .pipe(subtract_state_tax, rate=0.12)
  5363. ... .pipe(
  5364. ... (subtract_national_insurance, 'df'),
  5365. ... rate=0.05,
  5366. ... rate_increase=0.02
  5367. ... )
  5368. ... )
  5369. Salary Others
  5370. 0 5892.48 736.56
  5371. 1 6997.32 NaN
  5372. 2 3682.80 1473.12
  5373. """
  5374. if using_copy_on_write():
  5375. return common.pipe(self.copy(deep=None), func, *args, **kwargs)
  5376. return common.pipe(self, func, *args, **kwargs)
  5377. # ----------------------------------------------------------------------
  5378. # Attribute access
  5379. @final
  5380. def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
  5381. """
  5382. Propagate metadata from other to self.
  5383. Parameters
  5384. ----------
  5385. other : the object from which to get the attributes that we are going
  5386. to propagate
  5387. method : str, optional
  5388. A passed method name providing context on where ``__finalize__``
  5389. was called.
  5390. .. warning::
  5391. The value passed as `method` are not currently considered
  5392. stable across pandas releases.
  5393. """
  5394. if isinstance(other, NDFrame):
  5395. if other.attrs:
  5396. # We want attrs propagation to have minimal performance
  5397. # impact if attrs are not used; i.e. attrs is an empty dict.
  5398. # One could make the deepcopy unconditionally, but a deepcopy
  5399. # of an empty dict is 50x more expensive than the empty check.
  5400. self.attrs = deepcopy(other.attrs)
  5401. self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
  5402. # For subclasses using _metadata.
  5403. for name in set(self._metadata) & set(other._metadata):
  5404. assert isinstance(name, str)
  5405. object.__setattr__(self, name, getattr(other, name, None))
  5406. if method == "concat":
  5407. # propagate attrs only if all concat arguments have the same attrs
  5408. if all(bool(obj.attrs) for obj in other.objs):
  5409. # all concatenate arguments have non-empty attrs
  5410. attrs = other.objs[0].attrs
  5411. have_same_attrs = all(obj.attrs == attrs for obj in other.objs[1:])
  5412. if have_same_attrs:
  5413. self.attrs = deepcopy(attrs)
  5414. allows_duplicate_labels = all(
  5415. x.flags.allows_duplicate_labels for x in other.objs
  5416. )
  5417. self.flags.allows_duplicate_labels = allows_duplicate_labels
  5418. return self
  5419. @final
  5420. def __getattr__(self, name: str):
  5421. """
  5422. After regular attribute access, try looking up the name
  5423. This allows simpler access to columns for interactive use.
  5424. """
  5425. # Note: obj.x will always call obj.__getattribute__('x') prior to
  5426. # calling obj.__getattr__('x').
  5427. if (
  5428. name not in self._internal_names_set
  5429. and name not in self._metadata
  5430. and name not in self._accessors
  5431. and self._info_axis._can_hold_identifiers_and_holds_name(name)
  5432. ):
  5433. return self[name]
  5434. return object.__getattribute__(self, name)
  5435. @final
  5436. def __setattr__(self, name: str, value) -> None:
  5437. """
  5438. After regular attribute access, try setting the name
  5439. This allows simpler access to columns for interactive use.
  5440. """
  5441. # first try regular attribute access via __getattribute__, so that
  5442. # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
  5443. # the same attribute.
  5444. try:
  5445. object.__getattribute__(self, name)
  5446. return object.__setattr__(self, name, value)
  5447. except AttributeError:
  5448. pass
  5449. # if this fails, go on to more involved attribute setting
  5450. # (note that this matches __getattr__, above).
  5451. if name in self._internal_names_set:
  5452. object.__setattr__(self, name, value)
  5453. elif name in self._metadata:
  5454. object.__setattr__(self, name, value)
  5455. else:
  5456. try:
  5457. existing = getattr(self, name)
  5458. if isinstance(existing, Index):
  5459. object.__setattr__(self, name, value)
  5460. elif name in self._info_axis:
  5461. self[name] = value
  5462. else:
  5463. object.__setattr__(self, name, value)
  5464. except (AttributeError, TypeError):
  5465. if isinstance(self, ABCDataFrame) and (is_list_like(value)):
  5466. warnings.warn(
  5467. "Pandas doesn't allow columns to be "
  5468. "created via a new attribute name - see "
  5469. "https://pandas.pydata.org/pandas-docs/"
  5470. "stable/indexing.html#attribute-access",
  5471. stacklevel=find_stack_level(),
  5472. )
  5473. object.__setattr__(self, name, value)
  5474. @final
  5475. def _dir_additions(self) -> set[str]:
  5476. """
  5477. add the string-like attributes from the info_axis.
  5478. If info_axis is a MultiIndex, its first level values are used.
  5479. """
  5480. additions = super()._dir_additions()
  5481. if self._info_axis._can_hold_strings:
  5482. additions.update(self._info_axis._dir_additions_for_owner)
  5483. return additions
  5484. # ----------------------------------------------------------------------
  5485. # Consolidation of internals
  5486. @final
  5487. def _protect_consolidate(self, f):
  5488. """
  5489. Consolidate _mgr -- if the blocks have changed, then clear the
  5490. cache
  5491. """
  5492. if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):
  5493. return f()
  5494. blocks_before = len(self._mgr.blocks)
  5495. result = f()
  5496. if len(self._mgr.blocks) != blocks_before:
  5497. self._clear_item_cache()
  5498. return result
  5499. @final
  5500. def _consolidate_inplace(self) -> None:
  5501. """Consolidate data in place and return None"""
  5502. def f() -> None:
  5503. self._mgr = self._mgr.consolidate()
  5504. self._protect_consolidate(f)
  5505. @final
  5506. def _consolidate(self):
  5507. """
  5508. Compute NDFrame with "consolidated" internals (data of each dtype
  5509. grouped together in a single ndarray).
  5510. Returns
  5511. -------
  5512. consolidated : same type as caller
  5513. """
  5514. f = lambda: self._mgr.consolidate()
  5515. cons_data = self._protect_consolidate(f)
  5516. return self._constructor_from_mgr(cons_data, axes=cons_data.axes).__finalize__(
  5517. self
  5518. )
  5519. @final
  5520. @property
  5521. def _is_mixed_type(self) -> bool_t:
  5522. if self._mgr.is_single_block:
  5523. # Includes all Series cases
  5524. return False
  5525. if self._mgr.any_extension_types:
  5526. # Even if they have the same dtype, we can't consolidate them,
  5527. # so we pretend this is "mixed'"
  5528. return True
  5529. return self.dtypes.nunique() > 1
  5530. @final
  5531. def _get_numeric_data(self) -> Self:
  5532. new_mgr = self._mgr.get_numeric_data()
  5533. return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self)
  5534. @final
  5535. def _get_bool_data(self):
  5536. new_mgr = self._mgr.get_bool_data()
  5537. return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self)
  5538. # ----------------------------------------------------------------------
  5539. # Internal Interface Methods
  5540. @property
  5541. def values(self):
  5542. raise AbstractMethodError(self)
  5543. @property
  5544. def _values(self) -> ArrayLike:
  5545. """internal implementation"""
  5546. raise AbstractMethodError(self)
  5547. @property
  5548. def dtypes(self):
  5549. """
  5550. Return the dtypes in the DataFrame.
  5551. This returns a Series with the data type of each column.
  5552. The result's index is the original DataFrame's columns. Columns
  5553. with mixed types are stored with the ``object`` dtype. See
  5554. :ref:`the User Guide <basics.dtypes>` for more.
  5555. Returns
  5556. -------
  5557. pandas.Series
  5558. The data type of each column.
  5559. Examples
  5560. --------
  5561. >>> df = pd.DataFrame({'float': [1.0],
  5562. ... 'int': [1],
  5563. ... 'datetime': [pd.Timestamp('20180310')],
  5564. ... 'string': ['foo']})
  5565. >>> df.dtypes
  5566. float float64
  5567. int int64
  5568. datetime datetime64[ns]
  5569. string object
  5570. dtype: object
  5571. """
  5572. data = self._mgr.get_dtypes()
  5573. return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_)
  5574. @final
  5575. def astype(
  5576. self, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise"
  5577. ) -> Self:
  5578. """
  5579. Cast a pandas object to a specified dtype ``dtype``.
  5580. Parameters
  5581. ----------
  5582. dtype : str, data type, Series or Mapping of column name -> data type
  5583. Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to
  5584. cast entire pandas object to the same type. Alternatively, use a
  5585. mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
  5586. a numpy.dtype or Python type to cast one or more of the DataFrame's
  5587. columns to column-specific types.
  5588. copy : bool, default True
  5589. Return a copy when ``copy=True`` (be very careful setting
  5590. ``copy=False`` as changes to values then may propagate to other
  5591. pandas objects).
  5592. .. note::
  5593. The `copy` keyword will change behavior in pandas 3.0.
  5594. `Copy-on-Write
  5595. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  5596. will be enabled by default, which means that all methods with a
  5597. `copy` keyword will use a lazy copy mechanism to defer the copy and
  5598. ignore the `copy` keyword. The `copy` keyword will be removed in a
  5599. future version of pandas.
  5600. You can already get the future behavior and improvements through
  5601. enabling copy on write ``pd.options.mode.copy_on_write = True``
  5602. errors : {'raise', 'ignore'}, default 'raise'
  5603. Control raising of exceptions on invalid data for provided dtype.
  5604. - ``raise`` : allow exceptions to be raised
  5605. - ``ignore`` : suppress exceptions. On error return original object.
  5606. Returns
  5607. -------
  5608. same type as caller
  5609. See Also
  5610. --------
  5611. to_datetime : Convert argument to datetime.
  5612. to_timedelta : Convert argument to timedelta.
  5613. to_numeric : Convert argument to a numeric type.
  5614. numpy.ndarray.astype : Cast a numpy array to a specified type.
  5615. Notes
  5616. -----
  5617. .. versionchanged:: 2.0.0
  5618. Using ``astype`` to convert from timezone-naive dtype to
  5619. timezone-aware dtype will raise an exception.
  5620. Use :meth:`Series.dt.tz_localize` instead.
  5621. Examples
  5622. --------
  5623. Create a DataFrame:
  5624. >>> d = {'col1': [1, 2], 'col2': [3, 4]}
  5625. >>> df = pd.DataFrame(data=d)
  5626. >>> df.dtypes
  5627. col1 int64
  5628. col2 int64
  5629. dtype: object
  5630. Cast all columns to int32:
  5631. >>> df.astype('int32').dtypes
  5632. col1 int32
  5633. col2 int32
  5634. dtype: object
  5635. Cast col1 to int32 using a dictionary:
  5636. >>> df.astype({'col1': 'int32'}).dtypes
  5637. col1 int32
  5638. col2 int64
  5639. dtype: object
  5640. Create a series:
  5641. >>> ser = pd.Series([1, 2], dtype='int32')
  5642. >>> ser
  5643. 0 1
  5644. 1 2
  5645. dtype: int32
  5646. >>> ser.astype('int64')
  5647. 0 1
  5648. 1 2
  5649. dtype: int64
  5650. Convert to categorical type:
  5651. >>> ser.astype('category')
  5652. 0 1
  5653. 1 2
  5654. dtype: category
  5655. Categories (2, int32): [1, 2]
  5656. Convert to ordered categorical type with custom ordering:
  5657. >>> from pandas.api.types import CategoricalDtype
  5658. >>> cat_dtype = CategoricalDtype(
  5659. ... categories=[2, 1], ordered=True)
  5660. >>> ser.astype(cat_dtype)
  5661. 0 1
  5662. 1 2
  5663. dtype: category
  5664. Categories (2, int64): [2 < 1]
  5665. Create a series of dates:
  5666. >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
  5667. >>> ser_date
  5668. 0 2020-01-01
  5669. 1 2020-01-02
  5670. 2 2020-01-03
  5671. dtype: datetime64[ns]
  5672. """
  5673. if copy and using_copy_on_write():
  5674. copy = False
  5675. if is_dict_like(dtype):
  5676. if self.ndim == 1: # i.e. Series
  5677. if len(dtype) > 1 or self.name not in dtype:
  5678. raise KeyError(
  5679. "Only the Series name can be used for "
  5680. "the key in Series dtype mappings."
  5681. )
  5682. new_type = dtype[self.name]
  5683. return self.astype(new_type, copy, errors)
  5684. # GH#44417 cast to Series so we can use .iat below, which will be
  5685. # robust in case we
  5686. from pandas import Series
  5687. dtype_ser = Series(dtype, dtype=object)
  5688. for col_name in dtype_ser.index:
  5689. if col_name not in self:
  5690. raise KeyError(
  5691. "Only a column name can be used for the "
  5692. "key in a dtype mappings argument. "
  5693. f"'{col_name}' not found in columns."
  5694. )
  5695. dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)
  5696. results = []
  5697. for i, (col_name, col) in enumerate(self.items()):
  5698. cdt = dtype_ser.iat[i]
  5699. if isna(cdt):
  5700. res_col = col.copy(deep=copy)
  5701. else:
  5702. try:
  5703. res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
  5704. except ValueError as ex:
  5705. ex.args = (
  5706. f"{ex}: Error while type casting for column '{col_name}'",
  5707. )
  5708. raise
  5709. results.append(res_col)
  5710. elif is_extension_array_dtype(dtype) and self.ndim > 1:
  5711. # TODO(EA2D): special case not needed with 2D EAs
  5712. dtype = pandas_dtype(dtype)
  5713. if isinstance(dtype, ExtensionDtype) and all(
  5714. arr.dtype == dtype for arr in self._mgr.arrays
  5715. ):
  5716. return self.copy(deep=copy)
  5717. # GH 18099/22869: columnwise conversion to extension dtype
  5718. # GH 24704: self.items handles duplicate column names
  5719. results = [
  5720. ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
  5721. ]
  5722. else:
  5723. # else, only a single dtype is given
  5724. new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
  5725. res = self._constructor_from_mgr(new_data, axes=new_data.axes)
  5726. return res.__finalize__(self, method="astype")
  5727. # GH 33113: handle empty frame or series
  5728. if not results:
  5729. return self.copy(deep=None)
  5730. # GH 19920: retain column metadata after concat
  5731. result = concat(results, axis=1, copy=False)
  5732. # GH#40810 retain subclass
  5733. # error: Incompatible types in assignment
  5734. # (expression has type "Self", variable has type "DataFrame")
  5735. result = self._constructor(result) # type: ignore[assignment]
  5736. result.columns = self.columns
  5737. result = result.__finalize__(self, method="astype")
  5738. # https://github.com/python/mypy/issues/8354
  5739. return cast(Self, result)
  5740. @final
  5741. def copy(self, deep: bool_t | None = True) -> Self:
  5742. """
  5743. Make a copy of this object's indices and data.
  5744. When ``deep=True`` (default), a new object will be created with a
  5745. copy of the calling object's data and indices. Modifications to
  5746. the data or indices of the copy will not be reflected in the
  5747. original object (see notes below).
  5748. When ``deep=False``, a new object will be created without copying
  5749. the calling object's data or index (only references to the data
  5750. and index are copied). Any changes to the data of the original
  5751. will be reflected in the shallow copy (and vice versa).
  5752. .. note::
  5753. The ``deep=False`` behaviour as described above will change
  5754. in pandas 3.0. `Copy-on-Write
  5755. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  5756. will be enabled by default, which means that the "shallow" copy
  5757. is that is returned with ``deep=False`` will still avoid making
  5758. an eager copy, but changes to the data of the original will *no*
  5759. longer be reflected in the shallow copy (or vice versa). Instead,
  5760. it makes use of a lazy (deferred) copy mechanism that will copy
  5761. the data only when any changes to the original or shallow copy is
  5762. made.
  5763. You can already get the future behavior and improvements through
  5764. enabling copy on write ``pd.options.mode.copy_on_write = True``
  5765. Parameters
  5766. ----------
  5767. deep : bool, default True
  5768. Make a deep copy, including a copy of the data and the indices.
  5769. With ``deep=False`` neither the indices nor the data are copied.
  5770. Returns
  5771. -------
  5772. Series or DataFrame
  5773. Object type matches caller.
  5774. Notes
  5775. -----
  5776. When ``deep=True``, data is copied but actual Python objects
  5777. will not be copied recursively, only the reference to the object.
  5778. This is in contrast to `copy.deepcopy` in the Standard Library,
  5779. which recursively copies object data (see examples below).
  5780. While ``Index`` objects are copied when ``deep=True``, the underlying
  5781. numpy array is not copied for performance reasons. Since ``Index`` is
  5782. immutable, the underlying data can be safely shared and a copy
  5783. is not needed.
  5784. Since pandas is not thread safe, see the
  5785. :ref:`gotchas <gotchas.thread-safety>` when copying in a threading
  5786. environment.
  5787. When ``copy_on_write`` in pandas config is set to ``True``, the
  5788. ``copy_on_write`` config takes effect even when ``deep=False``.
  5789. This means that any changes to the copied data would make a new copy
  5790. of the data upon write (and vice versa). Changes made to either the
  5791. original or copied variable would not be reflected in the counterpart.
  5792. See :ref:`Copy_on_Write <copy_on_write>` for more information.
  5793. Examples
  5794. --------
  5795. >>> s = pd.Series([1, 2], index=["a", "b"])
  5796. >>> s
  5797. a 1
  5798. b 2
  5799. dtype: int64
  5800. >>> s_copy = s.copy()
  5801. >>> s_copy
  5802. a 1
  5803. b 2
  5804. dtype: int64
  5805. **Shallow copy versus default (deep) copy:**
  5806. >>> s = pd.Series([1, 2], index=["a", "b"])
  5807. >>> deep = s.copy()
  5808. >>> shallow = s.copy(deep=False)
  5809. Shallow copy shares data and index with original.
  5810. >>> s is shallow
  5811. False
  5812. >>> s.values is shallow.values and s.index is shallow.index
  5813. True
  5814. Deep copy has own copy of data and index.
  5815. >>> s is deep
  5816. False
  5817. >>> s.values is deep.values or s.index is deep.index
  5818. False
  5819. Updates to the data shared by shallow copy and original is reflected
  5820. in both (NOTE: this will no longer be true for pandas >= 3.0);
  5821. deep copy remains unchanged.
  5822. >>> s.iloc[0] = 3
  5823. >>> shallow.iloc[1] = 4
  5824. >>> s
  5825. a 3
  5826. b 4
  5827. dtype: int64
  5828. >>> shallow
  5829. a 3
  5830. b 4
  5831. dtype: int64
  5832. >>> deep
  5833. a 1
  5834. b 2
  5835. dtype: int64
  5836. Note that when copying an object containing Python objects, a deep copy
  5837. will copy the data, but will not do so recursively. Updating a nested
  5838. data object will be reflected in the deep copy.
  5839. >>> s = pd.Series([[1, 2], [3, 4]])
  5840. >>> deep = s.copy()
  5841. >>> s[0][0] = 10
  5842. >>> s
  5843. 0 [10, 2]
  5844. 1 [3, 4]
  5845. dtype: object
  5846. >>> deep
  5847. 0 [10, 2]
  5848. 1 [3, 4]
  5849. dtype: object
  5850. **Copy-on-Write is set to true**, the shallow copy is not modified
  5851. when the original data is changed:
  5852. >>> with pd.option_context("mode.copy_on_write", True):
  5853. ... s = pd.Series([1, 2], index=["a", "b"])
  5854. ... copy = s.copy(deep=False)
  5855. ... s.iloc[0] = 100
  5856. ... s
  5857. a 100
  5858. b 2
  5859. dtype: int64
  5860. >>> copy
  5861. a 1
  5862. b 2
  5863. dtype: int64
  5864. """
  5865. data = self._mgr.copy(deep=deep)
  5866. self._clear_item_cache()
  5867. return self._constructor_from_mgr(data, axes=data.axes).__finalize__(
  5868. self, method="copy"
  5869. )
  5870. @final
  5871. def __copy__(self, deep: bool_t = True) -> Self:
  5872. return self.copy(deep=deep)
  5873. @final
  5874. def __deepcopy__(self, memo=None) -> Self:
  5875. """
  5876. Parameters
  5877. ----------
  5878. memo, default None
  5879. Standard signature. Unused
  5880. """
  5881. return self.copy(deep=True)
  5882. @final
  5883. def infer_objects(self, copy: bool_t | None = None) -> Self:
  5884. """
  5885. Attempt to infer better dtypes for object columns.
  5886. Attempts soft conversion of object-dtyped
  5887. columns, leaving non-object and unconvertible
  5888. columns unchanged. The inference rules are the
  5889. same as during normal Series/DataFrame construction.
  5890. Parameters
  5891. ----------
  5892. copy : bool, default True
  5893. Whether to make a copy for non-object or non-inferable columns
  5894. or Series.
  5895. .. note::
  5896. The `copy` keyword will change behavior in pandas 3.0.
  5897. `Copy-on-Write
  5898. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  5899. will be enabled by default, which means that all methods with a
  5900. `copy` keyword will use a lazy copy mechanism to defer the copy and
  5901. ignore the `copy` keyword. The `copy` keyword will be removed in a
  5902. future version of pandas.
  5903. You can already get the future behavior and improvements through
  5904. enabling copy on write ``pd.options.mode.copy_on_write = True``
  5905. Returns
  5906. -------
  5907. same type as input object
  5908. See Also
  5909. --------
  5910. to_datetime : Convert argument to datetime.
  5911. to_timedelta : Convert argument to timedelta.
  5912. to_numeric : Convert argument to numeric type.
  5913. convert_dtypes : Convert argument to best possible dtype.
  5914. Examples
  5915. --------
  5916. >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})
  5917. >>> df = df.iloc[1:]
  5918. >>> df
  5919. A
  5920. 1 1
  5921. 2 2
  5922. 3 3
  5923. >>> df.dtypes
  5924. A object
  5925. dtype: object
  5926. >>> df.infer_objects().dtypes
  5927. A int64
  5928. dtype: object
  5929. """
  5930. new_mgr = self._mgr.convert(copy=copy)
  5931. res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  5932. return res.__finalize__(self, method="infer_objects")
  5933. @final
  5934. def convert_dtypes(
  5935. self,
  5936. infer_objects: bool_t = True,
  5937. convert_string: bool_t = True,
  5938. convert_integer: bool_t = True,
  5939. convert_boolean: bool_t = True,
  5940. convert_floating: bool_t = True,
  5941. dtype_backend: DtypeBackend = "numpy_nullable",
  5942. ) -> Self:
  5943. """
  5944. Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``.
  5945. Parameters
  5946. ----------
  5947. infer_objects : bool, default True
  5948. Whether object dtypes should be converted to the best possible types.
  5949. convert_string : bool, default True
  5950. Whether object dtypes should be converted to ``StringDtype()``.
  5951. convert_integer : bool, default True
  5952. Whether, if possible, conversion can be done to integer extension types.
  5953. convert_boolean : bool, defaults True
  5954. Whether object dtypes should be converted to ``BooleanDtypes()``.
  5955. convert_floating : bool, defaults True
  5956. Whether, if possible, conversion can be done to floating extension types.
  5957. If `convert_integer` is also True, preference will be give to integer
  5958. dtypes if the floats can be faithfully casted to integers.
  5959. dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
  5960. Back-end data type applied to the resultant :class:`DataFrame`
  5961. (still experimental). Behaviour is as follows:
  5962. * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
  5963. (default).
  5964. * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
  5965. DataFrame.
  5966. .. versionadded:: 2.0
  5967. Returns
  5968. -------
  5969. Series or DataFrame
  5970. Copy of input object with new dtype.
  5971. See Also
  5972. --------
  5973. infer_objects : Infer dtypes of objects.
  5974. to_datetime : Convert argument to datetime.
  5975. to_timedelta : Convert argument to timedelta.
  5976. to_numeric : Convert argument to a numeric type.
  5977. Notes
  5978. -----
  5979. By default, ``convert_dtypes`` will attempt to convert a Series (or each
  5980. Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
  5981. ``convert_string``, ``convert_integer``, ``convert_boolean`` and
  5982. ``convert_floating``, it is possible to turn off individual conversions
  5983. to ``StringDtype``, the integer extension types, ``BooleanDtype``
  5984. or floating extension types, respectively.
  5985. For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
  5986. rules as during normal Series/DataFrame construction. Then, if possible,
  5987. convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
  5988. or floating extension type, otherwise leave as ``object``.
  5989. If the dtype is integer, convert to an appropriate integer extension type.
  5990. If the dtype is numeric, and consists of all integers, convert to an
  5991. appropriate integer extension type. Otherwise, convert to an
  5992. appropriate floating extension type.
  5993. In the future, as new dtypes are added that support ``pd.NA``, the results
  5994. of this method will change to support those new dtypes.
  5995. Examples
  5996. --------
  5997. >>> df = pd.DataFrame(
  5998. ... {
  5999. ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
  6000. ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
  6001. ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
  6002. ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
  6003. ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
  6004. ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
  6005. ... }
  6006. ... )
  6007. Start with a DataFrame with default dtypes.
  6008. >>> df
  6009. a b c d e f
  6010. 0 1 x True h 10.0 NaN
  6011. 1 2 y False i NaN 100.5
  6012. 2 3 z NaN NaN 20.0 200.0
  6013. >>> df.dtypes
  6014. a int32
  6015. b object
  6016. c object
  6017. d object
  6018. e float64
  6019. f float64
  6020. dtype: object
  6021. Convert the DataFrame to use best possible dtypes.
  6022. >>> dfn = df.convert_dtypes()
  6023. >>> dfn
  6024. a b c d e f
  6025. 0 1 x True h 10 <NA>
  6026. 1 2 y False i <NA> 100.5
  6027. 2 3 z <NA> <NA> 20 200.0
  6028. >>> dfn.dtypes
  6029. a Int32
  6030. b string[python]
  6031. c boolean
  6032. d string[python]
  6033. e Int64
  6034. f Float64
  6035. dtype: object
  6036. Start with a Series of strings and missing data represented by ``np.nan``.
  6037. >>> s = pd.Series(["a", "b", np.nan])
  6038. >>> s
  6039. 0 a
  6040. 1 b
  6041. 2 NaN
  6042. dtype: object
  6043. Obtain a Series with dtype ``StringDtype``.
  6044. >>> s.convert_dtypes()
  6045. 0 a
  6046. 1 b
  6047. 2 <NA>
  6048. dtype: string
  6049. """
  6050. check_dtype_backend(dtype_backend)
  6051. new_mgr = self._mgr.convert_dtypes( # type: ignore[union-attr]
  6052. infer_objects=infer_objects,
  6053. convert_string=convert_string,
  6054. convert_integer=convert_integer,
  6055. convert_boolean=convert_boolean,
  6056. convert_floating=convert_floating,
  6057. dtype_backend=dtype_backend,
  6058. )
  6059. res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  6060. return res.__finalize__(self, method="convert_dtypes")
  6061. # ----------------------------------------------------------------------
  6062. # Filling NA's
  6063. def _deprecate_downcast(self, downcast, method_name: str):
  6064. # GH#40988
  6065. if downcast is not lib.no_default:
  6066. warnings.warn(
  6067. f"The 'downcast' keyword in {method_name} is deprecated and "
  6068. "will be removed in a future version. Use "
  6069. "res.infer_objects(copy=False) to infer non-object dtype, or "
  6070. "pd.to_numeric with the 'downcast' keyword to downcast numeric "
  6071. "results.",
  6072. FutureWarning,
  6073. stacklevel=find_stack_level(),
  6074. )
  6075. else:
  6076. downcast = None
  6077. return downcast
  6078. @final
  6079. def _pad_or_backfill(
  6080. self,
  6081. method: Literal["ffill", "bfill", "pad", "backfill"],
  6082. *,
  6083. axis: None | Axis = None,
  6084. inplace: bool_t = False,
  6085. limit: None | int = None,
  6086. limit_area: Literal["inside", "outside"] | None = None,
  6087. downcast: dict | None = None,
  6088. ):
  6089. if axis is None:
  6090. axis = 0
  6091. axis = self._get_axis_number(axis)
  6092. method = clean_fill_method(method)
  6093. if not self._mgr.is_single_block and axis == 1:
  6094. # e.g. test_align_fill_method
  6095. # TODO(3.0): once downcast is removed, we can do the .T
  6096. # in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
  6097. if inplace:
  6098. raise NotImplementedError()
  6099. result = self.T._pad_or_backfill(
  6100. method=method, limit=limit, limit_area=limit_area
  6101. ).T
  6102. return result
  6103. new_mgr = self._mgr.pad_or_backfill(
  6104. method=method,
  6105. axis=self._get_block_manager_axis(axis),
  6106. limit=limit,
  6107. limit_area=limit_area,
  6108. inplace=inplace,
  6109. downcast=downcast,
  6110. )
  6111. result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  6112. if inplace:
  6113. return self._update_inplace(result)
  6114. else:
  6115. return result.__finalize__(self, method="fillna")
  6116. @overload
  6117. def fillna(
  6118. self,
  6119. value: Hashable | Mapping | Series | DataFrame = ...,
  6120. *,
  6121. method: FillnaOptions | None = ...,
  6122. axis: Axis | None = ...,
  6123. inplace: Literal[False] = ...,
  6124. limit: int | None = ...,
  6125. downcast: dict | None = ...,
  6126. ) -> Self:
  6127. ...
  6128. @overload
  6129. def fillna(
  6130. self,
  6131. value: Hashable | Mapping | Series | DataFrame = ...,
  6132. *,
  6133. method: FillnaOptions | None = ...,
  6134. axis: Axis | None = ...,
  6135. inplace: Literal[True],
  6136. limit: int | None = ...,
  6137. downcast: dict | None = ...,
  6138. ) -> None:
  6139. ...
  6140. @overload
  6141. def fillna(
  6142. self,
  6143. value: Hashable | Mapping | Series | DataFrame = ...,
  6144. *,
  6145. method: FillnaOptions | None = ...,
  6146. axis: Axis | None = ...,
  6147. inplace: bool_t = ...,
  6148. limit: int | None = ...,
  6149. downcast: dict | None = ...,
  6150. ) -> Self | None:
  6151. ...
  6152. @final
  6153. @doc(
  6154. klass=_shared_doc_kwargs["klass"],
  6155. axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
  6156. )
  6157. def fillna(
  6158. self,
  6159. value: Hashable | Mapping | Series | DataFrame | None = None,
  6160. *,
  6161. method: FillnaOptions | None = None,
  6162. axis: Axis | None = None,
  6163. inplace: bool_t = False,
  6164. limit: int | None = None,
  6165. downcast: dict | None | lib.NoDefault = lib.no_default,
  6166. ) -> Self | None:
  6167. """
  6168. Fill NA/NaN values using the specified method.
  6169. Parameters
  6170. ----------
  6171. value : scalar, dict, Series, or DataFrame
  6172. Value to use to fill holes (e.g. 0), alternately a
  6173. dict/Series/DataFrame of values specifying which value to use for
  6174. each index (for a Series) or column (for a DataFrame). Values not
  6175. in the dict/Series/DataFrame will not be filled. This value cannot
  6176. be a list.
  6177. method : {{'backfill', 'bfill', 'ffill', None}}, default None
  6178. Method to use for filling holes in reindexed Series:
  6179. * ffill: propagate last valid observation forward to next valid.
  6180. * backfill / bfill: use next valid observation to fill gap.
  6181. .. deprecated:: 2.1.0
  6182. Use ffill or bfill instead.
  6183. axis : {axes_single_arg}
  6184. Axis along which to fill missing values. For `Series`
  6185. this parameter is unused and defaults to 0.
  6186. inplace : bool, default False
  6187. If True, fill in-place. Note: this will modify any
  6188. other views on this object (e.g., a no-copy slice for a column in a
  6189. DataFrame).
  6190. limit : int, default None
  6191. If method is specified, this is the maximum number of consecutive
  6192. NaN values to forward/backward fill. In other words, if there is
  6193. a gap with more than this number of consecutive NaNs, it will only
  6194. be partially filled. If method is not specified, this is the
  6195. maximum number of entries along the entire axis where NaNs will be
  6196. filled. Must be greater than 0 if not None.
  6197. downcast : dict, default is None
  6198. A dict of item->dtype of what to downcast if possible,
  6199. or the string 'infer' which will try to downcast to an appropriate
  6200. equal type (e.g. float64 to int64 if possible).
  6201. .. deprecated:: 2.2.0
  6202. Returns
  6203. -------
  6204. {klass} or None
  6205. Object with missing values filled or None if ``inplace=True``.
  6206. See Also
  6207. --------
  6208. ffill : Fill values by propagating the last valid observation to next valid.
  6209. bfill : Fill values by using the next valid observation to fill the gap.
  6210. interpolate : Fill NaN values using interpolation.
  6211. reindex : Conform object to new index.
  6212. asfreq : Convert TimeSeries to specified frequency.
  6213. Examples
  6214. --------
  6215. >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
  6216. ... [3, 4, np.nan, 1],
  6217. ... [np.nan, np.nan, np.nan, np.nan],
  6218. ... [np.nan, 3, np.nan, 4]],
  6219. ... columns=list("ABCD"))
  6220. >>> df
  6221. A B C D
  6222. 0 NaN 2.0 NaN 0.0
  6223. 1 3.0 4.0 NaN 1.0
  6224. 2 NaN NaN NaN NaN
  6225. 3 NaN 3.0 NaN 4.0
  6226. Replace all NaN elements with 0s.
  6227. >>> df.fillna(0)
  6228. A B C D
  6229. 0 0.0 2.0 0.0 0.0
  6230. 1 3.0 4.0 0.0 1.0
  6231. 2 0.0 0.0 0.0 0.0
  6232. 3 0.0 3.0 0.0 4.0
  6233. Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
  6234. 2, and 3 respectively.
  6235. >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}}
  6236. >>> df.fillna(value=values)
  6237. A B C D
  6238. 0 0.0 2.0 2.0 0.0
  6239. 1 3.0 4.0 2.0 1.0
  6240. 2 0.0 1.0 2.0 3.0
  6241. 3 0.0 3.0 2.0 4.0
  6242. Only replace the first NaN element.
  6243. >>> df.fillna(value=values, limit=1)
  6244. A B C D
  6245. 0 0.0 2.0 2.0 0.0
  6246. 1 3.0 4.0 NaN 1.0
  6247. 2 NaN 1.0 NaN 3.0
  6248. 3 NaN 3.0 NaN 4.0
  6249. When filling using a DataFrame, replacement happens along
  6250. the same column names and same indices
  6251. >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE"))
  6252. >>> df.fillna(df2)
  6253. A B C D
  6254. 0 0.0 2.0 0.0 0.0
  6255. 1 3.0 4.0 0.0 1.0
  6256. 2 0.0 0.0 0.0 NaN
  6257. 3 0.0 3.0 0.0 4.0
  6258. Note that column D is not affected since it is not present in df2.
  6259. """
  6260. inplace = validate_bool_kwarg(inplace, "inplace")
  6261. if inplace:
  6262. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  6263. if sys.getrefcount(self) <= REF_COUNT:
  6264. warnings.warn(
  6265. _chained_assignment_method_msg,
  6266. ChainedAssignmentError,
  6267. stacklevel=2,
  6268. )
  6269. elif (
  6270. not PYPY
  6271. and not WARNING_CHECK_DISABLED
  6272. and not using_copy_on_write()
  6273. and self._is_view_after_cow_rules()
  6274. ):
  6275. ctr = sys.getrefcount(self)
  6276. ref_count = REF_COUNT
  6277. if isinstance(self, ABCSeries) and _check_cacher(self):
  6278. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  6279. ref_count += 1
  6280. if ctr <= ref_count:
  6281. warnings.warn(
  6282. _chained_assignment_warning_method_msg,
  6283. FutureWarning,
  6284. stacklevel=2,
  6285. )
  6286. value, method = validate_fillna_kwargs(value, method)
  6287. if method is not None:
  6288. warnings.warn(
  6289. f"{type(self).__name__}.fillna with 'method' is deprecated and "
  6290. "will raise in a future version. Use obj.ffill() or obj.bfill() "
  6291. "instead.",
  6292. FutureWarning,
  6293. stacklevel=find_stack_level(),
  6294. )
  6295. was_no_default = downcast is lib.no_default
  6296. downcast = self._deprecate_downcast(downcast, "fillna")
  6297. # set the default here, so functions examining the signaure
  6298. # can detect if something was set (e.g. in groupby) (GH9221)
  6299. if axis is None:
  6300. axis = 0
  6301. axis = self._get_axis_number(axis)
  6302. if value is None:
  6303. return self._pad_or_backfill(
  6304. # error: Argument 1 to "_pad_or_backfill" of "NDFrame" has
  6305. # incompatible type "Optional[Literal['backfill', 'bfill', 'ffill',
  6306. # 'pad']]"; expected "Literal['ffill', 'bfill', 'pad', 'backfill']"
  6307. method, # type: ignore[arg-type]
  6308. axis=axis,
  6309. limit=limit,
  6310. inplace=inplace,
  6311. # error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
  6312. # has incompatible type "Union[Dict[Any, Any], None,
  6313. # Literal[_NoDefault.no_default]]"; expected
  6314. # "Optional[Dict[Any, Any]]"
  6315. downcast=downcast, # type: ignore[arg-type]
  6316. )
  6317. else:
  6318. if self.ndim == 1:
  6319. if isinstance(value, (dict, ABCSeries)):
  6320. if not len(value):
  6321. # test_fillna_nonscalar
  6322. if inplace:
  6323. return None
  6324. return self.copy(deep=None)
  6325. from pandas import Series
  6326. value = Series(value)
  6327. value = value.reindex(self.index, copy=False)
  6328. value = value._values
  6329. elif not is_list_like(value):
  6330. pass
  6331. else:
  6332. raise TypeError(
  6333. '"value" parameter must be a scalar, dict '
  6334. "or Series, but you passed a "
  6335. f'"{type(value).__name__}"'
  6336. )
  6337. new_data = self._mgr.fillna(
  6338. value=value, limit=limit, inplace=inplace, downcast=downcast
  6339. )
  6340. elif isinstance(value, (dict, ABCSeries)):
  6341. if axis == 1:
  6342. raise NotImplementedError(
  6343. "Currently only can fill "
  6344. "with dict/Series column "
  6345. "by column"
  6346. )
  6347. if using_copy_on_write():
  6348. result = self.copy(deep=None)
  6349. else:
  6350. result = self if inplace else self.copy()
  6351. is_dict = isinstance(downcast, dict)
  6352. for k, v in value.items():
  6353. if k not in result:
  6354. continue
  6355. if was_no_default:
  6356. downcast_k = lib.no_default
  6357. else:
  6358. downcast_k = (
  6359. # error: Incompatible types in assignment (expression
  6360. # has type "Union[Dict[Any, Any], None,
  6361. # Literal[_NoDefault.no_default], Any]", variable has
  6362. # type "_NoDefault")
  6363. downcast # type: ignore[assignment]
  6364. if not is_dict
  6365. # error: Item "None" of "Optional[Dict[Any, Any]]" has
  6366. # no attribute "get"
  6367. else downcast.get(k) # type: ignore[union-attr]
  6368. )
  6369. res_k = result[k].fillna(v, limit=limit, downcast=downcast_k)
  6370. if not inplace:
  6371. result[k] = res_k
  6372. else:
  6373. # We can write into our existing column(s) iff dtype
  6374. # was preserved.
  6375. if isinstance(res_k, ABCSeries):
  6376. # i.e. 'k' only shows up once in self.columns
  6377. if res_k.dtype == result[k].dtype:
  6378. result.loc[:, k] = res_k
  6379. else:
  6380. # Different dtype -> no way to do inplace.
  6381. result[k] = res_k
  6382. else:
  6383. # see test_fillna_dict_inplace_nonunique_columns
  6384. locs = result.columns.get_loc(k)
  6385. if isinstance(locs, slice):
  6386. locs = np.arange(self.shape[1])[locs]
  6387. elif (
  6388. isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
  6389. ):
  6390. locs = locs.nonzero()[0]
  6391. elif not (
  6392. isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
  6393. ):
  6394. # Should never be reached, but let's cover our bases
  6395. raise NotImplementedError(
  6396. "Unexpected get_loc result, please report a bug at "
  6397. "https://github.com/pandas-dev/pandas"
  6398. )
  6399. for i, loc in enumerate(locs):
  6400. res_loc = res_k.iloc[:, i]
  6401. target = self.iloc[:, loc]
  6402. if res_loc.dtype == target.dtype:
  6403. result.iloc[:, loc] = res_loc
  6404. else:
  6405. result.isetitem(loc, res_loc)
  6406. if inplace:
  6407. return self._update_inplace(result)
  6408. else:
  6409. return result
  6410. elif not is_list_like(value):
  6411. if axis == 1:
  6412. result = self.T.fillna(value=value, limit=limit).T
  6413. new_data = result._mgr
  6414. else:
  6415. new_data = self._mgr.fillna(
  6416. value=value, limit=limit, inplace=inplace, downcast=downcast
  6417. )
  6418. elif isinstance(value, ABCDataFrame) and self.ndim == 2:
  6419. new_data = self.where(self.notna(), value)._mgr
  6420. else:
  6421. raise ValueError(f"invalid fill value with a {type(value)}")
  6422. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  6423. if inplace:
  6424. return self._update_inplace(result)
  6425. else:
  6426. return result.__finalize__(self, method="fillna")
  6427. @overload
  6428. def ffill(
  6429. self,
  6430. *,
  6431. axis: None | Axis = ...,
  6432. inplace: Literal[False] = ...,
  6433. limit: None | int = ...,
  6434. limit_area: Literal["inside", "outside"] | None = ...,
  6435. downcast: dict | None | lib.NoDefault = ...,
  6436. ) -> Self:
  6437. ...
  6438. @overload
  6439. def ffill(
  6440. self,
  6441. *,
  6442. axis: None | Axis = ...,
  6443. inplace: Literal[True],
  6444. limit: None | int = ...,
  6445. limit_area: Literal["inside", "outside"] | None = ...,
  6446. downcast: dict | None | lib.NoDefault = ...,
  6447. ) -> None:
  6448. ...
  6449. @overload
  6450. def ffill(
  6451. self,
  6452. *,
  6453. axis: None | Axis = ...,
  6454. inplace: bool_t = ...,
  6455. limit: None | int = ...,
  6456. limit_area: Literal["inside", "outside"] | None = ...,
  6457. downcast: dict | None | lib.NoDefault = ...,
  6458. ) -> Self | None:
  6459. ...
  6460. @final
  6461. @doc(
  6462. klass=_shared_doc_kwargs["klass"],
  6463. axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
  6464. )
  6465. def ffill(
  6466. self,
  6467. *,
  6468. axis: None | Axis = None,
  6469. inplace: bool_t = False,
  6470. limit: None | int = None,
  6471. limit_area: Literal["inside", "outside"] | None = None,
  6472. downcast: dict | None | lib.NoDefault = lib.no_default,
  6473. ) -> Self | None:
  6474. """
  6475. Fill NA/NaN values by propagating the last valid observation to next valid.
  6476. Parameters
  6477. ----------
  6478. axis : {axes_single_arg}
  6479. Axis along which to fill missing values. For `Series`
  6480. this parameter is unused and defaults to 0.
  6481. inplace : bool, default False
  6482. If True, fill in-place. Note: this will modify any
  6483. other views on this object (e.g., a no-copy slice for a column in a
  6484. DataFrame).
  6485. limit : int, default None
  6486. If method is specified, this is the maximum number of consecutive
  6487. NaN values to forward/backward fill. In other words, if there is
  6488. a gap with more than this number of consecutive NaNs, it will only
  6489. be partially filled. If method is not specified, this is the
  6490. maximum number of entries along the entire axis where NaNs will be
  6491. filled. Must be greater than 0 if not None.
  6492. limit_area : {{`None`, 'inside', 'outside'}}, default None
  6493. If limit is specified, consecutive NaNs will be filled with this
  6494. restriction.
  6495. * ``None``: No fill restriction.
  6496. * 'inside': Only fill NaNs surrounded by valid values
  6497. (interpolate).
  6498. * 'outside': Only fill NaNs outside valid values (extrapolate).
  6499. .. versionadded:: 2.2.0
  6500. downcast : dict, default is None
  6501. A dict of item->dtype of what to downcast if possible,
  6502. or the string 'infer' which will try to downcast to an appropriate
  6503. equal type (e.g. float64 to int64 if possible).
  6504. .. deprecated:: 2.2.0
  6505. Returns
  6506. -------
  6507. {klass} or None
  6508. Object with missing values filled or None if ``inplace=True``.
  6509. Examples
  6510. --------
  6511. >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
  6512. ... [3, 4, np.nan, 1],
  6513. ... [np.nan, np.nan, np.nan, np.nan],
  6514. ... [np.nan, 3, np.nan, 4]],
  6515. ... columns=list("ABCD"))
  6516. >>> df
  6517. A B C D
  6518. 0 NaN 2.0 NaN 0.0
  6519. 1 3.0 4.0 NaN 1.0
  6520. 2 NaN NaN NaN NaN
  6521. 3 NaN 3.0 NaN 4.0
  6522. >>> df.ffill()
  6523. A B C D
  6524. 0 NaN 2.0 NaN 0.0
  6525. 1 3.0 4.0 NaN 1.0
  6526. 2 3.0 4.0 NaN 1.0
  6527. 3 3.0 3.0 NaN 4.0
  6528. >>> ser = pd.Series([1, np.nan, 2, 3])
  6529. >>> ser.ffill()
  6530. 0 1.0
  6531. 1 1.0
  6532. 2 2.0
  6533. 3 3.0
  6534. dtype: float64
  6535. """
  6536. downcast = self._deprecate_downcast(downcast, "ffill")
  6537. inplace = validate_bool_kwarg(inplace, "inplace")
  6538. if inplace:
  6539. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  6540. if sys.getrefcount(self) <= REF_COUNT:
  6541. warnings.warn(
  6542. _chained_assignment_method_msg,
  6543. ChainedAssignmentError,
  6544. stacklevel=2,
  6545. )
  6546. elif (
  6547. not PYPY
  6548. and not WARNING_CHECK_DISABLED
  6549. and not using_copy_on_write()
  6550. and self._is_view_after_cow_rules()
  6551. ):
  6552. ctr = sys.getrefcount(self)
  6553. ref_count = REF_COUNT
  6554. if isinstance(self, ABCSeries) and _check_cacher(self):
  6555. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  6556. ref_count += 1
  6557. if ctr <= ref_count:
  6558. warnings.warn(
  6559. _chained_assignment_warning_method_msg,
  6560. FutureWarning,
  6561. stacklevel=2,
  6562. )
  6563. return self._pad_or_backfill(
  6564. "ffill",
  6565. axis=axis,
  6566. inplace=inplace,
  6567. limit=limit,
  6568. limit_area=limit_area,
  6569. # error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
  6570. # has incompatible type "Union[Dict[Any, Any], None,
  6571. # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
  6572. downcast=downcast, # type: ignore[arg-type]
  6573. )
  6574. @final
  6575. @doc(klass=_shared_doc_kwargs["klass"])
  6576. def pad(
  6577. self,
  6578. *,
  6579. axis: None | Axis = None,
  6580. inplace: bool_t = False,
  6581. limit: None | int = None,
  6582. downcast: dict | None | lib.NoDefault = lib.no_default,
  6583. ) -> Self | None:
  6584. """
  6585. Fill NA/NaN values by propagating the last valid observation to next valid.
  6586. .. deprecated:: 2.0
  6587. {klass}.pad is deprecated. Use {klass}.ffill instead.
  6588. Returns
  6589. -------
  6590. {klass} or None
  6591. Object with missing values filled or None if ``inplace=True``.
  6592. Examples
  6593. --------
  6594. Please see examples for :meth:`DataFrame.ffill` or :meth:`Series.ffill`.
  6595. """
  6596. warnings.warn(
  6597. "DataFrame.pad/Series.pad is deprecated. Use "
  6598. "DataFrame.ffill/Series.ffill instead",
  6599. FutureWarning,
  6600. stacklevel=find_stack_level(),
  6601. )
  6602. return self.ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
  6603. @overload
  6604. def bfill(
  6605. self,
  6606. *,
  6607. axis: None | Axis = ...,
  6608. inplace: Literal[False] = ...,
  6609. limit: None | int = ...,
  6610. limit_area: Literal["inside", "outside"] | None = ...,
  6611. downcast: dict | None | lib.NoDefault = ...,
  6612. ) -> Self:
  6613. ...
  6614. @overload
  6615. def bfill(
  6616. self,
  6617. *,
  6618. axis: None | Axis = ...,
  6619. inplace: Literal[True],
  6620. limit: None | int = ...,
  6621. downcast: dict | None | lib.NoDefault = ...,
  6622. ) -> None:
  6623. ...
  6624. @overload
  6625. def bfill(
  6626. self,
  6627. *,
  6628. axis: None | Axis = ...,
  6629. inplace: bool_t = ...,
  6630. limit: None | int = ...,
  6631. limit_area: Literal["inside", "outside"] | None = ...,
  6632. downcast: dict | None | lib.NoDefault = ...,
  6633. ) -> Self | None:
  6634. ...
  6635. @final
  6636. @doc(
  6637. klass=_shared_doc_kwargs["klass"],
  6638. axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
  6639. )
  6640. def bfill(
  6641. self,
  6642. *,
  6643. axis: None | Axis = None,
  6644. inplace: bool_t = False,
  6645. limit: None | int = None,
  6646. limit_area: Literal["inside", "outside"] | None = None,
  6647. downcast: dict | None | lib.NoDefault = lib.no_default,
  6648. ) -> Self | None:
  6649. """
  6650. Fill NA/NaN values by using the next valid observation to fill the gap.
  6651. Parameters
  6652. ----------
  6653. axis : {axes_single_arg}
  6654. Axis along which to fill missing values. For `Series`
  6655. this parameter is unused and defaults to 0.
  6656. inplace : bool, default False
  6657. If True, fill in-place. Note: this will modify any
  6658. other views on this object (e.g., a no-copy slice for a column in a
  6659. DataFrame).
  6660. limit : int, default None
  6661. If method is specified, this is the maximum number of consecutive
  6662. NaN values to forward/backward fill. In other words, if there is
  6663. a gap with more than this number of consecutive NaNs, it will only
  6664. be partially filled. If method is not specified, this is the
  6665. maximum number of entries along the entire axis where NaNs will be
  6666. filled. Must be greater than 0 if not None.
  6667. limit_area : {{`None`, 'inside', 'outside'}}, default None
  6668. If limit is specified, consecutive NaNs will be filled with this
  6669. restriction.
  6670. * ``None``: No fill restriction.
  6671. * 'inside': Only fill NaNs surrounded by valid values
  6672. (interpolate).
  6673. * 'outside': Only fill NaNs outside valid values (extrapolate).
  6674. .. versionadded:: 2.2.0
  6675. downcast : dict, default is None
  6676. A dict of item->dtype of what to downcast if possible,
  6677. or the string 'infer' which will try to downcast to an appropriate
  6678. equal type (e.g. float64 to int64 if possible).
  6679. .. deprecated:: 2.2.0
  6680. Returns
  6681. -------
  6682. {klass} or None
  6683. Object with missing values filled or None if ``inplace=True``.
  6684. Examples
  6685. --------
  6686. For Series:
  6687. >>> s = pd.Series([1, None, None, 2])
  6688. >>> s.bfill()
  6689. 0 1.0
  6690. 1 2.0
  6691. 2 2.0
  6692. 3 2.0
  6693. dtype: float64
  6694. >>> s.bfill(limit=1)
  6695. 0 1.0
  6696. 1 NaN
  6697. 2 2.0
  6698. 3 2.0
  6699. dtype: float64
  6700. With DataFrame:
  6701. >>> df = pd.DataFrame({{'A': [1, None, None, 4], 'B': [None, 5, None, 7]}})
  6702. >>> df
  6703. A B
  6704. 0 1.0 NaN
  6705. 1 NaN 5.0
  6706. 2 NaN NaN
  6707. 3 4.0 7.0
  6708. >>> df.bfill()
  6709. A B
  6710. 0 1.0 5.0
  6711. 1 4.0 5.0
  6712. 2 4.0 7.0
  6713. 3 4.0 7.0
  6714. >>> df.bfill(limit=1)
  6715. A B
  6716. 0 1.0 5.0
  6717. 1 NaN 5.0
  6718. 2 4.0 7.0
  6719. 3 4.0 7.0
  6720. """
  6721. downcast = self._deprecate_downcast(downcast, "bfill")
  6722. inplace = validate_bool_kwarg(inplace, "inplace")
  6723. if inplace:
  6724. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  6725. if sys.getrefcount(self) <= REF_COUNT:
  6726. warnings.warn(
  6727. _chained_assignment_method_msg,
  6728. ChainedAssignmentError,
  6729. stacklevel=2,
  6730. )
  6731. elif (
  6732. not PYPY
  6733. and not WARNING_CHECK_DISABLED
  6734. and not using_copy_on_write()
  6735. and self._is_view_after_cow_rules()
  6736. ):
  6737. ctr = sys.getrefcount(self)
  6738. ref_count = REF_COUNT
  6739. if isinstance(self, ABCSeries) and _check_cacher(self):
  6740. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  6741. ref_count += 1
  6742. if ctr <= ref_count:
  6743. warnings.warn(
  6744. _chained_assignment_warning_method_msg,
  6745. FutureWarning,
  6746. stacklevel=2,
  6747. )
  6748. return self._pad_or_backfill(
  6749. "bfill",
  6750. axis=axis,
  6751. inplace=inplace,
  6752. limit=limit,
  6753. limit_area=limit_area,
  6754. # error: Argument "downcast" to "_fillna_with_method" of "NDFrame"
  6755. # has incompatible type "Union[Dict[Any, Any], None,
  6756. # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]"
  6757. downcast=downcast, # type: ignore[arg-type]
  6758. )
  6759. @final
  6760. @doc(klass=_shared_doc_kwargs["klass"])
  6761. def backfill(
  6762. self,
  6763. *,
  6764. axis: None | Axis = None,
  6765. inplace: bool_t = False,
  6766. limit: None | int = None,
  6767. downcast: dict | None | lib.NoDefault = lib.no_default,
  6768. ) -> Self | None:
  6769. """
  6770. Fill NA/NaN values by using the next valid observation to fill the gap.
  6771. .. deprecated:: 2.0
  6772. {klass}.backfill is deprecated. Use {klass}.bfill instead.
  6773. Returns
  6774. -------
  6775. {klass} or None
  6776. Object with missing values filled or None if ``inplace=True``.
  6777. Examples
  6778. --------
  6779. Please see examples for :meth:`DataFrame.bfill` or :meth:`Series.bfill`.
  6780. """
  6781. warnings.warn(
  6782. "DataFrame.backfill/Series.backfill is deprecated. Use "
  6783. "DataFrame.bfill/Series.bfill instead",
  6784. FutureWarning,
  6785. stacklevel=find_stack_level(),
  6786. )
  6787. return self.bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
  6788. @overload
  6789. def replace(
  6790. self,
  6791. to_replace=...,
  6792. value=...,
  6793. *,
  6794. inplace: Literal[False] = ...,
  6795. limit: int | None = ...,
  6796. regex: bool_t = ...,
  6797. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6798. ) -> Self:
  6799. ...
  6800. @overload
  6801. def replace(
  6802. self,
  6803. to_replace=...,
  6804. value=...,
  6805. *,
  6806. inplace: Literal[True],
  6807. limit: int | None = ...,
  6808. regex: bool_t = ...,
  6809. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6810. ) -> None:
  6811. ...
  6812. @overload
  6813. def replace(
  6814. self,
  6815. to_replace=...,
  6816. value=...,
  6817. *,
  6818. inplace: bool_t = ...,
  6819. limit: int | None = ...,
  6820. regex: bool_t = ...,
  6821. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6822. ) -> Self | None:
  6823. ...
  6824. @final
  6825. @doc(
  6826. _shared_docs["replace"],
  6827. klass=_shared_doc_kwargs["klass"],
  6828. inplace=_shared_doc_kwargs["inplace"],
  6829. )
  6830. def replace(
  6831. self,
  6832. to_replace=None,
  6833. value=lib.no_default,
  6834. *,
  6835. inplace: bool_t = False,
  6836. limit: int | None = None,
  6837. regex: bool_t = False,
  6838. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
  6839. ) -> Self | None:
  6840. if method is not lib.no_default:
  6841. warnings.warn(
  6842. # GH#33302
  6843. f"The 'method' keyword in {type(self).__name__}.replace is "
  6844. "deprecated and will be removed in a future version.",
  6845. FutureWarning,
  6846. stacklevel=find_stack_level(),
  6847. )
  6848. elif limit is not None:
  6849. warnings.warn(
  6850. # GH#33302
  6851. f"The 'limit' keyword in {type(self).__name__}.replace is "
  6852. "deprecated and will be removed in a future version.",
  6853. FutureWarning,
  6854. stacklevel=find_stack_level(),
  6855. )
  6856. if (
  6857. value is lib.no_default
  6858. and method is lib.no_default
  6859. and not is_dict_like(to_replace)
  6860. and regex is False
  6861. ):
  6862. # case that goes through _replace_single and defaults to method="pad"
  6863. warnings.warn(
  6864. # GH#33302
  6865. f"{type(self).__name__}.replace without 'value' and with "
  6866. "non-dict-like 'to_replace' is deprecated "
  6867. "and will raise in a future version. "
  6868. "Explicitly specify the new values instead.",
  6869. FutureWarning,
  6870. stacklevel=find_stack_level(),
  6871. )
  6872. if not (
  6873. is_scalar(to_replace)
  6874. or is_re_compilable(to_replace)
  6875. or is_list_like(to_replace)
  6876. ):
  6877. raise TypeError(
  6878. "Expecting 'to_replace' to be either a scalar, array-like, "
  6879. "dict or None, got invalid type "
  6880. f"{repr(type(to_replace).__name__)}"
  6881. )
  6882. inplace = validate_bool_kwarg(inplace, "inplace")
  6883. if inplace:
  6884. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  6885. if sys.getrefcount(self) <= REF_COUNT:
  6886. warnings.warn(
  6887. _chained_assignment_method_msg,
  6888. ChainedAssignmentError,
  6889. stacklevel=2,
  6890. )
  6891. elif (
  6892. not PYPY
  6893. and not WARNING_CHECK_DISABLED
  6894. and not using_copy_on_write()
  6895. and self._is_view_after_cow_rules()
  6896. ):
  6897. ctr = sys.getrefcount(self)
  6898. ref_count = REF_COUNT
  6899. if isinstance(self, ABCSeries) and _check_cacher(self):
  6900. # in non-CoW mode, chained Series access will populate the
  6901. # `_item_cache` which results in an increased ref count not below
  6902. # the threshold, while we still need to warn. We detect this case
  6903. # of a Series derived from a DataFrame through the presence of
  6904. # checking the `_cacher`
  6905. ref_count += 1
  6906. if ctr <= ref_count:
  6907. warnings.warn(
  6908. _chained_assignment_warning_method_msg,
  6909. FutureWarning,
  6910. stacklevel=2,
  6911. )
  6912. if not is_bool(regex) and to_replace is not None:
  6913. raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
  6914. if value is lib.no_default or method is not lib.no_default:
  6915. # GH#36984 if the user explicitly passes value=None we want to
  6916. # respect that. We have the corner case where the user explicitly
  6917. # passes value=None *and* a method, which we interpret as meaning
  6918. # they want the (documented) default behavior.
  6919. if method is lib.no_default:
  6920. # TODO: get this to show up as the default in the docs?
  6921. method = "pad"
  6922. # passing a single value that is scalar like
  6923. # when value is None (GH5319), for compat
  6924. if not is_dict_like(to_replace) and not is_dict_like(regex):
  6925. to_replace = [to_replace]
  6926. if isinstance(to_replace, (tuple, list)):
  6927. # TODO: Consider copy-on-write for non-replaced columns's here
  6928. if isinstance(self, ABCDataFrame):
  6929. from pandas import Series
  6930. result = self.apply(
  6931. Series._replace_single,
  6932. args=(to_replace, method, inplace, limit),
  6933. )
  6934. if inplace:
  6935. return None
  6936. return result
  6937. return self._replace_single(to_replace, method, inplace, limit)
  6938. if not is_dict_like(to_replace):
  6939. if not is_dict_like(regex):
  6940. raise TypeError(
  6941. 'If "to_replace" and "value" are both None '
  6942. 'and "to_replace" is not a list, then '
  6943. "regex must be a mapping"
  6944. )
  6945. to_replace = regex
  6946. regex = True
  6947. items = list(to_replace.items())
  6948. if items:
  6949. keys, values = zip(*items)
  6950. else:
  6951. # error: Incompatible types in assignment (expression has type
  6952. # "list[Never]", variable has type "tuple[Any, ...]")
  6953. keys, values = ([], []) # type: ignore[assignment]
  6954. are_mappings = [is_dict_like(v) for v in values]
  6955. if any(are_mappings):
  6956. if not all(are_mappings):
  6957. raise TypeError(
  6958. "If a nested mapping is passed, all values "
  6959. "of the top level mapping must be mappings"
  6960. )
  6961. # passed a nested dict/Series
  6962. to_rep_dict = {}
  6963. value_dict = {}
  6964. for k, v in items:
  6965. # error: Incompatible types in assignment (expression has type
  6966. # "list[Never]", variable has type "tuple[Any, ...]")
  6967. keys, values = list(zip(*v.items())) or ( # type: ignore[assignment]
  6968. [],
  6969. [],
  6970. )
  6971. to_rep_dict[k] = list(keys)
  6972. value_dict[k] = list(values)
  6973. to_replace, value = to_rep_dict, value_dict
  6974. else:
  6975. to_replace, value = keys, values
  6976. return self.replace(
  6977. to_replace, value, inplace=inplace, limit=limit, regex=regex
  6978. )
  6979. else:
  6980. # need a non-zero len on all axes
  6981. if not self.size:
  6982. if inplace:
  6983. return None
  6984. return self.copy(deep=None)
  6985. if is_dict_like(to_replace):
  6986. if is_dict_like(value): # {'A' : NA} -> {'A' : 0}
  6987. # Note: Checking below for `in foo.keys()` instead of
  6988. # `in foo` is needed for when we have a Series and not dict
  6989. mapping = {
  6990. col: (to_replace[col], value[col])
  6991. for col in to_replace.keys()
  6992. if col in value.keys() and col in self
  6993. }
  6994. return self._replace_columnwise(mapping, inplace, regex)
  6995. # {'A': NA} -> 0
  6996. elif not is_list_like(value):
  6997. # Operate column-wise
  6998. if self.ndim == 1:
  6999. raise ValueError(
  7000. "Series.replace cannot use dict-like to_replace "
  7001. "and non-None value"
  7002. )
  7003. mapping = {
  7004. col: (to_rep, value) for col, to_rep in to_replace.items()
  7005. }
  7006. return self._replace_columnwise(mapping, inplace, regex)
  7007. else:
  7008. raise TypeError("value argument must be scalar, dict, or Series")
  7009. elif is_list_like(to_replace):
  7010. if not is_list_like(value):
  7011. # e.g. to_replace = [NA, ''] and value is 0,
  7012. # so we replace NA with 0 and then replace '' with 0
  7013. value = [value] * len(to_replace)
  7014. # e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
  7015. if len(to_replace) != len(value):
  7016. raise ValueError(
  7017. f"Replacement lists must match in length. "
  7018. f"Expecting {len(to_replace)} got {len(value)} "
  7019. )
  7020. new_data = self._mgr.replace_list(
  7021. src_list=to_replace,
  7022. dest_list=value,
  7023. inplace=inplace,
  7024. regex=regex,
  7025. )
  7026. elif to_replace is None:
  7027. if not (
  7028. is_re_compilable(regex)
  7029. or is_list_like(regex)
  7030. or is_dict_like(regex)
  7031. ):
  7032. raise TypeError(
  7033. f"'regex' must be a string or a compiled regular expression "
  7034. f"or a list or dict of strings or regular expressions, "
  7035. f"you passed a {repr(type(regex).__name__)}"
  7036. )
  7037. return self.replace(
  7038. regex, value, inplace=inplace, limit=limit, regex=True
  7039. )
  7040. else:
  7041. # dest iterable dict-like
  7042. if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
  7043. # Operate column-wise
  7044. if self.ndim == 1:
  7045. raise ValueError(
  7046. "Series.replace cannot use dict-value and "
  7047. "non-None to_replace"
  7048. )
  7049. mapping = {col: (to_replace, val) for col, val in value.items()}
  7050. return self._replace_columnwise(mapping, inplace, regex)
  7051. elif not is_list_like(value): # NA -> 0
  7052. regex = should_use_regex(regex, to_replace)
  7053. if regex:
  7054. new_data = self._mgr.replace_regex(
  7055. to_replace=to_replace,
  7056. value=value,
  7057. inplace=inplace,
  7058. )
  7059. else:
  7060. new_data = self._mgr.replace(
  7061. to_replace=to_replace, value=value, inplace=inplace
  7062. )
  7063. else:
  7064. raise TypeError(
  7065. f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
  7066. )
  7067. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  7068. if inplace:
  7069. return self._update_inplace(result)
  7070. else:
  7071. return result.__finalize__(self, method="replace")
  7072. @overload
  7073. def interpolate(
  7074. self,
  7075. method: InterpolateOptions = ...,
  7076. *,
  7077. axis: Axis = ...,
  7078. limit: int | None = ...,
  7079. inplace: Literal[False] = ...,
  7080. limit_direction: Literal["forward", "backward", "both"] | None = ...,
  7081. limit_area: Literal["inside", "outside"] | None = ...,
  7082. downcast: Literal["infer"] | None | lib.NoDefault = ...,
  7083. **kwargs,
  7084. ) -> Self:
  7085. ...
  7086. @overload
  7087. def interpolate(
  7088. self,
  7089. method: InterpolateOptions = ...,
  7090. *,
  7091. axis: Axis = ...,
  7092. limit: int | None = ...,
  7093. inplace: Literal[True],
  7094. limit_direction: Literal["forward", "backward", "both"] | None = ...,
  7095. limit_area: Literal["inside", "outside"] | None = ...,
  7096. downcast: Literal["infer"] | None | lib.NoDefault = ...,
  7097. **kwargs,
  7098. ) -> None:
  7099. ...
  7100. @overload
  7101. def interpolate(
  7102. self,
  7103. method: InterpolateOptions = ...,
  7104. *,
  7105. axis: Axis = ...,
  7106. limit: int | None = ...,
  7107. inplace: bool_t = ...,
  7108. limit_direction: Literal["forward", "backward", "both"] | None = ...,
  7109. limit_area: Literal["inside", "outside"] | None = ...,
  7110. downcast: Literal["infer"] | None | lib.NoDefault = ...,
  7111. **kwargs,
  7112. ) -> Self | None:
  7113. ...
  7114. @final
  7115. def interpolate(
  7116. self,
  7117. method: InterpolateOptions = "linear",
  7118. *,
  7119. axis: Axis = 0,
  7120. limit: int | None = None,
  7121. inplace: bool_t = False,
  7122. limit_direction: Literal["forward", "backward", "both"] | None = None,
  7123. limit_area: Literal["inside", "outside"] | None = None,
  7124. downcast: Literal["infer"] | None | lib.NoDefault = lib.no_default,
  7125. **kwargs,
  7126. ) -> Self | None:
  7127. """
  7128. Fill NaN values using an interpolation method.
  7129. Please note that only ``method='linear'`` is supported for
  7130. DataFrame/Series with a MultiIndex.
  7131. Parameters
  7132. ----------
  7133. method : str, default 'linear'
  7134. Interpolation technique to use. One of:
  7135. * 'linear': Ignore the index and treat the values as equally
  7136. spaced. This is the only method supported on MultiIndexes.
  7137. * 'time': Works on daily and higher resolution data to interpolate
  7138. given length of interval.
  7139. * 'index', 'values': use the actual numerical values of the index.
  7140. * 'pad': Fill in NaNs using existing values.
  7141. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
  7142. 'barycentric', 'polynomial': Passed to
  7143. `scipy.interpolate.interp1d`, whereas 'spline' is passed to
  7144. `scipy.interpolate.UnivariateSpline`. These methods use the numerical
  7145. values of the index. Both 'polynomial' and 'spline' require that
  7146. you also specify an `order` (int), e.g.
  7147. ``df.interpolate(method='polynomial', order=5)``. Note that,
  7148. `slinear` method in Pandas refers to the Scipy first order `spline`
  7149. instead of Pandas first order `spline`.
  7150. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
  7151. 'cubicspline': Wrappers around the SciPy interpolation methods of
  7152. similar names. See `Notes`.
  7153. * 'from_derivatives': Refers to
  7154. `scipy.interpolate.BPoly.from_derivatives`.
  7155. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  7156. Axis to interpolate along. For `Series` this parameter is unused
  7157. and defaults to 0.
  7158. limit : int, optional
  7159. Maximum number of consecutive NaNs to fill. Must be greater than
  7160. 0.
  7161. inplace : bool, default False
  7162. Update the data in place if possible.
  7163. limit_direction : {{'forward', 'backward', 'both'}}, Optional
  7164. Consecutive NaNs will be filled in this direction.
  7165. If limit is specified:
  7166. * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.
  7167. * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be
  7168. 'backwards'.
  7169. If 'limit' is not specified:
  7170. * If 'method' is 'backfill' or 'bfill', the default is 'backward'
  7171. * else the default is 'forward'
  7172. raises ValueError if `limit_direction` is 'forward' or 'both' and
  7173. method is 'backfill' or 'bfill'.
  7174. raises ValueError if `limit_direction` is 'backward' or 'both' and
  7175. method is 'pad' or 'ffill'.
  7176. limit_area : {{`None`, 'inside', 'outside'}}, default None
  7177. If limit is specified, consecutive NaNs will be filled with this
  7178. restriction.
  7179. * ``None``: No fill restriction.
  7180. * 'inside': Only fill NaNs surrounded by valid values
  7181. (interpolate).
  7182. * 'outside': Only fill NaNs outside valid values (extrapolate).
  7183. downcast : optional, 'infer' or None, defaults to None
  7184. Downcast dtypes if possible.
  7185. .. deprecated:: 2.1.0
  7186. ``**kwargs`` : optional
  7187. Keyword arguments to pass on to the interpolating function.
  7188. Returns
  7189. -------
  7190. Series or DataFrame or None
  7191. Returns the same object type as the caller, interpolated at
  7192. some or all ``NaN`` values or None if ``inplace=True``.
  7193. See Also
  7194. --------
  7195. fillna : Fill missing values using different methods.
  7196. scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials
  7197. (Akima interpolator).
  7198. scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the
  7199. Bernstein basis.
  7200. scipy.interpolate.interp1d : Interpolate a 1-D function.
  7201. scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh
  7202. interpolator).
  7203. scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic
  7204. interpolation.
  7205. scipy.interpolate.CubicSpline : Cubic spline data interpolator.
  7206. Notes
  7207. -----
  7208. The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
  7209. methods are wrappers around the respective SciPy implementations of
  7210. similar names. These use the actual numerical values of the index.
  7211. For more information on their behavior, see the
  7212. `SciPy documentation
  7213. <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__.
  7214. Examples
  7215. --------
  7216. Filling in ``NaN`` in a :class:`~pandas.Series` via linear
  7217. interpolation.
  7218. >>> s = pd.Series([0, 1, np.nan, 3])
  7219. >>> s
  7220. 0 0.0
  7221. 1 1.0
  7222. 2 NaN
  7223. 3 3.0
  7224. dtype: float64
  7225. >>> s.interpolate()
  7226. 0 0.0
  7227. 1 1.0
  7228. 2 2.0
  7229. 3 3.0
  7230. dtype: float64
  7231. Filling in ``NaN`` in a Series via polynomial interpolation or splines:
  7232. Both 'polynomial' and 'spline' methods require that you also specify
  7233. an ``order`` (int).
  7234. >>> s = pd.Series([0, 2, np.nan, 8])
  7235. >>> s.interpolate(method='polynomial', order=2)
  7236. 0 0.000000
  7237. 1 2.000000
  7238. 2 4.666667
  7239. 3 8.000000
  7240. dtype: float64
  7241. Fill the DataFrame forward (that is, going down) along each column
  7242. using linear interpolation.
  7243. Note how the last entry in column 'a' is interpolated differently,
  7244. because there is no entry after it to use for interpolation.
  7245. Note how the first entry in column 'b' remains ``NaN``, because there
  7246. is no entry before it to use for interpolation.
  7247. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),
  7248. ... (np.nan, 2.0, np.nan, np.nan),
  7249. ... (2.0, 3.0, np.nan, 9.0),
  7250. ... (np.nan, 4.0, -4.0, 16.0)],
  7251. ... columns=list('abcd'))
  7252. >>> df
  7253. a b c d
  7254. 0 0.0 NaN -1.0 1.0
  7255. 1 NaN 2.0 NaN NaN
  7256. 2 2.0 3.0 NaN 9.0
  7257. 3 NaN 4.0 -4.0 16.0
  7258. >>> df.interpolate(method='linear', limit_direction='forward', axis=0)
  7259. a b c d
  7260. 0 0.0 NaN -1.0 1.0
  7261. 1 1.0 2.0 -2.0 5.0
  7262. 2 2.0 3.0 -3.0 9.0
  7263. 3 2.0 4.0 -4.0 16.0
  7264. Using polynomial interpolation.
  7265. >>> df['d'].interpolate(method='polynomial', order=2)
  7266. 0 1.0
  7267. 1 4.0
  7268. 2 9.0
  7269. 3 16.0
  7270. Name: d, dtype: float64
  7271. """
  7272. if downcast is not lib.no_default:
  7273. # GH#40988
  7274. warnings.warn(
  7275. f"The 'downcast' keyword in {type(self).__name__}.interpolate "
  7276. "is deprecated and will be removed in a future version. "
  7277. "Call result.infer_objects(copy=False) on the result instead.",
  7278. FutureWarning,
  7279. stacklevel=find_stack_level(),
  7280. )
  7281. else:
  7282. downcast = None
  7283. if downcast is not None and downcast != "infer":
  7284. raise ValueError("downcast must be either None or 'infer'")
  7285. inplace = validate_bool_kwarg(inplace, "inplace")
  7286. if inplace:
  7287. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  7288. if sys.getrefcount(self) <= REF_COUNT:
  7289. warnings.warn(
  7290. _chained_assignment_method_msg,
  7291. ChainedAssignmentError,
  7292. stacklevel=2,
  7293. )
  7294. elif (
  7295. not PYPY
  7296. and not WARNING_CHECK_DISABLED
  7297. and not using_copy_on_write()
  7298. and self._is_view_after_cow_rules()
  7299. ):
  7300. ctr = sys.getrefcount(self)
  7301. ref_count = REF_COUNT
  7302. if isinstance(self, ABCSeries) and _check_cacher(self):
  7303. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  7304. ref_count += 1
  7305. if ctr <= ref_count:
  7306. warnings.warn(
  7307. _chained_assignment_warning_method_msg,
  7308. FutureWarning,
  7309. stacklevel=2,
  7310. )
  7311. axis = self._get_axis_number(axis)
  7312. if self.empty:
  7313. if inplace:
  7314. return None
  7315. return self.copy()
  7316. if not isinstance(method, str):
  7317. raise ValueError("'method' should be a string, not None.")
  7318. fillna_methods = ["ffill", "bfill", "pad", "backfill"]
  7319. if method.lower() in fillna_methods:
  7320. # GH#53581
  7321. warnings.warn(
  7322. f"{type(self).__name__}.interpolate with method={method} is "
  7323. "deprecated and will raise in a future version. "
  7324. "Use obj.ffill() or obj.bfill() instead.",
  7325. FutureWarning,
  7326. stacklevel=find_stack_level(),
  7327. )
  7328. obj, should_transpose = self, False
  7329. else:
  7330. obj, should_transpose = (self.T, True) if axis == 1 else (self, False)
  7331. if np.any(obj.dtypes == object):
  7332. # GH#53631
  7333. if not (obj.ndim == 2 and np.all(obj.dtypes == object)):
  7334. # don't warn in cases that already raise
  7335. warnings.warn(
  7336. f"{type(self).__name__}.interpolate with object dtype is "
  7337. "deprecated and will raise in a future version. Call "
  7338. "obj.infer_objects(copy=False) before interpolating instead.",
  7339. FutureWarning,
  7340. stacklevel=find_stack_level(),
  7341. )
  7342. if method in fillna_methods and "fill_value" in kwargs:
  7343. raise ValueError(
  7344. "'fill_value' is not a valid keyword for "
  7345. f"{type(self).__name__}.interpolate with method from "
  7346. f"{fillna_methods}"
  7347. )
  7348. if isinstance(obj.index, MultiIndex) and method != "linear":
  7349. raise ValueError(
  7350. "Only `method=linear` interpolation is supported on MultiIndexes."
  7351. )
  7352. limit_direction = missing.infer_limit_direction(limit_direction, method)
  7353. if obj.ndim == 2 and np.all(obj.dtypes == object):
  7354. raise TypeError(
  7355. "Cannot interpolate with all object-dtype columns "
  7356. "in the DataFrame. Try setting at least one "
  7357. "column to a numeric dtype."
  7358. )
  7359. if method.lower() in fillna_methods:
  7360. # TODO(3.0): remove this case
  7361. # TODO: warn/raise on limit_direction or kwargs which are ignored?
  7362. # as of 2023-06-26 no tests get here with either
  7363. if not self._mgr.is_single_block and axis == 1:
  7364. # GH#53898
  7365. if inplace:
  7366. raise NotImplementedError()
  7367. obj, axis, should_transpose = self.T, 1 - axis, True
  7368. new_data = obj._mgr.pad_or_backfill(
  7369. method=method,
  7370. axis=self._get_block_manager_axis(axis),
  7371. limit=limit,
  7372. limit_area=limit_area,
  7373. inplace=inplace,
  7374. downcast=downcast,
  7375. )
  7376. else:
  7377. index = missing.get_interp_index(method, obj.index)
  7378. new_data = obj._mgr.interpolate(
  7379. method=method,
  7380. index=index,
  7381. limit=limit,
  7382. limit_direction=limit_direction,
  7383. limit_area=limit_area,
  7384. inplace=inplace,
  7385. downcast=downcast,
  7386. **kwargs,
  7387. )
  7388. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  7389. if should_transpose:
  7390. result = result.T
  7391. if inplace:
  7392. return self._update_inplace(result)
  7393. else:
  7394. return result.__finalize__(self, method="interpolate")
  7395. # ----------------------------------------------------------------------
  7396. # Timeseries methods Methods
  7397. @final
  7398. def asof(self, where, subset=None):
  7399. """
  7400. Return the last row(s) without any NaNs before `where`.
  7401. The last row (for each element in `where`, if list) without any
  7402. NaN is taken.
  7403. In case of a :class:`~pandas.DataFrame`, the last row without NaN
  7404. considering only the subset of columns (if not `None`)
  7405. If there is no good value, NaN is returned for a Series or
  7406. a Series of NaN values for a DataFrame
  7407. Parameters
  7408. ----------
  7409. where : date or array-like of dates
  7410. Date(s) before which the last row(s) are returned.
  7411. subset : str or array-like of str, default `None`
  7412. For DataFrame, if not `None`, only use these columns to
  7413. check for NaNs.
  7414. Returns
  7415. -------
  7416. scalar, Series, or DataFrame
  7417. The return can be:
  7418. * scalar : when `self` is a Series and `where` is a scalar
  7419. * Series: when `self` is a Series and `where` is an array-like,
  7420. or when `self` is a DataFrame and `where` is a scalar
  7421. * DataFrame : when `self` is a DataFrame and `where` is an
  7422. array-like
  7423. See Also
  7424. --------
  7425. merge_asof : Perform an asof merge. Similar to left join.
  7426. Notes
  7427. -----
  7428. Dates are assumed to be sorted. Raises if this is not the case.
  7429. Examples
  7430. --------
  7431. A Series and a scalar `where`.
  7432. >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])
  7433. >>> s
  7434. 10 1.0
  7435. 20 2.0
  7436. 30 NaN
  7437. 40 4.0
  7438. dtype: float64
  7439. >>> s.asof(20)
  7440. 2.0
  7441. For a sequence `where`, a Series is returned. The first value is
  7442. NaN, because the first element of `where` is before the first
  7443. index value.
  7444. >>> s.asof([5, 20])
  7445. 5 NaN
  7446. 20 2.0
  7447. dtype: float64
  7448. Missing values are not considered. The following is ``2.0``, not
  7449. NaN, even though NaN is at the index location for ``30``.
  7450. >>> s.asof(30)
  7451. 2.0
  7452. Take all columns into consideration
  7453. >>> df = pd.DataFrame({'a': [10., 20., 30., 40., 50.],
  7454. ... 'b': [None, None, None, None, 500]},
  7455. ... index=pd.DatetimeIndex(['2018-02-27 09:01:00',
  7456. ... '2018-02-27 09:02:00',
  7457. ... '2018-02-27 09:03:00',
  7458. ... '2018-02-27 09:04:00',
  7459. ... '2018-02-27 09:05:00']))
  7460. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  7461. ... '2018-02-27 09:04:30']))
  7462. a b
  7463. 2018-02-27 09:03:30 NaN NaN
  7464. 2018-02-27 09:04:30 NaN NaN
  7465. Take a single column into consideration
  7466. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  7467. ... '2018-02-27 09:04:30']),
  7468. ... subset=['a'])
  7469. a b
  7470. 2018-02-27 09:03:30 30.0 NaN
  7471. 2018-02-27 09:04:30 40.0 NaN
  7472. """
  7473. if isinstance(where, str):
  7474. where = Timestamp(where)
  7475. if not self.index.is_monotonic_increasing:
  7476. raise ValueError("asof requires a sorted index")
  7477. is_series = isinstance(self, ABCSeries)
  7478. if is_series:
  7479. if subset is not None:
  7480. raise ValueError("subset is not valid for Series")
  7481. else:
  7482. if subset is None:
  7483. subset = self.columns
  7484. if not is_list_like(subset):
  7485. subset = [subset]
  7486. is_list = is_list_like(where)
  7487. if not is_list:
  7488. start = self.index[0]
  7489. if isinstance(self.index, PeriodIndex):
  7490. where = Period(where, freq=self.index.freq)
  7491. if where < start:
  7492. if not is_series:
  7493. return self._constructor_sliced(
  7494. index=self.columns, name=where, dtype=np.float64
  7495. )
  7496. return np.nan
  7497. # It's always much faster to use a *while* loop here for
  7498. # Series than pre-computing all the NAs. However a
  7499. # *while* loop is extremely expensive for DataFrame
  7500. # so we later pre-compute all the NAs and use the same
  7501. # code path whether *where* is a scalar or list.
  7502. # See PR: https://github.com/pandas-dev/pandas/pull/14476
  7503. if is_series:
  7504. loc = self.index.searchsorted(where, side="right")
  7505. if loc > 0:
  7506. loc -= 1
  7507. values = self._values
  7508. while loc > 0 and isna(values[loc]):
  7509. loc -= 1
  7510. return values[loc]
  7511. if not isinstance(where, Index):
  7512. where = Index(where) if is_list else Index([where])
  7513. nulls = self.isna() if is_series else self[subset].isna().any(axis=1)
  7514. if nulls.all():
  7515. if is_series:
  7516. self = cast("Series", self)
  7517. return self._constructor(np.nan, index=where, name=self.name)
  7518. elif is_list:
  7519. self = cast("DataFrame", self)
  7520. return self._constructor(np.nan, index=where, columns=self.columns)
  7521. else:
  7522. self = cast("DataFrame", self)
  7523. return self._constructor_sliced(
  7524. np.nan, index=self.columns, name=where[0]
  7525. )
  7526. locs = self.index.asof_locs(where, ~(nulls._values))
  7527. # mask the missing
  7528. mask = locs == -1
  7529. data = self.take(locs)
  7530. data.index = where
  7531. if mask.any():
  7532. # GH#16063 only do this setting when necessary, otherwise
  7533. # we'd cast e.g. bools to floats
  7534. data.loc[mask] = np.nan
  7535. return data if is_list else data.iloc[-1]
  7536. # ----------------------------------------------------------------------
  7537. # Action Methods
  7538. @doc(klass=_shared_doc_kwargs["klass"])
  7539. def isna(self) -> Self:
  7540. """
  7541. Detect missing values.
  7542. Return a boolean same-sized object indicating if the values are NA.
  7543. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
  7544. values.
  7545. Everything else gets mapped to False values. Characters such as empty
  7546. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  7547. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  7548. Returns
  7549. -------
  7550. {klass}
  7551. Mask of bool values for each element in {klass} that
  7552. indicates whether an element is an NA value.
  7553. See Also
  7554. --------
  7555. {klass}.isnull : Alias of isna.
  7556. {klass}.notna : Boolean inverse of isna.
  7557. {klass}.dropna : Omit axes labels with missing values.
  7558. isna : Top-level isna.
  7559. Examples
  7560. --------
  7561. Show which entries in a DataFrame are NA.
  7562. >>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
  7563. ... born=[pd.NaT, pd.Timestamp('1939-05-27'),
  7564. ... pd.Timestamp('1940-04-25')],
  7565. ... name=['Alfred', 'Batman', ''],
  7566. ... toy=[None, 'Batmobile', 'Joker']))
  7567. >>> df
  7568. age born name toy
  7569. 0 5.0 NaT Alfred None
  7570. 1 6.0 1939-05-27 Batman Batmobile
  7571. 2 NaN 1940-04-25 Joker
  7572. >>> df.isna()
  7573. age born name toy
  7574. 0 False True False True
  7575. 1 False False False False
  7576. 2 True False False False
  7577. Show which entries in a Series are NA.
  7578. >>> ser = pd.Series([5, 6, np.nan])
  7579. >>> ser
  7580. 0 5.0
  7581. 1 6.0
  7582. 2 NaN
  7583. dtype: float64
  7584. >>> ser.isna()
  7585. 0 False
  7586. 1 False
  7587. 2 True
  7588. dtype: bool
  7589. """
  7590. return isna(self).__finalize__(self, method="isna")
  7591. @doc(isna, klass=_shared_doc_kwargs["klass"])
  7592. def isnull(self) -> Self:
  7593. return isna(self).__finalize__(self, method="isnull")
  7594. @doc(klass=_shared_doc_kwargs["klass"])
  7595. def notna(self) -> Self:
  7596. """
  7597. Detect existing (non-missing) values.
  7598. Return a boolean same-sized object indicating if the values are not NA.
  7599. Non-missing values get mapped to True. Characters such as empty
  7600. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  7601. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  7602. NA values, such as None or :attr:`numpy.NaN`, get mapped to False
  7603. values.
  7604. Returns
  7605. -------
  7606. {klass}
  7607. Mask of bool values for each element in {klass} that
  7608. indicates whether an element is not an NA value.
  7609. See Also
  7610. --------
  7611. {klass}.notnull : Alias of notna.
  7612. {klass}.isna : Boolean inverse of notna.
  7613. {klass}.dropna : Omit axes labels with missing values.
  7614. notna : Top-level notna.
  7615. Examples
  7616. --------
  7617. Show which entries in a DataFrame are not NA.
  7618. >>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
  7619. ... born=[pd.NaT, pd.Timestamp('1939-05-27'),
  7620. ... pd.Timestamp('1940-04-25')],
  7621. ... name=['Alfred', 'Batman', ''],
  7622. ... toy=[None, 'Batmobile', 'Joker']))
  7623. >>> df
  7624. age born name toy
  7625. 0 5.0 NaT Alfred None
  7626. 1 6.0 1939-05-27 Batman Batmobile
  7627. 2 NaN 1940-04-25 Joker
  7628. >>> df.notna()
  7629. age born name toy
  7630. 0 True False True False
  7631. 1 True True True True
  7632. 2 False True True True
  7633. Show which entries in a Series are not NA.
  7634. >>> ser = pd.Series([5, 6, np.nan])
  7635. >>> ser
  7636. 0 5.0
  7637. 1 6.0
  7638. 2 NaN
  7639. dtype: float64
  7640. >>> ser.notna()
  7641. 0 True
  7642. 1 True
  7643. 2 False
  7644. dtype: bool
  7645. """
  7646. return notna(self).__finalize__(self, method="notna")
  7647. @doc(notna, klass=_shared_doc_kwargs["klass"])
  7648. def notnull(self) -> Self:
  7649. return notna(self).__finalize__(self, method="notnull")
  7650. @final
  7651. def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
  7652. if (lower is not None and np.any(isna(lower))) or (
  7653. upper is not None and np.any(isna(upper))
  7654. ):
  7655. raise ValueError("Cannot use an NA value as a clip threshold")
  7656. result = self
  7657. mask = self.isna()
  7658. if lower is not None:
  7659. cond = mask | (self >= lower)
  7660. result = result.where(
  7661. cond, lower, inplace=inplace
  7662. ) # type: ignore[assignment]
  7663. if upper is not None:
  7664. cond = mask | (self <= upper)
  7665. result = self if inplace else result
  7666. result = result.where(
  7667. cond, upper, inplace=inplace
  7668. ) # type: ignore[assignment]
  7669. return result
  7670. @final
  7671. def _clip_with_one_bound(self, threshold, method, axis, inplace):
  7672. if axis is not None:
  7673. axis = self._get_axis_number(axis)
  7674. # method is self.le for upper bound and self.ge for lower bound
  7675. if is_scalar(threshold) and is_number(threshold):
  7676. if method.__name__ == "le":
  7677. return self._clip_with_scalar(None, threshold, inplace=inplace)
  7678. return self._clip_with_scalar(threshold, None, inplace=inplace)
  7679. # GH #15390
  7680. # In order for where method to work, the threshold must
  7681. # be transformed to NDFrame from other array like structure.
  7682. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
  7683. if isinstance(self, ABCSeries):
  7684. threshold = self._constructor(threshold, index=self.index)
  7685. else:
  7686. threshold = self._align_for_op(threshold, axis, flex=None)[1]
  7687. # GH 40420
  7688. # Treat missing thresholds as no bounds, not clipping the values
  7689. if is_list_like(threshold):
  7690. fill_value = np.inf if method.__name__ == "le" else -np.inf
  7691. threshold_inf = threshold.fillna(fill_value)
  7692. else:
  7693. threshold_inf = threshold
  7694. subset = method(threshold_inf, axis=axis) | isna(self)
  7695. # GH 40420
  7696. return self.where(subset, threshold, axis=axis, inplace=inplace)
  7697. @overload
  7698. def clip(
  7699. self,
  7700. lower=...,
  7701. upper=...,
  7702. *,
  7703. axis: Axis | None = ...,
  7704. inplace: Literal[False] = ...,
  7705. **kwargs,
  7706. ) -> Self:
  7707. ...
  7708. @overload
  7709. def clip(
  7710. self,
  7711. lower=...,
  7712. upper=...,
  7713. *,
  7714. axis: Axis | None = ...,
  7715. inplace: Literal[True],
  7716. **kwargs,
  7717. ) -> None:
  7718. ...
  7719. @overload
  7720. def clip(
  7721. self,
  7722. lower=...,
  7723. upper=...,
  7724. *,
  7725. axis: Axis | None = ...,
  7726. inplace: bool_t = ...,
  7727. **kwargs,
  7728. ) -> Self | None:
  7729. ...
  7730. @final
  7731. def clip(
  7732. self,
  7733. lower=None,
  7734. upper=None,
  7735. *,
  7736. axis: Axis | None = None,
  7737. inplace: bool_t = False,
  7738. **kwargs,
  7739. ) -> Self | None:
  7740. """
  7741. Trim values at input threshold(s).
  7742. Assigns values outside boundary to boundary values. Thresholds
  7743. can be singular values or array like, and in the latter case
  7744. the clipping is performed element-wise in the specified axis.
  7745. Parameters
  7746. ----------
  7747. lower : float or array-like, default None
  7748. Minimum threshold value. All values below this
  7749. threshold will be set to it. A missing
  7750. threshold (e.g `NA`) will not clip the value.
  7751. upper : float or array-like, default None
  7752. Maximum threshold value. All values above this
  7753. threshold will be set to it. A missing
  7754. threshold (e.g `NA`) will not clip the value.
  7755. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  7756. Align object with lower and upper along the given axis.
  7757. For `Series` this parameter is unused and defaults to `None`.
  7758. inplace : bool, default False
  7759. Whether to perform the operation in place on the data.
  7760. *args, **kwargs
  7761. Additional keywords have no effect but might be accepted
  7762. for compatibility with numpy.
  7763. Returns
  7764. -------
  7765. Series or DataFrame or None
  7766. Same type as calling object with the values outside the
  7767. clip boundaries replaced or None if ``inplace=True``.
  7768. See Also
  7769. --------
  7770. Series.clip : Trim values at input threshold in series.
  7771. DataFrame.clip : Trim values at input threshold in dataframe.
  7772. numpy.clip : Clip (limit) the values in an array.
  7773. Examples
  7774. --------
  7775. >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
  7776. >>> df = pd.DataFrame(data)
  7777. >>> df
  7778. col_0 col_1
  7779. 0 9 -2
  7780. 1 -3 -7
  7781. 2 0 6
  7782. 3 -1 8
  7783. 4 5 -5
  7784. Clips per column using lower and upper thresholds:
  7785. >>> df.clip(-4, 6)
  7786. col_0 col_1
  7787. 0 6 -2
  7788. 1 -3 -4
  7789. 2 0 6
  7790. 3 -1 6
  7791. 4 5 -4
  7792. Clips using specific lower and upper thresholds per column:
  7793. >>> df.clip([-2, -1], [4, 5])
  7794. col_0 col_1
  7795. 0 4 -1
  7796. 1 -2 -1
  7797. 2 0 5
  7798. 3 -1 5
  7799. 4 4 -1
  7800. Clips using specific lower and upper thresholds per column element:
  7801. >>> t = pd.Series([2, -4, -1, 6, 3])
  7802. >>> t
  7803. 0 2
  7804. 1 -4
  7805. 2 -1
  7806. 3 6
  7807. 4 3
  7808. dtype: int64
  7809. >>> df.clip(t, t + 4, axis=0)
  7810. col_0 col_1
  7811. 0 6 2
  7812. 1 -3 -4
  7813. 2 0 3
  7814. 3 6 8
  7815. 4 5 3
  7816. Clips using specific lower threshold per column element, with missing values:
  7817. >>> t = pd.Series([2, -4, np.nan, 6, 3])
  7818. >>> t
  7819. 0 2.0
  7820. 1 -4.0
  7821. 2 NaN
  7822. 3 6.0
  7823. 4 3.0
  7824. dtype: float64
  7825. >>> df.clip(t, axis=0)
  7826. col_0 col_1
  7827. 0 9 2
  7828. 1 -3 -4
  7829. 2 0 6
  7830. 3 6 8
  7831. 4 5 3
  7832. """
  7833. inplace = validate_bool_kwarg(inplace, "inplace")
  7834. if inplace:
  7835. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  7836. if sys.getrefcount(self) <= REF_COUNT:
  7837. warnings.warn(
  7838. _chained_assignment_method_msg,
  7839. ChainedAssignmentError,
  7840. stacklevel=2,
  7841. )
  7842. elif (
  7843. not PYPY
  7844. and not WARNING_CHECK_DISABLED
  7845. and not using_copy_on_write()
  7846. and self._is_view_after_cow_rules()
  7847. ):
  7848. ctr = sys.getrefcount(self)
  7849. ref_count = REF_COUNT
  7850. if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
  7851. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  7852. ref_count += 1
  7853. if ctr <= ref_count:
  7854. warnings.warn(
  7855. _chained_assignment_warning_method_msg,
  7856. FutureWarning,
  7857. stacklevel=2,
  7858. )
  7859. axis = nv.validate_clip_with_axis(axis, (), kwargs)
  7860. if axis is not None:
  7861. axis = self._get_axis_number(axis)
  7862. # GH 17276
  7863. # numpy doesn't like NaN as a clip value
  7864. # so ignore
  7865. # GH 19992
  7866. # numpy doesn't drop a list-like bound containing NaN
  7867. isna_lower = isna(lower)
  7868. if not is_list_like(lower):
  7869. if np.any(isna_lower):
  7870. lower = None
  7871. elif np.all(isna_lower):
  7872. lower = None
  7873. isna_upper = isna(upper)
  7874. if not is_list_like(upper):
  7875. if np.any(isna_upper):
  7876. upper = None
  7877. elif np.all(isna_upper):
  7878. upper = None
  7879. # GH 2747 (arguments were reversed)
  7880. if (
  7881. lower is not None
  7882. and upper is not None
  7883. and is_scalar(lower)
  7884. and is_scalar(upper)
  7885. ):
  7886. lower, upper = min(lower, upper), max(lower, upper)
  7887. # fast-path for scalars
  7888. if (lower is None or is_number(lower)) and (upper is None or is_number(upper)):
  7889. return self._clip_with_scalar(lower, upper, inplace=inplace)
  7890. result = self
  7891. if lower is not None:
  7892. result = result._clip_with_one_bound(
  7893. lower, method=self.ge, axis=axis, inplace=inplace
  7894. )
  7895. if upper is not None:
  7896. if inplace:
  7897. result = self
  7898. result = result._clip_with_one_bound(
  7899. upper, method=self.le, axis=axis, inplace=inplace
  7900. )
  7901. return result
  7902. @final
  7903. @doc(klass=_shared_doc_kwargs["klass"])
  7904. def asfreq(
  7905. self,
  7906. freq: Frequency,
  7907. method: FillnaOptions | None = None,
  7908. how: Literal["start", "end"] | None = None,
  7909. normalize: bool_t = False,
  7910. fill_value: Hashable | None = None,
  7911. ) -> Self:
  7912. """
  7913. Convert time series to specified frequency.
  7914. Returns the original data conformed to a new index with the specified
  7915. frequency.
  7916. If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index
  7917. is the result of transforming the original index with
  7918. :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index
  7919. will map one-to-one to the new index).
  7920. Otherwise, the new index will be equivalent to ``pd.date_range(start, end,
  7921. freq=freq)`` where ``start`` and ``end`` are, respectively, the first and
  7922. last entries in the original index (see :func:`pandas.date_range`). The
  7923. values corresponding to any timesteps in the new index which were not present
  7924. in the original index will be null (``NaN``), unless a method for filling
  7925. such unknowns is provided (see the ``method`` parameter below).
  7926. The :meth:`resample` method is more appropriate if an operation on each group of
  7927. timesteps (such as an aggregate) is necessary to represent the data at the new
  7928. frequency.
  7929. Parameters
  7930. ----------
  7931. freq : DateOffset or str
  7932. Frequency DateOffset or string.
  7933. method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None
  7934. Method to use for filling holes in reindexed Series (note this
  7935. does not fill NaNs that already were present):
  7936. * 'pad' / 'ffill': propagate last valid observation forward to next
  7937. valid
  7938. * 'backfill' / 'bfill': use NEXT valid observation to fill.
  7939. how : {{'start', 'end'}}, default end
  7940. For PeriodIndex only (see PeriodIndex.asfreq).
  7941. normalize : bool, default False
  7942. Whether to reset output index to midnight.
  7943. fill_value : scalar, optional
  7944. Value to use for missing values, applied during upsampling (note
  7945. this does not fill NaNs that already were present).
  7946. Returns
  7947. -------
  7948. {klass}
  7949. {klass} object reindexed to the specified frequency.
  7950. See Also
  7951. --------
  7952. reindex : Conform DataFrame to new index with optional filling logic.
  7953. Notes
  7954. -----
  7955. To learn more about the frequency strings, please see `this link
  7956. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  7957. Examples
  7958. --------
  7959. Start by creating a series with 4 one minute timestamps.
  7960. >>> index = pd.date_range('1/1/2000', periods=4, freq='min')
  7961. >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
  7962. >>> df = pd.DataFrame({{'s': series}})
  7963. >>> df
  7964. s
  7965. 2000-01-01 00:00:00 0.0
  7966. 2000-01-01 00:01:00 NaN
  7967. 2000-01-01 00:02:00 2.0
  7968. 2000-01-01 00:03:00 3.0
  7969. Upsample the series into 30 second bins.
  7970. >>> df.asfreq(freq='30s')
  7971. s
  7972. 2000-01-01 00:00:00 0.0
  7973. 2000-01-01 00:00:30 NaN
  7974. 2000-01-01 00:01:00 NaN
  7975. 2000-01-01 00:01:30 NaN
  7976. 2000-01-01 00:02:00 2.0
  7977. 2000-01-01 00:02:30 NaN
  7978. 2000-01-01 00:03:00 3.0
  7979. Upsample again, providing a ``fill value``.
  7980. >>> df.asfreq(freq='30s', fill_value=9.0)
  7981. s
  7982. 2000-01-01 00:00:00 0.0
  7983. 2000-01-01 00:00:30 9.0
  7984. 2000-01-01 00:01:00 NaN
  7985. 2000-01-01 00:01:30 9.0
  7986. 2000-01-01 00:02:00 2.0
  7987. 2000-01-01 00:02:30 9.0
  7988. 2000-01-01 00:03:00 3.0
  7989. Upsample again, providing a ``method``.
  7990. >>> df.asfreq(freq='30s', method='bfill')
  7991. s
  7992. 2000-01-01 00:00:00 0.0
  7993. 2000-01-01 00:00:30 NaN
  7994. 2000-01-01 00:01:00 NaN
  7995. 2000-01-01 00:01:30 2.0
  7996. 2000-01-01 00:02:00 2.0
  7997. 2000-01-01 00:02:30 3.0
  7998. 2000-01-01 00:03:00 3.0
  7999. """
  8000. from pandas.core.resample import asfreq
  8001. return asfreq(
  8002. self,
  8003. freq,
  8004. method=method,
  8005. how=how,
  8006. normalize=normalize,
  8007. fill_value=fill_value,
  8008. )
  8009. @final
  8010. def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self:
  8011. """
  8012. Select values at particular time of day (e.g., 9:30AM).
  8013. Parameters
  8014. ----------
  8015. time : datetime.time or str
  8016. The values to select.
  8017. axis : {0 or 'index', 1 or 'columns'}, default 0
  8018. For `Series` this parameter is unused and defaults to 0.
  8019. Returns
  8020. -------
  8021. Series or DataFrame
  8022. Raises
  8023. ------
  8024. TypeError
  8025. If the index is not a :class:`DatetimeIndex`
  8026. See Also
  8027. --------
  8028. between_time : Select values between particular times of the day.
  8029. first : Select initial periods of time series based on a date offset.
  8030. last : Select final periods of time series based on a date offset.
  8031. DatetimeIndex.indexer_at_time : Get just the index locations for
  8032. values at particular time of the day.
  8033. Examples
  8034. --------
  8035. >>> i = pd.date_range('2018-04-09', periods=4, freq='12h')
  8036. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  8037. >>> ts
  8038. A
  8039. 2018-04-09 00:00:00 1
  8040. 2018-04-09 12:00:00 2
  8041. 2018-04-10 00:00:00 3
  8042. 2018-04-10 12:00:00 4
  8043. >>> ts.at_time('12:00')
  8044. A
  8045. 2018-04-09 12:00:00 2
  8046. 2018-04-10 12:00:00 4
  8047. """
  8048. if axis is None:
  8049. axis = 0
  8050. axis = self._get_axis_number(axis)
  8051. index = self._get_axis(axis)
  8052. if not isinstance(index, DatetimeIndex):
  8053. raise TypeError("Index must be DatetimeIndex")
  8054. indexer = index.indexer_at_time(time, asof=asof)
  8055. return self._take_with_is_copy(indexer, axis=axis)
  8056. @final
  8057. def between_time(
  8058. self,
  8059. start_time,
  8060. end_time,
  8061. inclusive: IntervalClosedType = "both",
  8062. axis: Axis | None = None,
  8063. ) -> Self:
  8064. """
  8065. Select values between particular times of the day (e.g., 9:00-9:30 AM).
  8066. By setting ``start_time`` to be later than ``end_time``,
  8067. you can get the times that are *not* between the two times.
  8068. Parameters
  8069. ----------
  8070. start_time : datetime.time or str
  8071. Initial time as a time filter limit.
  8072. end_time : datetime.time or str
  8073. End time as a time filter limit.
  8074. inclusive : {"both", "neither", "left", "right"}, default "both"
  8075. Include boundaries; whether to set each bound as closed or open.
  8076. axis : {0 or 'index', 1 or 'columns'}, default 0
  8077. Determine range time on index or columns value.
  8078. For `Series` this parameter is unused and defaults to 0.
  8079. Returns
  8080. -------
  8081. Series or DataFrame
  8082. Data from the original object filtered to the specified dates range.
  8083. Raises
  8084. ------
  8085. TypeError
  8086. If the index is not a :class:`DatetimeIndex`
  8087. See Also
  8088. --------
  8089. at_time : Select values at a particular time of the day.
  8090. first : Select initial periods of time series based on a date offset.
  8091. last : Select final periods of time series based on a date offset.
  8092. DatetimeIndex.indexer_between_time : Get just the index locations for
  8093. values between particular times of the day.
  8094. Examples
  8095. --------
  8096. >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
  8097. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  8098. >>> ts
  8099. A
  8100. 2018-04-09 00:00:00 1
  8101. 2018-04-10 00:20:00 2
  8102. 2018-04-11 00:40:00 3
  8103. 2018-04-12 01:00:00 4
  8104. >>> ts.between_time('0:15', '0:45')
  8105. A
  8106. 2018-04-10 00:20:00 2
  8107. 2018-04-11 00:40:00 3
  8108. You get the times that are *not* between two times by setting
  8109. ``start_time`` later than ``end_time``:
  8110. >>> ts.between_time('0:45', '0:15')
  8111. A
  8112. 2018-04-09 00:00:00 1
  8113. 2018-04-12 01:00:00 4
  8114. """
  8115. if axis is None:
  8116. axis = 0
  8117. axis = self._get_axis_number(axis)
  8118. index = self._get_axis(axis)
  8119. if not isinstance(index, DatetimeIndex):
  8120. raise TypeError("Index must be DatetimeIndex")
  8121. left_inclusive, right_inclusive = validate_inclusive(inclusive)
  8122. indexer = index.indexer_between_time(
  8123. start_time,
  8124. end_time,
  8125. include_start=left_inclusive,
  8126. include_end=right_inclusive,
  8127. )
  8128. return self._take_with_is_copy(indexer, axis=axis)
  8129. @final
  8130. @doc(klass=_shared_doc_kwargs["klass"])
  8131. def resample(
  8132. self,
  8133. rule,
  8134. axis: Axis | lib.NoDefault = lib.no_default,
  8135. closed: Literal["right", "left"] | None = None,
  8136. label: Literal["right", "left"] | None = None,
  8137. convention: Literal["start", "end", "s", "e"] = "start",
  8138. kind: Literal["timestamp", "period"] | None | lib.NoDefault = lib.no_default,
  8139. on: Level | None = None,
  8140. level: Level | None = None,
  8141. origin: str | TimestampConvertibleTypes = "start_day",
  8142. offset: TimedeltaConvertibleTypes | None = None,
  8143. group_keys: bool_t = False,
  8144. ) -> Resampler:
  8145. """
  8146. Resample time-series data.
  8147. Convenience method for frequency conversion and resampling of time series.
  8148. The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`,
  8149. or `TimedeltaIndex`), or the caller must pass the label of a datetime-like
  8150. series/index to the ``on``/``level`` keyword parameter.
  8151. Parameters
  8152. ----------
  8153. rule : DateOffset, Timedelta or str
  8154. The offset string or object representing target conversion.
  8155. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  8156. Which axis to use for up- or down-sampling. For `Series` this parameter
  8157. is unused and defaults to 0. Must be
  8158. `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.
  8159. .. deprecated:: 2.0.0
  8160. Use frame.T.resample(...) instead.
  8161. closed : {{'right', 'left'}}, default None
  8162. Which side of bin interval is closed. The default is 'left'
  8163. for all frequency offsets except for 'ME', 'YE', 'QE', 'BME',
  8164. 'BA', 'BQE', and 'W' which all have a default of 'right'.
  8165. label : {{'right', 'left'}}, default None
  8166. Which bin edge label to label bucket with. The default is 'left'
  8167. for all frequency offsets except for 'ME', 'YE', 'QE', 'BME',
  8168. 'BA', 'BQE', and 'W' which all have a default of 'right'.
  8169. convention : {{'start', 'end', 's', 'e'}}, default 'start'
  8170. For `PeriodIndex` only, controls whether to use the start or
  8171. end of `rule`.
  8172. kind : {{'timestamp', 'period'}}, optional, default None
  8173. Pass 'timestamp' to convert the resulting index to a
  8174. `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.
  8175. By default the input representation is retained.
  8176. .. deprecated:: 2.2.0
  8177. Convert index to desired type explicitly instead.
  8178. on : str, optional
  8179. For a DataFrame, column to use instead of index for resampling.
  8180. Column must be datetime-like.
  8181. level : str or int, optional
  8182. For a MultiIndex, level (name or number) to use for
  8183. resampling. `level` must be datetime-like.
  8184. origin : Timestamp or str, default 'start_day'
  8185. The timestamp on which to adjust the grouping. The timezone of origin
  8186. must match the timezone of the index.
  8187. If string, must be one of the following:
  8188. - 'epoch': `origin` is 1970-01-01
  8189. - 'start': `origin` is the first value of the timeseries
  8190. - 'start_day': `origin` is the first day at midnight of the timeseries
  8191. - 'end': `origin` is the last value of the timeseries
  8192. - 'end_day': `origin` is the ceiling midnight of the last day
  8193. .. versionadded:: 1.3.0
  8194. .. note::
  8195. Only takes effect for Tick-frequencies (i.e. fixed frequencies like
  8196. days, hours, and minutes, rather than months or quarters).
  8197. offset : Timedelta or str, default is None
  8198. An offset timedelta added to the origin.
  8199. group_keys : bool, default False
  8200. Whether to include the group keys in the result index when using
  8201. ``.apply()`` on the resampled object.
  8202. .. versionadded:: 1.5.0
  8203. Not specifying ``group_keys`` will retain values-dependent behavior
  8204. from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
  8205. <whatsnew_150.enhancements.resample_group_keys>` for examples).
  8206. .. versionchanged:: 2.0.0
  8207. ``group_keys`` now defaults to ``False``.
  8208. Returns
  8209. -------
  8210. pandas.api.typing.Resampler
  8211. :class:`~pandas.core.Resampler` object.
  8212. See Also
  8213. --------
  8214. Series.resample : Resample a Series.
  8215. DataFrame.resample : Resample a DataFrame.
  8216. groupby : Group {klass} by mapping, function, label, or list of labels.
  8217. asfreq : Reindex a {klass} with the given frequency without grouping.
  8218. Notes
  8219. -----
  8220. See the `user guide
  8221. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__
  8222. for more.
  8223. To learn more about the offset strings, please see `this link
  8224. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__.
  8225. Examples
  8226. --------
  8227. Start by creating a series with 9 one minute timestamps.
  8228. >>> index = pd.date_range('1/1/2000', periods=9, freq='min')
  8229. >>> series = pd.Series(range(9), index=index)
  8230. >>> series
  8231. 2000-01-01 00:00:00 0
  8232. 2000-01-01 00:01:00 1
  8233. 2000-01-01 00:02:00 2
  8234. 2000-01-01 00:03:00 3
  8235. 2000-01-01 00:04:00 4
  8236. 2000-01-01 00:05:00 5
  8237. 2000-01-01 00:06:00 6
  8238. 2000-01-01 00:07:00 7
  8239. 2000-01-01 00:08:00 8
  8240. Freq: min, dtype: int64
  8241. Downsample the series into 3 minute bins and sum the values
  8242. of the timestamps falling into a bin.
  8243. >>> series.resample('3min').sum()
  8244. 2000-01-01 00:00:00 3
  8245. 2000-01-01 00:03:00 12
  8246. 2000-01-01 00:06:00 21
  8247. Freq: 3min, dtype: int64
  8248. Downsample the series into 3 minute bins as above, but label each
  8249. bin using the right edge instead of the left. Please note that the
  8250. value in the bucket used as the label is not included in the bucket,
  8251. which it labels. For example, in the original series the
  8252. bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
  8253. value in the resampled bucket with the label ``2000-01-01 00:03:00``
  8254. does not include 3 (if it did, the summed value would be 6, not 3).
  8255. >>> series.resample('3min', label='right').sum()
  8256. 2000-01-01 00:03:00 3
  8257. 2000-01-01 00:06:00 12
  8258. 2000-01-01 00:09:00 21
  8259. Freq: 3min, dtype: int64
  8260. To include this value close the right side of the bin interval,
  8261. as shown below.
  8262. >>> series.resample('3min', label='right', closed='right').sum()
  8263. 2000-01-01 00:00:00 0
  8264. 2000-01-01 00:03:00 6
  8265. 2000-01-01 00:06:00 15
  8266. 2000-01-01 00:09:00 15
  8267. Freq: 3min, dtype: int64
  8268. Upsample the series into 30 second bins.
  8269. >>> series.resample('30s').asfreq()[0:5] # Select first 5 rows
  8270. 2000-01-01 00:00:00 0.0
  8271. 2000-01-01 00:00:30 NaN
  8272. 2000-01-01 00:01:00 1.0
  8273. 2000-01-01 00:01:30 NaN
  8274. 2000-01-01 00:02:00 2.0
  8275. Freq: 30s, dtype: float64
  8276. Upsample the series into 30 second bins and fill the ``NaN``
  8277. values using the ``ffill`` method.
  8278. >>> series.resample('30s').ffill()[0:5]
  8279. 2000-01-01 00:00:00 0
  8280. 2000-01-01 00:00:30 0
  8281. 2000-01-01 00:01:00 1
  8282. 2000-01-01 00:01:30 1
  8283. 2000-01-01 00:02:00 2
  8284. Freq: 30s, dtype: int64
  8285. Upsample the series into 30 second bins and fill the
  8286. ``NaN`` values using the ``bfill`` method.
  8287. >>> series.resample('30s').bfill()[0:5]
  8288. 2000-01-01 00:00:00 0
  8289. 2000-01-01 00:00:30 1
  8290. 2000-01-01 00:01:00 1
  8291. 2000-01-01 00:01:30 2
  8292. 2000-01-01 00:02:00 2
  8293. Freq: 30s, dtype: int64
  8294. Pass a custom function via ``apply``
  8295. >>> def custom_resampler(arraylike):
  8296. ... return np.sum(arraylike) + 5
  8297. ...
  8298. >>> series.resample('3min').apply(custom_resampler)
  8299. 2000-01-01 00:00:00 8
  8300. 2000-01-01 00:03:00 17
  8301. 2000-01-01 00:06:00 26
  8302. Freq: 3min, dtype: int64
  8303. For a Series with a PeriodIndex, the keyword `convention` can be
  8304. used to control whether to use the start or end of `rule`.
  8305. Resample a year by quarter using 'start' `convention`. Values are
  8306. assigned to the first quarter of the period.
  8307. >>> s = pd.Series(
  8308. ... [1, 2], index=pd.period_range("2012-01-01", freq="Y", periods=2)
  8309. ... )
  8310. >>> s
  8311. 2012 1
  8312. 2013 2
  8313. Freq: Y-DEC, dtype: int64
  8314. >>> s.resample("Q", convention="start").asfreq()
  8315. 2012Q1 1.0
  8316. 2012Q2 NaN
  8317. 2012Q3 NaN
  8318. 2012Q4 NaN
  8319. 2013Q1 2.0
  8320. 2013Q2 NaN
  8321. 2013Q3 NaN
  8322. 2013Q4 NaN
  8323. Freq: Q-DEC, dtype: float64
  8324. Resample quarters by month using 'end' `convention`. Values are
  8325. assigned to the last month of the period.
  8326. >>> q = pd.Series(
  8327. ... [1, 2, 3, 4], index=pd.period_range("2018-01-01", freq="Q", periods=4)
  8328. ... )
  8329. >>> q
  8330. 2018Q1 1
  8331. 2018Q2 2
  8332. 2018Q3 3
  8333. 2018Q4 4
  8334. Freq: Q-DEC, dtype: int64
  8335. >>> q.resample("M", convention="end").asfreq()
  8336. 2018-03 1.0
  8337. 2018-04 NaN
  8338. 2018-05 NaN
  8339. 2018-06 2.0
  8340. 2018-07 NaN
  8341. 2018-08 NaN
  8342. 2018-09 3.0
  8343. 2018-10 NaN
  8344. 2018-11 NaN
  8345. 2018-12 4.0
  8346. Freq: M, dtype: float64
  8347. For DataFrame objects, the keyword `on` can be used to specify the
  8348. column instead of the index for resampling.
  8349. >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],
  8350. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}
  8351. >>> df = pd.DataFrame(d)
  8352. >>> df['week_starting'] = pd.date_range('01/01/2018',
  8353. ... periods=8,
  8354. ... freq='W')
  8355. >>> df
  8356. price volume week_starting
  8357. 0 10 50 2018-01-07
  8358. 1 11 60 2018-01-14
  8359. 2 9 40 2018-01-21
  8360. 3 13 100 2018-01-28
  8361. 4 14 50 2018-02-04
  8362. 5 18 100 2018-02-11
  8363. 6 17 40 2018-02-18
  8364. 7 19 50 2018-02-25
  8365. >>> df.resample('ME', on='week_starting').mean()
  8366. price volume
  8367. week_starting
  8368. 2018-01-31 10.75 62.5
  8369. 2018-02-28 17.00 60.0
  8370. For a DataFrame with MultiIndex, the keyword `level` can be used to
  8371. specify on which level the resampling needs to take place.
  8372. >>> days = pd.date_range('1/1/2000', periods=4, freq='D')
  8373. >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],
  8374. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}
  8375. >>> df2 = pd.DataFrame(
  8376. ... d2,
  8377. ... index=pd.MultiIndex.from_product(
  8378. ... [days, ['morning', 'afternoon']]
  8379. ... )
  8380. ... )
  8381. >>> df2
  8382. price volume
  8383. 2000-01-01 morning 10 50
  8384. afternoon 11 60
  8385. 2000-01-02 morning 9 40
  8386. afternoon 13 100
  8387. 2000-01-03 morning 14 50
  8388. afternoon 18 100
  8389. 2000-01-04 morning 17 40
  8390. afternoon 19 50
  8391. >>> df2.resample('D', level=0).sum()
  8392. price volume
  8393. 2000-01-01 21 110
  8394. 2000-01-02 22 140
  8395. 2000-01-03 32 150
  8396. 2000-01-04 36 90
  8397. If you want to adjust the start of the bins based on a fixed timestamp:
  8398. >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
  8399. >>> rng = pd.date_range(start, end, freq='7min')
  8400. >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
  8401. >>> ts
  8402. 2000-10-01 23:30:00 0
  8403. 2000-10-01 23:37:00 3
  8404. 2000-10-01 23:44:00 6
  8405. 2000-10-01 23:51:00 9
  8406. 2000-10-01 23:58:00 12
  8407. 2000-10-02 00:05:00 15
  8408. 2000-10-02 00:12:00 18
  8409. 2000-10-02 00:19:00 21
  8410. 2000-10-02 00:26:00 24
  8411. Freq: 7min, dtype: int64
  8412. >>> ts.resample('17min').sum()
  8413. 2000-10-01 23:14:00 0
  8414. 2000-10-01 23:31:00 9
  8415. 2000-10-01 23:48:00 21
  8416. 2000-10-02 00:05:00 54
  8417. 2000-10-02 00:22:00 24
  8418. Freq: 17min, dtype: int64
  8419. >>> ts.resample('17min', origin='epoch').sum()
  8420. 2000-10-01 23:18:00 0
  8421. 2000-10-01 23:35:00 18
  8422. 2000-10-01 23:52:00 27
  8423. 2000-10-02 00:09:00 39
  8424. 2000-10-02 00:26:00 24
  8425. Freq: 17min, dtype: int64
  8426. >>> ts.resample('17min', origin='2000-01-01').sum()
  8427. 2000-10-01 23:24:00 3
  8428. 2000-10-01 23:41:00 15
  8429. 2000-10-01 23:58:00 45
  8430. 2000-10-02 00:15:00 45
  8431. Freq: 17min, dtype: int64
  8432. If you want to adjust the start of the bins with an `offset` Timedelta, the two
  8433. following lines are equivalent:
  8434. >>> ts.resample('17min', origin='start').sum()
  8435. 2000-10-01 23:30:00 9
  8436. 2000-10-01 23:47:00 21
  8437. 2000-10-02 00:04:00 54
  8438. 2000-10-02 00:21:00 24
  8439. Freq: 17min, dtype: int64
  8440. >>> ts.resample('17min', offset='23h30min').sum()
  8441. 2000-10-01 23:30:00 9
  8442. 2000-10-01 23:47:00 21
  8443. 2000-10-02 00:04:00 54
  8444. 2000-10-02 00:21:00 24
  8445. Freq: 17min, dtype: int64
  8446. If you want to take the largest Timestamp as the end of the bins:
  8447. >>> ts.resample('17min', origin='end').sum()
  8448. 2000-10-01 23:35:00 0
  8449. 2000-10-01 23:52:00 18
  8450. 2000-10-02 00:09:00 27
  8451. 2000-10-02 00:26:00 63
  8452. Freq: 17min, dtype: int64
  8453. In contrast with the `start_day`, you can use `end_day` to take the ceiling
  8454. midnight of the largest Timestamp as the end of the bins and drop the bins
  8455. not containing data:
  8456. >>> ts.resample('17min', origin='end_day').sum()
  8457. 2000-10-01 23:38:00 3
  8458. 2000-10-01 23:55:00 15
  8459. 2000-10-02 00:12:00 45
  8460. 2000-10-02 00:29:00 45
  8461. Freq: 17min, dtype: int64
  8462. """
  8463. from pandas.core.resample import get_resampler
  8464. if axis is not lib.no_default:
  8465. axis = self._get_axis_number(axis)
  8466. if axis == 1:
  8467. warnings.warn(
  8468. "DataFrame.resample with axis=1 is deprecated. Do "
  8469. "`frame.T.resample(...)` without axis instead.",
  8470. FutureWarning,
  8471. stacklevel=find_stack_level(),
  8472. )
  8473. else:
  8474. warnings.warn(
  8475. f"The 'axis' keyword in {type(self).__name__}.resample is "
  8476. "deprecated and will be removed in a future version.",
  8477. FutureWarning,
  8478. stacklevel=find_stack_level(),
  8479. )
  8480. else:
  8481. axis = 0
  8482. if kind is not lib.no_default:
  8483. # GH#55895
  8484. warnings.warn(
  8485. f"The 'kind' keyword in {type(self).__name__}.resample is "
  8486. "deprecated and will be removed in a future version. "
  8487. "Explicitly cast the index to the desired type instead",
  8488. FutureWarning,
  8489. stacklevel=find_stack_level(),
  8490. )
  8491. else:
  8492. kind = None
  8493. return get_resampler(
  8494. cast("Series | DataFrame", self),
  8495. freq=rule,
  8496. label=label,
  8497. closed=closed,
  8498. axis=axis,
  8499. kind=kind,
  8500. convention=convention,
  8501. key=on,
  8502. level=level,
  8503. origin=origin,
  8504. offset=offset,
  8505. group_keys=group_keys,
  8506. )
  8507. @final
  8508. def first(self, offset) -> Self:
  8509. """
  8510. Select initial periods of time series data based on a date offset.
  8511. .. deprecated:: 2.1
  8512. :meth:`.first` is deprecated and will be removed in a future version.
  8513. Please create a mask and filter using `.loc` instead.
  8514. For a DataFrame with a sorted DatetimeIndex, this function can
  8515. select the first few rows based on a date offset.
  8516. Parameters
  8517. ----------
  8518. offset : str, DateOffset or dateutil.relativedelta
  8519. The offset length of the data that will be selected. For instance,
  8520. '1ME' will display all the rows having their index within the first month.
  8521. Returns
  8522. -------
  8523. Series or DataFrame
  8524. A subset of the caller.
  8525. Raises
  8526. ------
  8527. TypeError
  8528. If the index is not a :class:`DatetimeIndex`
  8529. See Also
  8530. --------
  8531. last : Select final periods of time series based on a date offset.
  8532. at_time : Select values at a particular time of the day.
  8533. between_time : Select values between particular times of the day.
  8534. Examples
  8535. --------
  8536. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  8537. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  8538. >>> ts
  8539. A
  8540. 2018-04-09 1
  8541. 2018-04-11 2
  8542. 2018-04-13 3
  8543. 2018-04-15 4
  8544. Get the rows for the first 3 days:
  8545. >>> ts.first('3D')
  8546. A
  8547. 2018-04-09 1
  8548. 2018-04-11 2
  8549. Notice the data for 3 first calendar days were returned, not the first
  8550. 3 days observed in the dataset, and therefore data for 2018-04-13 was
  8551. not returned.
  8552. """
  8553. warnings.warn(
  8554. "first is deprecated and will be removed in a future version. "
  8555. "Please create a mask and filter using `.loc` instead",
  8556. FutureWarning,
  8557. stacklevel=find_stack_level(),
  8558. )
  8559. if not isinstance(self.index, DatetimeIndex):
  8560. raise TypeError("'first' only supports a DatetimeIndex index")
  8561. if len(self.index) == 0:
  8562. return self.copy(deep=False)
  8563. offset = to_offset(offset)
  8564. if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]):
  8565. # GH#29623 if first value is end of period, remove offset with n = 1
  8566. # before adding the real offset
  8567. end_date = end = self.index[0] - offset.base + offset
  8568. else:
  8569. end_date = end = self.index[0] + offset
  8570. # Tick-like, e.g. 3 weeks
  8571. if isinstance(offset, Tick) and end_date in self.index:
  8572. end = self.index.searchsorted(end_date, side="left")
  8573. return self.iloc[:end]
  8574. return self.loc[:end]
  8575. @final
  8576. def last(self, offset) -> Self:
  8577. """
  8578. Select final periods of time series data based on a date offset.
  8579. .. deprecated:: 2.1
  8580. :meth:`.last` is deprecated and will be removed in a future version.
  8581. Please create a mask and filter using `.loc` instead.
  8582. For a DataFrame with a sorted DatetimeIndex, this function
  8583. selects the last few rows based on a date offset.
  8584. Parameters
  8585. ----------
  8586. offset : str, DateOffset, dateutil.relativedelta
  8587. The offset length of the data that will be selected. For instance,
  8588. '3D' will display all the rows having their index within the last 3 days.
  8589. Returns
  8590. -------
  8591. Series or DataFrame
  8592. A subset of the caller.
  8593. Raises
  8594. ------
  8595. TypeError
  8596. If the index is not a :class:`DatetimeIndex`
  8597. See Also
  8598. --------
  8599. first : Select initial periods of time series based on a date offset.
  8600. at_time : Select values at a particular time of the day.
  8601. between_time : Select values between particular times of the day.
  8602. Notes
  8603. -----
  8604. .. deprecated:: 2.1.0
  8605. Please create a mask and filter using `.loc` instead
  8606. Examples
  8607. --------
  8608. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  8609. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  8610. >>> ts
  8611. A
  8612. 2018-04-09 1
  8613. 2018-04-11 2
  8614. 2018-04-13 3
  8615. 2018-04-15 4
  8616. Get the rows for the last 3 days:
  8617. >>> ts.last('3D') # doctest: +SKIP
  8618. A
  8619. 2018-04-13 3
  8620. 2018-04-15 4
  8621. Notice the data for 3 last calendar days were returned, not the last
  8622. 3 observed days in the dataset, and therefore data for 2018-04-11 was
  8623. not returned.
  8624. """
  8625. warnings.warn(
  8626. "last is deprecated and will be removed in a future version. "
  8627. "Please create a mask and filter using `.loc` instead",
  8628. FutureWarning,
  8629. stacklevel=find_stack_level(),
  8630. )
  8631. if not isinstance(self.index, DatetimeIndex):
  8632. raise TypeError("'last' only supports a DatetimeIndex index")
  8633. if len(self.index) == 0:
  8634. return self.copy(deep=False)
  8635. offset = to_offset(offset)
  8636. start_date = self.index[-1] - offset
  8637. start = self.index.searchsorted(start_date, side="right")
  8638. return self.iloc[start:]
  8639. @final
  8640. def rank(
  8641. self,
  8642. axis: Axis = 0,
  8643. method: Literal["average", "min", "max", "first", "dense"] = "average",
  8644. numeric_only: bool_t = False,
  8645. na_option: Literal["keep", "top", "bottom"] = "keep",
  8646. ascending: bool_t = True,
  8647. pct: bool_t = False,
  8648. ) -> Self:
  8649. """
  8650. Compute numerical data ranks (1 through n) along axis.
  8651. By default, equal values are assigned a rank that is the average of the
  8652. ranks of those values.
  8653. Parameters
  8654. ----------
  8655. axis : {0 or 'index', 1 or 'columns'}, default 0
  8656. Index to direct ranking.
  8657. For `Series` this parameter is unused and defaults to 0.
  8658. method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
  8659. How to rank the group of records that have the same value (i.e. ties):
  8660. * average: average rank of the group
  8661. * min: lowest rank in the group
  8662. * max: highest rank in the group
  8663. * first: ranks assigned in order they appear in the array
  8664. * dense: like 'min', but rank always increases by 1 between groups.
  8665. numeric_only : bool, default False
  8666. For DataFrame objects, rank only numeric columns if set to True.
  8667. .. versionchanged:: 2.0.0
  8668. The default value of ``numeric_only`` is now ``False``.
  8669. na_option : {'keep', 'top', 'bottom'}, default 'keep'
  8670. How to rank NaN values:
  8671. * keep: assign NaN rank to NaN values
  8672. * top: assign lowest rank to NaN values
  8673. * bottom: assign highest rank to NaN values
  8674. ascending : bool, default True
  8675. Whether or not the elements should be ranked in ascending order.
  8676. pct : bool, default False
  8677. Whether or not to display the returned rankings in percentile
  8678. form.
  8679. Returns
  8680. -------
  8681. same type as caller
  8682. Return a Series or DataFrame with data ranks as values.
  8683. See Also
  8684. --------
  8685. core.groupby.DataFrameGroupBy.rank : Rank of values within each group.
  8686. core.groupby.SeriesGroupBy.rank : Rank of values within each group.
  8687. Examples
  8688. --------
  8689. >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
  8690. ... 'spider', 'snake'],
  8691. ... 'Number_legs': [4, 2, 4, 8, np.nan]})
  8692. >>> df
  8693. Animal Number_legs
  8694. 0 cat 4.0
  8695. 1 penguin 2.0
  8696. 2 dog 4.0
  8697. 3 spider 8.0
  8698. 4 snake NaN
  8699. Ties are assigned the mean of the ranks (by default) for the group.
  8700. >>> s = pd.Series(range(5), index=list("abcde"))
  8701. >>> s["d"] = s["b"]
  8702. >>> s.rank()
  8703. a 1.0
  8704. b 2.5
  8705. c 4.0
  8706. d 2.5
  8707. e 5.0
  8708. dtype: float64
  8709. The following example shows how the method behaves with the above
  8710. parameters:
  8711. * default_rank: this is the default behaviour obtained without using
  8712. any parameter.
  8713. * max_rank: setting ``method = 'max'`` the records that have the
  8714. same values are ranked using the highest rank (e.g.: since 'cat'
  8715. and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)
  8716. * NA_bottom: choosing ``na_option = 'bottom'``, if there are records
  8717. with NaN values they are placed at the bottom of the ranking.
  8718. * pct_rank: when setting ``pct = True``, the ranking is expressed as
  8719. percentile rank.
  8720. >>> df['default_rank'] = df['Number_legs'].rank()
  8721. >>> df['max_rank'] = df['Number_legs'].rank(method='max')
  8722. >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
  8723. >>> df['pct_rank'] = df['Number_legs'].rank(pct=True)
  8724. >>> df
  8725. Animal Number_legs default_rank max_rank NA_bottom pct_rank
  8726. 0 cat 4.0 2.5 3.0 2.5 0.625
  8727. 1 penguin 2.0 1.0 1.0 1.0 0.250
  8728. 2 dog 4.0 2.5 3.0 2.5 0.625
  8729. 3 spider 8.0 4.0 4.0 4.0 1.000
  8730. 4 snake NaN NaN NaN 5.0 NaN
  8731. """
  8732. axis_int = self._get_axis_number(axis)
  8733. if na_option not in {"keep", "top", "bottom"}:
  8734. msg = "na_option must be one of 'keep', 'top', or 'bottom'"
  8735. raise ValueError(msg)
  8736. def ranker(data):
  8737. if data.ndim == 2:
  8738. # i.e. DataFrame, we cast to ndarray
  8739. values = data.values
  8740. else:
  8741. # i.e. Series, can dispatch to EA
  8742. values = data._values
  8743. if isinstance(values, ExtensionArray):
  8744. ranks = values._rank(
  8745. axis=axis_int,
  8746. method=method,
  8747. ascending=ascending,
  8748. na_option=na_option,
  8749. pct=pct,
  8750. )
  8751. else:
  8752. ranks = algos.rank(
  8753. values,
  8754. axis=axis_int,
  8755. method=method,
  8756. ascending=ascending,
  8757. na_option=na_option,
  8758. pct=pct,
  8759. )
  8760. ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
  8761. return ranks_obj.__finalize__(self, method="rank")
  8762. if numeric_only:
  8763. if self.ndim == 1 and not is_numeric_dtype(self.dtype):
  8764. # GH#47500
  8765. raise TypeError(
  8766. "Series.rank does not allow numeric_only=True with "
  8767. "non-numeric dtype."
  8768. )
  8769. data = self._get_numeric_data()
  8770. else:
  8771. data = self
  8772. return ranker(data)
  8773. @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])
  8774. def compare(
  8775. self,
  8776. other,
  8777. align_axis: Axis = 1,
  8778. keep_shape: bool_t = False,
  8779. keep_equal: bool_t = False,
  8780. result_names: Suffixes = ("self", "other"),
  8781. ):
  8782. if type(self) is not type(other):
  8783. cls_self, cls_other = type(self).__name__, type(other).__name__
  8784. raise TypeError(
  8785. f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'"
  8786. )
  8787. mask = ~((self == other) | (self.isna() & other.isna()))
  8788. mask.fillna(True, inplace=True)
  8789. if not keep_equal:
  8790. self = self.where(mask)
  8791. other = other.where(mask)
  8792. if not keep_shape:
  8793. if isinstance(self, ABCDataFrame):
  8794. cmask = mask.any()
  8795. rmask = mask.any(axis=1)
  8796. self = self.loc[rmask, cmask]
  8797. other = other.loc[rmask, cmask]
  8798. else:
  8799. self = self[mask]
  8800. other = other[mask]
  8801. if not isinstance(result_names, tuple):
  8802. raise TypeError(
  8803. f"Passing 'result_names' as a {type(result_names)} is not "
  8804. "supported. Provide 'result_names' as a tuple instead."
  8805. )
  8806. if align_axis in (1, "columns"): # This is needed for Series
  8807. axis = 1
  8808. else:
  8809. axis = self._get_axis_number(align_axis)
  8810. # error: List item 0 has incompatible type "NDFrame"; expected
  8811. # "Union[Series, DataFrame]"
  8812. diff = concat(
  8813. [self, other], # type: ignore[list-item]
  8814. axis=axis,
  8815. keys=result_names,
  8816. )
  8817. if axis >= self.ndim:
  8818. # No need to reorganize data if stacking on new axis
  8819. # This currently applies for stacking two Series on columns
  8820. return diff
  8821. ax = diff._get_axis(axis)
  8822. ax_names = np.array(ax.names)
  8823. # set index names to positions to avoid confusion
  8824. ax.names = np.arange(len(ax_names))
  8825. # bring self-other to inner level
  8826. order = list(range(1, ax.nlevels)) + [0]
  8827. if isinstance(diff, ABCDataFrame):
  8828. diff = diff.reorder_levels(order, axis=axis)
  8829. else:
  8830. diff = diff.reorder_levels(order)
  8831. # restore the index names in order
  8832. diff._get_axis(axis=axis).names = ax_names[order]
  8833. # reorder axis to keep things organized
  8834. indices = (
  8835. np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
  8836. )
  8837. diff = diff.take(indices, axis=axis)
  8838. return diff
  8839. @final
  8840. @doc(
  8841. klass=_shared_doc_kwargs["klass"],
  8842. axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
  8843. )
  8844. def align(
  8845. self,
  8846. other: NDFrameT,
  8847. join: AlignJoin = "outer",
  8848. axis: Axis | None = None,
  8849. level: Level | None = None,
  8850. copy: bool_t | None = None,
  8851. fill_value: Hashable | None = None,
  8852. method: FillnaOptions | None | lib.NoDefault = lib.no_default,
  8853. limit: int | None | lib.NoDefault = lib.no_default,
  8854. fill_axis: Axis | lib.NoDefault = lib.no_default,
  8855. broadcast_axis: Axis | None | lib.NoDefault = lib.no_default,
  8856. ) -> tuple[Self, NDFrameT]:
  8857. """
  8858. Align two objects on their axes with the specified join method.
  8859. Join method is specified for each axis Index.
  8860. Parameters
  8861. ----------
  8862. other : DataFrame or Series
  8863. join : {{'outer', 'inner', 'left', 'right'}}, default 'outer'
  8864. Type of alignment to be performed.
  8865. * left: use only keys from left frame, preserve key order.
  8866. * right: use only keys from right frame, preserve key order.
  8867. * outer: use union of keys from both frames, sort keys lexicographically.
  8868. * inner: use intersection of keys from both frames,
  8869. preserve the order of the left keys.
  8870. axis : allowed axis of the other object, default None
  8871. Align on index (0), columns (1), or both (None).
  8872. level : int or level name, default None
  8873. Broadcast across a level, matching Index values on the
  8874. passed MultiIndex level.
  8875. copy : bool, default True
  8876. Always returns new objects. If copy=False and no reindexing is
  8877. required then original objects are returned.
  8878. .. note::
  8879. The `copy` keyword will change behavior in pandas 3.0.
  8880. `Copy-on-Write
  8881. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  8882. will be enabled by default, which means that all methods with a
  8883. `copy` keyword will use a lazy copy mechanism to defer the copy and
  8884. ignore the `copy` keyword. The `copy` keyword will be removed in a
  8885. future version of pandas.
  8886. You can already get the future behavior and improvements through
  8887. enabling copy on write ``pd.options.mode.copy_on_write = True``
  8888. fill_value : scalar, default np.nan
  8889. Value to use for missing values. Defaults to NaN, but can be any
  8890. "compatible" value.
  8891. method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None
  8892. Method to use for filling holes in reindexed Series:
  8893. - pad / ffill: propagate last valid observation forward to next valid.
  8894. - backfill / bfill: use NEXT valid observation to fill gap.
  8895. .. deprecated:: 2.1
  8896. limit : int, default None
  8897. If method is specified, this is the maximum number of consecutive
  8898. NaN values to forward/backward fill. In other words, if there is
  8899. a gap with more than this number of consecutive NaNs, it will only
  8900. be partially filled. If method is not specified, this is the
  8901. maximum number of entries along the entire axis where NaNs will be
  8902. filled. Must be greater than 0 if not None.
  8903. .. deprecated:: 2.1
  8904. fill_axis : {axes_single_arg}, default 0
  8905. Filling axis, method and limit.
  8906. .. deprecated:: 2.1
  8907. broadcast_axis : {axes_single_arg}, default None
  8908. Broadcast values along this axis, if aligning two objects of
  8909. different dimensions.
  8910. .. deprecated:: 2.1
  8911. Returns
  8912. -------
  8913. tuple of ({klass}, type of other)
  8914. Aligned objects.
  8915. Examples
  8916. --------
  8917. >>> df = pd.DataFrame(
  8918. ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2]
  8919. ... )
  8920. >>> other = pd.DataFrame(
  8921. ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]],
  8922. ... columns=["A", "B", "C", "D"],
  8923. ... index=[2, 3, 4],
  8924. ... )
  8925. >>> df
  8926. D B E A
  8927. 1 1 2 3 4
  8928. 2 6 7 8 9
  8929. >>> other
  8930. A B C D
  8931. 2 10 20 30 40
  8932. 3 60 70 80 90
  8933. 4 600 700 800 900
  8934. Align on columns:
  8935. >>> left, right = df.align(other, join="outer", axis=1)
  8936. >>> left
  8937. A B C D E
  8938. 1 4 2 NaN 1 3
  8939. 2 9 7 NaN 6 8
  8940. >>> right
  8941. A B C D E
  8942. 2 10 20 30 40 NaN
  8943. 3 60 70 80 90 NaN
  8944. 4 600 700 800 900 NaN
  8945. We can also align on the index:
  8946. >>> left, right = df.align(other, join="outer", axis=0)
  8947. >>> left
  8948. D B E A
  8949. 1 1.0 2.0 3.0 4.0
  8950. 2 6.0 7.0 8.0 9.0
  8951. 3 NaN NaN NaN NaN
  8952. 4 NaN NaN NaN NaN
  8953. >>> right
  8954. A B C D
  8955. 1 NaN NaN NaN NaN
  8956. 2 10.0 20.0 30.0 40.0
  8957. 3 60.0 70.0 80.0 90.0
  8958. 4 600.0 700.0 800.0 900.0
  8959. Finally, the default `axis=None` will align on both index and columns:
  8960. >>> left, right = df.align(other, join="outer", axis=None)
  8961. >>> left
  8962. A B C D E
  8963. 1 4.0 2.0 NaN 1.0 3.0
  8964. 2 9.0 7.0 NaN 6.0 8.0
  8965. 3 NaN NaN NaN NaN NaN
  8966. 4 NaN NaN NaN NaN NaN
  8967. >>> right
  8968. A B C D E
  8969. 1 NaN NaN NaN NaN NaN
  8970. 2 10.0 20.0 30.0 40.0 NaN
  8971. 3 60.0 70.0 80.0 90.0 NaN
  8972. 4 600.0 700.0 800.0 900.0 NaN
  8973. """
  8974. if (
  8975. method is not lib.no_default
  8976. or limit is not lib.no_default
  8977. or fill_axis is not lib.no_default
  8978. ):
  8979. # GH#51856
  8980. warnings.warn(
  8981. "The 'method', 'limit', and 'fill_axis' keywords in "
  8982. f"{type(self).__name__}.align are deprecated and will be removed "
  8983. "in a future version. Call fillna directly on the returned objects "
  8984. "instead.",
  8985. FutureWarning,
  8986. stacklevel=find_stack_level(),
  8987. )
  8988. if fill_axis is lib.no_default:
  8989. fill_axis = 0
  8990. if method is lib.no_default:
  8991. method = None
  8992. if limit is lib.no_default:
  8993. limit = None
  8994. if method is not None:
  8995. method = clean_fill_method(method)
  8996. if broadcast_axis is not lib.no_default:
  8997. # GH#51856
  8998. # TODO(3.0): enforcing this deprecation will close GH#13194
  8999. msg = (
  9000. f"The 'broadcast_axis' keyword in {type(self).__name__}.align is "
  9001. "deprecated and will be removed in a future version."
  9002. )
  9003. if broadcast_axis is not None:
  9004. if self.ndim == 1 and other.ndim == 2:
  9005. msg += (
  9006. " Use left = DataFrame({col: left for col in right.columns}, "
  9007. "index=right.index) before calling `left.align(right)` instead."
  9008. )
  9009. elif self.ndim == 2 and other.ndim == 1:
  9010. msg += (
  9011. " Use right = DataFrame({col: right for col in left.columns}, "
  9012. "index=left.index) before calling `left.align(right)` instead"
  9013. )
  9014. warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
  9015. else:
  9016. broadcast_axis = None
  9017. if broadcast_axis == 1 and self.ndim != other.ndim:
  9018. if isinstance(self, ABCSeries):
  9019. # this means other is a DataFrame, and we need to broadcast
  9020. # self
  9021. cons = self._constructor_expanddim
  9022. df = cons(
  9023. {c: self for c in other.columns}, **other._construct_axes_dict()
  9024. )
  9025. # error: Incompatible return value type (got "Tuple[DataFrame,
  9026. # DataFrame]", expected "Tuple[Self, NDFrameT]")
  9027. return df._align_frame( # type: ignore[return-value]
  9028. other, # type: ignore[arg-type]
  9029. join=join,
  9030. axis=axis,
  9031. level=level,
  9032. copy=copy,
  9033. fill_value=fill_value,
  9034. method=method,
  9035. limit=limit,
  9036. fill_axis=fill_axis,
  9037. )[:2]
  9038. elif isinstance(other, ABCSeries):
  9039. # this means self is a DataFrame, and we need to broadcast
  9040. # other
  9041. cons = other._constructor_expanddim
  9042. df = cons(
  9043. {c: other for c in self.columns}, **self._construct_axes_dict()
  9044. )
  9045. # error: Incompatible return value type (got "Tuple[NDFrameT,
  9046. # DataFrame]", expected "Tuple[Self, NDFrameT]")
  9047. return self._align_frame( # type: ignore[return-value]
  9048. df,
  9049. join=join,
  9050. axis=axis,
  9051. level=level,
  9052. copy=copy,
  9053. fill_value=fill_value,
  9054. method=method,
  9055. limit=limit,
  9056. fill_axis=fill_axis,
  9057. )[:2]
  9058. _right: DataFrame | Series
  9059. if axis is not None:
  9060. axis = self._get_axis_number(axis)
  9061. if isinstance(other, ABCDataFrame):
  9062. left, _right, join_index = self._align_frame(
  9063. other,
  9064. join=join,
  9065. axis=axis,
  9066. level=level,
  9067. copy=copy,
  9068. fill_value=fill_value,
  9069. method=method,
  9070. limit=limit,
  9071. fill_axis=fill_axis,
  9072. )
  9073. elif isinstance(other, ABCSeries):
  9074. left, _right, join_index = self._align_series(
  9075. other,
  9076. join=join,
  9077. axis=axis,
  9078. level=level,
  9079. copy=copy,
  9080. fill_value=fill_value,
  9081. method=method,
  9082. limit=limit,
  9083. fill_axis=fill_axis,
  9084. )
  9085. else: # pragma: no cover
  9086. raise TypeError(f"unsupported type: {type(other)}")
  9087. right = cast(NDFrameT, _right)
  9088. if self.ndim == 1 or axis == 0:
  9089. # If we are aligning timezone-aware DatetimeIndexes and the timezones
  9090. # do not match, convert both to UTC.
  9091. if isinstance(left.index.dtype, DatetimeTZDtype):
  9092. if left.index.tz != right.index.tz:
  9093. if join_index is not None:
  9094. # GH#33671 copy to ensure we don't change the index on
  9095. # our original Series
  9096. left = left.copy(deep=False)
  9097. right = right.copy(deep=False)
  9098. left.index = join_index
  9099. right.index = join_index
  9100. left = left.__finalize__(self)
  9101. right = right.__finalize__(other)
  9102. return left, right
  9103. @final
  9104. def _align_frame(
  9105. self,
  9106. other: DataFrame,
  9107. join: AlignJoin = "outer",
  9108. axis: Axis | None = None,
  9109. level=None,
  9110. copy: bool_t | None = None,
  9111. fill_value=None,
  9112. method=None,
  9113. limit: int | None = None,
  9114. fill_axis: Axis = 0,
  9115. ) -> tuple[Self, DataFrame, Index | None]:
  9116. # defaults
  9117. join_index, join_columns = None, None
  9118. ilidx, iridx = None, None
  9119. clidx, cridx = None, None
  9120. is_series = isinstance(self, ABCSeries)
  9121. if (axis is None or axis == 0) and not self.index.equals(other.index):
  9122. join_index, ilidx, iridx = self.index.join(
  9123. other.index, how=join, level=level, return_indexers=True
  9124. )
  9125. if (
  9126. (axis is None or axis == 1)
  9127. and not is_series
  9128. and not self.columns.equals(other.columns)
  9129. ):
  9130. join_columns, clidx, cridx = self.columns.join(
  9131. other.columns, how=join, level=level, return_indexers=True
  9132. )
  9133. if is_series:
  9134. reindexers = {0: [join_index, ilidx]}
  9135. else:
  9136. reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}
  9137. left = self._reindex_with_indexers(
  9138. reindexers, copy=copy, fill_value=fill_value, allow_dups=True
  9139. )
  9140. # other must be always DataFrame
  9141. right = other._reindex_with_indexers(
  9142. {0: [join_index, iridx], 1: [join_columns, cridx]},
  9143. copy=copy,
  9144. fill_value=fill_value,
  9145. allow_dups=True,
  9146. )
  9147. if method is not None:
  9148. left = left._pad_or_backfill(method, axis=fill_axis, limit=limit)
  9149. right = right._pad_or_backfill(method, axis=fill_axis, limit=limit)
  9150. return left, right, join_index
  9151. @final
  9152. def _align_series(
  9153. self,
  9154. other: Series,
  9155. join: AlignJoin = "outer",
  9156. axis: Axis | None = None,
  9157. level=None,
  9158. copy: bool_t | None = None,
  9159. fill_value=None,
  9160. method=None,
  9161. limit: int | None = None,
  9162. fill_axis: Axis = 0,
  9163. ) -> tuple[Self, Series, Index | None]:
  9164. is_series = isinstance(self, ABCSeries)
  9165. if copy and using_copy_on_write():
  9166. copy = False
  9167. if (not is_series and axis is None) or axis not in [None, 0, 1]:
  9168. raise ValueError("Must specify axis=0 or 1")
  9169. if is_series and axis == 1:
  9170. raise ValueError("cannot align series to a series other than axis 0")
  9171. # series/series compat, other must always be a Series
  9172. if not axis:
  9173. # equal
  9174. if self.index.equals(other.index):
  9175. join_index, lidx, ridx = None, None, None
  9176. else:
  9177. join_index, lidx, ridx = self.index.join(
  9178. other.index, how=join, level=level, return_indexers=True
  9179. )
  9180. if is_series:
  9181. left = self._reindex_indexer(join_index, lidx, copy)
  9182. elif lidx is None or join_index is None:
  9183. left = self.copy(deep=copy)
  9184. else:
  9185. new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
  9186. left = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
  9187. right = other._reindex_indexer(join_index, ridx, copy)
  9188. else:
  9189. # one has > 1 ndim
  9190. fdata = self._mgr
  9191. join_index = self.axes[1]
  9192. lidx, ridx = None, None
  9193. if not join_index.equals(other.index):
  9194. join_index, lidx, ridx = join_index.join(
  9195. other.index, how=join, level=level, return_indexers=True
  9196. )
  9197. if lidx is not None:
  9198. bm_axis = self._get_block_manager_axis(1)
  9199. fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
  9200. if copy and fdata is self._mgr:
  9201. fdata = fdata.copy()
  9202. left = self._constructor_from_mgr(fdata, axes=fdata.axes)
  9203. if ridx is None:
  9204. right = other.copy(deep=copy)
  9205. else:
  9206. right = other.reindex(join_index, level=level)
  9207. # fill
  9208. fill_na = notna(fill_value) or (method is not None)
  9209. if fill_na:
  9210. fill_value, method = validate_fillna_kwargs(fill_value, method)
  9211. if method is not None:
  9212. left = left._pad_or_backfill(method, limit=limit, axis=fill_axis)
  9213. right = right._pad_or_backfill(method, limit=limit)
  9214. else:
  9215. left = left.fillna(fill_value, limit=limit, axis=fill_axis)
  9216. right = right.fillna(fill_value, limit=limit)
  9217. return left, right, join_index
  9218. @final
  9219. def _where(
  9220. self,
  9221. cond,
  9222. other=lib.no_default,
  9223. inplace: bool_t = False,
  9224. axis: Axis | None = None,
  9225. level=None,
  9226. warn: bool_t = True,
  9227. ):
  9228. """
  9229. Equivalent to public method `where`, except that `other` is not
  9230. applied as a function even if callable. Used in __setitem__.
  9231. """
  9232. inplace = validate_bool_kwarg(inplace, "inplace")
  9233. if axis is not None:
  9234. axis = self._get_axis_number(axis)
  9235. # align the cond to same shape as myself
  9236. cond = common.apply_if_callable(cond, self)
  9237. if isinstance(cond, NDFrame):
  9238. # CoW: Make sure reference is not kept alive
  9239. if cond.ndim == 1 and self.ndim == 2:
  9240. cond = cond._constructor_expanddim(
  9241. {i: cond for i in range(len(self.columns))},
  9242. copy=False,
  9243. )
  9244. cond.columns = self.columns
  9245. cond = cond.align(self, join="right", copy=False)[0]
  9246. else:
  9247. if not hasattr(cond, "shape"):
  9248. cond = np.asanyarray(cond)
  9249. if cond.shape != self.shape:
  9250. raise ValueError("Array conditional must be same shape as self")
  9251. cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)
  9252. # make sure we are boolean
  9253. fill_value = bool(inplace)
  9254. with warnings.catch_warnings():
  9255. warnings.filterwarnings(
  9256. "ignore",
  9257. "Downcasting object dtype arrays",
  9258. category=FutureWarning,
  9259. )
  9260. cond = cond.fillna(fill_value)
  9261. cond = cond.infer_objects(copy=False)
  9262. msg = "Boolean array expected for the condition, not {dtype}"
  9263. if not cond.empty:
  9264. if not isinstance(cond, ABCDataFrame):
  9265. # This is a single-dimensional object.
  9266. if not is_bool_dtype(cond):
  9267. raise ValueError(msg.format(dtype=cond.dtype))
  9268. else:
  9269. for _dt in cond.dtypes:
  9270. if not is_bool_dtype(_dt):
  9271. raise ValueError(msg.format(dtype=_dt))
  9272. if cond._mgr.any_extension_types:
  9273. # GH51574: avoid object ndarray conversion later on
  9274. cond = cond._constructor(
  9275. cond.to_numpy(dtype=bool, na_value=fill_value),
  9276. **cond._construct_axes_dict(),
  9277. )
  9278. else:
  9279. # GH#21947 we have an empty DataFrame/Series, could be object-dtype
  9280. cond = cond.astype(bool)
  9281. cond = -cond if inplace else cond
  9282. cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False)
  9283. # try to align with other
  9284. if isinstance(other, NDFrame):
  9285. # align with me
  9286. if other.ndim <= self.ndim:
  9287. # CoW: Make sure reference is not kept alive
  9288. other = self.align(
  9289. other,
  9290. join="left",
  9291. axis=axis,
  9292. level=level,
  9293. fill_value=None,
  9294. copy=False,
  9295. )[1]
  9296. # if we are NOT aligned, raise as we cannot where index
  9297. if axis is None and not other._indexed_same(self):
  9298. raise InvalidIndexError
  9299. if other.ndim < self.ndim:
  9300. # TODO(EA2D): avoid object-dtype cast in EA case GH#38729
  9301. other = other._values
  9302. if axis == 0:
  9303. other = np.reshape(other, (-1, 1))
  9304. elif axis == 1:
  9305. other = np.reshape(other, (1, -1))
  9306. other = np.broadcast_to(other, self.shape)
  9307. # slice me out of the other
  9308. else:
  9309. raise NotImplementedError(
  9310. "cannot align with a higher dimensional NDFrame"
  9311. )
  9312. elif not isinstance(other, (MultiIndex, NDFrame)):
  9313. # mainly just catching Index here
  9314. other = extract_array(other, extract_numpy=True)
  9315. if isinstance(other, (np.ndarray, ExtensionArray)):
  9316. if other.shape != self.shape:
  9317. if self.ndim != 1:
  9318. # In the ndim == 1 case we may have
  9319. # other length 1, which we treat as scalar (GH#2745, GH#4192)
  9320. # or len(other) == icond.sum(), which we treat like
  9321. # __setitem__ (GH#3235)
  9322. raise ValueError(
  9323. "other must be the same shape as self when an ndarray"
  9324. )
  9325. # we are the same shape, so create an actual object for alignment
  9326. else:
  9327. other = self._constructor(
  9328. other, **self._construct_axes_dict(), copy=False
  9329. )
  9330. if axis is None:
  9331. axis = 0
  9332. if self.ndim == getattr(other, "ndim", 0):
  9333. align = True
  9334. else:
  9335. align = self._get_axis_number(axis) == 1
  9336. if inplace:
  9337. # we may have different type blocks come out of putmask, so
  9338. # reconstruct the block manager
  9339. new_data = self._mgr.putmask(mask=cond, new=other, align=align, warn=warn)
  9340. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  9341. return self._update_inplace(result)
  9342. else:
  9343. new_data = self._mgr.where(
  9344. other=other,
  9345. cond=cond,
  9346. align=align,
  9347. )
  9348. result = self._constructor_from_mgr(new_data, axes=new_data.axes)
  9349. return result.__finalize__(self)
  9350. @overload
  9351. def where(
  9352. self,
  9353. cond,
  9354. other=...,
  9355. *,
  9356. inplace: Literal[False] = ...,
  9357. axis: Axis | None = ...,
  9358. level: Level = ...,
  9359. ) -> Self:
  9360. ...
  9361. @overload
  9362. def where(
  9363. self,
  9364. cond,
  9365. other=...,
  9366. *,
  9367. inplace: Literal[True],
  9368. axis: Axis | None = ...,
  9369. level: Level = ...,
  9370. ) -> None:
  9371. ...
  9372. @overload
  9373. def where(
  9374. self,
  9375. cond,
  9376. other=...,
  9377. *,
  9378. inplace: bool_t = ...,
  9379. axis: Axis | None = ...,
  9380. level: Level = ...,
  9381. ) -> Self | None:
  9382. ...
  9383. @final
  9384. @doc(
  9385. klass=_shared_doc_kwargs["klass"],
  9386. cond="True",
  9387. cond_rev="False",
  9388. name="where",
  9389. name_other="mask",
  9390. )
  9391. def where(
  9392. self,
  9393. cond,
  9394. other=np.nan,
  9395. *,
  9396. inplace: bool_t = False,
  9397. axis: Axis | None = None,
  9398. level: Level | None = None,
  9399. ) -> Self | None:
  9400. """
  9401. Replace values where the condition is {cond_rev}.
  9402. Parameters
  9403. ----------
  9404. cond : bool {klass}, array-like, or callable
  9405. Where `cond` is {cond}, keep the original value. Where
  9406. {cond_rev}, replace with corresponding value from `other`.
  9407. If `cond` is callable, it is computed on the {klass} and
  9408. should return boolean {klass} or array. The callable must
  9409. not change input {klass} (though pandas doesn't check it).
  9410. other : scalar, {klass}, or callable
  9411. Entries where `cond` is {cond_rev} are replaced with
  9412. corresponding value from `other`.
  9413. If other is callable, it is computed on the {klass} and
  9414. should return scalar or {klass}. The callable must not
  9415. change input {klass} (though pandas doesn't check it).
  9416. If not specified, entries will be filled with the corresponding
  9417. NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension
  9418. dtypes).
  9419. inplace : bool, default False
  9420. Whether to perform the operation in place on the data.
  9421. axis : int, default None
  9422. Alignment axis if needed. For `Series` this parameter is
  9423. unused and defaults to 0.
  9424. level : int, default None
  9425. Alignment level if needed.
  9426. Returns
  9427. -------
  9428. Same type as caller or None if ``inplace=True``.
  9429. See Also
  9430. --------
  9431. :func:`DataFrame.{name_other}` : Return an object of same shape as
  9432. self.
  9433. Notes
  9434. -----
  9435. The {name} method is an application of the if-then idiom. For each
  9436. element in the calling DataFrame, if ``cond`` is ``{cond}`` the
  9437. element is used; otherwise the corresponding element from the DataFrame
  9438. ``other`` is used. If the axis of ``other`` does not align with axis of
  9439. ``cond`` {klass}, the misaligned index positions will be filled with
  9440. {cond_rev}.
  9441. The signature for :func:`DataFrame.where` differs from
  9442. :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
  9443. ``np.where(m, df1, df2)``.
  9444. For further details and examples see the ``{name}`` documentation in
  9445. :ref:`indexing <indexing.where_mask>`.
  9446. The dtype of the object takes precedence. The fill value is casted to
  9447. the object's dtype, if this can be done losslessly.
  9448. Examples
  9449. --------
  9450. >>> s = pd.Series(range(5))
  9451. >>> s.where(s > 0)
  9452. 0 NaN
  9453. 1 1.0
  9454. 2 2.0
  9455. 3 3.0
  9456. 4 4.0
  9457. dtype: float64
  9458. >>> s.mask(s > 0)
  9459. 0 0.0
  9460. 1 NaN
  9461. 2 NaN
  9462. 3 NaN
  9463. 4 NaN
  9464. dtype: float64
  9465. >>> s = pd.Series(range(5))
  9466. >>> t = pd.Series([True, False])
  9467. >>> s.where(t, 99)
  9468. 0 0
  9469. 1 99
  9470. 2 99
  9471. 3 99
  9472. 4 99
  9473. dtype: int64
  9474. >>> s.mask(t, 99)
  9475. 0 99
  9476. 1 1
  9477. 2 99
  9478. 3 99
  9479. 4 99
  9480. dtype: int64
  9481. >>> s.where(s > 1, 10)
  9482. 0 10
  9483. 1 10
  9484. 2 2
  9485. 3 3
  9486. 4 4
  9487. dtype: int64
  9488. >>> s.mask(s > 1, 10)
  9489. 0 0
  9490. 1 1
  9491. 2 10
  9492. 3 10
  9493. 4 10
  9494. dtype: int64
  9495. >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
  9496. >>> df
  9497. A B
  9498. 0 0 1
  9499. 1 2 3
  9500. 2 4 5
  9501. 3 6 7
  9502. 4 8 9
  9503. >>> m = df % 3 == 0
  9504. >>> df.where(m, -df)
  9505. A B
  9506. 0 0 -1
  9507. 1 -2 3
  9508. 2 -4 -5
  9509. 3 6 -7
  9510. 4 -8 9
  9511. >>> df.where(m, -df) == np.where(m, df, -df)
  9512. A B
  9513. 0 True True
  9514. 1 True True
  9515. 2 True True
  9516. 3 True True
  9517. 4 True True
  9518. >>> df.where(m, -df) == df.mask(~m, -df)
  9519. A B
  9520. 0 True True
  9521. 1 True True
  9522. 2 True True
  9523. 3 True True
  9524. 4 True True
  9525. """
  9526. inplace = validate_bool_kwarg(inplace, "inplace")
  9527. if inplace:
  9528. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  9529. if sys.getrefcount(self) <= REF_COUNT:
  9530. warnings.warn(
  9531. _chained_assignment_method_msg,
  9532. ChainedAssignmentError,
  9533. stacklevel=2,
  9534. )
  9535. elif (
  9536. not PYPY
  9537. and not WARNING_CHECK_DISABLED
  9538. and not using_copy_on_write()
  9539. and self._is_view_after_cow_rules()
  9540. ):
  9541. ctr = sys.getrefcount(self)
  9542. ref_count = REF_COUNT
  9543. if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
  9544. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  9545. ref_count += 1
  9546. if ctr <= ref_count:
  9547. warnings.warn(
  9548. _chained_assignment_warning_method_msg,
  9549. FutureWarning,
  9550. stacklevel=2,
  9551. )
  9552. other = common.apply_if_callable(other, self)
  9553. return self._where(cond, other, inplace, axis, level)
  9554. @overload
  9555. def mask(
  9556. self,
  9557. cond,
  9558. other=...,
  9559. *,
  9560. inplace: Literal[False] = ...,
  9561. axis: Axis | None = ...,
  9562. level: Level = ...,
  9563. ) -> Self:
  9564. ...
  9565. @overload
  9566. def mask(
  9567. self,
  9568. cond,
  9569. other=...,
  9570. *,
  9571. inplace: Literal[True],
  9572. axis: Axis | None = ...,
  9573. level: Level = ...,
  9574. ) -> None:
  9575. ...
  9576. @overload
  9577. def mask(
  9578. self,
  9579. cond,
  9580. other=...,
  9581. *,
  9582. inplace: bool_t = ...,
  9583. axis: Axis | None = ...,
  9584. level: Level = ...,
  9585. ) -> Self | None:
  9586. ...
  9587. @final
  9588. @doc(
  9589. where,
  9590. klass=_shared_doc_kwargs["klass"],
  9591. cond="False",
  9592. cond_rev="True",
  9593. name="mask",
  9594. name_other="where",
  9595. )
  9596. def mask(
  9597. self,
  9598. cond,
  9599. other=lib.no_default,
  9600. *,
  9601. inplace: bool_t = False,
  9602. axis: Axis | None = None,
  9603. level: Level | None = None,
  9604. ) -> Self | None:
  9605. inplace = validate_bool_kwarg(inplace, "inplace")
  9606. if inplace:
  9607. if not PYPY and not WARNING_CHECK_DISABLED and using_copy_on_write():
  9608. if sys.getrefcount(self) <= REF_COUNT:
  9609. warnings.warn(
  9610. _chained_assignment_method_msg,
  9611. ChainedAssignmentError,
  9612. stacklevel=2,
  9613. )
  9614. elif (
  9615. not PYPY
  9616. and not WARNING_CHECK_DISABLED
  9617. and not using_copy_on_write()
  9618. and self._is_view_after_cow_rules()
  9619. ):
  9620. ctr = sys.getrefcount(self)
  9621. ref_count = REF_COUNT
  9622. if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
  9623. # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
  9624. ref_count += 1
  9625. if ctr <= ref_count:
  9626. warnings.warn(
  9627. _chained_assignment_warning_method_msg,
  9628. FutureWarning,
  9629. stacklevel=2,
  9630. )
  9631. cond = common.apply_if_callable(cond, self)
  9632. other = common.apply_if_callable(other, self)
  9633. # see gh-21891
  9634. if not hasattr(cond, "__invert__"):
  9635. cond = np.array(cond)
  9636. return self._where(
  9637. ~cond,
  9638. other=other,
  9639. inplace=inplace,
  9640. axis=axis,
  9641. level=level,
  9642. )
  9643. @doc(klass=_shared_doc_kwargs["klass"])
  9644. def shift(
  9645. self,
  9646. periods: int | Sequence[int] = 1,
  9647. freq=None,
  9648. axis: Axis = 0,
  9649. fill_value: Hashable = lib.no_default,
  9650. suffix: str | None = None,
  9651. ) -> Self | DataFrame:
  9652. """
  9653. Shift index by desired number of periods with an optional time `freq`.
  9654. When `freq` is not passed, shift the index without realigning the data.
  9655. If `freq` is passed (in this case, the index must be date or datetime,
  9656. or it will raise a `NotImplementedError`), the index will be
  9657. increased using the periods and the `freq`. `freq` can be inferred
  9658. when specified as "infer" as long as either freq or inferred_freq
  9659. attribute is set in the index.
  9660. Parameters
  9661. ----------
  9662. periods : int or Sequence
  9663. Number of periods to shift. Can be positive or negative.
  9664. If an iterable of ints, the data will be shifted once by each int.
  9665. This is equivalent to shifting by one value at a time and
  9666. concatenating all resulting frames. The resulting columns will have
  9667. the shift suffixed to their column names. For multiple periods,
  9668. axis must not be 1.
  9669. freq : DateOffset, tseries.offsets, timedelta, or str, optional
  9670. Offset to use from the tseries module or time rule (e.g. 'EOM').
  9671. If `freq` is specified then the index values are shifted but the
  9672. data is not realigned. That is, use `freq` if you would like to
  9673. extend the index when shifting and preserve the original data.
  9674. If `freq` is specified as "infer" then it will be inferred from
  9675. the freq or inferred_freq attributes of the index. If neither of
  9676. those attributes exist, a ValueError is thrown.
  9677. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  9678. Shift direction. For `Series` this parameter is unused and defaults to 0.
  9679. fill_value : object, optional
  9680. The scalar value to use for newly introduced missing values.
  9681. the default depends on the dtype of `self`.
  9682. For numeric data, ``np.nan`` is used.
  9683. For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
  9684. For extension dtypes, ``self.dtype.na_value`` is used.
  9685. suffix : str, optional
  9686. If str and periods is an iterable, this is added after the column
  9687. name and before the shift value for each shifted column name.
  9688. Returns
  9689. -------
  9690. {klass}
  9691. Copy of input object, shifted.
  9692. See Also
  9693. --------
  9694. Index.shift : Shift values of Index.
  9695. DatetimeIndex.shift : Shift values of DatetimeIndex.
  9696. PeriodIndex.shift : Shift values of PeriodIndex.
  9697. Examples
  9698. --------
  9699. >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45],
  9700. ... "Col2": [13, 23, 18, 33, 48],
  9701. ... "Col3": [17, 27, 22, 37, 52]}},
  9702. ... index=pd.date_range("2020-01-01", "2020-01-05"))
  9703. >>> df
  9704. Col1 Col2 Col3
  9705. 2020-01-01 10 13 17
  9706. 2020-01-02 20 23 27
  9707. 2020-01-03 15 18 22
  9708. 2020-01-04 30 33 37
  9709. 2020-01-05 45 48 52
  9710. >>> df.shift(periods=3)
  9711. Col1 Col2 Col3
  9712. 2020-01-01 NaN NaN NaN
  9713. 2020-01-02 NaN NaN NaN
  9714. 2020-01-03 NaN NaN NaN
  9715. 2020-01-04 10.0 13.0 17.0
  9716. 2020-01-05 20.0 23.0 27.0
  9717. >>> df.shift(periods=1, axis="columns")
  9718. Col1 Col2 Col3
  9719. 2020-01-01 NaN 10 13
  9720. 2020-01-02 NaN 20 23
  9721. 2020-01-03 NaN 15 18
  9722. 2020-01-04 NaN 30 33
  9723. 2020-01-05 NaN 45 48
  9724. >>> df.shift(periods=3, fill_value=0)
  9725. Col1 Col2 Col3
  9726. 2020-01-01 0 0 0
  9727. 2020-01-02 0 0 0
  9728. 2020-01-03 0 0 0
  9729. 2020-01-04 10 13 17
  9730. 2020-01-05 20 23 27
  9731. >>> df.shift(periods=3, freq="D")
  9732. Col1 Col2 Col3
  9733. 2020-01-04 10 13 17
  9734. 2020-01-05 20 23 27
  9735. 2020-01-06 15 18 22
  9736. 2020-01-07 30 33 37
  9737. 2020-01-08 45 48 52
  9738. >>> df.shift(periods=3, freq="infer")
  9739. Col1 Col2 Col3
  9740. 2020-01-04 10 13 17
  9741. 2020-01-05 20 23 27
  9742. 2020-01-06 15 18 22
  9743. 2020-01-07 30 33 37
  9744. 2020-01-08 45 48 52
  9745. >>> df['Col1'].shift(periods=[0, 1, 2])
  9746. Col1_0 Col1_1 Col1_2
  9747. 2020-01-01 10 NaN NaN
  9748. 2020-01-02 20 10.0 NaN
  9749. 2020-01-03 15 20.0 10.0
  9750. 2020-01-04 30 15.0 20.0
  9751. 2020-01-05 45 30.0 15.0
  9752. """
  9753. axis = self._get_axis_number(axis)
  9754. if freq is not None and fill_value is not lib.no_default:
  9755. # GH#53832
  9756. warnings.warn(
  9757. "Passing a 'freq' together with a 'fill_value' silently ignores "
  9758. "the fill_value and is deprecated. This will raise in a future "
  9759. "version.",
  9760. FutureWarning,
  9761. stacklevel=find_stack_level(),
  9762. )
  9763. fill_value = lib.no_default
  9764. if periods == 0:
  9765. return self.copy(deep=None)
  9766. if is_list_like(periods) and isinstance(self, ABCSeries):
  9767. return self.to_frame().shift(
  9768. periods=periods, freq=freq, axis=axis, fill_value=fill_value
  9769. )
  9770. periods = cast(int, periods)
  9771. if freq is None:
  9772. # when freq is None, data is shifted, index is not
  9773. axis = self._get_axis_number(axis)
  9774. assert axis == 0 # axis == 1 cases handled in DataFrame.shift
  9775. new_data = self._mgr.shift(periods=periods, fill_value=fill_value)
  9776. return self._constructor_from_mgr(
  9777. new_data, axes=new_data.axes
  9778. ).__finalize__(self, method="shift")
  9779. return self._shift_with_freq(periods, axis, freq)
  9780. @final
  9781. def _shift_with_freq(self, periods: int, axis: int, freq) -> Self:
  9782. # see shift.__doc__
  9783. # when freq is given, index is shifted, data is not
  9784. index = self._get_axis(axis)
  9785. if freq == "infer":
  9786. freq = getattr(index, "freq", None)
  9787. if freq is None:
  9788. freq = getattr(index, "inferred_freq", None)
  9789. if freq is None:
  9790. msg = "Freq was not set in the index hence cannot be inferred"
  9791. raise ValueError(msg)
  9792. elif isinstance(freq, str):
  9793. is_period = isinstance(index, PeriodIndex)
  9794. freq = to_offset(freq, is_period=is_period)
  9795. if isinstance(index, PeriodIndex):
  9796. orig_freq = to_offset(index.freq)
  9797. if freq != orig_freq:
  9798. assert orig_freq is not None # for mypy
  9799. raise ValueError(
  9800. f"Given freq {freq_to_period_freqstr(freq.n, freq.name)} "
  9801. f"does not match PeriodIndex freq "
  9802. f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}"
  9803. )
  9804. new_ax = index.shift(periods)
  9805. else:
  9806. new_ax = index.shift(periods, freq)
  9807. result = self.set_axis(new_ax, axis=axis)
  9808. return result.__finalize__(self, method="shift")
  9809. @final
  9810. def truncate(
  9811. self,
  9812. before=None,
  9813. after=None,
  9814. axis: Axis | None = None,
  9815. copy: bool_t | None = None,
  9816. ) -> Self:
  9817. """
  9818. Truncate a Series or DataFrame before and after some index value.
  9819. This is a useful shorthand for boolean indexing based on index
  9820. values above or below certain thresholds.
  9821. Parameters
  9822. ----------
  9823. before : date, str, int
  9824. Truncate all rows before this index value.
  9825. after : date, str, int
  9826. Truncate all rows after this index value.
  9827. axis : {0 or 'index', 1 or 'columns'}, optional
  9828. Axis to truncate. Truncates the index (rows) by default.
  9829. For `Series` this parameter is unused and defaults to 0.
  9830. copy : bool, default is True,
  9831. Return a copy of the truncated section.
  9832. .. note::
  9833. The `copy` keyword will change behavior in pandas 3.0.
  9834. `Copy-on-Write
  9835. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  9836. will be enabled by default, which means that all methods with a
  9837. `copy` keyword will use a lazy copy mechanism to defer the copy and
  9838. ignore the `copy` keyword. The `copy` keyword will be removed in a
  9839. future version of pandas.
  9840. You can already get the future behavior and improvements through
  9841. enabling copy on write ``pd.options.mode.copy_on_write = True``
  9842. Returns
  9843. -------
  9844. type of caller
  9845. The truncated Series or DataFrame.
  9846. See Also
  9847. --------
  9848. DataFrame.loc : Select a subset of a DataFrame by label.
  9849. DataFrame.iloc : Select a subset of a DataFrame by position.
  9850. Notes
  9851. -----
  9852. If the index being truncated contains only datetime values,
  9853. `before` and `after` may be specified as strings instead of
  9854. Timestamps.
  9855. Examples
  9856. --------
  9857. >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
  9858. ... 'B': ['f', 'g', 'h', 'i', 'j'],
  9859. ... 'C': ['k', 'l', 'm', 'n', 'o']},
  9860. ... index=[1, 2, 3, 4, 5])
  9861. >>> df
  9862. A B C
  9863. 1 a f k
  9864. 2 b g l
  9865. 3 c h m
  9866. 4 d i n
  9867. 5 e j o
  9868. >>> df.truncate(before=2, after=4)
  9869. A B C
  9870. 2 b g l
  9871. 3 c h m
  9872. 4 d i n
  9873. The columns of a DataFrame can be truncated.
  9874. >>> df.truncate(before="A", after="B", axis="columns")
  9875. A B
  9876. 1 a f
  9877. 2 b g
  9878. 3 c h
  9879. 4 d i
  9880. 5 e j
  9881. For Series, only rows can be truncated.
  9882. >>> df['A'].truncate(before=2, after=4)
  9883. 2 b
  9884. 3 c
  9885. 4 d
  9886. Name: A, dtype: object
  9887. The index values in ``truncate`` can be datetimes or string
  9888. dates.
  9889. >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')
  9890. >>> df = pd.DataFrame(index=dates, data={'A': 1})
  9891. >>> df.tail()
  9892. A
  9893. 2016-01-31 23:59:56 1
  9894. 2016-01-31 23:59:57 1
  9895. 2016-01-31 23:59:58 1
  9896. 2016-01-31 23:59:59 1
  9897. 2016-02-01 00:00:00 1
  9898. >>> df.truncate(before=pd.Timestamp('2016-01-05'),
  9899. ... after=pd.Timestamp('2016-01-10')).tail()
  9900. A
  9901. 2016-01-09 23:59:56 1
  9902. 2016-01-09 23:59:57 1
  9903. 2016-01-09 23:59:58 1
  9904. 2016-01-09 23:59:59 1
  9905. 2016-01-10 00:00:00 1
  9906. Because the index is a DatetimeIndex containing only dates, we can
  9907. specify `before` and `after` as strings. They will be coerced to
  9908. Timestamps before truncation.
  9909. >>> df.truncate('2016-01-05', '2016-01-10').tail()
  9910. A
  9911. 2016-01-09 23:59:56 1
  9912. 2016-01-09 23:59:57 1
  9913. 2016-01-09 23:59:58 1
  9914. 2016-01-09 23:59:59 1
  9915. 2016-01-10 00:00:00 1
  9916. Note that ``truncate`` assumes a 0 value for any unspecified time
  9917. component (midnight). This differs from partial string slicing, which
  9918. returns any partially matching dates.
  9919. >>> df.loc['2016-01-05':'2016-01-10', :].tail()
  9920. A
  9921. 2016-01-10 23:59:55 1
  9922. 2016-01-10 23:59:56 1
  9923. 2016-01-10 23:59:57 1
  9924. 2016-01-10 23:59:58 1
  9925. 2016-01-10 23:59:59 1
  9926. """
  9927. if axis is None:
  9928. axis = 0
  9929. axis = self._get_axis_number(axis)
  9930. ax = self._get_axis(axis)
  9931. # GH 17935
  9932. # Check that index is sorted
  9933. if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
  9934. raise ValueError("truncate requires a sorted index")
  9935. # if we have a date index, convert to dates, otherwise
  9936. # treat like a slice
  9937. if ax._is_all_dates:
  9938. from pandas.core.tools.datetimes import to_datetime
  9939. before = to_datetime(before)
  9940. after = to_datetime(after)
  9941. if before is not None and after is not None and before > after:
  9942. raise ValueError(f"Truncate: {after} must be after {before}")
  9943. if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1:
  9944. before, after = after, before
  9945. slicer = [slice(None, None)] * self._AXIS_LEN
  9946. slicer[axis] = slice(before, after)
  9947. result = self.loc[tuple(slicer)]
  9948. if isinstance(ax, MultiIndex):
  9949. setattr(result, self._get_axis_name(axis), ax.truncate(before, after))
  9950. result = result.copy(deep=copy and not using_copy_on_write())
  9951. return result
  9952. @final
  9953. @doc(klass=_shared_doc_kwargs["klass"])
  9954. def tz_convert(
  9955. self, tz, axis: Axis = 0, level=None, copy: bool_t | None = None
  9956. ) -> Self:
  9957. """
  9958. Convert tz-aware axis to target time zone.
  9959. Parameters
  9960. ----------
  9961. tz : str or tzinfo object or None
  9962. Target time zone. Passing ``None`` will convert to
  9963. UTC and remove the timezone information.
  9964. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  9965. The axis to convert
  9966. level : int, str, default None
  9967. If axis is a MultiIndex, convert a specific level. Otherwise
  9968. must be None.
  9969. copy : bool, default True
  9970. Also make a copy of the underlying data.
  9971. .. note::
  9972. The `copy` keyword will change behavior in pandas 3.0.
  9973. `Copy-on-Write
  9974. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  9975. will be enabled by default, which means that all methods with a
  9976. `copy` keyword will use a lazy copy mechanism to defer the copy and
  9977. ignore the `copy` keyword. The `copy` keyword will be removed in a
  9978. future version of pandas.
  9979. You can already get the future behavior and improvements through
  9980. enabling copy on write ``pd.options.mode.copy_on_write = True``
  9981. Returns
  9982. -------
  9983. {klass}
  9984. Object with time zone converted axis.
  9985. Raises
  9986. ------
  9987. TypeError
  9988. If the axis is tz-naive.
  9989. Examples
  9990. --------
  9991. Change to another time zone:
  9992. >>> s = pd.Series(
  9993. ... [1],
  9994. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
  9995. ... )
  9996. >>> s.tz_convert('Asia/Shanghai')
  9997. 2018-09-15 07:30:00+08:00 1
  9998. dtype: int64
  9999. Pass None to convert to UTC and get a tz-naive index:
  10000. >>> s = pd.Series([1],
  10001. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
  10002. >>> s.tz_convert(None)
  10003. 2018-09-14 23:30:00 1
  10004. dtype: int64
  10005. """
  10006. axis = self._get_axis_number(axis)
  10007. ax = self._get_axis(axis)
  10008. def _tz_convert(ax, tz):
  10009. if not hasattr(ax, "tz_convert"):
  10010. if len(ax) > 0:
  10011. ax_name = self._get_axis_name(axis)
  10012. raise TypeError(
  10013. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  10014. )
  10015. ax = DatetimeIndex([], tz=tz)
  10016. else:
  10017. ax = ax.tz_convert(tz)
  10018. return ax
  10019. # if a level is given it must be a MultiIndex level or
  10020. # equivalent to the axis name
  10021. if isinstance(ax, MultiIndex):
  10022. level = ax._get_level_number(level)
  10023. new_level = _tz_convert(ax.levels[level], tz)
  10024. ax = ax.set_levels(new_level, level=level)
  10025. else:
  10026. if level not in (None, 0, ax.name):
  10027. raise ValueError(f"The level {level} is not valid")
  10028. ax = _tz_convert(ax, tz)
  10029. result = self.copy(deep=copy and not using_copy_on_write())
  10030. result = result.set_axis(ax, axis=axis, copy=False)
  10031. return result.__finalize__(self, method="tz_convert")
  10032. @final
  10033. @doc(klass=_shared_doc_kwargs["klass"])
  10034. def tz_localize(
  10035. self,
  10036. tz,
  10037. axis: Axis = 0,
  10038. level=None,
  10039. copy: bool_t | None = None,
  10040. ambiguous: TimeAmbiguous = "raise",
  10041. nonexistent: TimeNonexistent = "raise",
  10042. ) -> Self:
  10043. """
  10044. Localize tz-naive index of a Series or DataFrame to target time zone.
  10045. This operation localizes the Index. To localize the values in a
  10046. timezone-naive Series, use :meth:`Series.dt.tz_localize`.
  10047. Parameters
  10048. ----------
  10049. tz : str or tzinfo or None
  10050. Time zone to localize. Passing ``None`` will remove the
  10051. time zone information and preserve local time.
  10052. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  10053. The axis to localize
  10054. level : int, str, default None
  10055. If axis ia a MultiIndex, localize a specific level. Otherwise
  10056. must be None.
  10057. copy : bool, default True
  10058. Also make a copy of the underlying data.
  10059. .. note::
  10060. The `copy` keyword will change behavior in pandas 3.0.
  10061. `Copy-on-Write
  10062. <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
  10063. will be enabled by default, which means that all methods with a
  10064. `copy` keyword will use a lazy copy mechanism to defer the copy and
  10065. ignore the `copy` keyword. The `copy` keyword will be removed in a
  10066. future version of pandas.
  10067. You can already get the future behavior and improvements through
  10068. enabling copy on write ``pd.options.mode.copy_on_write = True``
  10069. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  10070. When clocks moved backward due to DST, ambiguous times may arise.
  10071. For example in Central European Time (UTC+01), when going from
  10072. 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
  10073. 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
  10074. `ambiguous` parameter dictates how ambiguous times should be
  10075. handled.
  10076. - 'infer' will attempt to infer fall dst-transition hours based on
  10077. order
  10078. - bool-ndarray where True signifies a DST time, False designates
  10079. a non-DST time (note that this flag is only applicable for
  10080. ambiguous times)
  10081. - 'NaT' will return NaT where there are ambiguous times
  10082. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  10083. times.
  10084. nonexistent : str, default 'raise'
  10085. A nonexistent time does not exist in a particular timezone
  10086. where clocks moved forward due to DST. Valid values are:
  10087. - 'shift_forward' will shift the nonexistent time forward to the
  10088. closest existing time
  10089. - 'shift_backward' will shift the nonexistent time backward to the
  10090. closest existing time
  10091. - 'NaT' will return NaT where there are nonexistent times
  10092. - timedelta objects will shift nonexistent times by the timedelta
  10093. - 'raise' will raise an NonExistentTimeError if there are
  10094. nonexistent times.
  10095. Returns
  10096. -------
  10097. {klass}
  10098. Same type as the input.
  10099. Raises
  10100. ------
  10101. TypeError
  10102. If the TimeSeries is tz-aware and tz is not None.
  10103. Examples
  10104. --------
  10105. Localize local times:
  10106. >>> s = pd.Series(
  10107. ... [1],
  10108. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']),
  10109. ... )
  10110. >>> s.tz_localize('CET')
  10111. 2018-09-15 01:30:00+02:00 1
  10112. dtype: int64
  10113. Pass None to convert to tz-naive index and preserve local time:
  10114. >>> s = pd.Series([1],
  10115. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
  10116. >>> s.tz_localize(None)
  10117. 2018-09-15 01:30:00 1
  10118. dtype: int64
  10119. Be careful with DST changes. When there is sequential data, pandas
  10120. can infer the DST time:
  10121. >>> s = pd.Series(range(7),
  10122. ... index=pd.DatetimeIndex(['2018-10-28 01:30:00',
  10123. ... '2018-10-28 02:00:00',
  10124. ... '2018-10-28 02:30:00',
  10125. ... '2018-10-28 02:00:00',
  10126. ... '2018-10-28 02:30:00',
  10127. ... '2018-10-28 03:00:00',
  10128. ... '2018-10-28 03:30:00']))
  10129. >>> s.tz_localize('CET', ambiguous='infer')
  10130. 2018-10-28 01:30:00+02:00 0
  10131. 2018-10-28 02:00:00+02:00 1
  10132. 2018-10-28 02:30:00+02:00 2
  10133. 2018-10-28 02:00:00+01:00 3
  10134. 2018-10-28 02:30:00+01:00 4
  10135. 2018-10-28 03:00:00+01:00 5
  10136. 2018-10-28 03:30:00+01:00 6
  10137. dtype: int64
  10138. In some cases, inferring the DST is impossible. In such cases, you can
  10139. pass an ndarray to the ambiguous parameter to set the DST explicitly
  10140. >>> s = pd.Series(range(3),
  10141. ... index=pd.DatetimeIndex(['2018-10-28 01:20:00',
  10142. ... '2018-10-28 02:36:00',
  10143. ... '2018-10-28 03:46:00']))
  10144. >>> s.tz_localize('CET', ambiguous=np.array([True, True, False]))
  10145. 2018-10-28 01:20:00+02:00 0
  10146. 2018-10-28 02:36:00+02:00 1
  10147. 2018-10-28 03:46:00+01:00 2
  10148. dtype: int64
  10149. If the DST transition causes nonexistent times, you can shift these
  10150. dates forward or backward with a timedelta object or `'shift_forward'`
  10151. or `'shift_backward'`.
  10152. >>> s = pd.Series(range(2),
  10153. ... index=pd.DatetimeIndex(['2015-03-29 02:30:00',
  10154. ... '2015-03-29 03:30:00']))
  10155. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
  10156. 2015-03-29 03:00:00+02:00 0
  10157. 2015-03-29 03:30:00+02:00 1
  10158. dtype: int64
  10159. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
  10160. 2015-03-29 01:59:59.999999999+01:00 0
  10161. 2015-03-29 03:30:00+02:00 1
  10162. dtype: int64
  10163. >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h'))
  10164. 2015-03-29 03:30:00+02:00 0
  10165. 2015-03-29 03:30:00+02:00 1
  10166. dtype: int64
  10167. """
  10168. nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
  10169. if nonexistent not in nonexistent_options and not isinstance(
  10170. nonexistent, dt.timedelta
  10171. ):
  10172. raise ValueError(
  10173. "The nonexistent argument must be one of 'raise', "
  10174. "'NaT', 'shift_forward', 'shift_backward' or "
  10175. "a timedelta object"
  10176. )
  10177. axis = self._get_axis_number(axis)
  10178. ax = self._get_axis(axis)
  10179. def _tz_localize(ax, tz, ambiguous, nonexistent):
  10180. if not hasattr(ax, "tz_localize"):
  10181. if len(ax) > 0:
  10182. ax_name = self._get_axis_name(axis)
  10183. raise TypeError(
  10184. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  10185. )
  10186. ax = DatetimeIndex([], tz=tz)
  10187. else:
  10188. ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent)
  10189. return ax
  10190. # if a level is given it must be a MultiIndex level or
  10191. # equivalent to the axis name
  10192. if isinstance(ax, MultiIndex):
  10193. level = ax._get_level_number(level)
  10194. new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent)
  10195. ax = ax.set_levels(new_level, level=level)
  10196. else:
  10197. if level not in (None, 0, ax.name):
  10198. raise ValueError(f"The level {level} is not valid")
  10199. ax = _tz_localize(ax, tz, ambiguous, nonexistent)
  10200. result = self.copy(deep=copy and not using_copy_on_write())
  10201. result = result.set_axis(ax, axis=axis, copy=False)
  10202. return result.__finalize__(self, method="tz_localize")
  10203. # ----------------------------------------------------------------------
  10204. # Numeric Methods
  10205. @final
  10206. def describe(
  10207. self,
  10208. percentiles=None,
  10209. include=None,
  10210. exclude=None,
  10211. ) -> Self:
  10212. """
  10213. Generate descriptive statistics.
  10214. Descriptive statistics include those that summarize the central
  10215. tendency, dispersion and shape of a
  10216. dataset's distribution, excluding ``NaN`` values.
  10217. Analyzes both numeric and object series, as well
  10218. as ``DataFrame`` column sets of mixed data types. The output
  10219. will vary depending on what is provided. Refer to the notes
  10220. below for more detail.
  10221. Parameters
  10222. ----------
  10223. percentiles : list-like of numbers, optional
  10224. The percentiles to include in the output. All should
  10225. fall between 0 and 1. The default is
  10226. ``[.25, .5, .75]``, which returns the 25th, 50th, and
  10227. 75th percentiles.
  10228. include : 'all', list-like of dtypes or None (default), optional
  10229. A white list of data types to include in the result. Ignored
  10230. for ``Series``. Here are the options:
  10231. - 'all' : All columns of the input will be included in the output.
  10232. - A list-like of dtypes : Limits the results to the
  10233. provided data types.
  10234. To limit the result to numeric types submit
  10235. ``numpy.number``. To limit it instead to object columns submit
  10236. the ``numpy.object`` data type. Strings
  10237. can also be used in the style of
  10238. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  10239. select pandas categorical columns, use ``'category'``
  10240. - None (default) : The result will include all numeric columns.
  10241. exclude : list-like of dtypes or None (default), optional,
  10242. A black list of data types to omit from the result. Ignored
  10243. for ``Series``. Here are the options:
  10244. - A list-like of dtypes : Excludes the provided data types
  10245. from the result. To exclude numeric types submit
  10246. ``numpy.number``. To exclude object columns submit the data
  10247. type ``numpy.object``. Strings can also be used in the style of
  10248. ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To
  10249. exclude pandas categorical columns, use ``'category'``
  10250. - None (default) : The result will exclude nothing.
  10251. Returns
  10252. -------
  10253. Series or DataFrame
  10254. Summary statistics of the Series or Dataframe provided.
  10255. See Also
  10256. --------
  10257. DataFrame.count: Count number of non-NA/null observations.
  10258. DataFrame.max: Maximum of the values in the object.
  10259. DataFrame.min: Minimum of the values in the object.
  10260. DataFrame.mean: Mean of the values.
  10261. DataFrame.std: Standard deviation of the observations.
  10262. DataFrame.select_dtypes: Subset of a DataFrame including/excluding
  10263. columns based on their dtype.
  10264. Notes
  10265. -----
  10266. For numeric data, the result's index will include ``count``,
  10267. ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
  10268. upper percentiles. By default the lower percentile is ``25`` and the
  10269. upper percentile is ``75``. The ``50`` percentile is the
  10270. same as the median.
  10271. For object data (e.g. strings or timestamps), the result's index
  10272. will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
  10273. is the most common value. The ``freq`` is the most common value's
  10274. frequency. Timestamps also include the ``first`` and ``last`` items.
  10275. If multiple object values have the highest count, then the
  10276. ``count`` and ``top`` results will be arbitrarily chosen from
  10277. among those with the highest count.
  10278. For mixed data types provided via a ``DataFrame``, the default is to
  10279. return only an analysis of numeric columns. If the dataframe consists
  10280. only of object and categorical data without any numeric columns, the
  10281. default is to return an analysis of both the object and categorical
  10282. columns. If ``include='all'`` is provided as an option, the result
  10283. will include a union of attributes of each type.
  10284. The `include` and `exclude` parameters can be used to limit
  10285. which columns in a ``DataFrame`` are analyzed for the output.
  10286. The parameters are ignored when analyzing a ``Series``.
  10287. Examples
  10288. --------
  10289. Describing a numeric ``Series``.
  10290. >>> s = pd.Series([1, 2, 3])
  10291. >>> s.describe()
  10292. count 3.0
  10293. mean 2.0
  10294. std 1.0
  10295. min 1.0
  10296. 25% 1.5
  10297. 50% 2.0
  10298. 75% 2.5
  10299. max 3.0
  10300. dtype: float64
  10301. Describing a categorical ``Series``.
  10302. >>> s = pd.Series(['a', 'a', 'b', 'c'])
  10303. >>> s.describe()
  10304. count 4
  10305. unique 3
  10306. top a
  10307. freq 2
  10308. dtype: object
  10309. Describing a timestamp ``Series``.
  10310. >>> s = pd.Series([
  10311. ... np.datetime64("2000-01-01"),
  10312. ... np.datetime64("2010-01-01"),
  10313. ... np.datetime64("2010-01-01")
  10314. ... ])
  10315. >>> s.describe()
  10316. count 3
  10317. mean 2006-09-01 08:00:00
  10318. min 2000-01-01 00:00:00
  10319. 25% 2004-12-31 12:00:00
  10320. 50% 2010-01-01 00:00:00
  10321. 75% 2010-01-01 00:00:00
  10322. max 2010-01-01 00:00:00
  10323. dtype: object
  10324. Describing a ``DataFrame``. By default only numeric fields
  10325. are returned.
  10326. >>> df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),
  10327. ... 'numeric': [1, 2, 3],
  10328. ... 'object': ['a', 'b', 'c']
  10329. ... })
  10330. >>> df.describe()
  10331. numeric
  10332. count 3.0
  10333. mean 2.0
  10334. std 1.0
  10335. min 1.0
  10336. 25% 1.5
  10337. 50% 2.0
  10338. 75% 2.5
  10339. max 3.0
  10340. Describing all columns of a ``DataFrame`` regardless of data type.
  10341. >>> df.describe(include='all') # doctest: +SKIP
  10342. categorical numeric object
  10343. count 3 3.0 3
  10344. unique 3 NaN 3
  10345. top f NaN a
  10346. freq 1 NaN 1
  10347. mean NaN 2.0 NaN
  10348. std NaN 1.0 NaN
  10349. min NaN 1.0 NaN
  10350. 25% NaN 1.5 NaN
  10351. 50% NaN 2.0 NaN
  10352. 75% NaN 2.5 NaN
  10353. max NaN 3.0 NaN
  10354. Describing a column from a ``DataFrame`` by accessing it as
  10355. an attribute.
  10356. >>> df.numeric.describe()
  10357. count 3.0
  10358. mean 2.0
  10359. std 1.0
  10360. min 1.0
  10361. 25% 1.5
  10362. 50% 2.0
  10363. 75% 2.5
  10364. max 3.0
  10365. Name: numeric, dtype: float64
  10366. Including only numeric columns in a ``DataFrame`` description.
  10367. >>> df.describe(include=[np.number])
  10368. numeric
  10369. count 3.0
  10370. mean 2.0
  10371. std 1.0
  10372. min 1.0
  10373. 25% 1.5
  10374. 50% 2.0
  10375. 75% 2.5
  10376. max 3.0
  10377. Including only string columns in a ``DataFrame`` description.
  10378. >>> df.describe(include=[object]) # doctest: +SKIP
  10379. object
  10380. count 3
  10381. unique 3
  10382. top a
  10383. freq 1
  10384. Including only categorical columns from a ``DataFrame`` description.
  10385. >>> df.describe(include=['category'])
  10386. categorical
  10387. count 3
  10388. unique 3
  10389. top d
  10390. freq 1
  10391. Excluding numeric columns from a ``DataFrame`` description.
  10392. >>> df.describe(exclude=[np.number]) # doctest: +SKIP
  10393. categorical object
  10394. count 3 3
  10395. unique 3 3
  10396. top f a
  10397. freq 1 1
  10398. Excluding object columns from a ``DataFrame`` description.
  10399. >>> df.describe(exclude=[object]) # doctest: +SKIP
  10400. categorical numeric
  10401. count 3 3.0
  10402. unique 3 NaN
  10403. top f NaN
  10404. freq 1 NaN
  10405. mean NaN 2.0
  10406. std NaN 1.0
  10407. min NaN 1.0
  10408. 25% NaN 1.5
  10409. 50% NaN 2.0
  10410. 75% NaN 2.5
  10411. max NaN 3.0
  10412. """
  10413. return describe_ndframe(
  10414. obj=self,
  10415. include=include,
  10416. exclude=exclude,
  10417. percentiles=percentiles,
  10418. ).__finalize__(self, method="describe")
  10419. @final
  10420. def pct_change(
  10421. self,
  10422. periods: int = 1,
  10423. fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default,
  10424. limit: int | None | lib.NoDefault = lib.no_default,
  10425. freq=None,
  10426. **kwargs,
  10427. ) -> Self:
  10428. """
  10429. Fractional change between the current and a prior element.
  10430. Computes the fractional change from the immediately previous row by
  10431. default. This is useful in comparing the fraction of change in a time
  10432. series of elements.
  10433. .. note::
  10434. Despite the name of this method, it calculates fractional change
  10435. (also known as per unit change or relative change) and not
  10436. percentage change. If you need the percentage change, multiply
  10437. these values by 100.
  10438. Parameters
  10439. ----------
  10440. periods : int, default 1
  10441. Periods to shift for forming percent change.
  10442. fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
  10443. How to handle NAs **before** computing percent changes.
  10444. .. deprecated:: 2.1
  10445. All options of `fill_method` are deprecated except `fill_method=None`.
  10446. limit : int, default None
  10447. The number of consecutive NAs to fill before stopping.
  10448. .. deprecated:: 2.1
  10449. freq : DateOffset, timedelta, or str, optional
  10450. Increment to use from time series API (e.g. 'ME' or BDay()).
  10451. **kwargs
  10452. Additional keyword arguments are passed into
  10453. `DataFrame.shift` or `Series.shift`.
  10454. Returns
  10455. -------
  10456. Series or DataFrame
  10457. The same type as the calling object.
  10458. See Also
  10459. --------
  10460. Series.diff : Compute the difference of two elements in a Series.
  10461. DataFrame.diff : Compute the difference of two elements in a DataFrame.
  10462. Series.shift : Shift the index by some number of periods.
  10463. DataFrame.shift : Shift the index by some number of periods.
  10464. Examples
  10465. --------
  10466. **Series**
  10467. >>> s = pd.Series([90, 91, 85])
  10468. >>> s
  10469. 0 90
  10470. 1 91
  10471. 2 85
  10472. dtype: int64
  10473. >>> s.pct_change()
  10474. 0 NaN
  10475. 1 0.011111
  10476. 2 -0.065934
  10477. dtype: float64
  10478. >>> s.pct_change(periods=2)
  10479. 0 NaN
  10480. 1 NaN
  10481. 2 -0.055556
  10482. dtype: float64
  10483. See the percentage change in a Series where filling NAs with last
  10484. valid observation forward to next valid.
  10485. >>> s = pd.Series([90, 91, None, 85])
  10486. >>> s
  10487. 0 90.0
  10488. 1 91.0
  10489. 2 NaN
  10490. 3 85.0
  10491. dtype: float64
  10492. >>> s.ffill().pct_change()
  10493. 0 NaN
  10494. 1 0.011111
  10495. 2 0.000000
  10496. 3 -0.065934
  10497. dtype: float64
  10498. **DataFrame**
  10499. Percentage change in French franc, Deutsche Mark, and Italian lira from
  10500. 1980-01-01 to 1980-03-01.
  10501. >>> df = pd.DataFrame({
  10502. ... 'FR': [4.0405, 4.0963, 4.3149],
  10503. ... 'GR': [1.7246, 1.7482, 1.8519],
  10504. ... 'IT': [804.74, 810.01, 860.13]},
  10505. ... index=['1980-01-01', '1980-02-01', '1980-03-01'])
  10506. >>> df
  10507. FR GR IT
  10508. 1980-01-01 4.0405 1.7246 804.74
  10509. 1980-02-01 4.0963 1.7482 810.01
  10510. 1980-03-01 4.3149 1.8519 860.13
  10511. >>> df.pct_change()
  10512. FR GR IT
  10513. 1980-01-01 NaN NaN NaN
  10514. 1980-02-01 0.013810 0.013684 0.006549
  10515. 1980-03-01 0.053365 0.059318 0.061876
  10516. Percentage of change in GOOG and APPL stock volume. Shows computing
  10517. the percentage change between columns.
  10518. >>> df = pd.DataFrame({
  10519. ... '2016': [1769950, 30586265],
  10520. ... '2015': [1500923, 40912316],
  10521. ... '2014': [1371819, 41403351]},
  10522. ... index=['GOOG', 'APPL'])
  10523. >>> df
  10524. 2016 2015 2014
  10525. GOOG 1769950 1500923 1371819
  10526. APPL 30586265 40912316 41403351
  10527. >>> df.pct_change(axis='columns', periods=-1)
  10528. 2016 2015 2014
  10529. GOOG 0.179241 0.094112 NaN
  10530. APPL -0.252395 -0.011860 NaN
  10531. """
  10532. # GH#53491
  10533. if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
  10534. warnings.warn(
  10535. "The 'fill_method' keyword being not None and the 'limit' keyword in "
  10536. f"{type(self).__name__}.pct_change are deprecated and will be removed "
  10537. "in a future version. Either fill in any non-leading NA values prior "
  10538. "to calling pct_change or specify 'fill_method=None' to not fill NA "
  10539. "values.",
  10540. FutureWarning,
  10541. stacklevel=find_stack_level(),
  10542. )
  10543. if fill_method is lib.no_default:
  10544. if limit is lib.no_default:
  10545. cols = self.items() if self.ndim == 2 else [(None, self)]
  10546. for _, col in cols:
  10547. if len(col) > 0:
  10548. mask = col.isna().values
  10549. mask = mask[np.argmax(~mask) :]
  10550. if mask.any():
  10551. warnings.warn(
  10552. "The default fill_method='pad' in "
  10553. f"{type(self).__name__}.pct_change is deprecated and "
  10554. "will be removed in a future version. Either fill in "
  10555. "any non-leading NA values prior to calling pct_change "
  10556. "or specify 'fill_method=None' to not fill NA values.",
  10557. FutureWarning,
  10558. stacklevel=find_stack_level(),
  10559. )
  10560. break
  10561. fill_method = "pad"
  10562. if limit is lib.no_default:
  10563. limit = None
  10564. axis = self._get_axis_number(kwargs.pop("axis", "index"))
  10565. if fill_method is None:
  10566. data = self
  10567. else:
  10568. data = self._pad_or_backfill(fill_method, axis=axis, limit=limit)
  10569. shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs)
  10570. # Unsupported left operand type for / ("Self")
  10571. rs = data / shifted - 1 # type: ignore[operator]
  10572. if freq is not None:
  10573. # Shift method is implemented differently when freq is not None
  10574. # We want to restore the original index
  10575. rs = rs.loc[~rs.index.duplicated()]
  10576. rs = rs.reindex_like(data)
  10577. return rs.__finalize__(self, method="pct_change")
  10578. @final
  10579. def _logical_func(
  10580. self,
  10581. name: str,
  10582. func,
  10583. axis: Axis | None = 0,
  10584. bool_only: bool_t = False,
  10585. skipna: bool_t = True,
  10586. **kwargs,
  10587. ) -> Series | bool_t:
  10588. nv.validate_logical_func((), kwargs, fname=name)
  10589. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  10590. if self.ndim > 1 and axis is None:
  10591. # Reduce along one dimension then the other, to simplify DataFrame._reduce
  10592. res = self._logical_func(
  10593. name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
  10594. )
  10595. # error: Item "bool" of "Series | bool" has no attribute "_logical_func"
  10596. return res._logical_func( # type: ignore[union-attr]
  10597. name, func, skipna=skipna, **kwargs
  10598. )
  10599. elif axis is None:
  10600. axis = 0
  10601. if (
  10602. self.ndim > 1
  10603. and axis == 1
  10604. and len(self._mgr.arrays) > 1
  10605. # TODO(EA2D): special-case not needed
  10606. and all(x.ndim == 2 for x in self._mgr.arrays)
  10607. and not kwargs
  10608. ):
  10609. # Fastpath avoiding potentially expensive transpose
  10610. obj = self
  10611. if bool_only:
  10612. obj = self._get_bool_data()
  10613. return obj._reduce_axis1(name, func, skipna=skipna)
  10614. return self._reduce(
  10615. func,
  10616. name=name,
  10617. axis=axis,
  10618. skipna=skipna,
  10619. numeric_only=bool_only,
  10620. filter_type="bool",
  10621. )
  10622. def any(
  10623. self,
  10624. axis: Axis | None = 0,
  10625. bool_only: bool_t = False,
  10626. skipna: bool_t = True,
  10627. **kwargs,
  10628. ) -> Series | bool_t:
  10629. return self._logical_func(
  10630. "any", nanops.nanany, axis, bool_only, skipna, **kwargs
  10631. )
  10632. def all(
  10633. self,
  10634. axis: Axis = 0,
  10635. bool_only: bool_t = False,
  10636. skipna: bool_t = True,
  10637. **kwargs,
  10638. ) -> Series | bool_t:
  10639. return self._logical_func(
  10640. "all", nanops.nanall, axis, bool_only, skipna, **kwargs
  10641. )
  10642. @final
  10643. def _accum_func(
  10644. self,
  10645. name: str,
  10646. func,
  10647. axis: Axis | None = None,
  10648. skipna: bool_t = True,
  10649. *args,
  10650. **kwargs,
  10651. ):
  10652. skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name)
  10653. if axis is None:
  10654. axis = 0
  10655. else:
  10656. axis = self._get_axis_number(axis)
  10657. if axis == 1:
  10658. return self.T._accum_func(
  10659. name, func, axis=0, skipna=skipna, *args, **kwargs # noqa: B026
  10660. ).T
  10661. def block_accum_func(blk_values):
  10662. values = blk_values.T if hasattr(blk_values, "T") else blk_values
  10663. result: np.ndarray | ExtensionArray
  10664. if isinstance(values, ExtensionArray):
  10665. result = values._accumulate(name, skipna=skipna, **kwargs)
  10666. else:
  10667. result = nanops.na_accum_func(values, func, skipna=skipna)
  10668. result = result.T if hasattr(result, "T") else result
  10669. return result
  10670. result = self._mgr.apply(block_accum_func)
  10671. return self._constructor_from_mgr(result, axes=result.axes).__finalize__(
  10672. self, method=name
  10673. )
  10674. def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  10675. return self._accum_func(
  10676. "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs
  10677. )
  10678. def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  10679. return self._accum_func(
  10680. "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs
  10681. )
  10682. def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  10683. return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs)
  10684. def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  10685. return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs)
  10686. @final
  10687. def _stat_function_ddof(
  10688. self,
  10689. name: str,
  10690. func,
  10691. axis: Axis | None | lib.NoDefault = lib.no_default,
  10692. skipna: bool_t = True,
  10693. ddof: int = 1,
  10694. numeric_only: bool_t = False,
  10695. **kwargs,
  10696. ) -> Series | float:
  10697. nv.validate_stat_ddof_func((), kwargs, fname=name)
  10698. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  10699. if axis is None:
  10700. if self.ndim > 1:
  10701. warnings.warn(
  10702. f"The behavior of {type(self).__name__}.{name} with axis=None "
  10703. "is deprecated, in a future version this will reduce over both "
  10704. "axes and return a scalar. To retain the old behavior, pass "
  10705. "axis=0 (or do not pass axis)",
  10706. FutureWarning,
  10707. stacklevel=find_stack_level(),
  10708. )
  10709. axis = 0
  10710. elif axis is lib.no_default:
  10711. axis = 0
  10712. return self._reduce(
  10713. func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
  10714. )
  10715. def sem(
  10716. self,
  10717. axis: Axis | None = 0,
  10718. skipna: bool_t = True,
  10719. ddof: int = 1,
  10720. numeric_only: bool_t = False,
  10721. **kwargs,
  10722. ) -> Series | float:
  10723. return self._stat_function_ddof(
  10724. "sem", nanops.nansem, axis, skipna, ddof, numeric_only, **kwargs
  10725. )
  10726. def var(
  10727. self,
  10728. axis: Axis | None = 0,
  10729. skipna: bool_t = True,
  10730. ddof: int = 1,
  10731. numeric_only: bool_t = False,
  10732. **kwargs,
  10733. ) -> Series | float:
  10734. return self._stat_function_ddof(
  10735. "var", nanops.nanvar, axis, skipna, ddof, numeric_only, **kwargs
  10736. )
  10737. def std(
  10738. self,
  10739. axis: Axis | None = 0,
  10740. skipna: bool_t = True,
  10741. ddof: int = 1,
  10742. numeric_only: bool_t = False,
  10743. **kwargs,
  10744. ) -> Series | float:
  10745. return self._stat_function_ddof(
  10746. "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs
  10747. )
  10748. @final
  10749. def _stat_function(
  10750. self,
  10751. name: str,
  10752. func,
  10753. axis: Axis | None = 0,
  10754. skipna: bool_t = True,
  10755. numeric_only: bool_t = False,
  10756. **kwargs,
  10757. ):
  10758. assert name in ["median", "mean", "min", "max", "kurt", "skew"], name
  10759. nv.validate_func(name, (), kwargs)
  10760. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  10761. return self._reduce(
  10762. func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  10763. )
  10764. def min(
  10765. self,
  10766. axis: Axis | None = 0,
  10767. skipna: bool_t = True,
  10768. numeric_only: bool_t = False,
  10769. **kwargs,
  10770. ):
  10771. return self._stat_function(
  10772. "min",
  10773. nanops.nanmin,
  10774. axis,
  10775. skipna,
  10776. numeric_only,
  10777. **kwargs,
  10778. )
  10779. def max(
  10780. self,
  10781. axis: Axis | None = 0,
  10782. skipna: bool_t = True,
  10783. numeric_only: bool_t = False,
  10784. **kwargs,
  10785. ):
  10786. return self._stat_function(
  10787. "max",
  10788. nanops.nanmax,
  10789. axis,
  10790. skipna,
  10791. numeric_only,
  10792. **kwargs,
  10793. )
  10794. def mean(
  10795. self,
  10796. axis: Axis | None = 0,
  10797. skipna: bool_t = True,
  10798. numeric_only: bool_t = False,
  10799. **kwargs,
  10800. ) -> Series | float:
  10801. return self._stat_function(
  10802. "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  10803. )
  10804. def median(
  10805. self,
  10806. axis: Axis | None = 0,
  10807. skipna: bool_t = True,
  10808. numeric_only: bool_t = False,
  10809. **kwargs,
  10810. ) -> Series | float:
  10811. return self._stat_function(
  10812. "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs
  10813. )
  10814. def skew(
  10815. self,
  10816. axis: Axis | None = 0,
  10817. skipna: bool_t = True,
  10818. numeric_only: bool_t = False,
  10819. **kwargs,
  10820. ) -> Series | float:
  10821. return self._stat_function(
  10822. "skew", nanops.nanskew, axis, skipna, numeric_only, **kwargs
  10823. )
  10824. def kurt(
  10825. self,
  10826. axis: Axis | None = 0,
  10827. skipna: bool_t = True,
  10828. numeric_only: bool_t = False,
  10829. **kwargs,
  10830. ) -> Series | float:
  10831. return self._stat_function(
  10832. "kurt", nanops.nankurt, axis, skipna, numeric_only, **kwargs
  10833. )
  10834. kurtosis = kurt
  10835. @final
  10836. def _min_count_stat_function(
  10837. self,
  10838. name: str,
  10839. func,
  10840. axis: Axis | None | lib.NoDefault = lib.no_default,
  10841. skipna: bool_t = True,
  10842. numeric_only: bool_t = False,
  10843. min_count: int = 0,
  10844. **kwargs,
  10845. ):
  10846. assert name in ["sum", "prod"], name
  10847. nv.validate_func(name, (), kwargs)
  10848. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  10849. if axis is None:
  10850. if self.ndim > 1:
  10851. warnings.warn(
  10852. f"The behavior of {type(self).__name__}.{name} with axis=None "
  10853. "is deprecated, in a future version this will reduce over both "
  10854. "axes and return a scalar. To retain the old behavior, pass "
  10855. "axis=0 (or do not pass axis)",
  10856. FutureWarning,
  10857. stacklevel=find_stack_level(),
  10858. )
  10859. axis = 0
  10860. elif axis is lib.no_default:
  10861. axis = 0
  10862. return self._reduce(
  10863. func,
  10864. name=name,
  10865. axis=axis,
  10866. skipna=skipna,
  10867. numeric_only=numeric_only,
  10868. min_count=min_count,
  10869. )
  10870. def sum(
  10871. self,
  10872. axis: Axis | None = 0,
  10873. skipna: bool_t = True,
  10874. numeric_only: bool_t = False,
  10875. min_count: int = 0,
  10876. **kwargs,
  10877. ):
  10878. return self._min_count_stat_function(
  10879. "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs
  10880. )
  10881. def prod(
  10882. self,
  10883. axis: Axis | None = 0,
  10884. skipna: bool_t = True,
  10885. numeric_only: bool_t = False,
  10886. min_count: int = 0,
  10887. **kwargs,
  10888. ):
  10889. return self._min_count_stat_function(
  10890. "prod",
  10891. nanops.nanprod,
  10892. axis,
  10893. skipna,
  10894. numeric_only,
  10895. min_count,
  10896. **kwargs,
  10897. )
  10898. product = prod
  10899. @final
  10900. @doc(Rolling)
  10901. def rolling(
  10902. self,
  10903. window: int | dt.timedelta | str | BaseOffset | BaseIndexer,
  10904. min_periods: int | None = None,
  10905. center: bool_t = False,
  10906. win_type: str | None = None,
  10907. on: str | None = None,
  10908. axis: Axis | lib.NoDefault = lib.no_default,
  10909. closed: IntervalClosedType | None = None,
  10910. step: int | None = None,
  10911. method: str = "single",
  10912. ) -> Window | Rolling:
  10913. if axis is not lib.no_default:
  10914. axis = self._get_axis_number(axis)
  10915. name = "rolling"
  10916. if axis == 1:
  10917. warnings.warn(
  10918. f"Support for axis=1 in {type(self).__name__}.{name} is "
  10919. "deprecated and will be removed in a future version. "
  10920. f"Use obj.T.{name}(...) instead",
  10921. FutureWarning,
  10922. stacklevel=find_stack_level(),
  10923. )
  10924. else:
  10925. warnings.warn(
  10926. f"The 'axis' keyword in {type(self).__name__}.{name} is "
  10927. "deprecated and will be removed in a future version. "
  10928. "Call the method without the axis keyword instead.",
  10929. FutureWarning,
  10930. stacklevel=find_stack_level(),
  10931. )
  10932. else:
  10933. axis = 0
  10934. if win_type is not None:
  10935. return Window(
  10936. self,
  10937. window=window,
  10938. min_periods=min_periods,
  10939. center=center,
  10940. win_type=win_type,
  10941. on=on,
  10942. axis=axis,
  10943. closed=closed,
  10944. step=step,
  10945. method=method,
  10946. )
  10947. return Rolling(
  10948. self,
  10949. window=window,
  10950. min_periods=min_periods,
  10951. center=center,
  10952. win_type=win_type,
  10953. on=on,
  10954. axis=axis,
  10955. closed=closed,
  10956. step=step,
  10957. method=method,
  10958. )
  10959. @final
  10960. @doc(Expanding)
  10961. def expanding(
  10962. self,
  10963. min_periods: int = 1,
  10964. axis: Axis | lib.NoDefault = lib.no_default,
  10965. method: Literal["single", "table"] = "single",
  10966. ) -> Expanding:
  10967. if axis is not lib.no_default:
  10968. axis = self._get_axis_number(axis)
  10969. name = "expanding"
  10970. if axis == 1:
  10971. warnings.warn(
  10972. f"Support for axis=1 in {type(self).__name__}.{name} is "
  10973. "deprecated and will be removed in a future version. "
  10974. f"Use obj.T.{name}(...) instead",
  10975. FutureWarning,
  10976. stacklevel=find_stack_level(),
  10977. )
  10978. else:
  10979. warnings.warn(
  10980. f"The 'axis' keyword in {type(self).__name__}.{name} is "
  10981. "deprecated and will be removed in a future version. "
  10982. "Call the method without the axis keyword instead.",
  10983. FutureWarning,
  10984. stacklevel=find_stack_level(),
  10985. )
  10986. else:
  10987. axis = 0
  10988. return Expanding(self, min_periods=min_periods, axis=axis, method=method)
  10989. @final
  10990. @doc(ExponentialMovingWindow)
  10991. def ewm(
  10992. self,
  10993. com: float | None = None,
  10994. span: float | None = None,
  10995. halflife: float | TimedeltaConvertibleTypes | None = None,
  10996. alpha: float | None = None,
  10997. min_periods: int | None = 0,
  10998. adjust: bool_t = True,
  10999. ignore_na: bool_t = False,
  11000. axis: Axis | lib.NoDefault = lib.no_default,
  11001. times: np.ndarray | DataFrame | Series | None = None,
  11002. method: Literal["single", "table"] = "single",
  11003. ) -> ExponentialMovingWindow:
  11004. if axis is not lib.no_default:
  11005. axis = self._get_axis_number(axis)
  11006. name = "ewm"
  11007. if axis == 1:
  11008. warnings.warn(
  11009. f"Support for axis=1 in {type(self).__name__}.{name} is "
  11010. "deprecated and will be removed in a future version. "
  11011. f"Use obj.T.{name}(...) instead",
  11012. FutureWarning,
  11013. stacklevel=find_stack_level(),
  11014. )
  11015. else:
  11016. warnings.warn(
  11017. f"The 'axis' keyword in {type(self).__name__}.{name} is "
  11018. "deprecated and will be removed in a future version. "
  11019. "Call the method without the axis keyword instead.",
  11020. FutureWarning,
  11021. stacklevel=find_stack_level(),
  11022. )
  11023. else:
  11024. axis = 0
  11025. return ExponentialMovingWindow(
  11026. self,
  11027. com=com,
  11028. span=span,
  11029. halflife=halflife,
  11030. alpha=alpha,
  11031. min_periods=min_periods,
  11032. adjust=adjust,
  11033. ignore_na=ignore_na,
  11034. axis=axis,
  11035. times=times,
  11036. method=method,
  11037. )
  11038. # ----------------------------------------------------------------------
  11039. # Arithmetic Methods
  11040. @final
  11041. def _inplace_method(self, other, op) -> Self:
  11042. """
  11043. Wrap arithmetic method to operate inplace.
  11044. """
  11045. warn = True
  11046. if not PYPY and warn_copy_on_write():
  11047. if sys.getrefcount(self) <= REF_COUNT + 2:
  11048. # we are probably in an inplace setitem context (e.g. df['a'] += 1)
  11049. warn = False
  11050. result = op(self, other)
  11051. if (
  11052. self.ndim == 1
  11053. and result._indexed_same(self)
  11054. and result.dtype == self.dtype
  11055. and not using_copy_on_write()
  11056. and not (warn_copy_on_write() and not warn)
  11057. ):
  11058. # GH#36498 this inplace op can _actually_ be inplace.
  11059. # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
  11060. # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
  11061. self._mgr.setitem_inplace( # type: ignore[union-attr]
  11062. slice(None), result._values, warn=warn
  11063. )
  11064. return self
  11065. # Delete cacher
  11066. self._reset_cacher()
  11067. # this makes sure that we are aligned like the input
  11068. # we are updating inplace so we want to ignore is_copy
  11069. self._update_inplace(
  11070. result.reindex_like(self, copy=False), verify_is_copy=False
  11071. )
  11072. return self
  11073. @final
  11074. def __iadd__(self, other) -> Self:
  11075. # error: Unsupported left operand type for + ("Type[NDFrame]")
  11076. return self._inplace_method(other, type(self).__add__) # type: ignore[operator]
  11077. @final
  11078. def __isub__(self, other) -> Self:
  11079. # error: Unsupported left operand type for - ("Type[NDFrame]")
  11080. return self._inplace_method(other, type(self).__sub__) # type: ignore[operator]
  11081. @final
  11082. def __imul__(self, other) -> Self:
  11083. # error: Unsupported left operand type for * ("Type[NDFrame]")
  11084. return self._inplace_method(other, type(self).__mul__) # type: ignore[operator]
  11085. @final
  11086. def __itruediv__(self, other) -> Self:
  11087. # error: Unsupported left operand type for / ("Type[NDFrame]")
  11088. return self._inplace_method(
  11089. other, type(self).__truediv__ # type: ignore[operator]
  11090. )
  11091. @final
  11092. def __ifloordiv__(self, other) -> Self:
  11093. # error: Unsupported left operand type for // ("Type[NDFrame]")
  11094. return self._inplace_method(
  11095. other, type(self).__floordiv__ # type: ignore[operator]
  11096. )
  11097. @final
  11098. def __imod__(self, other) -> Self:
  11099. # error: Unsupported left operand type for % ("Type[NDFrame]")
  11100. return self._inplace_method(other, type(self).__mod__) # type: ignore[operator]
  11101. @final
  11102. def __ipow__(self, other) -> Self:
  11103. # error: Unsupported left operand type for ** ("Type[NDFrame]")
  11104. return self._inplace_method(other, type(self).__pow__) # type: ignore[operator]
  11105. @final
  11106. def __iand__(self, other) -> Self:
  11107. # error: Unsupported left operand type for & ("Type[NDFrame]")
  11108. return self._inplace_method(other, type(self).__and__) # type: ignore[operator]
  11109. @final
  11110. def __ior__(self, other) -> Self:
  11111. return self._inplace_method(other, type(self).__or__)
  11112. @final
  11113. def __ixor__(self, other) -> Self:
  11114. # error: Unsupported left operand type for ^ ("Type[NDFrame]")
  11115. return self._inplace_method(other, type(self).__xor__) # type: ignore[operator]
  11116. # ----------------------------------------------------------------------
  11117. # Misc methods
  11118. @final
  11119. def _find_valid_index(self, *, how: str) -> Hashable | None:
  11120. """
  11121. Retrieves the index of the first valid value.
  11122. Parameters
  11123. ----------
  11124. how : {'first', 'last'}
  11125. Use this parameter to change between the first or last valid index.
  11126. Returns
  11127. -------
  11128. idx_first_valid : type of index
  11129. """
  11130. is_valid = self.notna().values
  11131. idxpos = find_valid_index(how=how, is_valid=is_valid)
  11132. if idxpos is None:
  11133. return None
  11134. return self.index[idxpos]
  11135. @final
  11136. @doc(position="first", klass=_shared_doc_kwargs["klass"])
  11137. def first_valid_index(self) -> Hashable | None:
  11138. """
  11139. Return index for {position} non-NA value or None, if no non-NA value is found.
  11140. Returns
  11141. -------
  11142. type of index
  11143. Examples
  11144. --------
  11145. For Series:
  11146. >>> s = pd.Series([None, 3, 4])
  11147. >>> s.first_valid_index()
  11148. 1
  11149. >>> s.last_valid_index()
  11150. 2
  11151. >>> s = pd.Series([None, None])
  11152. >>> print(s.first_valid_index())
  11153. None
  11154. >>> print(s.last_valid_index())
  11155. None
  11156. If all elements in Series are NA/null, returns None.
  11157. >>> s = pd.Series()
  11158. >>> print(s.first_valid_index())
  11159. None
  11160. >>> print(s.last_valid_index())
  11161. None
  11162. If Series is empty, returns None.
  11163. For DataFrame:
  11164. >>> df = pd.DataFrame({{'A': [None, None, 2], 'B': [None, 3, 4]}})
  11165. >>> df
  11166. A B
  11167. 0 NaN NaN
  11168. 1 NaN 3.0
  11169. 2 2.0 4.0
  11170. >>> df.first_valid_index()
  11171. 1
  11172. >>> df.last_valid_index()
  11173. 2
  11174. >>> df = pd.DataFrame({{'A': [None, None, None], 'B': [None, None, None]}})
  11175. >>> df
  11176. A B
  11177. 0 None None
  11178. 1 None None
  11179. 2 None None
  11180. >>> print(df.first_valid_index())
  11181. None
  11182. >>> print(df.last_valid_index())
  11183. None
  11184. If all elements in DataFrame are NA/null, returns None.
  11185. >>> df = pd.DataFrame()
  11186. >>> df
  11187. Empty DataFrame
  11188. Columns: []
  11189. Index: []
  11190. >>> print(df.first_valid_index())
  11191. None
  11192. >>> print(df.last_valid_index())
  11193. None
  11194. If DataFrame is empty, returns None.
  11195. """
  11196. return self._find_valid_index(how="first")
  11197. @final
  11198. @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"])
  11199. def last_valid_index(self) -> Hashable | None:
  11200. return self._find_valid_index(how="last")
  11201. _num_doc = """
  11202. {desc}
  11203. Parameters
  11204. ----------
  11205. axis : {axis_descr}
  11206. Axis for the function to be applied on.
  11207. For `Series` this parameter is unused and defaults to 0.
  11208. For DataFrames, specifying ``axis=None`` will apply the aggregation
  11209. across both axes.
  11210. .. versionadded:: 2.0.0
  11211. skipna : bool, default True
  11212. Exclude NA/null values when computing the result.
  11213. numeric_only : bool, default False
  11214. Include only float, int, boolean columns. Not implemented for Series.
  11215. {min_count}\
  11216. **kwargs
  11217. Additional keyword arguments to be passed to the function.
  11218. Returns
  11219. -------
  11220. {name1} or scalar\
  11221. {see_also}\
  11222. {examples}
  11223. """
  11224. _sum_prod_doc = """
  11225. {desc}
  11226. Parameters
  11227. ----------
  11228. axis : {axis_descr}
  11229. Axis for the function to be applied on.
  11230. For `Series` this parameter is unused and defaults to 0.
  11231. .. warning::
  11232. The behavior of DataFrame.{name} with ``axis=None`` is deprecated,
  11233. in a future version this will reduce over both axes and return a scalar
  11234. To retain the old behavior, pass axis=0 (or do not pass axis).
  11235. .. versionadded:: 2.0.0
  11236. skipna : bool, default True
  11237. Exclude NA/null values when computing the result.
  11238. numeric_only : bool, default False
  11239. Include only float, int, boolean columns. Not implemented for Series.
  11240. {min_count}\
  11241. **kwargs
  11242. Additional keyword arguments to be passed to the function.
  11243. Returns
  11244. -------
  11245. {name1} or scalar\
  11246. {see_also}\
  11247. {examples}
  11248. """
  11249. _num_ddof_doc = """
  11250. {desc}
  11251. Parameters
  11252. ----------
  11253. axis : {axis_descr}
  11254. For `Series` this parameter is unused and defaults to 0.
  11255. .. warning::
  11256. The behavior of DataFrame.{name} with ``axis=None`` is deprecated,
  11257. in a future version this will reduce over both axes and return a scalar
  11258. To retain the old behavior, pass axis=0 (or do not pass axis).
  11259. skipna : bool, default True
  11260. Exclude NA/null values. If an entire row/column is NA, the result
  11261. will be NA.
  11262. ddof : int, default 1
  11263. Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
  11264. where N represents the number of elements.
  11265. numeric_only : bool, default False
  11266. Include only float, int, boolean columns. Not implemented for Series.
  11267. Returns
  11268. -------
  11269. {name1} or {name2} (if level specified) \
  11270. {notes}\
  11271. {examples}
  11272. """
  11273. _std_notes = """
  11274. Notes
  11275. -----
  11276. To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the
  11277. default `ddof=1`)"""
  11278. _std_examples = """
  11279. Examples
  11280. --------
  11281. >>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
  11282. ... 'age': [21, 25, 62, 43],
  11283. ... 'height': [1.61, 1.87, 1.49, 2.01]}
  11284. ... ).set_index('person_id')
  11285. >>> df
  11286. age height
  11287. person_id
  11288. 0 21 1.61
  11289. 1 25 1.87
  11290. 2 62 1.49
  11291. 3 43 2.01
  11292. The standard deviation of the columns can be found as follows:
  11293. >>> df.std()
  11294. age 18.786076
  11295. height 0.237417
  11296. dtype: float64
  11297. Alternatively, `ddof=0` can be set to normalize by N instead of N-1:
  11298. >>> df.std(ddof=0)
  11299. age 16.269219
  11300. height 0.205609
  11301. dtype: float64"""
  11302. _var_examples = """
  11303. Examples
  11304. --------
  11305. >>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
  11306. ... 'age': [21, 25, 62, 43],
  11307. ... 'height': [1.61, 1.87, 1.49, 2.01]}
  11308. ... ).set_index('person_id')
  11309. >>> df
  11310. age height
  11311. person_id
  11312. 0 21 1.61
  11313. 1 25 1.87
  11314. 2 62 1.49
  11315. 3 43 2.01
  11316. >>> df.var()
  11317. age 352.916667
  11318. height 0.056367
  11319. dtype: float64
  11320. Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1:
  11321. >>> df.var(ddof=0)
  11322. age 264.687500
  11323. height 0.042275
  11324. dtype: float64"""
  11325. _bool_doc = """
  11326. {desc}
  11327. Parameters
  11328. ----------
  11329. axis : {{0 or 'index', 1 or 'columns', None}}, default 0
  11330. Indicate which axis or axes should be reduced. For `Series` this parameter
  11331. is unused and defaults to 0.
  11332. * 0 / 'index' : reduce the index, return a Series whose index is the
  11333. original column labels.
  11334. * 1 / 'columns' : reduce the columns, return a Series whose index is the
  11335. original index.
  11336. * None : reduce all axes, return a scalar.
  11337. bool_only : bool, default False
  11338. Include only boolean columns. Not implemented for Series.
  11339. skipna : bool, default True
  11340. Exclude NA/null values. If the entire row/column is NA and skipna is
  11341. True, then the result will be {empty_value}, as for an empty row/column.
  11342. If skipna is False, then NA are treated as True, because these are not
  11343. equal to zero.
  11344. **kwargs : any, default None
  11345. Additional keywords have no effect but might be accepted for
  11346. compatibility with NumPy.
  11347. Returns
  11348. -------
  11349. {name1} or {name2}
  11350. If level is specified, then, {name2} is returned; otherwise, {name1}
  11351. is returned.
  11352. {see_also}
  11353. {examples}"""
  11354. _all_desc = """\
  11355. Return whether all elements are True, potentially over an axis.
  11356. Returns True unless there at least one element within a series or
  11357. along a Dataframe axis that is False or equivalent (e.g. zero or
  11358. empty)."""
  11359. _all_examples = """\
  11360. Examples
  11361. --------
  11362. **Series**
  11363. >>> pd.Series([True, True]).all()
  11364. True
  11365. >>> pd.Series([True, False]).all()
  11366. False
  11367. >>> pd.Series([], dtype="float64").all()
  11368. True
  11369. >>> pd.Series([np.nan]).all()
  11370. True
  11371. >>> pd.Series([np.nan]).all(skipna=False)
  11372. True
  11373. **DataFrames**
  11374. Create a dataframe from a dictionary.
  11375. >>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})
  11376. >>> df
  11377. col1 col2
  11378. 0 True True
  11379. 1 True False
  11380. Default behaviour checks if values in each column all return True.
  11381. >>> df.all()
  11382. col1 True
  11383. col2 False
  11384. dtype: bool
  11385. Specify ``axis='columns'`` to check if values in each row all return True.
  11386. >>> df.all(axis='columns')
  11387. 0 True
  11388. 1 False
  11389. dtype: bool
  11390. Or ``axis=None`` for whether every value is True.
  11391. >>> df.all(axis=None)
  11392. False
  11393. """
  11394. _all_see_also = """\
  11395. See Also
  11396. --------
  11397. Series.all : Return True if all elements are True.
  11398. DataFrame.any : Return True if one (or more) elements are True.
  11399. """
  11400. _cnum_doc = """
  11401. Return cumulative {desc} over a DataFrame or Series axis.
  11402. Returns a DataFrame or Series of the same size containing the cumulative
  11403. {desc}.
  11404. Parameters
  11405. ----------
  11406. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  11407. The index or the name of the axis. 0 is equivalent to None or 'index'.
  11408. For `Series` this parameter is unused and defaults to 0.
  11409. skipna : bool, default True
  11410. Exclude NA/null values. If an entire row/column is NA, the result
  11411. will be NA.
  11412. *args, **kwargs
  11413. Additional keywords have no effect but might be accepted for
  11414. compatibility with NumPy.
  11415. Returns
  11416. -------
  11417. {name1} or {name2}
  11418. Return cumulative {desc} of {name1} or {name2}.
  11419. See Also
  11420. --------
  11421. core.window.expanding.Expanding.{accum_func_name} : Similar functionality
  11422. but ignores ``NaN`` values.
  11423. {name2}.{accum_func_name} : Return the {desc} over
  11424. {name2} axis.
  11425. {name2}.cummax : Return cumulative maximum over {name2} axis.
  11426. {name2}.cummin : Return cumulative minimum over {name2} axis.
  11427. {name2}.cumsum : Return cumulative sum over {name2} axis.
  11428. {name2}.cumprod : Return cumulative product over {name2} axis.
  11429. {examples}"""
  11430. _cummin_examples = """\
  11431. Examples
  11432. --------
  11433. **Series**
  11434. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  11435. >>> s
  11436. 0 2.0
  11437. 1 NaN
  11438. 2 5.0
  11439. 3 -1.0
  11440. 4 0.0
  11441. dtype: float64
  11442. By default, NA values are ignored.
  11443. >>> s.cummin()
  11444. 0 2.0
  11445. 1 NaN
  11446. 2 2.0
  11447. 3 -1.0
  11448. 4 -1.0
  11449. dtype: float64
  11450. To include NA values in the operation, use ``skipna=False``
  11451. >>> s.cummin(skipna=False)
  11452. 0 2.0
  11453. 1 NaN
  11454. 2 NaN
  11455. 3 NaN
  11456. 4 NaN
  11457. dtype: float64
  11458. **DataFrame**
  11459. >>> df = pd.DataFrame([[2.0, 1.0],
  11460. ... [3.0, np.nan],
  11461. ... [1.0, 0.0]],
  11462. ... columns=list('AB'))
  11463. >>> df
  11464. A B
  11465. 0 2.0 1.0
  11466. 1 3.0 NaN
  11467. 2 1.0 0.0
  11468. By default, iterates over rows and finds the minimum
  11469. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  11470. >>> df.cummin()
  11471. A B
  11472. 0 2.0 1.0
  11473. 1 2.0 NaN
  11474. 2 1.0 0.0
  11475. To iterate over columns and find the minimum in each row,
  11476. use ``axis=1``
  11477. >>> df.cummin(axis=1)
  11478. A B
  11479. 0 2.0 1.0
  11480. 1 3.0 NaN
  11481. 2 1.0 0.0
  11482. """
  11483. _cumsum_examples = """\
  11484. Examples
  11485. --------
  11486. **Series**
  11487. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  11488. >>> s
  11489. 0 2.0
  11490. 1 NaN
  11491. 2 5.0
  11492. 3 -1.0
  11493. 4 0.0
  11494. dtype: float64
  11495. By default, NA values are ignored.
  11496. >>> s.cumsum()
  11497. 0 2.0
  11498. 1 NaN
  11499. 2 7.0
  11500. 3 6.0
  11501. 4 6.0
  11502. dtype: float64
  11503. To include NA values in the operation, use ``skipna=False``
  11504. >>> s.cumsum(skipna=False)
  11505. 0 2.0
  11506. 1 NaN
  11507. 2 NaN
  11508. 3 NaN
  11509. 4 NaN
  11510. dtype: float64
  11511. **DataFrame**
  11512. >>> df = pd.DataFrame([[2.0, 1.0],
  11513. ... [3.0, np.nan],
  11514. ... [1.0, 0.0]],
  11515. ... columns=list('AB'))
  11516. >>> df
  11517. A B
  11518. 0 2.0 1.0
  11519. 1 3.0 NaN
  11520. 2 1.0 0.0
  11521. By default, iterates over rows and finds the sum
  11522. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  11523. >>> df.cumsum()
  11524. A B
  11525. 0 2.0 1.0
  11526. 1 5.0 NaN
  11527. 2 6.0 1.0
  11528. To iterate over columns and find the sum in each row,
  11529. use ``axis=1``
  11530. >>> df.cumsum(axis=1)
  11531. A B
  11532. 0 2.0 3.0
  11533. 1 3.0 NaN
  11534. 2 1.0 1.0
  11535. """
  11536. _cumprod_examples = """\
  11537. Examples
  11538. --------
  11539. **Series**
  11540. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  11541. >>> s
  11542. 0 2.0
  11543. 1 NaN
  11544. 2 5.0
  11545. 3 -1.0
  11546. 4 0.0
  11547. dtype: float64
  11548. By default, NA values are ignored.
  11549. >>> s.cumprod()
  11550. 0 2.0
  11551. 1 NaN
  11552. 2 10.0
  11553. 3 -10.0
  11554. 4 -0.0
  11555. dtype: float64
  11556. To include NA values in the operation, use ``skipna=False``
  11557. >>> s.cumprod(skipna=False)
  11558. 0 2.0
  11559. 1 NaN
  11560. 2 NaN
  11561. 3 NaN
  11562. 4 NaN
  11563. dtype: float64
  11564. **DataFrame**
  11565. >>> df = pd.DataFrame([[2.0, 1.0],
  11566. ... [3.0, np.nan],
  11567. ... [1.0, 0.0]],
  11568. ... columns=list('AB'))
  11569. >>> df
  11570. A B
  11571. 0 2.0 1.0
  11572. 1 3.0 NaN
  11573. 2 1.0 0.0
  11574. By default, iterates over rows and finds the product
  11575. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  11576. >>> df.cumprod()
  11577. A B
  11578. 0 2.0 1.0
  11579. 1 6.0 NaN
  11580. 2 6.0 0.0
  11581. To iterate over columns and find the product in each row,
  11582. use ``axis=1``
  11583. >>> df.cumprod(axis=1)
  11584. A B
  11585. 0 2.0 2.0
  11586. 1 3.0 NaN
  11587. 2 1.0 0.0
  11588. """
  11589. _cummax_examples = """\
  11590. Examples
  11591. --------
  11592. **Series**
  11593. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  11594. >>> s
  11595. 0 2.0
  11596. 1 NaN
  11597. 2 5.0
  11598. 3 -1.0
  11599. 4 0.0
  11600. dtype: float64
  11601. By default, NA values are ignored.
  11602. >>> s.cummax()
  11603. 0 2.0
  11604. 1 NaN
  11605. 2 5.0
  11606. 3 5.0
  11607. 4 5.0
  11608. dtype: float64
  11609. To include NA values in the operation, use ``skipna=False``
  11610. >>> s.cummax(skipna=False)
  11611. 0 2.0
  11612. 1 NaN
  11613. 2 NaN
  11614. 3 NaN
  11615. 4 NaN
  11616. dtype: float64
  11617. **DataFrame**
  11618. >>> df = pd.DataFrame([[2.0, 1.0],
  11619. ... [3.0, np.nan],
  11620. ... [1.0, 0.0]],
  11621. ... columns=list('AB'))
  11622. >>> df
  11623. A B
  11624. 0 2.0 1.0
  11625. 1 3.0 NaN
  11626. 2 1.0 0.0
  11627. By default, iterates over rows and finds the maximum
  11628. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  11629. >>> df.cummax()
  11630. A B
  11631. 0 2.0 1.0
  11632. 1 3.0 NaN
  11633. 2 3.0 1.0
  11634. To iterate over columns and find the maximum in each row,
  11635. use ``axis=1``
  11636. >>> df.cummax(axis=1)
  11637. A B
  11638. 0 2.0 2.0
  11639. 1 3.0 NaN
  11640. 2 1.0 1.0
  11641. """
  11642. _any_see_also = """\
  11643. See Also
  11644. --------
  11645. numpy.any : Numpy version of this method.
  11646. Series.any : Return whether any element is True.
  11647. Series.all : Return whether all elements are True.
  11648. DataFrame.any : Return whether any element is True over requested axis.
  11649. DataFrame.all : Return whether all elements are True over requested axis.
  11650. """
  11651. _any_desc = """\
  11652. Return whether any element is True, potentially over an axis.
  11653. Returns False unless there is at least one element within a series or
  11654. along a Dataframe axis that is True or equivalent (e.g. non-zero or
  11655. non-empty)."""
  11656. _any_examples = """\
  11657. Examples
  11658. --------
  11659. **Series**
  11660. For Series input, the output is a scalar indicating whether any element
  11661. is True.
  11662. >>> pd.Series([False, False]).any()
  11663. False
  11664. >>> pd.Series([True, False]).any()
  11665. True
  11666. >>> pd.Series([], dtype="float64").any()
  11667. False
  11668. >>> pd.Series([np.nan]).any()
  11669. False
  11670. >>> pd.Series([np.nan]).any(skipna=False)
  11671. True
  11672. **DataFrame**
  11673. Whether each column contains at least one True element (the default).
  11674. >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})
  11675. >>> df
  11676. A B C
  11677. 0 1 0 0
  11678. 1 2 2 0
  11679. >>> df.any()
  11680. A True
  11681. B True
  11682. C False
  11683. dtype: bool
  11684. Aggregating over the columns.
  11685. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})
  11686. >>> df
  11687. A B
  11688. 0 True 1
  11689. 1 False 2
  11690. >>> df.any(axis='columns')
  11691. 0 True
  11692. 1 True
  11693. dtype: bool
  11694. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})
  11695. >>> df
  11696. A B
  11697. 0 True 1
  11698. 1 False 0
  11699. >>> df.any(axis='columns')
  11700. 0 True
  11701. 1 False
  11702. dtype: bool
  11703. Aggregating over the entire DataFrame with ``axis=None``.
  11704. >>> df.any(axis=None)
  11705. True
  11706. `any` for an empty DataFrame is an empty Series.
  11707. >>> pd.DataFrame([]).any()
  11708. Series([], dtype: bool)
  11709. """
  11710. _shared_docs[
  11711. "stat_func_example"
  11712. ] = """
  11713. Examples
  11714. --------
  11715. >>> idx = pd.MultiIndex.from_arrays([
  11716. ... ['warm', 'warm', 'cold', 'cold'],
  11717. ... ['dog', 'falcon', 'fish', 'spider']],
  11718. ... names=['blooded', 'animal'])
  11719. >>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
  11720. >>> s
  11721. blooded animal
  11722. warm dog 4
  11723. falcon 2
  11724. cold fish 0
  11725. spider 8
  11726. Name: legs, dtype: int64
  11727. >>> s.{stat_func}()
  11728. {default_output}"""
  11729. _sum_examples = _shared_docs["stat_func_example"].format(
  11730. stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8
  11731. )
  11732. _sum_examples += """
  11733. By default, the sum of an empty or all-NA Series is ``0``.
  11734. >>> pd.Series([], dtype="float64").sum() # min_count=0 is the default
  11735. 0.0
  11736. This can be controlled with the ``min_count`` parameter. For example, if
  11737. you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
  11738. >>> pd.Series([], dtype="float64").sum(min_count=1)
  11739. nan
  11740. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  11741. empty series identically.
  11742. >>> pd.Series([np.nan]).sum()
  11743. 0.0
  11744. >>> pd.Series([np.nan]).sum(min_count=1)
  11745. nan"""
  11746. _max_examples: str = _shared_docs["stat_func_example"].format(
  11747. stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8
  11748. )
  11749. _min_examples: str = _shared_docs["stat_func_example"].format(
  11750. stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0
  11751. )
  11752. _stat_func_see_also = """
  11753. See Also
  11754. --------
  11755. Series.sum : Return the sum.
  11756. Series.min : Return the minimum.
  11757. Series.max : Return the maximum.
  11758. Series.idxmin : Return the index of the minimum.
  11759. Series.idxmax : Return the index of the maximum.
  11760. DataFrame.sum : Return the sum over the requested axis.
  11761. DataFrame.min : Return the minimum over the requested axis.
  11762. DataFrame.max : Return the maximum over the requested axis.
  11763. DataFrame.idxmin : Return the index of the minimum over the requested axis.
  11764. DataFrame.idxmax : Return the index of the maximum over the requested axis."""
  11765. _prod_examples = """
  11766. Examples
  11767. --------
  11768. By default, the product of an empty or all-NA Series is ``1``
  11769. >>> pd.Series([], dtype="float64").prod()
  11770. 1.0
  11771. This can be controlled with the ``min_count`` parameter
  11772. >>> pd.Series([], dtype="float64").prod(min_count=1)
  11773. nan
  11774. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  11775. empty series identically.
  11776. >>> pd.Series([np.nan]).prod()
  11777. 1.0
  11778. >>> pd.Series([np.nan]).prod(min_count=1)
  11779. nan"""
  11780. _min_count_stub = """\
  11781. min_count : int, default 0
  11782. The required number of valid values to perform the operation. If fewer than
  11783. ``min_count`` non-NA values are present the result will be NA.
  11784. """
  11785. def make_doc(name: str, ndim: int) -> str:
  11786. """
  11787. Generate the docstring for a Series/DataFrame reduction.
  11788. """
  11789. if ndim == 1:
  11790. name1 = "scalar"
  11791. name2 = "Series"
  11792. axis_descr = "{index (0)}"
  11793. else:
  11794. name1 = "Series"
  11795. name2 = "DataFrame"
  11796. axis_descr = "{index (0), columns (1)}"
  11797. if name == "any":
  11798. base_doc = _bool_doc
  11799. desc = _any_desc
  11800. see_also = _any_see_also
  11801. examples = _any_examples
  11802. kwargs = {"empty_value": "False"}
  11803. elif name == "all":
  11804. base_doc = _bool_doc
  11805. desc = _all_desc
  11806. see_also = _all_see_also
  11807. examples = _all_examples
  11808. kwargs = {"empty_value": "True"}
  11809. elif name == "min":
  11810. base_doc = _num_doc
  11811. desc = (
  11812. "Return the minimum of the values over the requested axis.\n\n"
  11813. "If you want the *index* of the minimum, use ``idxmin``. This is "
  11814. "the equivalent of the ``numpy.ndarray`` method ``argmin``."
  11815. )
  11816. see_also = _stat_func_see_also
  11817. examples = _min_examples
  11818. kwargs = {"min_count": ""}
  11819. elif name == "max":
  11820. base_doc = _num_doc
  11821. desc = (
  11822. "Return the maximum of the values over the requested axis.\n\n"
  11823. "If you want the *index* of the maximum, use ``idxmax``. This is "
  11824. "the equivalent of the ``numpy.ndarray`` method ``argmax``."
  11825. )
  11826. see_also = _stat_func_see_also
  11827. examples = _max_examples
  11828. kwargs = {"min_count": ""}
  11829. elif name == "sum":
  11830. base_doc = _sum_prod_doc
  11831. desc = (
  11832. "Return the sum of the values over the requested axis.\n\n"
  11833. "This is equivalent to the method ``numpy.sum``."
  11834. )
  11835. see_also = _stat_func_see_also
  11836. examples = _sum_examples
  11837. kwargs = {"min_count": _min_count_stub}
  11838. elif name == "prod":
  11839. base_doc = _sum_prod_doc
  11840. desc = "Return the product of the values over the requested axis."
  11841. see_also = _stat_func_see_also
  11842. examples = _prod_examples
  11843. kwargs = {"min_count": _min_count_stub}
  11844. elif name == "median":
  11845. base_doc = _num_doc
  11846. desc = "Return the median of the values over the requested axis."
  11847. see_also = ""
  11848. examples = """
  11849. Examples
  11850. --------
  11851. >>> s = pd.Series([1, 2, 3])
  11852. >>> s.median()
  11853. 2.0
  11854. With a DataFrame
  11855. >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra'])
  11856. >>> df
  11857. a b
  11858. tiger 1 2
  11859. zebra 2 3
  11860. >>> df.median()
  11861. a 1.5
  11862. b 2.5
  11863. dtype: float64
  11864. Using axis=1
  11865. >>> df.median(axis=1)
  11866. tiger 1.5
  11867. zebra 2.5
  11868. dtype: float64
  11869. In this case, `numeric_only` should be set to `True`
  11870. to avoid getting an error.
  11871. >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']},
  11872. ... index=['tiger', 'zebra'])
  11873. >>> df.median(numeric_only=True)
  11874. a 1.5
  11875. dtype: float64"""
  11876. kwargs = {"min_count": ""}
  11877. elif name == "mean":
  11878. base_doc = _num_doc
  11879. desc = "Return the mean of the values over the requested axis."
  11880. see_also = ""
  11881. examples = """
  11882. Examples
  11883. --------
  11884. >>> s = pd.Series([1, 2, 3])
  11885. >>> s.mean()
  11886. 2.0
  11887. With a DataFrame
  11888. >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra'])
  11889. >>> df
  11890. a b
  11891. tiger 1 2
  11892. zebra 2 3
  11893. >>> df.mean()
  11894. a 1.5
  11895. b 2.5
  11896. dtype: float64
  11897. Using axis=1
  11898. >>> df.mean(axis=1)
  11899. tiger 1.5
  11900. zebra 2.5
  11901. dtype: float64
  11902. In this case, `numeric_only` should be set to `True` to avoid
  11903. getting an error.
  11904. >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']},
  11905. ... index=['tiger', 'zebra'])
  11906. >>> df.mean(numeric_only=True)
  11907. a 1.5
  11908. dtype: float64"""
  11909. kwargs = {"min_count": ""}
  11910. elif name == "var":
  11911. base_doc = _num_ddof_doc
  11912. desc = (
  11913. "Return unbiased variance over requested axis.\n\nNormalized by "
  11914. "N-1 by default. This can be changed using the ddof argument."
  11915. )
  11916. examples = _var_examples
  11917. see_also = ""
  11918. kwargs = {"notes": ""}
  11919. elif name == "std":
  11920. base_doc = _num_ddof_doc
  11921. desc = (
  11922. "Return sample standard deviation over requested axis."
  11923. "\n\nNormalized by N-1 by default. This can be changed using the "
  11924. "ddof argument."
  11925. )
  11926. examples = _std_examples
  11927. see_also = ""
  11928. kwargs = {"notes": _std_notes}
  11929. elif name == "sem":
  11930. base_doc = _num_ddof_doc
  11931. desc = (
  11932. "Return unbiased standard error of the mean over requested "
  11933. "axis.\n\nNormalized by N-1 by default. This can be changed "
  11934. "using the ddof argument"
  11935. )
  11936. examples = """
  11937. Examples
  11938. --------
  11939. >>> s = pd.Series([1, 2, 3])
  11940. >>> s.sem().round(6)
  11941. 0.57735
  11942. With a DataFrame
  11943. >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra'])
  11944. >>> df
  11945. a b
  11946. tiger 1 2
  11947. zebra 2 3
  11948. >>> df.sem()
  11949. a 0.5
  11950. b 0.5
  11951. dtype: float64
  11952. Using axis=1
  11953. >>> df.sem(axis=1)
  11954. tiger 0.5
  11955. zebra 0.5
  11956. dtype: float64
  11957. In this case, `numeric_only` should be set to `True`
  11958. to avoid getting an error.
  11959. >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']},
  11960. ... index=['tiger', 'zebra'])
  11961. >>> df.sem(numeric_only=True)
  11962. a 0.5
  11963. dtype: float64"""
  11964. see_also = ""
  11965. kwargs = {"notes": ""}
  11966. elif name == "skew":
  11967. base_doc = _num_doc
  11968. desc = "Return unbiased skew over requested axis.\n\nNormalized by N-1."
  11969. see_also = ""
  11970. examples = """
  11971. Examples
  11972. --------
  11973. >>> s = pd.Series([1, 2, 3])
  11974. >>> s.skew()
  11975. 0.0
  11976. With a DataFrame
  11977. >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]},
  11978. ... index=['tiger', 'zebra', 'cow'])
  11979. >>> df
  11980. a b c
  11981. tiger 1 2 1
  11982. zebra 2 3 3
  11983. cow 3 4 5
  11984. >>> df.skew()
  11985. a 0.0
  11986. b 0.0
  11987. c 0.0
  11988. dtype: float64
  11989. Using axis=1
  11990. >>> df.skew(axis=1)
  11991. tiger 1.732051
  11992. zebra -1.732051
  11993. cow 0.000000
  11994. dtype: float64
  11995. In this case, `numeric_only` should be set to `True` to avoid
  11996. getting an error.
  11997. >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': ['T', 'Z', 'X']},
  11998. ... index=['tiger', 'zebra', 'cow'])
  11999. >>> df.skew(numeric_only=True)
  12000. a 0.0
  12001. dtype: float64"""
  12002. kwargs = {"min_count": ""}
  12003. elif name == "kurt":
  12004. base_doc = _num_doc
  12005. desc = (
  12006. "Return unbiased kurtosis over requested axis.\n\n"
  12007. "Kurtosis obtained using Fisher's definition of\n"
  12008. "kurtosis (kurtosis of normal == 0.0). Normalized "
  12009. "by N-1."
  12010. )
  12011. see_also = ""
  12012. examples = """
  12013. Examples
  12014. --------
  12015. >>> s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse'])
  12016. >>> s
  12017. cat 1
  12018. dog 2
  12019. dog 2
  12020. mouse 3
  12021. dtype: int64
  12022. >>> s.kurt()
  12023. 1.5
  12024. With a DataFrame
  12025. >>> df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]},
  12026. ... index=['cat', 'dog', 'dog', 'mouse'])
  12027. >>> df
  12028. a b
  12029. cat 1 3
  12030. dog 2 4
  12031. dog 2 4
  12032. mouse 3 4
  12033. >>> df.kurt()
  12034. a 1.5
  12035. b 4.0
  12036. dtype: float64
  12037. With axis=None
  12038. >>> df.kurt(axis=None).round(6)
  12039. -0.988693
  12040. Using axis=1
  12041. >>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]},
  12042. ... index=['cat', 'dog'])
  12043. >>> df.kurt(axis=1)
  12044. cat -6.0
  12045. dog -6.0
  12046. dtype: float64"""
  12047. kwargs = {"min_count": ""}
  12048. elif name == "cumsum":
  12049. base_doc = _cnum_doc
  12050. desc = "sum"
  12051. see_also = ""
  12052. examples = _cumsum_examples
  12053. kwargs = {"accum_func_name": "sum"}
  12054. elif name == "cumprod":
  12055. base_doc = _cnum_doc
  12056. desc = "product"
  12057. see_also = ""
  12058. examples = _cumprod_examples
  12059. kwargs = {"accum_func_name": "prod"}
  12060. elif name == "cummin":
  12061. base_doc = _cnum_doc
  12062. desc = "minimum"
  12063. see_also = ""
  12064. examples = _cummin_examples
  12065. kwargs = {"accum_func_name": "min"}
  12066. elif name == "cummax":
  12067. base_doc = _cnum_doc
  12068. desc = "maximum"
  12069. see_also = ""
  12070. examples = _cummax_examples
  12071. kwargs = {"accum_func_name": "max"}
  12072. else:
  12073. raise NotImplementedError
  12074. docstr = base_doc.format(
  12075. desc=desc,
  12076. name=name,
  12077. name1=name1,
  12078. name2=name2,
  12079. axis_descr=axis_descr,
  12080. see_also=see_also,
  12081. examples=examples,
  12082. **kwargs,
  12083. )
  12084. return docstr