resample.py 93 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906
  1. from __future__ import annotations
  2. import copy
  3. from textwrap import dedent
  4. from typing import (
  5. TYPE_CHECKING,
  6. Callable,
  7. Literal,
  8. cast,
  9. final,
  10. no_type_check,
  11. )
  12. import warnings
  13. import numpy as np
  14. from pandas._libs import lib
  15. from pandas._libs.tslibs import (
  16. BaseOffset,
  17. IncompatibleFrequency,
  18. NaT,
  19. Period,
  20. Timedelta,
  21. Timestamp,
  22. to_offset,
  23. )
  24. from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
  25. from pandas._typing import NDFrameT
  26. from pandas.compat.numpy import function as nv
  27. from pandas.errors import AbstractMethodError
  28. from pandas.util._decorators import (
  29. Appender,
  30. Substitution,
  31. doc,
  32. )
  33. from pandas.util._exceptions import (
  34. find_stack_level,
  35. rewrite_warning,
  36. )
  37. from pandas.core.dtypes.dtypes import ArrowDtype
  38. from pandas.core.dtypes.generic import (
  39. ABCDataFrame,
  40. ABCSeries,
  41. )
  42. import pandas.core.algorithms as algos
  43. from pandas.core.apply import (
  44. ResamplerWindowApply,
  45. warn_alias_replacement,
  46. )
  47. from pandas.core.arrays import ArrowExtensionArray
  48. from pandas.core.base import (
  49. PandasObject,
  50. SelectionMixin,
  51. )
  52. import pandas.core.common as com
  53. from pandas.core.generic import (
  54. NDFrame,
  55. _shared_docs,
  56. )
  57. from pandas.core.groupby.generic import SeriesGroupBy
  58. from pandas.core.groupby.groupby import (
  59. BaseGroupBy,
  60. GroupBy,
  61. _apply_groupings_depr,
  62. _pipe_template,
  63. get_groupby,
  64. )
  65. from pandas.core.groupby.grouper import Grouper
  66. from pandas.core.groupby.ops import BinGrouper
  67. from pandas.core.indexes.api import MultiIndex
  68. from pandas.core.indexes.base import Index
  69. from pandas.core.indexes.datetimes import (
  70. DatetimeIndex,
  71. date_range,
  72. )
  73. from pandas.core.indexes.period import (
  74. PeriodIndex,
  75. period_range,
  76. )
  77. from pandas.core.indexes.timedeltas import (
  78. TimedeltaIndex,
  79. timedelta_range,
  80. )
  81. from pandas.tseries.frequencies import (
  82. is_subperiod,
  83. is_superperiod,
  84. )
  85. from pandas.tseries.offsets import (
  86. Day,
  87. Tick,
  88. )
  89. if TYPE_CHECKING:
  90. from collections.abc import Hashable
  91. from pandas._typing import (
  92. AnyArrayLike,
  93. Axis,
  94. AxisInt,
  95. Frequency,
  96. IndexLabel,
  97. InterpolateOptions,
  98. T,
  99. TimedeltaConvertibleTypes,
  100. TimeGrouperOrigin,
  101. TimestampConvertibleTypes,
  102. npt,
  103. )
  104. from pandas import (
  105. DataFrame,
  106. Series,
  107. )
  108. _shared_docs_kwargs: dict[str, str] = {}
  109. class Resampler(BaseGroupBy, PandasObject):
  110. """
  111. Class for resampling datetimelike data, a groupby-like operation.
  112. See aggregate, transform, and apply functions on this object.
  113. It's easiest to use obj.resample(...) to use Resampler.
  114. Parameters
  115. ----------
  116. obj : Series or DataFrame
  117. groupby : TimeGrouper
  118. axis : int, default 0
  119. kind : str or None
  120. 'period', 'timestamp' to override default index treatment
  121. Returns
  122. -------
  123. a Resampler of the appropriate type
  124. Notes
  125. -----
  126. After resampling, see aggregate, apply, and transform functions.
  127. """
  128. _grouper: BinGrouper
  129. _timegrouper: TimeGrouper
  130. binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass
  131. exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat
  132. _internal_names_set = set({"obj", "ax", "_indexer"})
  133. # to the groupby descriptor
  134. _attributes = [
  135. "freq",
  136. "axis",
  137. "closed",
  138. "label",
  139. "convention",
  140. "kind",
  141. "origin",
  142. "offset",
  143. ]
  144. def __init__(
  145. self,
  146. obj: NDFrame,
  147. timegrouper: TimeGrouper,
  148. axis: Axis = 0,
  149. kind=None,
  150. *,
  151. gpr_index: Index,
  152. group_keys: bool = False,
  153. selection=None,
  154. include_groups: bool = True,
  155. ) -> None:
  156. self._timegrouper = timegrouper
  157. self.keys = None
  158. self.sort = True
  159. self.axis = obj._get_axis_number(axis)
  160. self.kind = kind
  161. self.group_keys = group_keys
  162. self.as_index = True
  163. self.include_groups = include_groups
  164. self.obj, self.ax, self._indexer = self._timegrouper._set_grouper(
  165. self._convert_obj(obj), sort=True, gpr_index=gpr_index
  166. )
  167. self.binner, self._grouper = self._get_binner()
  168. self._selection = selection
  169. if self._timegrouper.key is not None:
  170. self.exclusions = frozenset([self._timegrouper.key])
  171. else:
  172. self.exclusions = frozenset()
  173. @final
  174. def __str__(self) -> str:
  175. """
  176. Provide a nice str repr of our rolling object.
  177. """
  178. attrs = (
  179. f"{k}={getattr(self._timegrouper, k)}"
  180. for k in self._attributes
  181. if getattr(self._timegrouper, k, None) is not None
  182. )
  183. return f"{type(self).__name__} [{', '.join(attrs)}]"
  184. @final
  185. def __getattr__(self, attr: str):
  186. if attr in self._internal_names_set:
  187. return object.__getattribute__(self, attr)
  188. if attr in self._attributes:
  189. return getattr(self._timegrouper, attr)
  190. if attr in self.obj:
  191. return self[attr]
  192. return object.__getattribute__(self, attr)
  193. @final
  194. @property
  195. def _from_selection(self) -> bool:
  196. """
  197. Is the resampling from a DataFrame column or MultiIndex level.
  198. """
  199. # upsampling and PeriodIndex resampling do not work
  200. # with selection, this state used to catch and raise an error
  201. return self._timegrouper is not None and (
  202. self._timegrouper.key is not None or self._timegrouper.level is not None
  203. )
  204. def _convert_obj(self, obj: NDFrameT) -> NDFrameT:
  205. """
  206. Provide any conversions for the object in order to correctly handle.
  207. Parameters
  208. ----------
  209. obj : Series or DataFrame
  210. Returns
  211. -------
  212. Series or DataFrame
  213. """
  214. return obj._consolidate()
  215. def _get_binner_for_time(self):
  216. raise AbstractMethodError(self)
  217. @final
  218. def _get_binner(self):
  219. """
  220. Create the BinGrouper, assume that self.set_grouper(obj)
  221. has already been called.
  222. """
  223. binner, bins, binlabels = self._get_binner_for_time()
  224. assert len(bins) == len(binlabels)
  225. bin_grouper = BinGrouper(bins, binlabels, indexer=self._indexer)
  226. return binner, bin_grouper
  227. @final
  228. @Substitution(
  229. klass="Resampler",
  230. examples="""
  231. >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
  232. ... index=pd.date_range('2012-08-02', periods=4))
  233. >>> df
  234. A
  235. 2012-08-02 1
  236. 2012-08-03 2
  237. 2012-08-04 3
  238. 2012-08-05 4
  239. To get the difference between each 2-day period's maximum and minimum
  240. value in one pass, you can do
  241. >>> df.resample('2D').pipe(lambda x: x.max() - x.min())
  242. A
  243. 2012-08-02 1
  244. 2012-08-04 1""",
  245. )
  246. @Appender(_pipe_template)
  247. def pipe(
  248. self,
  249. func: Callable[..., T] | tuple[Callable[..., T], str],
  250. *args,
  251. **kwargs,
  252. ) -> T:
  253. return super().pipe(func, *args, **kwargs)
  254. _agg_see_also_doc = dedent(
  255. """
  256. See Also
  257. --------
  258. DataFrame.groupby.aggregate : Aggregate using callable, string, dict,
  259. or list of string/callables.
  260. DataFrame.resample.transform : Transforms the Series on each group
  261. based on the given function.
  262. DataFrame.aggregate: Aggregate using one or more
  263. operations over the specified axis.
  264. """
  265. )
  266. _agg_examples_doc = dedent(
  267. """
  268. Examples
  269. --------
  270. >>> s = pd.Series([1, 2, 3, 4, 5],
  271. ... index=pd.date_range('20130101', periods=5, freq='s'))
  272. >>> s
  273. 2013-01-01 00:00:00 1
  274. 2013-01-01 00:00:01 2
  275. 2013-01-01 00:00:02 3
  276. 2013-01-01 00:00:03 4
  277. 2013-01-01 00:00:04 5
  278. Freq: s, dtype: int64
  279. >>> r = s.resample('2s')
  280. >>> r.agg("sum")
  281. 2013-01-01 00:00:00 3
  282. 2013-01-01 00:00:02 7
  283. 2013-01-01 00:00:04 5
  284. Freq: 2s, dtype: int64
  285. >>> r.agg(['sum', 'mean', 'max'])
  286. sum mean max
  287. 2013-01-01 00:00:00 3 1.5 2
  288. 2013-01-01 00:00:02 7 3.5 4
  289. 2013-01-01 00:00:04 5 5.0 5
  290. >>> r.agg({'result': lambda x: x.mean() / x.std(),
  291. ... 'total': "sum"})
  292. result total
  293. 2013-01-01 00:00:00 2.121320 3
  294. 2013-01-01 00:00:02 4.949747 7
  295. 2013-01-01 00:00:04 NaN 5
  296. >>> r.agg(average="mean", total="sum")
  297. average total
  298. 2013-01-01 00:00:00 1.5 3
  299. 2013-01-01 00:00:02 3.5 7
  300. 2013-01-01 00:00:04 5.0 5
  301. """
  302. )
  303. @final
  304. @doc(
  305. _shared_docs["aggregate"],
  306. see_also=_agg_see_also_doc,
  307. examples=_agg_examples_doc,
  308. klass="DataFrame",
  309. axis="",
  310. )
  311. def aggregate(self, func=None, *args, **kwargs):
  312. result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
  313. if result is None:
  314. how = func
  315. result = self._groupby_and_aggregate(how, *args, **kwargs)
  316. return result
  317. agg = aggregate
  318. apply = aggregate
  319. @final
  320. def transform(self, arg, *args, **kwargs):
  321. """
  322. Call function producing a like-indexed Series on each group.
  323. Return a Series with the transformed values.
  324. Parameters
  325. ----------
  326. arg : function
  327. To apply to each group. Should return a Series with the same index.
  328. Returns
  329. -------
  330. Series
  331. Examples
  332. --------
  333. >>> s = pd.Series([1, 2],
  334. ... index=pd.date_range('20180101',
  335. ... periods=2,
  336. ... freq='1h'))
  337. >>> s
  338. 2018-01-01 00:00:00 1
  339. 2018-01-01 01:00:00 2
  340. Freq: h, dtype: int64
  341. >>> resampled = s.resample('15min')
  342. >>> resampled.transform(lambda x: (x - x.mean()) / x.std())
  343. 2018-01-01 00:00:00 NaN
  344. 2018-01-01 01:00:00 NaN
  345. Freq: h, dtype: float64
  346. """
  347. return self._selected_obj.groupby(self._timegrouper).transform(
  348. arg, *args, **kwargs
  349. )
  350. def _downsample(self, f, **kwargs):
  351. raise AbstractMethodError(self)
  352. def _upsample(self, f, limit: int | None = None, fill_value=None):
  353. raise AbstractMethodError(self)
  354. def _gotitem(self, key, ndim: int, subset=None):
  355. """
  356. Sub-classes to define. Return a sliced object.
  357. Parameters
  358. ----------
  359. key : string / list of selections
  360. ndim : {1, 2}
  361. requested ndim of result
  362. subset : object, default None
  363. subset to act on
  364. """
  365. grouper = self._grouper
  366. if subset is None:
  367. subset = self.obj
  368. if key is not None:
  369. subset = subset[key]
  370. else:
  371. # reached via Apply.agg_dict_like with selection=None and ndim=1
  372. assert subset.ndim == 1
  373. if ndim == 1:
  374. assert subset.ndim == 1
  375. grouped = get_groupby(
  376. subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
  377. )
  378. return grouped
  379. def _groupby_and_aggregate(self, how, *args, **kwargs):
  380. """
  381. Re-evaluate the obj with a groupby aggregation.
  382. """
  383. grouper = self._grouper
  384. # Excludes `on` column when provided
  385. obj = self._obj_with_exclusions
  386. grouped = get_groupby(
  387. obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
  388. )
  389. try:
  390. if callable(how):
  391. # TODO: test_resample_apply_with_additional_args fails if we go
  392. # through the non-lambda path, not clear that it should.
  393. func = lambda x: how(x, *args, **kwargs)
  394. result = grouped.aggregate(func)
  395. else:
  396. result = grouped.aggregate(how, *args, **kwargs)
  397. except (AttributeError, KeyError):
  398. # we have a non-reducing function; try to evaluate
  399. # alternatively we want to evaluate only a column of the input
  400. # test_apply_to_one_column_of_df the function being applied references
  401. # a DataFrame column, but aggregate_item_by_item operates column-wise
  402. # on Series, raising AttributeError or KeyError
  403. # (depending on whether the column lookup uses getattr/__getitem__)
  404. result = _apply(
  405. grouped, how, *args, include_groups=self.include_groups, **kwargs
  406. )
  407. except ValueError as err:
  408. if "Must produce aggregated value" in str(err):
  409. # raised in _aggregate_named
  410. # see test_apply_without_aggregation, test_apply_with_mutated_index
  411. pass
  412. else:
  413. raise
  414. # we have a non-reducing function
  415. # try to evaluate
  416. result = _apply(
  417. grouped, how, *args, include_groups=self.include_groups, **kwargs
  418. )
  419. return self._wrap_result(result)
  420. @final
  421. def _get_resampler_for_grouping(
  422. self, groupby: GroupBy, key, include_groups: bool = True
  423. ):
  424. """
  425. Return the correct class for resampling with groupby.
  426. """
  427. return self._resampler_for_grouping(
  428. groupby=groupby, key=key, parent=self, include_groups=include_groups
  429. )
  430. def _wrap_result(self, result):
  431. """
  432. Potentially wrap any results.
  433. """
  434. # GH 47705
  435. obj = self.obj
  436. if (
  437. isinstance(result, ABCDataFrame)
  438. and len(result) == 0
  439. and not isinstance(result.index, PeriodIndex)
  440. ):
  441. result = result.set_index(
  442. _asfreq_compat(obj.index[:0], freq=self.freq), append=True
  443. )
  444. if isinstance(result, ABCSeries) and self._selection is not None:
  445. result.name = self._selection
  446. if isinstance(result, ABCSeries) and result.empty:
  447. # When index is all NaT, result is empty but index is not
  448. result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
  449. result.name = getattr(obj, "name", None)
  450. if self._timegrouper._arrow_dtype is not None:
  451. result.index = result.index.astype(self._timegrouper._arrow_dtype)
  452. return result
  453. @final
  454. def ffill(self, limit: int | None = None):
  455. """
  456. Forward fill the values.
  457. Parameters
  458. ----------
  459. limit : int, optional
  460. Limit of how many values to fill.
  461. Returns
  462. -------
  463. An upsampled Series.
  464. See Also
  465. --------
  466. Series.fillna: Fill NA/NaN values using the specified method.
  467. DataFrame.fillna: Fill NA/NaN values using the specified method.
  468. Examples
  469. --------
  470. Here we only create a ``Series``.
  471. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  472. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  473. >>> ser
  474. 2023-01-01 1
  475. 2023-01-15 2
  476. 2023-02-01 3
  477. 2023-02-15 4
  478. dtype: int64
  479. Example for ``ffill`` with downsampling (we have fewer dates after resampling):
  480. >>> ser.resample('MS').ffill()
  481. 2023-01-01 1
  482. 2023-02-01 3
  483. Freq: MS, dtype: int64
  484. Example for ``ffill`` with upsampling (fill the new dates with
  485. the previous value):
  486. >>> ser.resample('W').ffill()
  487. 2023-01-01 1
  488. 2023-01-08 1
  489. 2023-01-15 2
  490. 2023-01-22 2
  491. 2023-01-29 2
  492. 2023-02-05 3
  493. 2023-02-12 3
  494. 2023-02-19 4
  495. Freq: W-SUN, dtype: int64
  496. With upsampling and limiting (only fill the first new date with the
  497. previous value):
  498. >>> ser.resample('W').ffill(limit=1)
  499. 2023-01-01 1.0
  500. 2023-01-08 1.0
  501. 2023-01-15 2.0
  502. 2023-01-22 2.0
  503. 2023-01-29 NaN
  504. 2023-02-05 3.0
  505. 2023-02-12 NaN
  506. 2023-02-19 4.0
  507. Freq: W-SUN, dtype: float64
  508. """
  509. return self._upsample("ffill", limit=limit)
  510. @final
  511. def nearest(self, limit: int | None = None):
  512. """
  513. Resample by using the nearest value.
  514. When resampling data, missing values may appear (e.g., when the
  515. resampling frequency is higher than the original frequency).
  516. The `nearest` method will replace ``NaN`` values that appeared in
  517. the resampled data with the value from the nearest member of the
  518. sequence, based on the index value.
  519. Missing values that existed in the original data will not be modified.
  520. If `limit` is given, fill only this many values in each direction for
  521. each of the original values.
  522. Parameters
  523. ----------
  524. limit : int, optional
  525. Limit of how many values to fill.
  526. Returns
  527. -------
  528. Series or DataFrame
  529. An upsampled Series or DataFrame with ``NaN`` values filled with
  530. their nearest value.
  531. See Also
  532. --------
  533. backfill : Backward fill the new missing values in the resampled data.
  534. pad : Forward fill ``NaN`` values.
  535. Examples
  536. --------
  537. >>> s = pd.Series([1, 2],
  538. ... index=pd.date_range('20180101',
  539. ... periods=2,
  540. ... freq='1h'))
  541. >>> s
  542. 2018-01-01 00:00:00 1
  543. 2018-01-01 01:00:00 2
  544. Freq: h, dtype: int64
  545. >>> s.resample('15min').nearest()
  546. 2018-01-01 00:00:00 1
  547. 2018-01-01 00:15:00 1
  548. 2018-01-01 00:30:00 2
  549. 2018-01-01 00:45:00 2
  550. 2018-01-01 01:00:00 2
  551. Freq: 15min, dtype: int64
  552. Limit the number of upsampled values imputed by the nearest:
  553. >>> s.resample('15min').nearest(limit=1)
  554. 2018-01-01 00:00:00 1.0
  555. 2018-01-01 00:15:00 1.0
  556. 2018-01-01 00:30:00 NaN
  557. 2018-01-01 00:45:00 2.0
  558. 2018-01-01 01:00:00 2.0
  559. Freq: 15min, dtype: float64
  560. """
  561. return self._upsample("nearest", limit=limit)
  562. @final
  563. def bfill(self, limit: int | None = None):
  564. """
  565. Backward fill the new missing values in the resampled data.
  566. In statistics, imputation is the process of replacing missing data with
  567. substituted values [1]_. When resampling data, missing values may
  568. appear (e.g., when the resampling frequency is higher than the original
  569. frequency). The backward fill will replace NaN values that appeared in
  570. the resampled data with the next value in the original sequence.
  571. Missing values that existed in the original data will not be modified.
  572. Parameters
  573. ----------
  574. limit : int, optional
  575. Limit of how many values to fill.
  576. Returns
  577. -------
  578. Series, DataFrame
  579. An upsampled Series or DataFrame with backward filled NaN values.
  580. See Also
  581. --------
  582. bfill : Alias of backfill.
  583. fillna : Fill NaN values using the specified method, which can be
  584. 'backfill'.
  585. nearest : Fill NaN values with nearest neighbor starting from center.
  586. ffill : Forward fill NaN values.
  587. Series.fillna : Fill NaN values in the Series using the
  588. specified method, which can be 'backfill'.
  589. DataFrame.fillna : Fill NaN values in the DataFrame using the
  590. specified method, which can be 'backfill'.
  591. References
  592. ----------
  593. .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
  594. Examples
  595. --------
  596. Resampling a Series:
  597. >>> s = pd.Series([1, 2, 3],
  598. ... index=pd.date_range('20180101', periods=3, freq='h'))
  599. >>> s
  600. 2018-01-01 00:00:00 1
  601. 2018-01-01 01:00:00 2
  602. 2018-01-01 02:00:00 3
  603. Freq: h, dtype: int64
  604. >>> s.resample('30min').bfill()
  605. 2018-01-01 00:00:00 1
  606. 2018-01-01 00:30:00 2
  607. 2018-01-01 01:00:00 2
  608. 2018-01-01 01:30:00 3
  609. 2018-01-01 02:00:00 3
  610. Freq: 30min, dtype: int64
  611. >>> s.resample('15min').bfill(limit=2)
  612. 2018-01-01 00:00:00 1.0
  613. 2018-01-01 00:15:00 NaN
  614. 2018-01-01 00:30:00 2.0
  615. 2018-01-01 00:45:00 2.0
  616. 2018-01-01 01:00:00 2.0
  617. 2018-01-01 01:15:00 NaN
  618. 2018-01-01 01:30:00 3.0
  619. 2018-01-01 01:45:00 3.0
  620. 2018-01-01 02:00:00 3.0
  621. Freq: 15min, dtype: float64
  622. Resampling a DataFrame that has missing values:
  623. >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
  624. ... index=pd.date_range('20180101', periods=3,
  625. ... freq='h'))
  626. >>> df
  627. a b
  628. 2018-01-01 00:00:00 2.0 1
  629. 2018-01-01 01:00:00 NaN 3
  630. 2018-01-01 02:00:00 6.0 5
  631. >>> df.resample('30min').bfill()
  632. a b
  633. 2018-01-01 00:00:00 2.0 1
  634. 2018-01-01 00:30:00 NaN 3
  635. 2018-01-01 01:00:00 NaN 3
  636. 2018-01-01 01:30:00 6.0 5
  637. 2018-01-01 02:00:00 6.0 5
  638. >>> df.resample('15min').bfill(limit=2)
  639. a b
  640. 2018-01-01 00:00:00 2.0 1.0
  641. 2018-01-01 00:15:00 NaN NaN
  642. 2018-01-01 00:30:00 NaN 3.0
  643. 2018-01-01 00:45:00 NaN 3.0
  644. 2018-01-01 01:00:00 NaN 3.0
  645. 2018-01-01 01:15:00 NaN NaN
  646. 2018-01-01 01:30:00 6.0 5.0
  647. 2018-01-01 01:45:00 6.0 5.0
  648. 2018-01-01 02:00:00 6.0 5.0
  649. """
  650. return self._upsample("bfill", limit=limit)
  651. @final
  652. def fillna(self, method, limit: int | None = None):
  653. """
  654. Fill missing values introduced by upsampling.
  655. In statistics, imputation is the process of replacing missing data with
  656. substituted values [1]_. When resampling data, missing values may
  657. appear (e.g., when the resampling frequency is higher than the original
  658. frequency).
  659. Missing values that existed in the original data will
  660. not be modified.
  661. Parameters
  662. ----------
  663. method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'}
  664. Method to use for filling holes in resampled data
  665. * 'pad' or 'ffill': use previous valid observation to fill gap
  666. (forward fill).
  667. * 'backfill' or 'bfill': use next valid observation to fill gap.
  668. * 'nearest': use nearest valid observation to fill gap.
  669. limit : int, optional
  670. Limit of how many consecutive missing values to fill.
  671. Returns
  672. -------
  673. Series or DataFrame
  674. An upsampled Series or DataFrame with missing values filled.
  675. See Also
  676. --------
  677. bfill : Backward fill NaN values in the resampled data.
  678. ffill : Forward fill NaN values in the resampled data.
  679. nearest : Fill NaN values in the resampled data
  680. with nearest neighbor starting from center.
  681. interpolate : Fill NaN values using interpolation.
  682. Series.fillna : Fill NaN values in the Series using the
  683. specified method, which can be 'bfill' and 'ffill'.
  684. DataFrame.fillna : Fill NaN values in the DataFrame using the
  685. specified method, which can be 'bfill' and 'ffill'.
  686. References
  687. ----------
  688. .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
  689. Examples
  690. --------
  691. Resampling a Series:
  692. >>> s = pd.Series([1, 2, 3],
  693. ... index=pd.date_range('20180101', periods=3, freq='h'))
  694. >>> s
  695. 2018-01-01 00:00:00 1
  696. 2018-01-01 01:00:00 2
  697. 2018-01-01 02:00:00 3
  698. Freq: h, dtype: int64
  699. Without filling the missing values you get:
  700. >>> s.resample("30min").asfreq()
  701. 2018-01-01 00:00:00 1.0
  702. 2018-01-01 00:30:00 NaN
  703. 2018-01-01 01:00:00 2.0
  704. 2018-01-01 01:30:00 NaN
  705. 2018-01-01 02:00:00 3.0
  706. Freq: 30min, dtype: float64
  707. >>> s.resample('30min').fillna("backfill")
  708. 2018-01-01 00:00:00 1
  709. 2018-01-01 00:30:00 2
  710. 2018-01-01 01:00:00 2
  711. 2018-01-01 01:30:00 3
  712. 2018-01-01 02:00:00 3
  713. Freq: 30min, dtype: int64
  714. >>> s.resample('15min').fillna("backfill", limit=2)
  715. 2018-01-01 00:00:00 1.0
  716. 2018-01-01 00:15:00 NaN
  717. 2018-01-01 00:30:00 2.0
  718. 2018-01-01 00:45:00 2.0
  719. 2018-01-01 01:00:00 2.0
  720. 2018-01-01 01:15:00 NaN
  721. 2018-01-01 01:30:00 3.0
  722. 2018-01-01 01:45:00 3.0
  723. 2018-01-01 02:00:00 3.0
  724. Freq: 15min, dtype: float64
  725. >>> s.resample('30min').fillna("pad")
  726. 2018-01-01 00:00:00 1
  727. 2018-01-01 00:30:00 1
  728. 2018-01-01 01:00:00 2
  729. 2018-01-01 01:30:00 2
  730. 2018-01-01 02:00:00 3
  731. Freq: 30min, dtype: int64
  732. >>> s.resample('30min').fillna("nearest")
  733. 2018-01-01 00:00:00 1
  734. 2018-01-01 00:30:00 2
  735. 2018-01-01 01:00:00 2
  736. 2018-01-01 01:30:00 3
  737. 2018-01-01 02:00:00 3
  738. Freq: 30min, dtype: int64
  739. Missing values present before the upsampling are not affected.
  740. >>> sm = pd.Series([1, None, 3],
  741. ... index=pd.date_range('20180101', periods=3, freq='h'))
  742. >>> sm
  743. 2018-01-01 00:00:00 1.0
  744. 2018-01-01 01:00:00 NaN
  745. 2018-01-01 02:00:00 3.0
  746. Freq: h, dtype: float64
  747. >>> sm.resample('30min').fillna('backfill')
  748. 2018-01-01 00:00:00 1.0
  749. 2018-01-01 00:30:00 NaN
  750. 2018-01-01 01:00:00 NaN
  751. 2018-01-01 01:30:00 3.0
  752. 2018-01-01 02:00:00 3.0
  753. Freq: 30min, dtype: float64
  754. >>> sm.resample('30min').fillna('pad')
  755. 2018-01-01 00:00:00 1.0
  756. 2018-01-01 00:30:00 1.0
  757. 2018-01-01 01:00:00 NaN
  758. 2018-01-01 01:30:00 NaN
  759. 2018-01-01 02:00:00 3.0
  760. Freq: 30min, dtype: float64
  761. >>> sm.resample('30min').fillna('nearest')
  762. 2018-01-01 00:00:00 1.0
  763. 2018-01-01 00:30:00 NaN
  764. 2018-01-01 01:00:00 NaN
  765. 2018-01-01 01:30:00 3.0
  766. 2018-01-01 02:00:00 3.0
  767. Freq: 30min, dtype: float64
  768. DataFrame resampling is done column-wise. All the same options are
  769. available.
  770. >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
  771. ... index=pd.date_range('20180101', periods=3,
  772. ... freq='h'))
  773. >>> df
  774. a b
  775. 2018-01-01 00:00:00 2.0 1
  776. 2018-01-01 01:00:00 NaN 3
  777. 2018-01-01 02:00:00 6.0 5
  778. >>> df.resample('30min').fillna("bfill")
  779. a b
  780. 2018-01-01 00:00:00 2.0 1
  781. 2018-01-01 00:30:00 NaN 3
  782. 2018-01-01 01:00:00 NaN 3
  783. 2018-01-01 01:30:00 6.0 5
  784. 2018-01-01 02:00:00 6.0 5
  785. """
  786. warnings.warn(
  787. f"{type(self).__name__}.fillna is deprecated and will be removed "
  788. "in a future version. Use obj.ffill(), obj.bfill(), "
  789. "or obj.nearest() instead.",
  790. FutureWarning,
  791. stacklevel=find_stack_level(),
  792. )
  793. return self._upsample(method, limit=limit)
  794. @final
  795. def interpolate(
  796. self,
  797. method: InterpolateOptions = "linear",
  798. *,
  799. axis: Axis = 0,
  800. limit: int | None = None,
  801. inplace: bool = False,
  802. limit_direction: Literal["forward", "backward", "both"] = "forward",
  803. limit_area=None,
  804. downcast=lib.no_default,
  805. **kwargs,
  806. ):
  807. """
  808. Interpolate values between target timestamps according to different methods.
  809. The original index is first reindexed to target timestamps
  810. (see :meth:`core.resample.Resampler.asfreq`),
  811. then the interpolation of ``NaN`` values via :meth:`DataFrame.interpolate`
  812. happens.
  813. Parameters
  814. ----------
  815. method : str, default 'linear'
  816. Interpolation technique to use. One of:
  817. * 'linear': Ignore the index and treat the values as equally
  818. spaced. This is the only method supported on MultiIndexes.
  819. * 'time': Works on daily and higher resolution data to interpolate
  820. given length of interval.
  821. * 'index', 'values': use the actual numerical values of the index.
  822. * 'pad': Fill in NaNs using existing values.
  823. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
  824. 'barycentric', 'polynomial': Passed to
  825. `scipy.interpolate.interp1d`, whereas 'spline' is passed to
  826. `scipy.interpolate.UnivariateSpline`. These methods use the numerical
  827. values of the index. Both 'polynomial' and 'spline' require that
  828. you also specify an `order` (int), e.g.
  829. ``df.interpolate(method='polynomial', order=5)``. Note that,
  830. `slinear` method in Pandas refers to the Scipy first order `spline`
  831. instead of Pandas first order `spline`.
  832. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
  833. 'cubicspline': Wrappers around the SciPy interpolation methods of
  834. similar names. See `Notes`.
  835. * 'from_derivatives': Refers to
  836. `scipy.interpolate.BPoly.from_derivatives`.
  837. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  838. Axis to interpolate along. For `Series` this parameter is unused
  839. and defaults to 0.
  840. limit : int, optional
  841. Maximum number of consecutive NaNs to fill. Must be greater than
  842. 0.
  843. inplace : bool, default False
  844. Update the data in place if possible.
  845. limit_direction : {{'forward', 'backward', 'both'}}, Optional
  846. Consecutive NaNs will be filled in this direction.
  847. If limit is specified:
  848. * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.
  849. * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be
  850. 'backwards'.
  851. If 'limit' is not specified:
  852. * If 'method' is 'backfill' or 'bfill', the default is 'backward'
  853. * else the default is 'forward'
  854. raises ValueError if `limit_direction` is 'forward' or 'both' and
  855. method is 'backfill' or 'bfill'.
  856. raises ValueError if `limit_direction` is 'backward' or 'both' and
  857. method is 'pad' or 'ffill'.
  858. limit_area : {{`None`, 'inside', 'outside'}}, default None
  859. If limit is specified, consecutive NaNs will be filled with this
  860. restriction.
  861. * ``None``: No fill restriction.
  862. * 'inside': Only fill NaNs surrounded by valid values
  863. (interpolate).
  864. * 'outside': Only fill NaNs outside valid values (extrapolate).
  865. downcast : optional, 'infer' or None, defaults to None
  866. Downcast dtypes if possible.
  867. .. deprecated:: 2.1.0
  868. ``**kwargs`` : optional
  869. Keyword arguments to pass on to the interpolating function.
  870. Returns
  871. -------
  872. DataFrame or Series
  873. Interpolated values at the specified freq.
  874. See Also
  875. --------
  876. core.resample.Resampler.asfreq: Return the values at the new freq,
  877. essentially a reindex.
  878. DataFrame.interpolate: Fill NaN values using an interpolation method.
  879. Notes
  880. -----
  881. For high-frequent or non-equidistant time-series with timestamps
  882. the reindexing followed by interpolation may lead to information loss
  883. as shown in the last example.
  884. Examples
  885. --------
  886. >>> start = "2023-03-01T07:00:00"
  887. >>> timesteps = pd.date_range(start, periods=5, freq="s")
  888. >>> series = pd.Series(data=[1, -1, 2, 1, 3], index=timesteps)
  889. >>> series
  890. 2023-03-01 07:00:00 1
  891. 2023-03-01 07:00:01 -1
  892. 2023-03-01 07:00:02 2
  893. 2023-03-01 07:00:03 1
  894. 2023-03-01 07:00:04 3
  895. Freq: s, dtype: int64
  896. Upsample the dataframe to 0.5Hz by providing the period time of 2s.
  897. >>> series.resample("2s").interpolate("linear")
  898. 2023-03-01 07:00:00 1
  899. 2023-03-01 07:00:02 2
  900. 2023-03-01 07:00:04 3
  901. Freq: 2s, dtype: int64
  902. Downsample the dataframe to 2Hz by providing the period time of 500ms.
  903. >>> series.resample("500ms").interpolate("linear")
  904. 2023-03-01 07:00:00.000 1.0
  905. 2023-03-01 07:00:00.500 0.0
  906. 2023-03-01 07:00:01.000 -1.0
  907. 2023-03-01 07:00:01.500 0.5
  908. 2023-03-01 07:00:02.000 2.0
  909. 2023-03-01 07:00:02.500 1.5
  910. 2023-03-01 07:00:03.000 1.0
  911. 2023-03-01 07:00:03.500 2.0
  912. 2023-03-01 07:00:04.000 3.0
  913. Freq: 500ms, dtype: float64
  914. Internal reindexing with ``asfreq()`` prior to interpolation leads to
  915. an interpolated timeseries on the basis the reindexed timestamps (anchors).
  916. Since not all datapoints from original series become anchors,
  917. it can lead to misleading interpolation results as in the following example:
  918. >>> series.resample("400ms").interpolate("linear")
  919. 2023-03-01 07:00:00.000 1.0
  920. 2023-03-01 07:00:00.400 1.2
  921. 2023-03-01 07:00:00.800 1.4
  922. 2023-03-01 07:00:01.200 1.6
  923. 2023-03-01 07:00:01.600 1.8
  924. 2023-03-01 07:00:02.000 2.0
  925. 2023-03-01 07:00:02.400 2.2
  926. 2023-03-01 07:00:02.800 2.4
  927. 2023-03-01 07:00:03.200 2.6
  928. 2023-03-01 07:00:03.600 2.8
  929. 2023-03-01 07:00:04.000 3.0
  930. Freq: 400ms, dtype: float64
  931. Note that the series erroneously increases between two anchors
  932. ``07:00:00`` and ``07:00:02``.
  933. """
  934. assert downcast is lib.no_default # just checking coverage
  935. result = self._upsample("asfreq")
  936. return result.interpolate(
  937. method=method,
  938. axis=axis,
  939. limit=limit,
  940. inplace=inplace,
  941. limit_direction=limit_direction,
  942. limit_area=limit_area,
  943. downcast=downcast,
  944. **kwargs,
  945. )
  946. @final
  947. def asfreq(self, fill_value=None):
  948. """
  949. Return the values at the new freq, essentially a reindex.
  950. Parameters
  951. ----------
  952. fill_value : scalar, optional
  953. Value to use for missing values, applied during upsampling (note
  954. this does not fill NaNs that already were present).
  955. Returns
  956. -------
  957. DataFrame or Series
  958. Values at the specified freq.
  959. See Also
  960. --------
  961. Series.asfreq: Convert TimeSeries to specified frequency.
  962. DataFrame.asfreq: Convert TimeSeries to specified frequency.
  963. Examples
  964. --------
  965. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  966. ... ['2023-01-01', '2023-01-31', '2023-02-01', '2023-02-28']))
  967. >>> ser
  968. 2023-01-01 1
  969. 2023-01-31 2
  970. 2023-02-01 3
  971. 2023-02-28 4
  972. dtype: int64
  973. >>> ser.resample('MS').asfreq()
  974. 2023-01-01 1
  975. 2023-02-01 3
  976. Freq: MS, dtype: int64
  977. """
  978. return self._upsample("asfreq", fill_value=fill_value)
  979. @final
  980. def sum(
  981. self,
  982. numeric_only: bool = False,
  983. min_count: int = 0,
  984. *args,
  985. **kwargs,
  986. ):
  987. """
  988. Compute sum of group values.
  989. Parameters
  990. ----------
  991. numeric_only : bool, default False
  992. Include only float, int, boolean columns.
  993. .. versionchanged:: 2.0.0
  994. numeric_only no longer accepts ``None``.
  995. min_count : int, default 0
  996. The required number of valid values to perform the operation. If fewer
  997. than ``min_count`` non-NA values are present the result will be NA.
  998. Returns
  999. -------
  1000. Series or DataFrame
  1001. Computed sum of values within each group.
  1002. Examples
  1003. --------
  1004. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  1005. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  1006. >>> ser
  1007. 2023-01-01 1
  1008. 2023-01-15 2
  1009. 2023-02-01 3
  1010. 2023-02-15 4
  1011. dtype: int64
  1012. >>> ser.resample('MS').sum()
  1013. 2023-01-01 3
  1014. 2023-02-01 7
  1015. Freq: MS, dtype: int64
  1016. """
  1017. maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)
  1018. nv.validate_resampler_func("sum", args, kwargs)
  1019. return self._downsample("sum", numeric_only=numeric_only, min_count=min_count)
  1020. @final
  1021. def prod(
  1022. self,
  1023. numeric_only: bool = False,
  1024. min_count: int = 0,
  1025. *args,
  1026. **kwargs,
  1027. ):
  1028. """
  1029. Compute prod of group values.
  1030. Parameters
  1031. ----------
  1032. numeric_only : bool, default False
  1033. Include only float, int, boolean columns.
  1034. .. versionchanged:: 2.0.0
  1035. numeric_only no longer accepts ``None``.
  1036. min_count : int, default 0
  1037. The required number of valid values to perform the operation. If fewer
  1038. than ``min_count`` non-NA values are present the result will be NA.
  1039. Returns
  1040. -------
  1041. Series or DataFrame
  1042. Computed prod of values within each group.
  1043. Examples
  1044. --------
  1045. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  1046. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  1047. >>> ser
  1048. 2023-01-01 1
  1049. 2023-01-15 2
  1050. 2023-02-01 3
  1051. 2023-02-15 4
  1052. dtype: int64
  1053. >>> ser.resample('MS').prod()
  1054. 2023-01-01 2
  1055. 2023-02-01 12
  1056. Freq: MS, dtype: int64
  1057. """
  1058. maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs)
  1059. nv.validate_resampler_func("prod", args, kwargs)
  1060. return self._downsample("prod", numeric_only=numeric_only, min_count=min_count)
  1061. @final
  1062. def min(
  1063. self,
  1064. numeric_only: bool = False,
  1065. min_count: int = 0,
  1066. *args,
  1067. **kwargs,
  1068. ):
  1069. """
  1070. Compute min value of group.
  1071. Returns
  1072. -------
  1073. Series or DataFrame
  1074. Examples
  1075. --------
  1076. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  1077. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  1078. >>> ser
  1079. 2023-01-01 1
  1080. 2023-01-15 2
  1081. 2023-02-01 3
  1082. 2023-02-15 4
  1083. dtype: int64
  1084. >>> ser.resample('MS').min()
  1085. 2023-01-01 1
  1086. 2023-02-01 3
  1087. Freq: MS, dtype: int64
  1088. """
  1089. maybe_warn_args_and_kwargs(type(self), "min", args, kwargs)
  1090. nv.validate_resampler_func("min", args, kwargs)
  1091. return self._downsample("min", numeric_only=numeric_only, min_count=min_count)
  1092. @final
  1093. def max(
  1094. self,
  1095. numeric_only: bool = False,
  1096. min_count: int = 0,
  1097. *args,
  1098. **kwargs,
  1099. ):
  1100. """
  1101. Compute max value of group.
  1102. Returns
  1103. -------
  1104. Series or DataFrame
  1105. Examples
  1106. --------
  1107. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  1108. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  1109. >>> ser
  1110. 2023-01-01 1
  1111. 2023-01-15 2
  1112. 2023-02-01 3
  1113. 2023-02-15 4
  1114. dtype: int64
  1115. >>> ser.resample('MS').max()
  1116. 2023-01-01 2
  1117. 2023-02-01 4
  1118. Freq: MS, dtype: int64
  1119. """
  1120. maybe_warn_args_and_kwargs(type(self), "max", args, kwargs)
  1121. nv.validate_resampler_func("max", args, kwargs)
  1122. return self._downsample("max", numeric_only=numeric_only, min_count=min_count)
  1123. @final
  1124. @doc(GroupBy.first)
  1125. def first(
  1126. self,
  1127. numeric_only: bool = False,
  1128. min_count: int = 0,
  1129. skipna: bool = True,
  1130. *args,
  1131. **kwargs,
  1132. ):
  1133. maybe_warn_args_and_kwargs(type(self), "first", args, kwargs)
  1134. nv.validate_resampler_func("first", args, kwargs)
  1135. return self._downsample(
  1136. "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna
  1137. )
  1138. @final
  1139. @doc(GroupBy.last)
  1140. def last(
  1141. self,
  1142. numeric_only: bool = False,
  1143. min_count: int = 0,
  1144. skipna: bool = True,
  1145. *args,
  1146. **kwargs,
  1147. ):
  1148. maybe_warn_args_and_kwargs(type(self), "last", args, kwargs)
  1149. nv.validate_resampler_func("last", args, kwargs)
  1150. return self._downsample(
  1151. "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna
  1152. )
  1153. @final
  1154. @doc(GroupBy.median)
  1155. def median(self, numeric_only: bool = False, *args, **kwargs):
  1156. maybe_warn_args_and_kwargs(type(self), "median", args, kwargs)
  1157. nv.validate_resampler_func("median", args, kwargs)
  1158. return self._downsample("median", numeric_only=numeric_only)
  1159. @final
  1160. def mean(
  1161. self,
  1162. numeric_only: bool = False,
  1163. *args,
  1164. **kwargs,
  1165. ):
  1166. """
  1167. Compute mean of groups, excluding missing values.
  1168. Parameters
  1169. ----------
  1170. numeric_only : bool, default False
  1171. Include only `float`, `int` or `boolean` data.
  1172. .. versionchanged:: 2.0.0
  1173. numeric_only now defaults to ``False``.
  1174. Returns
  1175. -------
  1176. DataFrame or Series
  1177. Mean of values within each group.
  1178. Examples
  1179. --------
  1180. >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex(
  1181. ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']))
  1182. >>> ser
  1183. 2023-01-01 1
  1184. 2023-01-15 2
  1185. 2023-02-01 3
  1186. 2023-02-15 4
  1187. dtype: int64
  1188. >>> ser.resample('MS').mean()
  1189. 2023-01-01 1.5
  1190. 2023-02-01 3.5
  1191. Freq: MS, dtype: float64
  1192. """
  1193. maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)
  1194. nv.validate_resampler_func("mean", args, kwargs)
  1195. return self._downsample("mean", numeric_only=numeric_only)
  1196. @final
  1197. def std(
  1198. self,
  1199. ddof: int = 1,
  1200. numeric_only: bool = False,
  1201. *args,
  1202. **kwargs,
  1203. ):
  1204. """
  1205. Compute standard deviation of groups, excluding missing values.
  1206. Parameters
  1207. ----------
  1208. ddof : int, default 1
  1209. Degrees of freedom.
  1210. numeric_only : bool, default False
  1211. Include only `float`, `int` or `boolean` data.
  1212. .. versionadded:: 1.5.0
  1213. .. versionchanged:: 2.0.0
  1214. numeric_only now defaults to ``False``.
  1215. Returns
  1216. -------
  1217. DataFrame or Series
  1218. Standard deviation of values within each group.
  1219. Examples
  1220. --------
  1221. >>> ser = pd.Series([1, 3, 2, 4, 3, 8],
  1222. ... index=pd.DatetimeIndex(['2023-01-01',
  1223. ... '2023-01-10',
  1224. ... '2023-01-15',
  1225. ... '2023-02-01',
  1226. ... '2023-02-10',
  1227. ... '2023-02-15']))
  1228. >>> ser.resample('MS').std()
  1229. 2023-01-01 1.000000
  1230. 2023-02-01 2.645751
  1231. Freq: MS, dtype: float64
  1232. """
  1233. maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)
  1234. nv.validate_resampler_func("std", args, kwargs)
  1235. return self._downsample("std", ddof=ddof, numeric_only=numeric_only)
  1236. @final
  1237. def var(
  1238. self,
  1239. ddof: int = 1,
  1240. numeric_only: bool = False,
  1241. *args,
  1242. **kwargs,
  1243. ):
  1244. """
  1245. Compute variance of groups, excluding missing values.
  1246. Parameters
  1247. ----------
  1248. ddof : int, default 1
  1249. Degrees of freedom.
  1250. numeric_only : bool, default False
  1251. Include only `float`, `int` or `boolean` data.
  1252. .. versionadded:: 1.5.0
  1253. .. versionchanged:: 2.0.0
  1254. numeric_only now defaults to ``False``.
  1255. Returns
  1256. -------
  1257. DataFrame or Series
  1258. Variance of values within each group.
  1259. Examples
  1260. --------
  1261. >>> ser = pd.Series([1, 3, 2, 4, 3, 8],
  1262. ... index=pd.DatetimeIndex(['2023-01-01',
  1263. ... '2023-01-10',
  1264. ... '2023-01-15',
  1265. ... '2023-02-01',
  1266. ... '2023-02-10',
  1267. ... '2023-02-15']))
  1268. >>> ser.resample('MS').var()
  1269. 2023-01-01 1.0
  1270. 2023-02-01 7.0
  1271. Freq: MS, dtype: float64
  1272. >>> ser.resample('MS').var(ddof=0)
  1273. 2023-01-01 0.666667
  1274. 2023-02-01 4.666667
  1275. Freq: MS, dtype: float64
  1276. """
  1277. maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)
  1278. nv.validate_resampler_func("var", args, kwargs)
  1279. return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
  1280. @final
  1281. @doc(GroupBy.sem)
  1282. def sem(
  1283. self,
  1284. ddof: int = 1,
  1285. numeric_only: bool = False,
  1286. *args,
  1287. **kwargs,
  1288. ):
  1289. maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs)
  1290. nv.validate_resampler_func("sem", args, kwargs)
  1291. return self._downsample("sem", ddof=ddof, numeric_only=numeric_only)
  1292. @final
  1293. @doc(GroupBy.ohlc)
  1294. def ohlc(
  1295. self,
  1296. *args,
  1297. **kwargs,
  1298. ):
  1299. maybe_warn_args_and_kwargs(type(self), "ohlc", args, kwargs)
  1300. nv.validate_resampler_func("ohlc", args, kwargs)
  1301. ax = self.ax
  1302. obj = self._obj_with_exclusions
  1303. if len(ax) == 0:
  1304. # GH#42902
  1305. obj = obj.copy()
  1306. obj.index = _asfreq_compat(obj.index, self.freq)
  1307. if obj.ndim == 1:
  1308. obj = obj.to_frame()
  1309. obj = obj.reindex(["open", "high", "low", "close"], axis=1)
  1310. else:
  1311. mi = MultiIndex.from_product(
  1312. [obj.columns, ["open", "high", "low", "close"]]
  1313. )
  1314. obj = obj.reindex(mi, axis=1)
  1315. return obj
  1316. return self._downsample("ohlc")
  1317. @final
  1318. @doc(SeriesGroupBy.nunique)
  1319. def nunique(
  1320. self,
  1321. *args,
  1322. **kwargs,
  1323. ):
  1324. maybe_warn_args_and_kwargs(type(self), "nunique", args, kwargs)
  1325. nv.validate_resampler_func("nunique", args, kwargs)
  1326. return self._downsample("nunique")
  1327. @final
  1328. @doc(GroupBy.size)
  1329. def size(self):
  1330. result = self._downsample("size")
  1331. # If the result is a non-empty DataFrame we stack to get a Series
  1332. # GH 46826
  1333. if isinstance(result, ABCDataFrame) and not result.empty:
  1334. result = result.stack(future_stack=True)
  1335. if not len(self.ax):
  1336. from pandas import Series
  1337. if self._selected_obj.ndim == 1:
  1338. name = self._selected_obj.name
  1339. else:
  1340. name = None
  1341. result = Series([], index=result.index, dtype="int64", name=name)
  1342. return result
  1343. @final
  1344. @doc(GroupBy.count)
  1345. def count(self):
  1346. result = self._downsample("count")
  1347. if not len(self.ax):
  1348. if self._selected_obj.ndim == 1:
  1349. result = type(self._selected_obj)(
  1350. [], index=result.index, dtype="int64", name=self._selected_obj.name
  1351. )
  1352. else:
  1353. from pandas import DataFrame
  1354. result = DataFrame(
  1355. [], index=result.index, columns=result.columns, dtype="int64"
  1356. )
  1357. return result
  1358. @final
  1359. def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs):
  1360. """
  1361. Return value at the given quantile.
  1362. Parameters
  1363. ----------
  1364. q : float or array-like, default 0.5 (50% quantile)
  1365. Returns
  1366. -------
  1367. DataFrame or Series
  1368. Quantile of values within each group.
  1369. See Also
  1370. --------
  1371. Series.quantile
  1372. Return a series, where the index is q and the values are the quantiles.
  1373. DataFrame.quantile
  1374. Return a DataFrame, where the columns are the columns of self,
  1375. and the values are the quantiles.
  1376. DataFrameGroupBy.quantile
  1377. Return a DataFrame, where the columns are groupby columns,
  1378. and the values are its quantiles.
  1379. Examples
  1380. --------
  1381. >>> ser = pd.Series([1, 3, 2, 4, 3, 8],
  1382. ... index=pd.DatetimeIndex(['2023-01-01',
  1383. ... '2023-01-10',
  1384. ... '2023-01-15',
  1385. ... '2023-02-01',
  1386. ... '2023-02-10',
  1387. ... '2023-02-15']))
  1388. >>> ser.resample('MS').quantile()
  1389. 2023-01-01 2.0
  1390. 2023-02-01 4.0
  1391. Freq: MS, dtype: float64
  1392. >>> ser.resample('MS').quantile(.25)
  1393. 2023-01-01 1.5
  1394. 2023-02-01 3.5
  1395. Freq: MS, dtype: float64
  1396. """
  1397. return self._downsample("quantile", q=q, **kwargs)
  1398. class _GroupByMixin(PandasObject, SelectionMixin):
  1399. """
  1400. Provide the groupby facilities.
  1401. """
  1402. _attributes: list[str] # in practice the same as Resampler._attributes
  1403. _selection: IndexLabel | None = None
  1404. _groupby: GroupBy
  1405. _timegrouper: TimeGrouper
  1406. def __init__(
  1407. self,
  1408. *,
  1409. parent: Resampler,
  1410. groupby: GroupBy,
  1411. key=None,
  1412. selection: IndexLabel | None = None,
  1413. include_groups: bool = False,
  1414. ) -> None:
  1415. # reached via ._gotitem and _get_resampler_for_grouping
  1416. assert isinstance(groupby, GroupBy), type(groupby)
  1417. # parent is always a Resampler, sometimes a _GroupByMixin
  1418. assert isinstance(parent, Resampler), type(parent)
  1419. # initialize our GroupByMixin object with
  1420. # the resampler attributes
  1421. for attr in self._attributes:
  1422. setattr(self, attr, getattr(parent, attr))
  1423. self._selection = selection
  1424. self.binner = parent.binner
  1425. self.key = key
  1426. self._groupby = groupby
  1427. self._timegrouper = copy.copy(parent._timegrouper)
  1428. self.ax = parent.ax
  1429. self.obj = parent.obj
  1430. self.include_groups = include_groups
  1431. @no_type_check
  1432. def _apply(self, f, *args, **kwargs):
  1433. """
  1434. Dispatch to _upsample; we are stripping all of the _upsample kwargs and
  1435. performing the original function call on the grouped object.
  1436. """
  1437. def func(x):
  1438. x = self._resampler_cls(x, timegrouper=self._timegrouper, gpr_index=self.ax)
  1439. if isinstance(f, str):
  1440. return getattr(x, f)(**kwargs)
  1441. return x.apply(f, *args, **kwargs)
  1442. result = _apply(self._groupby, func, include_groups=self.include_groups)
  1443. return self._wrap_result(result)
  1444. _upsample = _apply
  1445. _downsample = _apply
  1446. _groupby_and_aggregate = _apply
  1447. @final
  1448. def _gotitem(self, key, ndim, subset=None):
  1449. """
  1450. Sub-classes to define. Return a sliced object.
  1451. Parameters
  1452. ----------
  1453. key : string / list of selections
  1454. ndim : {1, 2}
  1455. requested ndim of result
  1456. subset : object, default None
  1457. subset to act on
  1458. """
  1459. # create a new object to prevent aliasing
  1460. if subset is None:
  1461. subset = self.obj
  1462. if key is not None:
  1463. subset = subset[key]
  1464. else:
  1465. # reached via Apply.agg_dict_like with selection=None, ndim=1
  1466. assert subset.ndim == 1
  1467. # Try to select from a DataFrame, falling back to a Series
  1468. try:
  1469. if isinstance(key, list) and self.key not in key and self.key is not None:
  1470. key.append(self.key)
  1471. groupby = self._groupby[key]
  1472. except IndexError:
  1473. groupby = self._groupby
  1474. selection = self._infer_selection(key, subset)
  1475. new_rs = type(self)(
  1476. groupby=groupby,
  1477. parent=cast(Resampler, self),
  1478. selection=selection,
  1479. )
  1480. return new_rs
  1481. class DatetimeIndexResampler(Resampler):
  1482. ax: DatetimeIndex
  1483. @property
  1484. def _resampler_for_grouping(self):
  1485. return DatetimeIndexResamplerGroupby
  1486. def _get_binner_for_time(self):
  1487. # this is how we are actually creating the bins
  1488. if self.kind == "period":
  1489. return self._timegrouper._get_time_period_bins(self.ax)
  1490. return self._timegrouper._get_time_bins(self.ax)
  1491. def _downsample(self, how, **kwargs):
  1492. """
  1493. Downsample the cython defined function.
  1494. Parameters
  1495. ----------
  1496. how : string / cython mapped function
  1497. **kwargs : kw args passed to how function
  1498. """
  1499. orig_how = how
  1500. how = com.get_cython_func(how) or how
  1501. if orig_how != how:
  1502. warn_alias_replacement(self, orig_how, how)
  1503. ax = self.ax
  1504. # Excludes `on` column when provided
  1505. obj = self._obj_with_exclusions
  1506. if not len(ax):
  1507. # reset to the new freq
  1508. obj = obj.copy()
  1509. obj.index = obj.index._with_freq(self.freq)
  1510. assert obj.index.freq == self.freq, (obj.index.freq, self.freq)
  1511. return obj
  1512. # do we have a regular frequency
  1513. # error: Item "None" of "Optional[Any]" has no attribute "binlabels"
  1514. if (
  1515. (ax.freq is not None or ax.inferred_freq is not None)
  1516. and len(self._grouper.binlabels) > len(ax)
  1517. and how is None
  1518. ):
  1519. # let's do an asfreq
  1520. return self.asfreq()
  1521. # we are downsampling
  1522. # we want to call the actual grouper method here
  1523. if self.axis == 0:
  1524. result = obj.groupby(self._grouper).aggregate(how, **kwargs)
  1525. else:
  1526. # test_resample_axis1
  1527. result = obj.T.groupby(self._grouper).aggregate(how, **kwargs).T
  1528. return self._wrap_result(result)
  1529. def _adjust_binner_for_upsample(self, binner):
  1530. """
  1531. Adjust our binner when upsampling.
  1532. The range of a new index should not be outside specified range
  1533. """
  1534. if self.closed == "right":
  1535. binner = binner[1:]
  1536. else:
  1537. binner = binner[:-1]
  1538. return binner
  1539. def _upsample(self, method, limit: int | None = None, fill_value=None):
  1540. """
  1541. Parameters
  1542. ----------
  1543. method : string {'backfill', 'bfill', 'pad',
  1544. 'ffill', 'asfreq'} method for upsampling
  1545. limit : int, default None
  1546. Maximum size gap to fill when reindexing
  1547. fill_value : scalar, default None
  1548. Value to use for missing values
  1549. See Also
  1550. --------
  1551. .fillna: Fill NA/NaN values using the specified method.
  1552. """
  1553. if self.axis:
  1554. raise AssertionError("axis must be 0")
  1555. if self._from_selection:
  1556. raise ValueError(
  1557. "Upsampling from level= or on= selection "
  1558. "is not supported, use .set_index(...) "
  1559. "to explicitly set index to datetime-like"
  1560. )
  1561. ax = self.ax
  1562. obj = self._selected_obj
  1563. binner = self.binner
  1564. res_index = self._adjust_binner_for_upsample(binner)
  1565. # if we have the same frequency as our axis, then we are equal sampling
  1566. if (
  1567. limit is None
  1568. and to_offset(ax.inferred_freq) == self.freq
  1569. and len(obj) == len(res_index)
  1570. ):
  1571. result = obj.copy()
  1572. result.index = res_index
  1573. else:
  1574. if method == "asfreq":
  1575. method = None
  1576. result = obj.reindex(
  1577. res_index, method=method, limit=limit, fill_value=fill_value
  1578. )
  1579. return self._wrap_result(result)
  1580. def _wrap_result(self, result):
  1581. result = super()._wrap_result(result)
  1582. # we may have a different kind that we were asked originally
  1583. # convert if needed
  1584. if self.kind == "period" and not isinstance(result.index, PeriodIndex):
  1585. if isinstance(result.index, MultiIndex):
  1586. # GH 24103 - e.g. groupby resample
  1587. if not isinstance(result.index.levels[-1], PeriodIndex):
  1588. new_level = result.index.levels[-1].to_period(self.freq)
  1589. result.index = result.index.set_levels(new_level, level=-1)
  1590. else:
  1591. result.index = result.index.to_period(self.freq)
  1592. return result
  1593. # error: Definition of "ax" in base class "_GroupByMixin" is incompatible
  1594. # with definition in base class "DatetimeIndexResampler"
  1595. class DatetimeIndexResamplerGroupby( # type: ignore[misc]
  1596. _GroupByMixin, DatetimeIndexResampler
  1597. ):
  1598. """
  1599. Provides a resample of a groupby implementation
  1600. """
  1601. @property
  1602. def _resampler_cls(self):
  1603. return DatetimeIndexResampler
  1604. class PeriodIndexResampler(DatetimeIndexResampler):
  1605. # error: Incompatible types in assignment (expression has type "PeriodIndex", base
  1606. # class "DatetimeIndexResampler" defined the type as "DatetimeIndex")
  1607. ax: PeriodIndex # type: ignore[assignment]
  1608. @property
  1609. def _resampler_for_grouping(self):
  1610. return PeriodIndexResamplerGroupby
  1611. def _get_binner_for_time(self):
  1612. if self.kind == "timestamp":
  1613. return super()._get_binner_for_time()
  1614. return self._timegrouper._get_period_bins(self.ax)
  1615. def _convert_obj(self, obj: NDFrameT) -> NDFrameT:
  1616. obj = super()._convert_obj(obj)
  1617. if self._from_selection:
  1618. # see GH 14008, GH 12871
  1619. msg = (
  1620. "Resampling from level= or on= selection "
  1621. "with a PeriodIndex is not currently supported, "
  1622. "use .set_index(...) to explicitly set index"
  1623. )
  1624. raise NotImplementedError(msg)
  1625. # convert to timestamp
  1626. if self.kind == "timestamp":
  1627. obj = obj.to_timestamp(how=self.convention)
  1628. return obj
  1629. def _downsample(self, how, **kwargs):
  1630. """
  1631. Downsample the cython defined function.
  1632. Parameters
  1633. ----------
  1634. how : string / cython mapped function
  1635. **kwargs : kw args passed to how function
  1636. """
  1637. # we may need to actually resample as if we are timestamps
  1638. if self.kind == "timestamp":
  1639. return super()._downsample(how, **kwargs)
  1640. orig_how = how
  1641. how = com.get_cython_func(how) or how
  1642. if orig_how != how:
  1643. warn_alias_replacement(self, orig_how, how)
  1644. ax = self.ax
  1645. if is_subperiod(ax.freq, self.freq):
  1646. # Downsampling
  1647. return self._groupby_and_aggregate(how, **kwargs)
  1648. elif is_superperiod(ax.freq, self.freq):
  1649. if how == "ohlc":
  1650. # GH #13083
  1651. # upsampling to subperiods is handled as an asfreq, which works
  1652. # for pure aggregating/reducing methods
  1653. # OHLC reduces along the time dimension, but creates multiple
  1654. # values for each period -> handle by _groupby_and_aggregate()
  1655. return self._groupby_and_aggregate(how)
  1656. return self.asfreq()
  1657. elif ax.freq == self.freq:
  1658. return self.asfreq()
  1659. raise IncompatibleFrequency(
  1660. f"Frequency {ax.freq} cannot be resampled to {self.freq}, "
  1661. "as they are not sub or super periods"
  1662. )
  1663. def _upsample(self, method, limit: int | None = None, fill_value=None):
  1664. """
  1665. Parameters
  1666. ----------
  1667. method : {'backfill', 'bfill', 'pad', 'ffill'}
  1668. Method for upsampling.
  1669. limit : int, default None
  1670. Maximum size gap to fill when reindexing.
  1671. fill_value : scalar, default None
  1672. Value to use for missing values.
  1673. See Also
  1674. --------
  1675. .fillna: Fill NA/NaN values using the specified method.
  1676. """
  1677. # we may need to actually resample as if we are timestamps
  1678. if self.kind == "timestamp":
  1679. return super()._upsample(method, limit=limit, fill_value=fill_value)
  1680. ax = self.ax
  1681. obj = self.obj
  1682. new_index = self.binner
  1683. # Start vs. end of period
  1684. memb = ax.asfreq(self.freq, how=self.convention)
  1685. # Get the fill indexer
  1686. if method == "asfreq":
  1687. method = None
  1688. indexer = memb.get_indexer(new_index, method=method, limit=limit)
  1689. new_obj = _take_new_index(
  1690. obj,
  1691. indexer,
  1692. new_index,
  1693. axis=self.axis,
  1694. )
  1695. return self._wrap_result(new_obj)
  1696. # error: Definition of "ax" in base class "_GroupByMixin" is incompatible with
  1697. # definition in base class "PeriodIndexResampler"
  1698. class PeriodIndexResamplerGroupby( # type: ignore[misc]
  1699. _GroupByMixin, PeriodIndexResampler
  1700. ):
  1701. """
  1702. Provides a resample of a groupby implementation.
  1703. """
  1704. @property
  1705. def _resampler_cls(self):
  1706. return PeriodIndexResampler
  1707. class TimedeltaIndexResampler(DatetimeIndexResampler):
  1708. # error: Incompatible types in assignment (expression has type "TimedeltaIndex",
  1709. # base class "DatetimeIndexResampler" defined the type as "DatetimeIndex")
  1710. ax: TimedeltaIndex # type: ignore[assignment]
  1711. @property
  1712. def _resampler_for_grouping(self):
  1713. return TimedeltaIndexResamplerGroupby
  1714. def _get_binner_for_time(self):
  1715. return self._timegrouper._get_time_delta_bins(self.ax)
  1716. def _adjust_binner_for_upsample(self, binner):
  1717. """
  1718. Adjust our binner when upsampling.
  1719. The range of a new index is allowed to be greater than original range
  1720. so we don't need to change the length of a binner, GH 13022
  1721. """
  1722. return binner
  1723. # error: Definition of "ax" in base class "_GroupByMixin" is incompatible with
  1724. # definition in base class "DatetimeIndexResampler"
  1725. class TimedeltaIndexResamplerGroupby( # type: ignore[misc]
  1726. _GroupByMixin, TimedeltaIndexResampler
  1727. ):
  1728. """
  1729. Provides a resample of a groupby implementation.
  1730. """
  1731. @property
  1732. def _resampler_cls(self):
  1733. return TimedeltaIndexResampler
  1734. def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler:
  1735. """
  1736. Create a TimeGrouper and return our resampler.
  1737. """
  1738. tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type]
  1739. return tg._get_resampler(obj, kind=kind)
  1740. get_resampler.__doc__ = Resampler.__doc__
  1741. def get_resampler_for_grouping(
  1742. groupby: GroupBy,
  1743. rule,
  1744. how=None,
  1745. fill_method=None,
  1746. limit: int | None = None,
  1747. kind=None,
  1748. on=None,
  1749. include_groups: bool = True,
  1750. **kwargs,
  1751. ) -> Resampler:
  1752. """
  1753. Return our appropriate resampler when grouping as well.
  1754. """
  1755. # .resample uses 'on' similar to how .groupby uses 'key'
  1756. tg = TimeGrouper(freq=rule, key=on, **kwargs)
  1757. resampler = tg._get_resampler(groupby.obj, kind=kind)
  1758. return resampler._get_resampler_for_grouping(
  1759. groupby=groupby, include_groups=include_groups, key=tg.key
  1760. )
  1761. class TimeGrouper(Grouper):
  1762. """
  1763. Custom groupby class for time-interval grouping.
  1764. Parameters
  1765. ----------
  1766. freq : pandas date offset or offset alias for identifying bin edges
  1767. closed : closed end of interval; 'left' or 'right'
  1768. label : interval boundary to use for labeling; 'left' or 'right'
  1769. convention : {'start', 'end', 'e', 's'}
  1770. If axis is PeriodIndex
  1771. """
  1772. _attributes = Grouper._attributes + (
  1773. "closed",
  1774. "label",
  1775. "how",
  1776. "kind",
  1777. "convention",
  1778. "origin",
  1779. "offset",
  1780. )
  1781. origin: TimeGrouperOrigin
  1782. def __init__(
  1783. self,
  1784. obj: Grouper | None = None,
  1785. freq: Frequency = "Min",
  1786. key: str | None = None,
  1787. closed: Literal["left", "right"] | None = None,
  1788. label: Literal["left", "right"] | None = None,
  1789. how: str = "mean",
  1790. axis: Axis = 0,
  1791. fill_method=None,
  1792. limit: int | None = None,
  1793. kind: str | None = None,
  1794. convention: Literal["start", "end", "e", "s"] | None = None,
  1795. origin: Literal["epoch", "start", "start_day", "end", "end_day"]
  1796. | TimestampConvertibleTypes = "start_day",
  1797. offset: TimedeltaConvertibleTypes | None = None,
  1798. group_keys: bool = False,
  1799. **kwargs,
  1800. ) -> None:
  1801. # Check for correctness of the keyword arguments which would
  1802. # otherwise silently use the default if misspelled
  1803. if label not in {None, "left", "right"}:
  1804. raise ValueError(f"Unsupported value {label} for `label`")
  1805. if closed not in {None, "left", "right"}:
  1806. raise ValueError(f"Unsupported value {closed} for `closed`")
  1807. if convention not in {None, "start", "end", "e", "s"}:
  1808. raise ValueError(f"Unsupported value {convention} for `convention`")
  1809. if (
  1810. key is None
  1811. and obj is not None
  1812. and isinstance(obj.index, PeriodIndex) # type: ignore[attr-defined]
  1813. or (
  1814. key is not None
  1815. and obj is not None
  1816. and getattr(obj[key], "dtype", None) == "period" # type: ignore[index]
  1817. )
  1818. ):
  1819. freq = to_offset(freq, is_period=True)
  1820. else:
  1821. freq = to_offset(freq)
  1822. end_types = {"ME", "YE", "QE", "BME", "BYE", "BQE", "W"}
  1823. rule = freq.rule_code
  1824. if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
  1825. if closed is None:
  1826. closed = "right"
  1827. if label is None:
  1828. label = "right"
  1829. else:
  1830. # The backward resample sets ``closed`` to ``'right'`` by default
  1831. # since the last value should be considered as the edge point for
  1832. # the last bin. When origin in "end" or "end_day", the value for a
  1833. # specific ``Timestamp`` index stands for the resample result from
  1834. # the current ``Timestamp`` minus ``freq`` to the current
  1835. # ``Timestamp`` with a right close.
  1836. if origin in ["end", "end_day"]:
  1837. if closed is None:
  1838. closed = "right"
  1839. if label is None:
  1840. label = "right"
  1841. else:
  1842. if closed is None:
  1843. closed = "left"
  1844. if label is None:
  1845. label = "left"
  1846. self.closed = closed
  1847. self.label = label
  1848. self.kind = kind
  1849. self.convention = convention if convention is not None else "e"
  1850. self.how = how
  1851. self.fill_method = fill_method
  1852. self.limit = limit
  1853. self.group_keys = group_keys
  1854. self._arrow_dtype: ArrowDtype | None = None
  1855. if origin in ("epoch", "start", "start_day", "end", "end_day"):
  1856. # error: Incompatible types in assignment (expression has type "Union[Union[
  1857. # Timestamp, datetime, datetime64, signedinteger[_64Bit], float, str],
  1858. # Literal['epoch', 'start', 'start_day', 'end', 'end_day']]", variable has
  1859. # type "Union[Timestamp, Literal['epoch', 'start', 'start_day', 'end',
  1860. # 'end_day']]")
  1861. self.origin = origin # type: ignore[assignment]
  1862. else:
  1863. try:
  1864. self.origin = Timestamp(origin)
  1865. except (ValueError, TypeError) as err:
  1866. raise ValueError(
  1867. "'origin' should be equal to 'epoch', 'start', 'start_day', "
  1868. "'end', 'end_day' or "
  1869. f"should be a Timestamp convertible type. Got '{origin}' instead."
  1870. ) from err
  1871. try:
  1872. self.offset = Timedelta(offset) if offset is not None else None
  1873. except (ValueError, TypeError) as err:
  1874. raise ValueError(
  1875. "'offset' should be a Timedelta convertible type. "
  1876. f"Got '{offset}' instead."
  1877. ) from err
  1878. # always sort time groupers
  1879. kwargs["sort"] = True
  1880. super().__init__(freq=freq, key=key, axis=axis, **kwargs)
  1881. def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:
  1882. """
  1883. Return my resampler or raise if we have an invalid axis.
  1884. Parameters
  1885. ----------
  1886. obj : Series or DataFrame
  1887. kind : string, optional
  1888. 'period','timestamp','timedelta' are valid
  1889. Returns
  1890. -------
  1891. Resampler
  1892. Raises
  1893. ------
  1894. TypeError if incompatible axis
  1895. """
  1896. _, ax, _ = self._set_grouper(obj, gpr_index=None)
  1897. if isinstance(ax, DatetimeIndex):
  1898. return DatetimeIndexResampler(
  1899. obj,
  1900. timegrouper=self,
  1901. kind=kind,
  1902. axis=self.axis,
  1903. group_keys=self.group_keys,
  1904. gpr_index=ax,
  1905. )
  1906. elif isinstance(ax, PeriodIndex) or kind == "period":
  1907. if not isinstance(ax, PeriodIndex):
  1908. warnings.warn(
  1909. "Resampling with kind='period' is deprecated. "
  1910. "Use datetime paths instead.",
  1911. FutureWarning,
  1912. stacklevel=find_stack_level(),
  1913. )
  1914. return PeriodIndexResampler(
  1915. obj,
  1916. timegrouper=self,
  1917. kind=kind,
  1918. axis=self.axis,
  1919. group_keys=self.group_keys,
  1920. gpr_index=ax,
  1921. )
  1922. elif isinstance(ax, TimedeltaIndex):
  1923. return TimedeltaIndexResampler(
  1924. obj,
  1925. timegrouper=self,
  1926. axis=self.axis,
  1927. group_keys=self.group_keys,
  1928. gpr_index=ax,
  1929. )
  1930. raise TypeError(
  1931. "Only valid with DatetimeIndex, "
  1932. "TimedeltaIndex or PeriodIndex, "
  1933. f"but got an instance of '{type(ax).__name__}'"
  1934. )
  1935. def _get_grouper(
  1936. self, obj: NDFrameT, validate: bool = True
  1937. ) -> tuple[BinGrouper, NDFrameT]:
  1938. # create the resampler and return our binner
  1939. r = self._get_resampler(obj)
  1940. return r._grouper, cast(NDFrameT, r.obj)
  1941. def _get_time_bins(self, ax: DatetimeIndex):
  1942. if not isinstance(ax, DatetimeIndex):
  1943. raise TypeError(
  1944. "axis must be a DatetimeIndex, but got "
  1945. f"an instance of {type(ax).__name__}"
  1946. )
  1947. if len(ax) == 0:
  1948. binner = labels = DatetimeIndex(
  1949. data=[], freq=self.freq, name=ax.name, dtype=ax.dtype
  1950. )
  1951. return binner, [], labels
  1952. first, last = _get_timestamp_range_edges(
  1953. ax.min(),
  1954. ax.max(),
  1955. self.freq,
  1956. unit=ax.unit,
  1957. closed=self.closed,
  1958. origin=self.origin,
  1959. offset=self.offset,
  1960. )
  1961. # GH #12037
  1962. # use first/last directly instead of call replace() on them
  1963. # because replace() will swallow the nanosecond part
  1964. # thus last bin maybe slightly before the end if the end contains
  1965. # nanosecond part and lead to `Values falls after last bin` error
  1966. # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback
  1967. # has noted that ambiguous=True provides the most sensible result
  1968. binner = labels = date_range(
  1969. freq=self.freq,
  1970. start=first,
  1971. end=last,
  1972. tz=ax.tz,
  1973. name=ax.name,
  1974. ambiguous=True,
  1975. nonexistent="shift_forward",
  1976. unit=ax.unit,
  1977. )
  1978. ax_values = ax.asi8
  1979. binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
  1980. # general version, knowing nothing about relative frequencies
  1981. bins = lib.generate_bins_dt64(
  1982. ax_values, bin_edges, self.closed, hasnans=ax.hasnans
  1983. )
  1984. if self.closed == "right":
  1985. labels = binner
  1986. if self.label == "right":
  1987. labels = labels[1:]
  1988. elif self.label == "right":
  1989. labels = labels[1:]
  1990. if ax.hasnans:
  1991. binner = binner.insert(0, NaT)
  1992. labels = labels.insert(0, NaT)
  1993. # if we end up with more labels than bins
  1994. # adjust the labels
  1995. # GH4076
  1996. if len(bins) < len(labels):
  1997. labels = labels[: len(bins)]
  1998. return binner, bins, labels
  1999. def _adjust_bin_edges(
  2000. self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64]
  2001. ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
  2002. # Some hacks for > daily data, see #1471, #1458, #1483
  2003. if self.freq.name in ("BME", "ME", "W") or self.freq.name.split("-")[0] in (
  2004. "BQE",
  2005. "BYE",
  2006. "QE",
  2007. "YE",
  2008. "W",
  2009. ):
  2010. # If the right end-point is on the last day of the month, roll forwards
  2011. # until the last moment of that day. Note that we only do this for offsets
  2012. # which correspond to the end of a super-daily period - "month start", for
  2013. # example, is excluded.
  2014. if self.closed == "right":
  2015. # GH 21459, GH 9119: Adjust the bins relative to the wall time
  2016. edges_dti = binner.tz_localize(None)
  2017. edges_dti = (
  2018. edges_dti
  2019. + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)
  2020. - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)
  2021. )
  2022. bin_edges = edges_dti.tz_localize(binner.tz).asi8
  2023. else:
  2024. bin_edges = binner.asi8
  2025. # intraday values on last day
  2026. if bin_edges[-2] > ax_values.max():
  2027. bin_edges = bin_edges[:-1]
  2028. binner = binner[:-1]
  2029. else:
  2030. bin_edges = binner.asi8
  2031. return binner, bin_edges
  2032. def _get_time_delta_bins(self, ax: TimedeltaIndex):
  2033. if not isinstance(ax, TimedeltaIndex):
  2034. raise TypeError(
  2035. "axis must be a TimedeltaIndex, but got "
  2036. f"an instance of {type(ax).__name__}"
  2037. )
  2038. if not isinstance(self.freq, Tick):
  2039. # GH#51896
  2040. raise ValueError(
  2041. "Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
  2042. f"e.g. '24h' or '3D', not {self.freq}"
  2043. )
  2044. if not len(ax):
  2045. binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name)
  2046. return binner, [], labels
  2047. start, end = ax.min(), ax.max()
  2048. if self.closed == "right":
  2049. end += self.freq
  2050. labels = binner = timedelta_range(
  2051. start=start, end=end, freq=self.freq, name=ax.name
  2052. )
  2053. end_stamps = labels
  2054. if self.closed == "left":
  2055. end_stamps += self.freq
  2056. bins = ax.searchsorted(end_stamps, side=self.closed)
  2057. if self.offset:
  2058. # GH 10530 & 31809
  2059. labels += self.offset
  2060. return binner, bins, labels
  2061. def _get_time_period_bins(self, ax: DatetimeIndex):
  2062. if not isinstance(ax, DatetimeIndex):
  2063. raise TypeError(
  2064. "axis must be a DatetimeIndex, but got "
  2065. f"an instance of {type(ax).__name__}"
  2066. )
  2067. freq = self.freq
  2068. if len(ax) == 0:
  2069. binner = labels = PeriodIndex(
  2070. data=[], freq=freq, name=ax.name, dtype=ax.dtype
  2071. )
  2072. return binner, [], labels
  2073. labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name)
  2074. end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp()
  2075. if ax.tz:
  2076. end_stamps = end_stamps.tz_localize(ax.tz)
  2077. bins = ax.searchsorted(end_stamps, side="left")
  2078. return binner, bins, labels
  2079. def _get_period_bins(self, ax: PeriodIndex):
  2080. if not isinstance(ax, PeriodIndex):
  2081. raise TypeError(
  2082. "axis must be a PeriodIndex, but got "
  2083. f"an instance of {type(ax).__name__}"
  2084. )
  2085. memb = ax.asfreq(self.freq, how=self.convention)
  2086. # NaT handling as in pandas._lib.lib.generate_bins_dt64()
  2087. nat_count = 0
  2088. if memb.hasnans:
  2089. # error: Incompatible types in assignment (expression has type
  2090. # "bool_", variable has type "int") [assignment]
  2091. nat_count = np.sum(memb._isnan) # type: ignore[assignment]
  2092. memb = memb[~memb._isnan]
  2093. if not len(memb):
  2094. # index contains no valid (non-NaT) values
  2095. bins = np.array([], dtype=np.int64)
  2096. binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)
  2097. if len(ax) > 0:
  2098. # index is all NaT
  2099. binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax))
  2100. return binner, bins, labels
  2101. freq_mult = self.freq.n
  2102. start = ax.min().asfreq(self.freq, how=self.convention)
  2103. end = ax.max().asfreq(self.freq, how="end")
  2104. bin_shift = 0
  2105. if isinstance(self.freq, Tick):
  2106. # GH 23882 & 31809: get adjusted bin edge labels with 'origin'
  2107. # and 'origin' support. This call only makes sense if the freq is a
  2108. # Tick since offset and origin are only used in those cases.
  2109. # Not doing this check could create an extra empty bin.
  2110. p_start, end = _get_period_range_edges(
  2111. start,
  2112. end,
  2113. self.freq,
  2114. closed=self.closed,
  2115. origin=self.origin,
  2116. offset=self.offset,
  2117. )
  2118. # Get offset for bin edge (not label edge) adjustment
  2119. start_offset = Period(start, self.freq) - Period(p_start, self.freq)
  2120. # error: Item "Period" of "Union[Period, Any]" has no attribute "n"
  2121. bin_shift = start_offset.n % freq_mult # type: ignore[union-attr]
  2122. start = p_start
  2123. labels = binner = period_range(
  2124. start=start, end=end, freq=self.freq, name=ax.name
  2125. )
  2126. i8 = memb.asi8
  2127. # when upsampling to subperiods, we need to generate enough bins
  2128. expected_bins_count = len(binner) * freq_mult
  2129. i8_extend = expected_bins_count - (i8[-1] - i8[0])
  2130. rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
  2131. rng += freq_mult
  2132. # adjust bin edge indexes to account for base
  2133. rng -= bin_shift
  2134. # Wrap in PeriodArray for PeriodArray.searchsorted
  2135. prng = type(memb._data)(rng, dtype=memb.dtype)
  2136. bins = memb.searchsorted(prng, side="left")
  2137. if nat_count > 0:
  2138. binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count)
  2139. return binner, bins, labels
  2140. def _set_grouper(
  2141. self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None
  2142. ) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:
  2143. obj, ax, indexer = super()._set_grouper(obj, sort, gpr_index=gpr_index)
  2144. if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm":
  2145. self._arrow_dtype = ax.dtype
  2146. ax = Index(
  2147. cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array()
  2148. )
  2149. return obj, ax, indexer
  2150. def _take_new_index(
  2151. obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0
  2152. ) -> NDFrameT:
  2153. if isinstance(obj, ABCSeries):
  2154. new_values = algos.take_nd(obj._values, indexer)
  2155. # error: Incompatible return value type (got "Series", expected "NDFrameT")
  2156. return obj._constructor( # type: ignore[return-value]
  2157. new_values, index=new_index, name=obj.name
  2158. )
  2159. elif isinstance(obj, ABCDataFrame):
  2160. if axis == 1:
  2161. raise NotImplementedError("axis 1 is not supported")
  2162. new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
  2163. # error: Incompatible return value type (got "DataFrame", expected "NDFrameT")
  2164. return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) # type: ignore[return-value]
  2165. else:
  2166. raise ValueError("'obj' should be either a Series or a DataFrame")
  2167. def _get_timestamp_range_edges(
  2168. first: Timestamp,
  2169. last: Timestamp,
  2170. freq: BaseOffset,
  2171. unit: str,
  2172. closed: Literal["right", "left"] = "left",
  2173. origin: TimeGrouperOrigin = "start_day",
  2174. offset: Timedelta | None = None,
  2175. ) -> tuple[Timestamp, Timestamp]:
  2176. """
  2177. Adjust the `first` Timestamp to the preceding Timestamp that resides on
  2178. the provided offset. Adjust the `last` Timestamp to the following
  2179. Timestamp that resides on the provided offset. Input Timestamps that
  2180. already reside on the offset will be adjusted depending on the type of
  2181. offset and the `closed` parameter.
  2182. Parameters
  2183. ----------
  2184. first : pd.Timestamp
  2185. The beginning Timestamp of the range to be adjusted.
  2186. last : pd.Timestamp
  2187. The ending Timestamp of the range to be adjusted.
  2188. freq : pd.DateOffset
  2189. The dateoffset to which the Timestamps will be adjusted.
  2190. closed : {'right', 'left'}, default "left"
  2191. Which side of bin interval is closed.
  2192. origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'
  2193. The timestamp on which to adjust the grouping. The timezone of origin must
  2194. match the timezone of the index.
  2195. If a timestamp is not used, these values are also supported:
  2196. - 'epoch': `origin` is 1970-01-01
  2197. - 'start': `origin` is the first value of the timeseries
  2198. - 'start_day': `origin` is the first day at midnight of the timeseries
  2199. offset : pd.Timedelta, default is None
  2200. An offset timedelta added to the origin.
  2201. Returns
  2202. -------
  2203. A tuple of length 2, containing the adjusted pd.Timestamp objects.
  2204. """
  2205. if isinstance(freq, Tick):
  2206. index_tz = first.tz
  2207. if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):
  2208. raise ValueError("The origin must have the same timezone as the index.")
  2209. if origin == "epoch":
  2210. # set the epoch based on the timezone to have similar bins results when
  2211. # resampling on the same kind of indexes on different timezones
  2212. origin = Timestamp("1970-01-01", tz=index_tz)
  2213. if isinstance(freq, Day):
  2214. # _adjust_dates_anchored assumes 'D' means 24h, but first/last
  2215. # might contain a DST transition (23h, 24h, or 25h).
  2216. # So "pretend" the dates are naive when adjusting the endpoints
  2217. first = first.tz_localize(None)
  2218. last = last.tz_localize(None)
  2219. if isinstance(origin, Timestamp):
  2220. origin = origin.tz_localize(None)
  2221. first, last = _adjust_dates_anchored(
  2222. first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit
  2223. )
  2224. if isinstance(freq, Day):
  2225. first = first.tz_localize(index_tz)
  2226. last = last.tz_localize(index_tz)
  2227. else:
  2228. first = first.normalize()
  2229. last = last.normalize()
  2230. if closed == "left":
  2231. first = Timestamp(freq.rollback(first))
  2232. else:
  2233. first = Timestamp(first - freq)
  2234. last = Timestamp(last + freq)
  2235. return first, last
  2236. def _get_period_range_edges(
  2237. first: Period,
  2238. last: Period,
  2239. freq: BaseOffset,
  2240. closed: Literal["right", "left"] = "left",
  2241. origin: TimeGrouperOrigin = "start_day",
  2242. offset: Timedelta | None = None,
  2243. ) -> tuple[Period, Period]:
  2244. """
  2245. Adjust the provided `first` and `last` Periods to the respective Period of
  2246. the given offset that encompasses them.
  2247. Parameters
  2248. ----------
  2249. first : pd.Period
  2250. The beginning Period of the range to be adjusted.
  2251. last : pd.Period
  2252. The ending Period of the range to be adjusted.
  2253. freq : pd.DateOffset
  2254. The freq to which the Periods will be adjusted.
  2255. closed : {'right', 'left'}, default "left"
  2256. Which side of bin interval is closed.
  2257. origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'
  2258. The timestamp on which to adjust the grouping. The timezone of origin must
  2259. match the timezone of the index.
  2260. If a timestamp is not used, these values are also supported:
  2261. - 'epoch': `origin` is 1970-01-01
  2262. - 'start': `origin` is the first value of the timeseries
  2263. - 'start_day': `origin` is the first day at midnight of the timeseries
  2264. offset : pd.Timedelta, default is None
  2265. An offset timedelta added to the origin.
  2266. Returns
  2267. -------
  2268. A tuple of length 2, containing the adjusted pd.Period objects.
  2269. """
  2270. if not all(isinstance(obj, Period) for obj in [first, last]):
  2271. raise TypeError("'first' and 'last' must be instances of type Period")
  2272. # GH 23882
  2273. first_ts = first.to_timestamp()
  2274. last_ts = last.to_timestamp()
  2275. adjust_first = not freq.is_on_offset(first_ts)
  2276. adjust_last = freq.is_on_offset(last_ts)
  2277. first_ts, last_ts = _get_timestamp_range_edges(
  2278. first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset
  2279. )
  2280. first = (first_ts + int(adjust_first) * freq).to_period(freq)
  2281. last = (last_ts - int(adjust_last) * freq).to_period(freq)
  2282. return first, last
  2283. def _insert_nat_bin(
  2284. binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int
  2285. ) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]:
  2286. # NaT handling as in pandas._lib.lib.generate_bins_dt64()
  2287. # shift bins by the number of NaT
  2288. assert nat_count > 0
  2289. bins += nat_count
  2290. bins = np.insert(bins, 0, nat_count)
  2291. # Incompatible types in assignment (expression has type "Index", variable
  2292. # has type "PeriodIndex")
  2293. binner = binner.insert(0, NaT) # type: ignore[assignment]
  2294. # Incompatible types in assignment (expression has type "Index", variable
  2295. # has type "PeriodIndex")
  2296. labels = labels.insert(0, NaT) # type: ignore[assignment]
  2297. return binner, bins, labels
  2298. def _adjust_dates_anchored(
  2299. first: Timestamp,
  2300. last: Timestamp,
  2301. freq: Tick,
  2302. closed: Literal["right", "left"] = "right",
  2303. origin: TimeGrouperOrigin = "start_day",
  2304. offset: Timedelta | None = None,
  2305. unit: str = "ns",
  2306. ) -> tuple[Timestamp, Timestamp]:
  2307. # First and last offsets should be calculated from the start day to fix an
  2308. # error cause by resampling across multiple days when a one day period is
  2309. # not a multiple of the frequency. See GH 8683
  2310. # To handle frequencies that are not multiple or divisible by a day we let
  2311. # the possibility to define a fixed origin timestamp. See GH 31809
  2312. first = first.as_unit(unit)
  2313. last = last.as_unit(unit)
  2314. if offset is not None:
  2315. offset = offset.as_unit(unit)
  2316. freq_value = Timedelta(freq).as_unit(unit)._value
  2317. origin_timestamp = 0 # origin == "epoch"
  2318. if origin == "start_day":
  2319. origin_timestamp = first.normalize()._value
  2320. elif origin == "start":
  2321. origin_timestamp = first._value
  2322. elif isinstance(origin, Timestamp):
  2323. origin_timestamp = origin.as_unit(unit)._value
  2324. elif origin in ["end", "end_day"]:
  2325. origin_last = last if origin == "end" else last.ceil("D")
  2326. sub_freq_times = (origin_last._value - first._value) // freq_value
  2327. if closed == "left":
  2328. sub_freq_times += 1
  2329. first = origin_last - sub_freq_times * freq
  2330. origin_timestamp = first._value
  2331. origin_timestamp += offset._value if offset else 0
  2332. # GH 10117 & GH 19375. If first and last contain timezone information,
  2333. # Perform the calculation in UTC in order to avoid localizing on an
  2334. # Ambiguous or Nonexistent time.
  2335. first_tzinfo = first.tzinfo
  2336. last_tzinfo = last.tzinfo
  2337. if first_tzinfo is not None:
  2338. first = first.tz_convert("UTC")
  2339. if last_tzinfo is not None:
  2340. last = last.tz_convert("UTC")
  2341. foffset = (first._value - origin_timestamp) % freq_value
  2342. loffset = (last._value - origin_timestamp) % freq_value
  2343. if closed == "right":
  2344. if foffset > 0:
  2345. # roll back
  2346. fresult_int = first._value - foffset
  2347. else:
  2348. fresult_int = first._value - freq_value
  2349. if loffset > 0:
  2350. # roll forward
  2351. lresult_int = last._value + (freq_value - loffset)
  2352. else:
  2353. # already the end of the road
  2354. lresult_int = last._value
  2355. else: # closed == 'left'
  2356. if foffset > 0:
  2357. fresult_int = first._value - foffset
  2358. else:
  2359. # start of the road
  2360. fresult_int = first._value
  2361. if loffset > 0:
  2362. # roll forward
  2363. lresult_int = last._value + (freq_value - loffset)
  2364. else:
  2365. lresult_int = last._value + freq_value
  2366. fresult = Timestamp(fresult_int, unit=unit)
  2367. lresult = Timestamp(lresult_int, unit=unit)
  2368. if first_tzinfo is not None:
  2369. fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
  2370. if last_tzinfo is not None:
  2371. lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo)
  2372. return fresult, lresult
  2373. def asfreq(
  2374. obj: NDFrameT,
  2375. freq,
  2376. method=None,
  2377. how=None,
  2378. normalize: bool = False,
  2379. fill_value=None,
  2380. ) -> NDFrameT:
  2381. """
  2382. Utility frequency conversion method for Series/DataFrame.
  2383. See :meth:`pandas.NDFrame.asfreq` for full documentation.
  2384. """
  2385. if isinstance(obj.index, PeriodIndex):
  2386. if method is not None:
  2387. raise NotImplementedError("'method' argument is not supported")
  2388. if how is None:
  2389. how = "E"
  2390. if isinstance(freq, BaseOffset):
  2391. if hasattr(freq, "_period_dtype_code"):
  2392. freq = freq_to_period_freqstr(freq.n, freq.name)
  2393. else:
  2394. raise ValueError(
  2395. f"Invalid offset: '{freq.base}' for converting time series "
  2396. f"with PeriodIndex."
  2397. )
  2398. new_obj = obj.copy()
  2399. new_obj.index = obj.index.asfreq(freq, how=how)
  2400. elif len(obj.index) == 0:
  2401. new_obj = obj.copy()
  2402. new_obj.index = _asfreq_compat(obj.index, freq)
  2403. else:
  2404. unit = None
  2405. if isinstance(obj.index, DatetimeIndex):
  2406. # TODO: should we disallow non-DatetimeIndex?
  2407. unit = obj.index.unit
  2408. dti = date_range(obj.index.min(), obj.index.max(), freq=freq, unit=unit)
  2409. dti.name = obj.index.name
  2410. new_obj = obj.reindex(dti, method=method, fill_value=fill_value)
  2411. if normalize:
  2412. new_obj.index = new_obj.index.normalize()
  2413. return new_obj
  2414. def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq):
  2415. """
  2416. Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex.
  2417. Parameters
  2418. ----------
  2419. index : PeriodIndex, DatetimeIndex, or TimedeltaIndex
  2420. freq : DateOffset
  2421. Returns
  2422. -------
  2423. same type as index
  2424. """
  2425. if len(index) != 0:
  2426. # This should never be reached, always checked by the caller
  2427. raise ValueError(
  2428. "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex"
  2429. )
  2430. new_index: Index
  2431. if isinstance(index, PeriodIndex):
  2432. new_index = index.asfreq(freq=freq)
  2433. elif isinstance(index, DatetimeIndex):
  2434. new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)
  2435. elif isinstance(index, TimedeltaIndex):
  2436. new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)
  2437. else: # pragma: no cover
  2438. raise TypeError(type(index))
  2439. return new_index
  2440. def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None:
  2441. """
  2442. Warn for deprecation of args and kwargs in resample functions.
  2443. Parameters
  2444. ----------
  2445. cls : type
  2446. Class to warn about.
  2447. kernel : str
  2448. Operation name.
  2449. args : tuple or None
  2450. args passed by user. Will be None if and only if kernel does not have args.
  2451. kwargs : dict or None
  2452. kwargs passed by user. Will be None if and only if kernel does not have kwargs.
  2453. """
  2454. warn_args = args is not None and len(args) > 0
  2455. warn_kwargs = kwargs is not None and len(kwargs) > 0
  2456. if warn_args and warn_kwargs:
  2457. msg = "args and kwargs"
  2458. elif warn_args:
  2459. msg = "args"
  2460. elif warn_kwargs:
  2461. msg = "kwargs"
  2462. else:
  2463. return
  2464. warnings.warn(
  2465. f"Passing additional {msg} to {cls.__name__}.{kernel} has "
  2466. "no impact on the result and is deprecated. This will "
  2467. "raise a TypeError in a future version of pandas.",
  2468. category=FutureWarning,
  2469. stacklevel=find_stack_level(),
  2470. )
  2471. def _apply(
  2472. grouped: GroupBy, how: Callable, *args, include_groups: bool, **kwargs
  2473. ) -> DataFrame:
  2474. # GH#7155 - rewrite warning to appear as if it came from `.resample`
  2475. target_message = "DataFrameGroupBy.apply operated on the grouping columns"
  2476. new_message = _apply_groupings_depr.format("DataFrameGroupBy", "resample")
  2477. with rewrite_warning(
  2478. target_message=target_message,
  2479. target_category=FutureWarning,
  2480. new_message=new_message,
  2481. ):
  2482. result = grouped.apply(how, *args, include_groups=include_groups, **kwargs)
  2483. return result