frequencies.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. from __future__ import annotations
  2. from typing import TYPE_CHECKING
  3. import numpy as np
  4. from pandas._libs import lib
  5. from pandas._libs.algos import unique_deltas
  6. from pandas._libs.tslibs import (
  7. Timestamp,
  8. get_unit_from_dtype,
  9. periods_per_day,
  10. tz_convert_from_utc,
  11. )
  12. from pandas._libs.tslibs.ccalendar import (
  13. DAYS,
  14. MONTH_ALIASES,
  15. MONTH_NUMBERS,
  16. MONTHS,
  17. int_to_weekday,
  18. )
  19. from pandas._libs.tslibs.dtypes import (
  20. OFFSET_TO_PERIOD_FREQSTR,
  21. freq_to_period_freqstr,
  22. )
  23. from pandas._libs.tslibs.fields import (
  24. build_field_sarray,
  25. month_position_check,
  26. )
  27. from pandas._libs.tslibs.offsets import (
  28. DateOffset,
  29. Day,
  30. to_offset,
  31. )
  32. from pandas._libs.tslibs.parsing import get_rule_month
  33. from pandas.util._decorators import cache_readonly
  34. from pandas.core.dtypes.common import is_numeric_dtype
  35. from pandas.core.dtypes.dtypes import (
  36. DatetimeTZDtype,
  37. PeriodDtype,
  38. )
  39. from pandas.core.dtypes.generic import (
  40. ABCIndex,
  41. ABCSeries,
  42. )
  43. from pandas.core.algorithms import unique
  44. if TYPE_CHECKING:
  45. from pandas._typing import npt
  46. from pandas import (
  47. DatetimeIndex,
  48. Series,
  49. TimedeltaIndex,
  50. )
  51. from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
  52. # --------------------------------------------------------------------
  53. # Offset related functions
  54. _need_suffix = ["QS", "BQE", "BQS", "YS", "BYE", "BYS"]
  55. for _prefix in _need_suffix:
  56. for _m in MONTHS:
  57. key = f"{_prefix}-{_m}"
  58. OFFSET_TO_PERIOD_FREQSTR[key] = OFFSET_TO_PERIOD_FREQSTR[_prefix]
  59. for _prefix in ["Y", "Q"]:
  60. for _m in MONTHS:
  61. _alias = f"{_prefix}-{_m}"
  62. OFFSET_TO_PERIOD_FREQSTR[_alias] = _alias
  63. for _d in DAYS:
  64. OFFSET_TO_PERIOD_FREQSTR[f"W-{_d}"] = f"W-{_d}"
  65. def get_period_alias(offset_str: str) -> str | None:
  66. """
  67. Alias to closest period strings BQ->Q etc.
  68. """
  69. return OFFSET_TO_PERIOD_FREQSTR.get(offset_str, None)
  70. # ---------------------------------------------------------------------
  71. # Period codes
  72. def infer_freq(
  73. index: DatetimeIndex | TimedeltaIndex | Series | DatetimeLikeArrayMixin,
  74. ) -> str | None:
  75. """
  76. Infer the most likely frequency given the input index.
  77. Parameters
  78. ----------
  79. index : DatetimeIndex, TimedeltaIndex, Series or array-like
  80. If passed a Series will use the values of the series (NOT THE INDEX).
  81. Returns
  82. -------
  83. str or None
  84. None if no discernible frequency.
  85. Raises
  86. ------
  87. TypeError
  88. If the index is not datetime-like.
  89. ValueError
  90. If there are fewer than three values.
  91. Examples
  92. --------
  93. >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30)
  94. >>> pd.infer_freq(idx)
  95. 'D'
  96. """
  97. from pandas.core.api import DatetimeIndex
  98. if isinstance(index, ABCSeries):
  99. values = index._values
  100. if not (
  101. lib.is_np_dtype(values.dtype, "mM")
  102. or isinstance(values.dtype, DatetimeTZDtype)
  103. or values.dtype == object
  104. ):
  105. raise TypeError(
  106. "cannot infer freq from a non-convertible dtype "
  107. f"on a Series of {index.dtype}"
  108. )
  109. index = values
  110. inferer: _FrequencyInferer
  111. if not hasattr(index, "dtype"):
  112. pass
  113. elif isinstance(index.dtype, PeriodDtype):
  114. raise TypeError(
  115. "PeriodIndex given. Check the `freq` attribute "
  116. "instead of using infer_freq."
  117. )
  118. elif lib.is_np_dtype(index.dtype, "m"):
  119. # Allow TimedeltaIndex and TimedeltaArray
  120. inferer = _TimedeltaFrequencyInferer(index)
  121. return inferer.get_freq()
  122. elif is_numeric_dtype(index.dtype):
  123. raise TypeError(
  124. f"cannot infer freq from a non-convertible index of dtype {index.dtype}"
  125. )
  126. if not isinstance(index, DatetimeIndex):
  127. index = DatetimeIndex(index)
  128. inferer = _FrequencyInferer(index)
  129. return inferer.get_freq()
  130. class _FrequencyInferer:
  131. """
  132. Not sure if I can avoid the state machine here
  133. """
  134. def __init__(self, index) -> None:
  135. self.index = index
  136. self.i8values = index.asi8
  137. # For get_unit_from_dtype we need the dtype to the underlying ndarray,
  138. # which for tz-aware is not the same as index.dtype
  139. if isinstance(index, ABCIndex):
  140. # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
  141. # ndarray[Any, Any]]" has no attribute "_ndarray"
  142. self._creso = get_unit_from_dtype(
  143. index._data._ndarray.dtype # type: ignore[union-attr]
  144. )
  145. else:
  146. # otherwise we have DTA/TDA
  147. self._creso = get_unit_from_dtype(index._ndarray.dtype)
  148. # This moves the values, which are implicitly in UTC, to the
  149. # the timezone so they are in local time
  150. if hasattr(index, "tz"):
  151. if index.tz is not None:
  152. self.i8values = tz_convert_from_utc(
  153. self.i8values, index.tz, reso=self._creso
  154. )
  155. if len(index) < 3:
  156. raise ValueError("Need at least 3 dates to infer frequency")
  157. self.is_monotonic = (
  158. self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing
  159. )
  160. @cache_readonly
  161. def deltas(self) -> npt.NDArray[np.int64]:
  162. return unique_deltas(self.i8values)
  163. @cache_readonly
  164. def deltas_asi8(self) -> npt.NDArray[np.int64]:
  165. # NB: we cannot use self.i8values here because we may have converted
  166. # the tz in __init__
  167. return unique_deltas(self.index.asi8)
  168. @cache_readonly
  169. def is_unique(self) -> bool:
  170. return len(self.deltas) == 1
  171. @cache_readonly
  172. def is_unique_asi8(self) -> bool:
  173. return len(self.deltas_asi8) == 1
  174. def get_freq(self) -> str | None:
  175. """
  176. Find the appropriate frequency string to describe the inferred
  177. frequency of self.i8values
  178. Returns
  179. -------
  180. str or None
  181. """
  182. if not self.is_monotonic or not self.index._is_unique:
  183. return None
  184. delta = self.deltas[0]
  185. ppd = periods_per_day(self._creso)
  186. if delta and _is_multiple(delta, ppd):
  187. return self._infer_daily_rule()
  188. # Business hourly, maybe. 17: one day / 65: one weekend
  189. if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
  190. return "bh"
  191. # Possibly intraday frequency. Here we use the
  192. # original .asi8 values as the modified values
  193. # will not work around DST transitions. See #8772
  194. if not self.is_unique_asi8:
  195. return None
  196. delta = self.deltas_asi8[0]
  197. pph = ppd // 24
  198. ppm = pph // 60
  199. pps = ppm // 60
  200. if _is_multiple(delta, pph):
  201. # Hours
  202. return _maybe_add_count("h", delta / pph)
  203. elif _is_multiple(delta, ppm):
  204. # Minutes
  205. return _maybe_add_count("min", delta / ppm)
  206. elif _is_multiple(delta, pps):
  207. # Seconds
  208. return _maybe_add_count("s", delta / pps)
  209. elif _is_multiple(delta, (pps // 1000)):
  210. # Milliseconds
  211. return _maybe_add_count("ms", delta / (pps // 1000))
  212. elif _is_multiple(delta, (pps // 1_000_000)):
  213. # Microseconds
  214. return _maybe_add_count("us", delta / (pps // 1_000_000))
  215. else:
  216. # Nanoseconds
  217. return _maybe_add_count("ns", delta)
  218. @cache_readonly
  219. def day_deltas(self) -> list[int]:
  220. ppd = periods_per_day(self._creso)
  221. return [x / ppd for x in self.deltas]
  222. @cache_readonly
  223. def hour_deltas(self) -> list[int]:
  224. pph = periods_per_day(self._creso) // 24
  225. return [x / pph for x in self.deltas]
  226. @cache_readonly
  227. def fields(self) -> np.ndarray: # structured array of fields
  228. return build_field_sarray(self.i8values, reso=self._creso)
  229. @cache_readonly
  230. def rep_stamp(self) -> Timestamp:
  231. return Timestamp(self.i8values[0], unit=self.index.unit)
  232. def month_position_check(self) -> str | None:
  233. return month_position_check(self.fields, self.index.dayofweek)
  234. @cache_readonly
  235. def mdiffs(self) -> npt.NDArray[np.int64]:
  236. nmonths = self.fields["Y"] * 12 + self.fields["M"]
  237. return unique_deltas(nmonths.astype("i8"))
  238. @cache_readonly
  239. def ydiffs(self) -> npt.NDArray[np.int64]:
  240. return unique_deltas(self.fields["Y"].astype("i8"))
  241. def _infer_daily_rule(self) -> str | None:
  242. annual_rule = self._get_annual_rule()
  243. if annual_rule:
  244. nyears = self.ydiffs[0]
  245. month = MONTH_ALIASES[self.rep_stamp.month]
  246. alias = f"{annual_rule}-{month}"
  247. return _maybe_add_count(alias, nyears)
  248. quarterly_rule = self._get_quarterly_rule()
  249. if quarterly_rule:
  250. nquarters = self.mdiffs[0] / 3
  251. mod_dict = {0: 12, 2: 11, 1: 10}
  252. month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
  253. alias = f"{quarterly_rule}-{month}"
  254. return _maybe_add_count(alias, nquarters)
  255. monthly_rule = self._get_monthly_rule()
  256. if monthly_rule:
  257. return _maybe_add_count(monthly_rule, self.mdiffs[0])
  258. if self.is_unique:
  259. return self._get_daily_rule()
  260. if self._is_business_daily():
  261. return "B"
  262. wom_rule = self._get_wom_rule()
  263. if wom_rule:
  264. return wom_rule
  265. return None
  266. def _get_daily_rule(self) -> str | None:
  267. ppd = periods_per_day(self._creso)
  268. days = self.deltas[0] / ppd
  269. if days % 7 == 0:
  270. # Weekly
  271. wd = int_to_weekday[self.rep_stamp.weekday()]
  272. alias = f"W-{wd}"
  273. return _maybe_add_count(alias, days / 7)
  274. else:
  275. return _maybe_add_count("D", days)
  276. def _get_annual_rule(self) -> str | None:
  277. if len(self.ydiffs) > 1:
  278. return None
  279. if len(unique(self.fields["M"])) > 1:
  280. return None
  281. pos_check = self.month_position_check()
  282. if pos_check is None:
  283. return None
  284. else:
  285. return {"cs": "YS", "bs": "BYS", "ce": "YE", "be": "BYE"}.get(pos_check)
  286. def _get_quarterly_rule(self) -> str | None:
  287. if len(self.mdiffs) > 1:
  288. return None
  289. if not self.mdiffs[0] % 3 == 0:
  290. return None
  291. pos_check = self.month_position_check()
  292. if pos_check is None:
  293. return None
  294. else:
  295. return {"cs": "QS", "bs": "BQS", "ce": "QE", "be": "BQE"}.get(pos_check)
  296. def _get_monthly_rule(self) -> str | None:
  297. if len(self.mdiffs) > 1:
  298. return None
  299. pos_check = self.month_position_check()
  300. if pos_check is None:
  301. return None
  302. else:
  303. return {"cs": "MS", "bs": "BMS", "ce": "ME", "be": "BME"}.get(pos_check)
  304. def _is_business_daily(self) -> bool:
  305. # quick check: cannot be business daily
  306. if self.day_deltas != [1, 3]:
  307. return False
  308. # probably business daily, but need to confirm
  309. first_weekday = self.index[0].weekday()
  310. shifts = np.diff(self.i8values)
  311. ppd = periods_per_day(self._creso)
  312. shifts = np.floor_divide(shifts, ppd)
  313. weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
  314. return bool(
  315. np.all(
  316. ((weekdays == 0) & (shifts == 3))
  317. | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))
  318. )
  319. )
  320. def _get_wom_rule(self) -> str | None:
  321. weekdays = unique(self.index.weekday)
  322. if len(weekdays) > 1:
  323. return None
  324. week_of_months = unique((self.index.day - 1) // 7)
  325. # Only attempt to infer up to WOM-4. See #9425
  326. week_of_months = week_of_months[week_of_months < 4]
  327. if len(week_of_months) == 0 or len(week_of_months) > 1:
  328. return None
  329. # get which week
  330. week = week_of_months[0] + 1
  331. wd = int_to_weekday[weekdays[0]]
  332. return f"WOM-{week}{wd}"
  333. class _TimedeltaFrequencyInferer(_FrequencyInferer):
  334. def _infer_daily_rule(self):
  335. if self.is_unique:
  336. return self._get_daily_rule()
  337. def _is_multiple(us, mult: int) -> bool:
  338. return us % mult == 0
  339. def _maybe_add_count(base: str, count: float) -> str:
  340. if count != 1:
  341. assert count == int(count)
  342. count = int(count)
  343. return f"{count}{base}"
  344. else:
  345. return base
  346. # ----------------------------------------------------------------------
  347. # Frequency comparison
  348. def is_subperiod(source, target) -> bool:
  349. """
  350. Returns True if downsampling is possible between source and target
  351. frequencies
  352. Parameters
  353. ----------
  354. source : str or DateOffset
  355. Frequency converting from
  356. target : str or DateOffset
  357. Frequency converting to
  358. Returns
  359. -------
  360. bool
  361. """
  362. if target is None or source is None:
  363. return False
  364. source = _maybe_coerce_freq(source)
  365. target = _maybe_coerce_freq(target)
  366. if _is_annual(target):
  367. if _is_quarterly(source):
  368. return _quarter_months_conform(
  369. get_rule_month(source), get_rule_month(target)
  370. )
  371. return source in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
  372. elif _is_quarterly(target):
  373. return source in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
  374. elif _is_monthly(target):
  375. return source in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  376. elif _is_weekly(target):
  377. return source in {target, "D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  378. elif target == "B":
  379. return source in {"B", "h", "min", "s", "ms", "us", "ns"}
  380. elif target == "C":
  381. return source in {"C", "h", "min", "s", "ms", "us", "ns"}
  382. elif target == "D":
  383. return source in {"D", "h", "min", "s", "ms", "us", "ns"}
  384. elif target == "h":
  385. return source in {"h", "min", "s", "ms", "us", "ns"}
  386. elif target == "min":
  387. return source in {"min", "s", "ms", "us", "ns"}
  388. elif target == "s":
  389. return source in {"s", "ms", "us", "ns"}
  390. elif target == "ms":
  391. return source in {"ms", "us", "ns"}
  392. elif target == "us":
  393. return source in {"us", "ns"}
  394. elif target == "ns":
  395. return source in {"ns"}
  396. else:
  397. return False
  398. def is_superperiod(source, target) -> bool:
  399. """
  400. Returns True if upsampling is possible between source and target
  401. frequencies
  402. Parameters
  403. ----------
  404. source : str or DateOffset
  405. Frequency converting from
  406. target : str or DateOffset
  407. Frequency converting to
  408. Returns
  409. -------
  410. bool
  411. """
  412. if target is None or source is None:
  413. return False
  414. source = _maybe_coerce_freq(source)
  415. target = _maybe_coerce_freq(target)
  416. if _is_annual(source):
  417. if _is_annual(target):
  418. return get_rule_month(source) == get_rule_month(target)
  419. if _is_quarterly(target):
  420. smonth = get_rule_month(source)
  421. tmonth = get_rule_month(target)
  422. return _quarter_months_conform(smonth, tmonth)
  423. return target in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
  424. elif _is_quarterly(source):
  425. return target in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
  426. elif _is_monthly(source):
  427. return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  428. elif _is_weekly(source):
  429. return target in {source, "D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  430. elif source == "B":
  431. return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  432. elif source == "C":
  433. return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  434. elif source == "D":
  435. return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
  436. elif source == "h":
  437. return target in {"h", "min", "s", "ms", "us", "ns"}
  438. elif source == "min":
  439. return target in {"min", "s", "ms", "us", "ns"}
  440. elif source == "s":
  441. return target in {"s", "ms", "us", "ns"}
  442. elif source == "ms":
  443. return target in {"ms", "us", "ns"}
  444. elif source == "us":
  445. return target in {"us", "ns"}
  446. elif source == "ns":
  447. return target in {"ns"}
  448. else:
  449. return False
  450. def _maybe_coerce_freq(code) -> str:
  451. """we might need to coerce a code to a rule_code
  452. and uppercase it
  453. Parameters
  454. ----------
  455. source : str or DateOffset
  456. Frequency converting from
  457. Returns
  458. -------
  459. str
  460. """
  461. assert code is not None
  462. if isinstance(code, DateOffset):
  463. code = freq_to_period_freqstr(1, code.name)
  464. if code in {"h", "min", "s", "ms", "us", "ns"}:
  465. return code
  466. else:
  467. return code.upper()
  468. def _quarter_months_conform(source: str, target: str) -> bool:
  469. snum = MONTH_NUMBERS[source]
  470. tnum = MONTH_NUMBERS[target]
  471. return snum % 3 == tnum % 3
  472. def _is_annual(rule: str) -> bool:
  473. rule = rule.upper()
  474. return rule == "Y" or rule.startswith("Y-")
  475. def _is_quarterly(rule: str) -> bool:
  476. rule = rule.upper()
  477. return rule == "Q" or rule.startswith(("Q-", "BQ"))
  478. def _is_monthly(rule: str) -> bool:
  479. rule = rule.upper()
  480. return rule in ("M", "BM")
  481. def _is_weekly(rule: str) -> bool:
  482. rule = rule.upper()
  483. return rule == "W" or rule.startswith("W-")
  484. __all__ = [
  485. "Day",
  486. "get_period_alias",
  487. "infer_freq",
  488. "is_subperiod",
  489. "is_superperiod",
  490. "to_offset",
  491. ]