holiday.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. import warnings
  7. from dateutil.relativedelta import (
  8. FR,
  9. MO,
  10. SA,
  11. SU,
  12. TH,
  13. TU,
  14. WE,
  15. )
  16. import numpy as np
  17. from pandas.errors import PerformanceWarning
  18. from pandas import (
  19. DateOffset,
  20. DatetimeIndex,
  21. Series,
  22. Timestamp,
  23. concat,
  24. date_range,
  25. )
  26. from pandas.tseries.offsets import (
  27. Day,
  28. Easter,
  29. )
  30. def next_monday(dt: datetime) -> datetime:
  31. """
  32. If holiday falls on Saturday, use following Monday instead;
  33. if holiday falls on Sunday, use Monday instead
  34. """
  35. if dt.weekday() == 5:
  36. return dt + timedelta(2)
  37. elif dt.weekday() == 6:
  38. return dt + timedelta(1)
  39. return dt
  40. def next_monday_or_tuesday(dt: datetime) -> datetime:
  41. """
  42. For second holiday of two adjacent ones!
  43. If holiday falls on Saturday, use following Monday instead;
  44. if holiday falls on Sunday or Monday, use following Tuesday instead
  45. (because Monday is already taken by adjacent holiday on the day before)
  46. """
  47. dow = dt.weekday()
  48. if dow in (5, 6):
  49. return dt + timedelta(2)
  50. if dow == 0:
  51. return dt + timedelta(1)
  52. return dt
  53. def previous_friday(dt: datetime) -> datetime:
  54. """
  55. If holiday falls on Saturday or Sunday, use previous Friday instead.
  56. """
  57. if dt.weekday() == 5:
  58. return dt - timedelta(1)
  59. elif dt.weekday() == 6:
  60. return dt - timedelta(2)
  61. return dt
  62. def sunday_to_monday(dt: datetime) -> datetime:
  63. """
  64. If holiday falls on Sunday, use day thereafter (Monday) instead.
  65. """
  66. if dt.weekday() == 6:
  67. return dt + timedelta(1)
  68. return dt
  69. def weekend_to_monday(dt: datetime) -> datetime:
  70. """
  71. If holiday falls on Sunday or Saturday,
  72. use day thereafter (Monday) instead.
  73. Needed for holidays such as Christmas observation in Europe
  74. """
  75. if dt.weekday() == 6:
  76. return dt + timedelta(1)
  77. elif dt.weekday() == 5:
  78. return dt + timedelta(2)
  79. return dt
  80. def nearest_workday(dt: datetime) -> datetime:
  81. """
  82. If holiday falls on Saturday, use day before (Friday) instead;
  83. if holiday falls on Sunday, use day thereafter (Monday) instead.
  84. """
  85. if dt.weekday() == 5:
  86. return dt - timedelta(1)
  87. elif dt.weekday() == 6:
  88. return dt + timedelta(1)
  89. return dt
  90. def next_workday(dt: datetime) -> datetime:
  91. """
  92. returns next weekday used for observances
  93. """
  94. dt += timedelta(days=1)
  95. while dt.weekday() > 4:
  96. # Mon-Fri are 0-4
  97. dt += timedelta(days=1)
  98. return dt
  99. def previous_workday(dt: datetime) -> datetime:
  100. """
  101. returns previous weekday used for observances
  102. """
  103. dt -= timedelta(days=1)
  104. while dt.weekday() > 4:
  105. # Mon-Fri are 0-4
  106. dt -= timedelta(days=1)
  107. return dt
  108. def before_nearest_workday(dt: datetime) -> datetime:
  109. """
  110. returns previous workday after nearest workday
  111. """
  112. return previous_workday(nearest_workday(dt))
  113. def after_nearest_workday(dt: datetime) -> datetime:
  114. """
  115. returns next workday after nearest workday
  116. needed for Boxing day or multiple holidays in a series
  117. """
  118. return next_workday(nearest_workday(dt))
  119. class Holiday:
  120. """
  121. Class that defines a holiday with start/end dates and rules
  122. for observance.
  123. """
  124. start_date: Timestamp | None
  125. end_date: Timestamp | None
  126. days_of_week: tuple[int, ...] | None
  127. def __init__(
  128. self,
  129. name: str,
  130. year=None,
  131. month=None,
  132. day=None,
  133. offset=None,
  134. observance=None,
  135. start_date=None,
  136. end_date=None,
  137. days_of_week=None,
  138. ) -> None:
  139. """
  140. Parameters
  141. ----------
  142. name : str
  143. Name of the holiday , defaults to class name
  144. offset : array of pandas.tseries.offsets or
  145. class from pandas.tseries.offsets
  146. computes offset from date
  147. observance: function
  148. computes when holiday is given a pandas Timestamp
  149. days_of_week:
  150. provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
  151. Monday=0,..,Sunday=6
  152. Examples
  153. --------
  154. >>> from dateutil.relativedelta import MO
  155. >>> USMemorialDay = pd.tseries.holiday.Holiday(
  156. ... "Memorial Day", month=5, day=31, offset=pd.DateOffset(weekday=MO(-1))
  157. ... )
  158. >>> USMemorialDay
  159. Holiday: Memorial Day (month=5, day=31, offset=<DateOffset: weekday=MO(-1)>)
  160. >>> USLaborDay = pd.tseries.holiday.Holiday(
  161. ... "Labor Day", month=9, day=1, offset=pd.DateOffset(weekday=MO(1))
  162. ... )
  163. >>> USLaborDay
  164. Holiday: Labor Day (month=9, day=1, offset=<DateOffset: weekday=MO(+1)>)
  165. >>> July3rd = pd.tseries.holiday.Holiday("July 3rd", month=7, day=3)
  166. >>> July3rd
  167. Holiday: July 3rd (month=7, day=3, )
  168. >>> NewYears = pd.tseries.holiday.Holiday(
  169. ... "New Years Day", month=1, day=1,
  170. ... observance=pd.tseries.holiday.nearest_workday
  171. ... )
  172. >>> NewYears # doctest: +SKIP
  173. Holiday: New Years Day (
  174. month=1, day=1, observance=<function nearest_workday at 0x66545e9bc440>
  175. )
  176. >>> July3rd = pd.tseries.holiday.Holiday(
  177. ... "July 3rd", month=7, day=3,
  178. ... days_of_week=(0, 1, 2, 3)
  179. ... )
  180. >>> July3rd
  181. Holiday: July 3rd (month=7, day=3, )
  182. """
  183. if offset is not None and observance is not None:
  184. raise NotImplementedError("Cannot use both offset and observance.")
  185. self.name = name
  186. self.year = year
  187. self.month = month
  188. self.day = day
  189. self.offset = offset
  190. self.start_date = (
  191. Timestamp(start_date) if start_date is not None else start_date
  192. )
  193. self.end_date = Timestamp(end_date) if end_date is not None else end_date
  194. self.observance = observance
  195. assert days_of_week is None or type(days_of_week) == tuple
  196. self.days_of_week = days_of_week
  197. def __repr__(self) -> str:
  198. info = ""
  199. if self.year is not None:
  200. info += f"year={self.year}, "
  201. info += f"month={self.month}, day={self.day}, "
  202. if self.offset is not None:
  203. info += f"offset={self.offset}"
  204. if self.observance is not None:
  205. info += f"observance={self.observance}"
  206. repr = f"Holiday: {self.name} ({info})"
  207. return repr
  208. def dates(
  209. self, start_date, end_date, return_name: bool = False
  210. ) -> Series | DatetimeIndex:
  211. """
  212. Calculate holidays observed between start date and end date
  213. Parameters
  214. ----------
  215. start_date : starting date, datetime-like, optional
  216. end_date : ending date, datetime-like, optional
  217. return_name : bool, optional, default=False
  218. If True, return a series that has dates and holiday names.
  219. False will only return dates.
  220. Returns
  221. -------
  222. Series or DatetimeIndex
  223. Series if return_name is True
  224. """
  225. start_date = Timestamp(start_date)
  226. end_date = Timestamp(end_date)
  227. filter_start_date = start_date
  228. filter_end_date = end_date
  229. if self.year is not None:
  230. dt = Timestamp(datetime(self.year, self.month, self.day))
  231. dti = DatetimeIndex([dt])
  232. if return_name:
  233. return Series(self.name, index=dti)
  234. else:
  235. return dti
  236. dates = self._reference_dates(start_date, end_date)
  237. holiday_dates = self._apply_rule(dates)
  238. if self.days_of_week is not None:
  239. holiday_dates = holiday_dates[
  240. np.isin(
  241. # error: "DatetimeIndex" has no attribute "dayofweek"
  242. holiday_dates.dayofweek, # type: ignore[attr-defined]
  243. self.days_of_week,
  244. ).ravel()
  245. ]
  246. if self.start_date is not None:
  247. filter_start_date = max(
  248. self.start_date.tz_localize(filter_start_date.tz), filter_start_date
  249. )
  250. if self.end_date is not None:
  251. filter_end_date = min(
  252. self.end_date.tz_localize(filter_end_date.tz), filter_end_date
  253. )
  254. holiday_dates = holiday_dates[
  255. (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date)
  256. ]
  257. if return_name:
  258. return Series(self.name, index=holiday_dates)
  259. return holiday_dates
  260. def _reference_dates(
  261. self, start_date: Timestamp, end_date: Timestamp
  262. ) -> DatetimeIndex:
  263. """
  264. Get reference dates for the holiday.
  265. Return reference dates for the holiday also returning the year
  266. prior to the start_date and year following the end_date. This ensures
  267. that any offsets to be applied will yield the holidays within
  268. the passed in dates.
  269. """
  270. if self.start_date is not None:
  271. start_date = self.start_date.tz_localize(start_date.tz)
  272. if self.end_date is not None:
  273. end_date = self.end_date.tz_localize(start_date.tz)
  274. year_offset = DateOffset(years=1)
  275. reference_start_date = Timestamp(
  276. datetime(start_date.year - 1, self.month, self.day)
  277. )
  278. reference_end_date = Timestamp(
  279. datetime(end_date.year + 1, self.month, self.day)
  280. )
  281. # Don't process unnecessary holidays
  282. dates = date_range(
  283. start=reference_start_date,
  284. end=reference_end_date,
  285. freq=year_offset,
  286. tz=start_date.tz,
  287. )
  288. return dates
  289. def _apply_rule(self, dates: DatetimeIndex) -> DatetimeIndex:
  290. """
  291. Apply the given offset/observance to a DatetimeIndex of dates.
  292. Parameters
  293. ----------
  294. dates : DatetimeIndex
  295. Dates to apply the given offset/observance rule
  296. Returns
  297. -------
  298. Dates with rules applied
  299. """
  300. if dates.empty:
  301. return dates.copy()
  302. if self.observance is not None:
  303. return dates.map(lambda d: self.observance(d))
  304. if self.offset is not None:
  305. if not isinstance(self.offset, list):
  306. offsets = [self.offset]
  307. else:
  308. offsets = self.offset
  309. for offset in offsets:
  310. # if we are adding a non-vectorized value
  311. # ignore the PerformanceWarnings:
  312. with warnings.catch_warnings():
  313. warnings.simplefilter("ignore", PerformanceWarning)
  314. dates += offset
  315. return dates
  316. holiday_calendars = {}
  317. def register(cls) -> None:
  318. try:
  319. name = cls.name
  320. except AttributeError:
  321. name = cls.__name__
  322. holiday_calendars[name] = cls
  323. def get_calendar(name: str):
  324. """
  325. Return an instance of a calendar based on its name.
  326. Parameters
  327. ----------
  328. name : str
  329. Calendar name to return an instance of
  330. """
  331. return holiday_calendars[name]()
  332. class HolidayCalendarMetaClass(type):
  333. def __new__(cls, clsname: str, bases, attrs):
  334. calendar_class = super().__new__(cls, clsname, bases, attrs)
  335. register(calendar_class)
  336. return calendar_class
  337. class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass):
  338. """
  339. Abstract interface to create holidays following certain rules.
  340. """
  341. rules: list[Holiday] = []
  342. start_date = Timestamp(datetime(1970, 1, 1))
  343. end_date = Timestamp(datetime(2200, 12, 31))
  344. _cache = None
  345. def __init__(self, name: str = "", rules=None) -> None:
  346. """
  347. Initializes holiday object with a given set a rules. Normally
  348. classes just have the rules defined within them.
  349. Parameters
  350. ----------
  351. name : str
  352. Name of the holiday calendar, defaults to class name
  353. rules : array of Holiday objects
  354. A set of rules used to create the holidays.
  355. """
  356. super().__init__()
  357. if not name:
  358. name = type(self).__name__
  359. self.name = name
  360. if rules is not None:
  361. self.rules = rules
  362. def rule_from_name(self, name: str):
  363. for rule in self.rules:
  364. if rule.name == name:
  365. return rule
  366. return None
  367. def holidays(self, start=None, end=None, return_name: bool = False):
  368. """
  369. Returns a curve with holidays between start_date and end_date
  370. Parameters
  371. ----------
  372. start : starting date, datetime-like, optional
  373. end : ending date, datetime-like, optional
  374. return_name : bool, optional
  375. If True, return a series that has dates and holiday names.
  376. False will only return a DatetimeIndex of dates.
  377. Returns
  378. -------
  379. DatetimeIndex of holidays
  380. """
  381. if self.rules is None:
  382. raise Exception(
  383. f"Holiday Calendar {self.name} does not have any rules specified"
  384. )
  385. if start is None:
  386. start = AbstractHolidayCalendar.start_date
  387. if end is None:
  388. end = AbstractHolidayCalendar.end_date
  389. start = Timestamp(start)
  390. end = Timestamp(end)
  391. # If we don't have a cache or the dates are outside the prior cache, we
  392. # get them again
  393. if self._cache is None or start < self._cache[0] or end > self._cache[1]:
  394. pre_holidays = [
  395. rule.dates(start, end, return_name=True) for rule in self.rules
  396. ]
  397. if pre_holidays:
  398. # error: Argument 1 to "concat" has incompatible type
  399. # "List[Union[Series, DatetimeIndex]]"; expected
  400. # "Union[Iterable[DataFrame], Mapping[<nothing>, DataFrame]]"
  401. holidays = concat(pre_holidays) # type: ignore[arg-type]
  402. else:
  403. # error: Incompatible types in assignment (expression has type
  404. # "Series", variable has type "DataFrame")
  405. holidays = Series(
  406. index=DatetimeIndex([]), dtype=object
  407. ) # type: ignore[assignment]
  408. self._cache = (start, end, holidays.sort_index())
  409. holidays = self._cache[2]
  410. holidays = holidays[start:end]
  411. if return_name:
  412. return holidays
  413. else:
  414. return holidays.index
  415. @staticmethod
  416. def merge_class(base, other):
  417. """
  418. Merge holiday calendars together. The base calendar
  419. will take precedence to other. The merge will be done
  420. based on each holiday's name.
  421. Parameters
  422. ----------
  423. base : AbstractHolidayCalendar
  424. instance/subclass or array of Holiday objects
  425. other : AbstractHolidayCalendar
  426. instance/subclass or array of Holiday objects
  427. """
  428. try:
  429. other = other.rules
  430. except AttributeError:
  431. pass
  432. if not isinstance(other, list):
  433. other = [other]
  434. other_holidays = {holiday.name: holiday for holiday in other}
  435. try:
  436. base = base.rules
  437. except AttributeError:
  438. pass
  439. if not isinstance(base, list):
  440. base = [base]
  441. base_holidays = {holiday.name: holiday for holiday in base}
  442. other_holidays.update(base_holidays)
  443. return list(other_holidays.values())
  444. def merge(self, other, inplace: bool = False):
  445. """
  446. Merge holiday calendars together. The caller's class
  447. rules take precedence. The merge will be done
  448. based on each holiday's name.
  449. Parameters
  450. ----------
  451. other : holiday calendar
  452. inplace : bool (default=False)
  453. If True set rule_table to holidays, else return array of Holidays
  454. """
  455. holidays = self.merge_class(self, other)
  456. if inplace:
  457. self.rules = holidays
  458. else:
  459. return holidays
  460. USMemorialDay = Holiday(
  461. "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
  462. )
  463. USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
  464. USColumbusDay = Holiday(
  465. "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
  466. )
  467. USThanksgivingDay = Holiday(
  468. "Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4))
  469. )
  470. USMartinLutherKingJr = Holiday(
  471. "Birthday of Martin Luther King, Jr.",
  472. start_date=datetime(1986, 1, 1),
  473. month=1,
  474. day=1,
  475. offset=DateOffset(weekday=MO(3)),
  476. )
  477. USPresidentsDay = Holiday(
  478. "Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))
  479. )
  480. GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
  481. EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)])
  482. class USFederalHolidayCalendar(AbstractHolidayCalendar):
  483. """
  484. US Federal Government Holiday Calendar based on rules specified by:
  485. https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/
  486. """
  487. rules = [
  488. Holiday("New Year's Day", month=1, day=1, observance=nearest_workday),
  489. USMartinLutherKingJr,
  490. USPresidentsDay,
  491. USMemorialDay,
  492. Holiday(
  493. "Juneteenth National Independence Day",
  494. month=6,
  495. day=19,
  496. start_date="2021-06-18",
  497. observance=nearest_workday,
  498. ),
  499. Holiday("Independence Day", month=7, day=4, observance=nearest_workday),
  500. USLaborDay,
  501. USColumbusDay,
  502. Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
  503. USThanksgivingDay,
  504. Holiday("Christmas Day", month=12, day=25, observance=nearest_workday),
  505. ]
  506. def HolidayCalendarFactory(name: str, base, other, base_class=AbstractHolidayCalendar):
  507. rules = AbstractHolidayCalendar.merge_class(base, other)
  508. calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
  509. return calendar_class
  510. __all__ = [
  511. "after_nearest_workday",
  512. "before_nearest_workday",
  513. "FR",
  514. "get_calendar",
  515. "HolidayCalendarFactory",
  516. "MO",
  517. "nearest_workday",
  518. "next_monday",
  519. "next_monday_or_tuesday",
  520. "next_workday",
  521. "previous_friday",
  522. "previous_workday",
  523. "register",
  524. "SA",
  525. "SU",
  526. "sunday_to_monday",
  527. "TH",
  528. "TU",
  529. "WE",
  530. "weekend_to_monday",
  531. ]