test_setops.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. from datetime import (
  2. datetime,
  3. timezone,
  4. )
  5. import numpy as np
  6. import pytest
  7. import pytz
  8. import pandas.util._test_decorators as td
  9. import pandas as pd
  10. from pandas import (
  11. DataFrame,
  12. DatetimeIndex,
  13. Index,
  14. Series,
  15. Timestamp,
  16. bdate_range,
  17. date_range,
  18. )
  19. import pandas._testing as tm
  20. from pandas.tseries.offsets import (
  21. BMonthEnd,
  22. Minute,
  23. MonthEnd,
  24. )
  25. START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
  26. class TestDatetimeIndexSetOps:
  27. tz = [
  28. None,
  29. "UTC",
  30. "Asia/Tokyo",
  31. "US/Eastern",
  32. "dateutil/Asia/Singapore",
  33. "dateutil/US/Pacific",
  34. ]
  35. # TODO: moved from test_datetimelike; dedup with version below
  36. def test_union2(self, sort):
  37. everything = date_range("2020-01-01", periods=10)
  38. first = everything[:5]
  39. second = everything[5:]
  40. union = first.union(second, sort=sort)
  41. tm.assert_index_equal(union, everything)
  42. @pytest.mark.parametrize("box", [np.array, Series, list])
  43. def test_union3(self, sort, box):
  44. everything = date_range("2020-01-01", periods=10)
  45. first = everything[:5]
  46. second = everything[5:]
  47. # GH 10149 support listlike inputs other than Index objects
  48. expected = first.union(second, sort=sort)
  49. case = box(second.values)
  50. result = first.union(case, sort=sort)
  51. tm.assert_index_equal(result, expected)
  52. @pytest.mark.parametrize("tz", tz)
  53. def test_union(self, tz, sort):
  54. rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
  55. other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
  56. expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
  57. expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
  58. rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
  59. other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
  60. expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
  61. expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
  62. rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
  63. other3 = DatetimeIndex([], tz=tz).as_unit("ns")
  64. expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
  65. expected3_notsorted = rng3
  66. for rng, other, exp, exp_notsorted in [
  67. (rng1, other1, expected1, expected1_notsorted),
  68. (rng2, other2, expected2, expected2_notsorted),
  69. (rng3, other3, expected3, expected3_notsorted),
  70. ]:
  71. result_union = rng.union(other, sort=sort)
  72. tm.assert_index_equal(result_union, exp)
  73. result_union = other.union(rng, sort=sort)
  74. if sort is None:
  75. tm.assert_index_equal(result_union, exp)
  76. else:
  77. tm.assert_index_equal(result_union, exp_notsorted)
  78. def test_union_coverage(self, sort):
  79. idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
  80. ordered = DatetimeIndex(idx.sort_values(), freq="infer")
  81. result = ordered.union(idx, sort=sort)
  82. tm.assert_index_equal(result, ordered)
  83. result = ordered[:0].union(ordered, sort=sort)
  84. tm.assert_index_equal(result, ordered)
  85. assert result.freq == ordered.freq
  86. def test_union_bug_1730(self, sort):
  87. rng_a = date_range("1/1/2012", periods=4, freq="3h")
  88. rng_b = date_range("1/1/2012", periods=4, freq="4h")
  89. result = rng_a.union(rng_b, sort=sort)
  90. exp = list(rng_a) + list(rng_b[1:])
  91. if sort is None:
  92. exp = DatetimeIndex(sorted(exp))
  93. else:
  94. exp = DatetimeIndex(exp)
  95. tm.assert_index_equal(result, exp)
  96. def test_union_bug_1745(self, sort):
  97. left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
  98. right = DatetimeIndex(
  99. [
  100. "2012-05-29 13:04:21.322000",
  101. "2012-05-11 15:27:24.873000",
  102. "2012-05-11 15:31:05.350000",
  103. ]
  104. )
  105. result = left.union(right, sort=sort)
  106. exp = DatetimeIndex(
  107. [
  108. "2012-05-11 15:19:49.695000",
  109. "2012-05-29 13:04:21.322000",
  110. "2012-05-11 15:27:24.873000",
  111. "2012-05-11 15:31:05.350000",
  112. ]
  113. )
  114. if sort is None:
  115. exp = exp.sort_values()
  116. tm.assert_index_equal(result, exp)
  117. def test_union_bug_4564(self, sort):
  118. from pandas import DateOffset
  119. left = date_range("2013-01-01", "2013-02-01")
  120. right = left + DateOffset(minutes=15)
  121. result = left.union(right, sort=sort)
  122. exp = list(left) + list(right)
  123. if sort is None:
  124. exp = DatetimeIndex(sorted(exp))
  125. else:
  126. exp = DatetimeIndex(exp)
  127. tm.assert_index_equal(result, exp)
  128. def test_union_freq_both_none(self, sort):
  129. # GH11086
  130. expected = bdate_range("20150101", periods=10)
  131. expected._data.freq = None
  132. result = expected.union(expected, sort=sort)
  133. tm.assert_index_equal(result, expected)
  134. assert result.freq is None
  135. def test_union_freq_infer(self):
  136. # When taking the union of two DatetimeIndexes, we infer
  137. # a freq even if the arguments don't have freq. This matches
  138. # TimedeltaIndex behavior.
  139. dti = date_range("2016-01-01", periods=5)
  140. left = dti[[0, 1, 3, 4]]
  141. right = dti[[2, 3, 1]]
  142. assert left.freq is None
  143. assert right.freq is None
  144. result = left.union(right)
  145. tm.assert_index_equal(result, dti)
  146. assert result.freq == "D"
  147. def test_union_dataframe_index(self):
  148. rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
  149. s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
  150. rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
  151. s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
  152. df = DataFrame({"s1": s1, "s2": s2})
  153. exp = date_range("1/1/1980", "1/1/2012", freq="MS")
  154. tm.assert_index_equal(df.index, exp)
  155. def test_union_with_DatetimeIndex(self, sort):
  156. i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
  157. i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
  158. # Works
  159. i1.union(i2, sort=sort)
  160. # Fails with "AttributeError: can't set attribute"
  161. i2.union(i1, sort=sort)
  162. def test_union_same_timezone_different_units(self):
  163. # GH 55238
  164. idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
  165. idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
  166. result = idx1.union(idx2)
  167. expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
  168. tm.assert_index_equal(result, expected)
  169. # TODO: moved from test_datetimelike; de-duplicate with version below
  170. def test_intersection2(self):
  171. first = date_range("2020-01-01", periods=10)
  172. second = first[5:]
  173. intersect = first.intersection(second)
  174. tm.assert_index_equal(intersect, second)
  175. # GH 10149
  176. cases = [klass(second.values) for klass in [np.array, Series, list]]
  177. for case in cases:
  178. result = first.intersection(case)
  179. tm.assert_index_equal(result, second)
  180. third = Index(["a", "b", "c"])
  181. result = first.intersection(third)
  182. expected = Index([], dtype=object)
  183. tm.assert_index_equal(result, expected)
  184. @pytest.mark.parametrize(
  185. "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
  186. )
  187. def test_intersection(self, tz, sort):
  188. # GH 4690 (with tz)
  189. base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
  190. # if target has the same name, it is preserved
  191. rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
  192. expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
  193. # if target name is different, it will be reset
  194. rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
  195. expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
  196. rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
  197. expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
  198. for rng, expected in [
  199. (rng2, expected2),
  200. (rng3, expected3),
  201. (rng4, expected4),
  202. ]:
  203. result = base.intersection(rng)
  204. tm.assert_index_equal(result, expected)
  205. assert result.freq == expected.freq
  206. # non-monotonic
  207. base = DatetimeIndex(
  208. ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
  209. ).as_unit("ns")
  210. rng2 = DatetimeIndex(
  211. ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
  212. ).as_unit("ns")
  213. expected2 = DatetimeIndex(
  214. ["2011-01-04", "2011-01-02"], tz=tz, name="idx"
  215. ).as_unit("ns")
  216. rng3 = DatetimeIndex(
  217. ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
  218. tz=tz,
  219. name="other",
  220. ).as_unit("ns")
  221. expected3 = DatetimeIndex(
  222. ["2011-01-04", "2011-01-02"], tz=tz, name=None
  223. ).as_unit("ns")
  224. # GH 7880
  225. rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
  226. expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
  227. assert expected4.freq is None
  228. for rng, expected in [
  229. (rng2, expected2),
  230. (rng3, expected3),
  231. (rng4, expected4),
  232. ]:
  233. result = base.intersection(rng, sort=sort)
  234. if sort is None:
  235. expected = expected.sort_values()
  236. tm.assert_index_equal(result, expected)
  237. assert result.freq == expected.freq
  238. # parametrize over both anchored and non-anchored freqs, as they
  239. # have different code paths
  240. @pytest.mark.parametrize("freq", ["min", "B"])
  241. def test_intersection_empty(self, tz_aware_fixture, freq):
  242. # empty same freq GH2129
  243. tz = tz_aware_fixture
  244. rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
  245. result = rng[0:0].intersection(rng)
  246. assert len(result) == 0
  247. assert result.freq == rng.freq
  248. result = rng.intersection(rng[0:0])
  249. assert len(result) == 0
  250. assert result.freq == rng.freq
  251. # no overlap GH#33604
  252. check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
  253. result = rng[:3].intersection(rng[-3:])
  254. tm.assert_index_equal(result, rng[:0])
  255. if check_freq:
  256. # We don't preserve freq on non-anchored offsets
  257. assert result.freq == rng.freq
  258. # swapped left and right
  259. result = rng[-3:].intersection(rng[:3])
  260. tm.assert_index_equal(result, rng[:0])
  261. if check_freq:
  262. # We don't preserve freq on non-anchored offsets
  263. assert result.freq == rng.freq
  264. def test_intersection_bug_1708(self):
  265. from pandas import DateOffset
  266. index_1 = date_range("1/1/2012", periods=4, freq="12h")
  267. index_2 = index_1 + DateOffset(hours=1)
  268. result = index_1.intersection(index_2)
  269. assert len(result) == 0
  270. @pytest.mark.parametrize("tz", tz)
  271. def test_difference(self, tz, sort):
  272. rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
  273. rng1 = DatetimeIndex(rng_dates, tz=tz)
  274. other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
  275. expected1 = DatetimeIndex(rng_dates, tz=tz)
  276. rng2 = DatetimeIndex(rng_dates, tz=tz)
  277. other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
  278. expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
  279. rng3 = DatetimeIndex(rng_dates, tz=tz)
  280. other3 = DatetimeIndex([], tz=tz)
  281. expected3 = DatetimeIndex(rng_dates, tz=tz)
  282. for rng, other, expected in [
  283. (rng1, other1, expected1),
  284. (rng2, other2, expected2),
  285. (rng3, other3, expected3),
  286. ]:
  287. result_diff = rng.difference(other, sort)
  288. if sort is None and len(other):
  289. # We dont sort (yet?) when empty GH#24959
  290. expected = expected.sort_values()
  291. tm.assert_index_equal(result_diff, expected)
  292. def test_difference_freq(self, sort):
  293. # GH14323: difference of DatetimeIndex should not preserve frequency
  294. index = date_range("20160920", "20160925", freq="D")
  295. other = date_range("20160921", "20160924", freq="D")
  296. expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
  297. idx_diff = index.difference(other, sort)
  298. tm.assert_index_equal(idx_diff, expected)
  299. tm.assert_attr_equal("freq", idx_diff, expected)
  300. # preserve frequency when the difference is a contiguous
  301. # subset of the original range
  302. other = date_range("20160922", "20160925", freq="D")
  303. idx_diff = index.difference(other, sort)
  304. expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
  305. tm.assert_index_equal(idx_diff, expected)
  306. tm.assert_attr_equal("freq", idx_diff, expected)
  307. def test_datetimeindex_diff(self, sort):
  308. dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
  309. dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
  310. assert len(dti1.difference(dti2, sort)) == 2
  311. @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
  312. def test_setops_preserve_freq(self, tz):
  313. rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
  314. result = rng[:50].union(rng[50:100])
  315. assert result.name == rng.name
  316. assert result.freq == rng.freq
  317. assert result.tz == rng.tz
  318. result = rng[:50].union(rng[30:100])
  319. assert result.name == rng.name
  320. assert result.freq == rng.freq
  321. assert result.tz == rng.tz
  322. result = rng[:50].union(rng[60:100])
  323. assert result.name == rng.name
  324. assert result.freq is None
  325. assert result.tz == rng.tz
  326. result = rng[:50].intersection(rng[25:75])
  327. assert result.name == rng.name
  328. assert result.freqstr == "D"
  329. assert result.tz == rng.tz
  330. nofreq = DatetimeIndex(list(rng[25:75]), name="other")
  331. result = rng[:50].union(nofreq)
  332. assert result.name is None
  333. assert result.freq == rng.freq
  334. assert result.tz == rng.tz
  335. result = rng[:50].intersection(nofreq)
  336. assert result.name is None
  337. assert result.freq == rng.freq
  338. assert result.tz == rng.tz
  339. def test_intersection_non_tick_no_fastpath(self):
  340. # GH#42104
  341. dti = DatetimeIndex(
  342. [
  343. "2018-12-31",
  344. "2019-03-31",
  345. "2019-06-30",
  346. "2019-09-30",
  347. "2019-12-31",
  348. "2020-03-31",
  349. ],
  350. freq="QE-DEC",
  351. )
  352. result = dti[::2].intersection(dti[1::2])
  353. expected = dti[:0]
  354. tm.assert_index_equal(result, expected)
  355. def test_dti_intersection(self):
  356. rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
  357. left = rng[10:90][::-1]
  358. right = rng[20:80][::-1]
  359. assert left.tz == rng.tz
  360. result = left.intersection(right)
  361. assert result.tz == left.tz
  362. # Note: not difference, as there is no symmetry requirement there
  363. @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
  364. def test_dti_setop_aware(self, setop):
  365. # non-overlapping
  366. # GH#39328 as of 2.0 we cast these to UTC instead of object
  367. rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
  368. rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
  369. result = getattr(rng, setop)(rng2)
  370. left = rng.tz_convert("UTC")
  371. right = rng2.tz_convert("UTC")
  372. expected = getattr(left, setop)(right)
  373. tm.assert_index_equal(result, expected)
  374. assert result.tz == left.tz
  375. if len(result):
  376. assert result[0].tz is timezone.utc
  377. assert result[-1].tz is timezone.utc
  378. def test_dti_union_mixed(self):
  379. # GH#21671
  380. rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
  381. rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
  382. result = rng.union(rng2)
  383. expected = Index(
  384. [
  385. Timestamp("2011-01-01"),
  386. pd.NaT,
  387. Timestamp("2012-01-01", tz="Asia/Tokyo"),
  388. Timestamp("2012-01-02", tz="Asia/Tokyo"),
  389. ],
  390. dtype=object,
  391. )
  392. tm.assert_index_equal(result, expected)
  393. class TestBusinessDatetimeIndex:
  394. def test_union(self, sort):
  395. rng = bdate_range(START, END)
  396. # overlapping
  397. left = rng[:10]
  398. right = rng[5:10]
  399. the_union = left.union(right, sort=sort)
  400. assert isinstance(the_union, DatetimeIndex)
  401. # non-overlapping, gap in middle
  402. left = rng[:5]
  403. right = rng[10:]
  404. the_union = left.union(right, sort=sort)
  405. assert isinstance(the_union, Index)
  406. # non-overlapping, no gap
  407. left = rng[:5]
  408. right = rng[5:10]
  409. the_union = left.union(right, sort=sort)
  410. assert isinstance(the_union, DatetimeIndex)
  411. # order does not matter
  412. if sort is None:
  413. tm.assert_index_equal(right.union(left, sort=sort), the_union)
  414. else:
  415. expected = DatetimeIndex(list(right) + list(left))
  416. tm.assert_index_equal(right.union(left, sort=sort), expected)
  417. # overlapping, but different offset
  418. rng = date_range(START, END, freq=BMonthEnd())
  419. the_union = rng.union(rng, sort=sort)
  420. assert isinstance(the_union, DatetimeIndex)
  421. def test_union_not_cacheable(self, sort):
  422. rng = date_range("1/1/2000", periods=50, freq=Minute())
  423. rng1 = rng[10:]
  424. rng2 = rng[:25]
  425. the_union = rng1.union(rng2, sort=sort)
  426. if sort is None:
  427. tm.assert_index_equal(the_union, rng)
  428. else:
  429. expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
  430. tm.assert_index_equal(the_union, expected)
  431. rng1 = rng[10:]
  432. rng2 = rng[15:35]
  433. the_union = rng1.union(rng2, sort=sort)
  434. expected = rng[10:]
  435. tm.assert_index_equal(the_union, expected)
  436. def test_intersection(self):
  437. rng = date_range("1/1/2000", periods=50, freq=Minute())
  438. rng1 = rng[10:]
  439. rng2 = rng[:25]
  440. the_int = rng1.intersection(rng2)
  441. expected = rng[10:25]
  442. tm.assert_index_equal(the_int, expected)
  443. assert isinstance(the_int, DatetimeIndex)
  444. assert the_int.freq == rng.freq
  445. the_int = rng1.intersection(rng2)
  446. tm.assert_index_equal(the_int, expected)
  447. # non-overlapping
  448. the_int = rng[:10].intersection(rng[10:])
  449. expected = DatetimeIndex([]).as_unit("ns")
  450. tm.assert_index_equal(the_int, expected)
  451. def test_intersection_bug(self):
  452. # GH #771
  453. a = bdate_range("11/30/2011", "12/31/2011")
  454. b = bdate_range("12/10/2011", "12/20/2011")
  455. result = a.intersection(b)
  456. tm.assert_index_equal(result, b)
  457. assert result.freq == b.freq
  458. def test_intersection_list(self):
  459. # GH#35876
  460. # values is not an Index -> no name -> retain "a"
  461. values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
  462. idx = DatetimeIndex(values, name="a")
  463. res = idx.intersection(values)
  464. tm.assert_index_equal(res, idx)
  465. def test_month_range_union_tz_pytz(self, sort):
  466. tz = pytz.timezone("US/Eastern")
  467. early_start = datetime(2011, 1, 1)
  468. early_end = datetime(2011, 3, 1)
  469. late_start = datetime(2011, 3, 1)
  470. late_end = datetime(2011, 5, 1)
  471. early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
  472. late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
  473. early_dr.union(late_dr, sort=sort)
  474. @td.skip_if_windows
  475. def test_month_range_union_tz_dateutil(self, sort):
  476. from pandas._libs.tslibs.timezones import dateutil_gettz
  477. tz = dateutil_gettz("US/Eastern")
  478. early_start = datetime(2011, 1, 1)
  479. early_end = datetime(2011, 3, 1)
  480. late_start = datetime(2011, 3, 1)
  481. late_end = datetime(2011, 5, 1)
  482. early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
  483. late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
  484. early_dr.union(late_dr, sort=sort)
  485. @pytest.mark.parametrize("sort", [False, None])
  486. def test_intersection_duplicates(self, sort):
  487. # GH#38196
  488. idx1 = Index(
  489. [
  490. Timestamp("2019-12-13"),
  491. Timestamp("2019-12-12"),
  492. Timestamp("2019-12-12"),
  493. ]
  494. )
  495. result = idx1.intersection(idx1, sort=sort)
  496. expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
  497. tm.assert_index_equal(result, expected)
  498. class TestCustomDatetimeIndex:
  499. def test_union(self, sort):
  500. # overlapping
  501. rng = bdate_range(START, END, freq="C")
  502. left = rng[:10]
  503. right = rng[5:10]
  504. the_union = left.union(right, sort=sort)
  505. assert isinstance(the_union, DatetimeIndex)
  506. # non-overlapping, gap in middle
  507. left = rng[:5]
  508. right = rng[10:]
  509. the_union = left.union(right, sort)
  510. assert isinstance(the_union, Index)
  511. # non-overlapping, no gap
  512. left = rng[:5]
  513. right = rng[5:10]
  514. the_union = left.union(right, sort=sort)
  515. assert isinstance(the_union, DatetimeIndex)
  516. # order does not matter
  517. if sort is None:
  518. tm.assert_index_equal(right.union(left, sort=sort), the_union)
  519. # overlapping, but different offset
  520. rng = date_range(START, END, freq=BMonthEnd())
  521. the_union = rng.union(rng, sort=sort)
  522. assert isinstance(the_union, DatetimeIndex)
  523. def test_intersection_bug(self):
  524. # GH #771
  525. a = bdate_range("11/30/2011", "12/31/2011", freq="C")
  526. b = bdate_range("12/10/2011", "12/20/2011", freq="C")
  527. result = a.intersection(b)
  528. tm.assert_index_equal(result, b)
  529. assert result.freq == b.freq
  530. @pytest.mark.parametrize(
  531. "tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
  532. )
  533. def test_intersection_dst_transition(self, tz):
  534. # GH 46702: Europe/Berlin has DST transition
  535. idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
  536. idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
  537. result = idx1.intersection(idx2)
  538. expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
  539. tm.assert_index_equal(result, expected)
  540. # GH#45863 same problem for union
  541. index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
  542. index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
  543. result = index1.union(index2)
  544. expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
  545. tm.assert_index_equal(result, expected)