test_join.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. from datetime import (
  2. datetime,
  3. timezone,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas import (
  8. DataFrame,
  9. DatetimeIndex,
  10. Index,
  11. Timestamp,
  12. date_range,
  13. period_range,
  14. to_datetime,
  15. )
  16. import pandas._testing as tm
  17. from pandas.tseries.offsets import (
  18. BDay,
  19. BMonthEnd,
  20. )
  21. class TestJoin:
  22. def test_does_not_convert_mixed_integer(self):
  23. df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
  24. cols = df.columns.join(df.index, how="outer")
  25. joined = cols.join(df.columns)
  26. assert cols.dtype == np.dtype("O")
  27. assert cols.dtype == joined.dtype
  28. tm.assert_numpy_array_equal(cols.values, joined.values)
  29. def test_join_self(self, join_type):
  30. index = date_range("1/1/2000", periods=10)
  31. joined = index.join(index, how=join_type)
  32. assert index is joined
  33. def test_join_with_period_index(self, join_type):
  34. df = DataFrame(
  35. np.ones((10, 2)),
  36. index=date_range("2020-01-01", periods=10),
  37. columns=period_range("2020-01-01", periods=2),
  38. )
  39. s = df.iloc[:5, 0]
  40. expected = df.columns.astype("O").join(s.index, how=join_type)
  41. result = df.columns.join(s.index, how=join_type)
  42. tm.assert_index_equal(expected, result)
  43. def test_join_object_index(self):
  44. rng = date_range("1/1/2000", periods=10)
  45. idx = Index(["a", "b", "c", "d"])
  46. result = rng.join(idx, how="outer")
  47. assert isinstance(result[0], Timestamp)
  48. def test_join_utc_convert(self, join_type):
  49. rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
  50. left = rng.tz_convert("US/Eastern")
  51. right = rng.tz_convert("Europe/Berlin")
  52. result = left.join(left[:-5], how=join_type)
  53. assert isinstance(result, DatetimeIndex)
  54. assert result.tz == left.tz
  55. result = left.join(right[:-5], how=join_type)
  56. assert isinstance(result, DatetimeIndex)
  57. assert result.tz is timezone.utc
  58. def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
  59. dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
  60. empty = Index([])
  61. result = dti.union(empty, sort=sort)
  62. if using_infer_string:
  63. assert isinstance(result, DatetimeIndex)
  64. tm.assert_index_equal(result, dti)
  65. else:
  66. expected = dti.astype("O")
  67. tm.assert_index_equal(result, expected)
  68. result = dti.join(empty)
  69. assert isinstance(result, DatetimeIndex)
  70. tm.assert_index_equal(result, dti)
  71. def test_join_nonunique(self):
  72. idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
  73. idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
  74. rs = idx1.join(idx2, how="outer")
  75. assert rs.is_monotonic_increasing
  76. @pytest.mark.parametrize("freq", ["B", "C"])
  77. def test_outer_join(self, freq):
  78. # should just behave as union
  79. start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
  80. rng = date_range(start=start, end=end, freq=freq)
  81. # overlapping
  82. left = rng[:10]
  83. right = rng[5:10]
  84. the_join = left.join(right, how="outer")
  85. assert isinstance(the_join, DatetimeIndex)
  86. # non-overlapping, gap in middle
  87. left = rng[:5]
  88. right = rng[10:]
  89. the_join = left.join(right, how="outer")
  90. assert isinstance(the_join, DatetimeIndex)
  91. assert the_join.freq is None
  92. # non-overlapping, no gap
  93. left = rng[:5]
  94. right = rng[5:10]
  95. the_join = left.join(right, how="outer")
  96. assert isinstance(the_join, DatetimeIndex)
  97. # overlapping, but different offset
  98. other = date_range(start, end, freq=BMonthEnd())
  99. the_join = rng.join(other, how="outer")
  100. assert isinstance(the_join, DatetimeIndex)
  101. assert the_join.freq is None
  102. def test_naive_aware_conflicts(self):
  103. start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
  104. naive = date_range(start, end, freq=BDay(), tz=None)
  105. aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
  106. msg = "tz-naive.*tz-aware"
  107. with pytest.raises(TypeError, match=msg):
  108. naive.join(aware)
  109. with pytest.raises(TypeError, match=msg):
  110. aware.join(naive)
  111. @pytest.mark.parametrize("tz", [None, "US/Pacific"])
  112. def test_join_preserves_freq(self, tz):
  113. # GH#32157
  114. dti = date_range("2016-01-01", periods=10, tz=tz)
  115. result = dti[:5].join(dti[5:], how="outer")
  116. assert result.freq == dti.freq
  117. tm.assert_index_equal(result, dti)
  118. result = dti[:5].join(dti[6:], how="outer")
  119. assert result.freq is None
  120. expected = dti.delete(5)
  121. tm.assert_index_equal(result, expected)