test_align.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from datetime import timezone
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. Series,
  7. date_range,
  8. period_range,
  9. )
  10. import pandas._testing as tm
  11. @pytest.mark.parametrize(
  12. "first_slice,second_slice",
  13. [
  14. [[2, None], [None, -5]],
  15. [[None, 0], [None, -5]],
  16. [[None, -5], [None, 0]],
  17. [[None, 0], [None, 0]],
  18. ],
  19. )
  20. @pytest.mark.parametrize("fill", [None, -1])
  21. def test_align(datetime_series, first_slice, second_slice, join_type, fill):
  22. a = datetime_series[slice(*first_slice)]
  23. b = datetime_series[slice(*second_slice)]
  24. aa, ab = a.align(b, join=join_type, fill_value=fill)
  25. join_index = a.index.join(b.index, how=join_type)
  26. if fill is not None:
  27. diff_a = aa.index.difference(join_index)
  28. diff_b = ab.index.difference(join_index)
  29. if len(diff_a) > 0:
  30. assert (aa.reindex(diff_a) == fill).all()
  31. if len(diff_b) > 0:
  32. assert (ab.reindex(diff_b) == fill).all()
  33. ea = a.reindex(join_index)
  34. eb = b.reindex(join_index)
  35. if fill is not None:
  36. ea = ea.fillna(fill)
  37. eb = eb.fillna(fill)
  38. tm.assert_series_equal(aa, ea)
  39. tm.assert_series_equal(ab, eb)
  40. assert aa.name == "ts"
  41. assert ea.name == "ts"
  42. assert ab.name == "ts"
  43. assert eb.name == "ts"
  44. @pytest.mark.parametrize(
  45. "first_slice,second_slice",
  46. [
  47. [[2, None], [None, -5]],
  48. [[None, 0], [None, -5]],
  49. [[None, -5], [None, 0]],
  50. [[None, 0], [None, 0]],
  51. ],
  52. )
  53. @pytest.mark.parametrize("method", ["pad", "bfill"])
  54. @pytest.mark.parametrize("limit", [None, 1])
  55. def test_align_fill_method(
  56. datetime_series, first_slice, second_slice, join_type, method, limit
  57. ):
  58. a = datetime_series[slice(*first_slice)]
  59. b = datetime_series[slice(*second_slice)]
  60. msg = (
  61. "The 'method', 'limit', and 'fill_axis' keywords in Series.align "
  62. "are deprecated"
  63. )
  64. with tm.assert_produces_warning(FutureWarning, match=msg):
  65. aa, ab = a.align(b, join=join_type, method=method, limit=limit)
  66. join_index = a.index.join(b.index, how=join_type)
  67. ea = a.reindex(join_index)
  68. eb = b.reindex(join_index)
  69. msg2 = "Series.fillna with 'method' is deprecated"
  70. with tm.assert_produces_warning(FutureWarning, match=msg2):
  71. ea = ea.fillna(method=method, limit=limit)
  72. eb = eb.fillna(method=method, limit=limit)
  73. tm.assert_series_equal(aa, ea)
  74. tm.assert_series_equal(ab, eb)
  75. def test_align_nocopy(datetime_series, using_copy_on_write):
  76. b = datetime_series[:5].copy()
  77. # do copy
  78. a = datetime_series.copy()
  79. ra, _ = a.align(b, join="left")
  80. ra[:5] = 5
  81. assert not (a[:5] == 5).any()
  82. # do not copy
  83. a = datetime_series.copy()
  84. ra, _ = a.align(b, join="left", copy=False)
  85. ra[:5] = 5
  86. if using_copy_on_write:
  87. assert not (a[:5] == 5).any()
  88. else:
  89. assert (a[:5] == 5).all()
  90. # do copy
  91. a = datetime_series.copy()
  92. b = datetime_series[:5].copy()
  93. _, rb = a.align(b, join="right")
  94. rb[:3] = 5
  95. assert not (b[:3] == 5).any()
  96. # do not copy
  97. a = datetime_series.copy()
  98. b = datetime_series[:5].copy()
  99. _, rb = a.align(b, join="right", copy=False)
  100. rb[:2] = 5
  101. if using_copy_on_write:
  102. assert not (b[:2] == 5).any()
  103. else:
  104. assert (b[:2] == 5).all()
  105. def test_align_same_index(datetime_series, using_copy_on_write):
  106. a, b = datetime_series.align(datetime_series, copy=False)
  107. if not using_copy_on_write:
  108. assert a.index is datetime_series.index
  109. assert b.index is datetime_series.index
  110. else:
  111. assert a.index.is_(datetime_series.index)
  112. assert b.index.is_(datetime_series.index)
  113. a, b = datetime_series.align(datetime_series, copy=True)
  114. assert a.index is not datetime_series.index
  115. assert b.index is not datetime_series.index
  116. assert a.index.is_(datetime_series.index)
  117. assert b.index.is_(datetime_series.index)
  118. def test_align_multiindex():
  119. # GH 10665
  120. midx = pd.MultiIndex.from_product(
  121. [range(2), range(3), range(2)], names=("a", "b", "c")
  122. )
  123. idx = pd.Index(range(2), name="b")
  124. s1 = Series(np.arange(12, dtype="int64"), index=midx)
  125. s2 = Series(np.arange(2, dtype="int64"), index=idx)
  126. # these must be the same results (but flipped)
  127. res1l, res1r = s1.align(s2, join="left")
  128. res2l, res2r = s2.align(s1, join="right")
  129. expl = s1
  130. tm.assert_series_equal(expl, res1l)
  131. tm.assert_series_equal(expl, res2r)
  132. expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
  133. tm.assert_series_equal(expr, res1r)
  134. tm.assert_series_equal(expr, res2l)
  135. res1l, res1r = s1.align(s2, join="right")
  136. res2l, res2r = s2.align(s1, join="left")
  137. exp_idx = pd.MultiIndex.from_product(
  138. [range(2), range(2), range(2)], names=("a", "b", "c")
  139. )
  140. expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
  141. tm.assert_series_equal(expl, res1l)
  142. tm.assert_series_equal(expl, res2r)
  143. expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
  144. tm.assert_series_equal(expr, res1r)
  145. tm.assert_series_equal(expr, res2l)
  146. @pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
  147. def test_align_with_dataframe_method(method):
  148. # GH31788
  149. ser = Series(range(3), index=range(3))
  150. df = pd.DataFrame(0.0, index=range(3), columns=range(3))
  151. msg = (
  152. "The 'method', 'limit', and 'fill_axis' keywords in Series.align "
  153. "are deprecated"
  154. )
  155. with tm.assert_produces_warning(FutureWarning, match=msg):
  156. result_ser, result_df = ser.align(df, method=method)
  157. tm.assert_series_equal(result_ser, ser)
  158. tm.assert_frame_equal(result_df, df)
  159. def test_align_dt64tzindex_mismatched_tzs():
  160. idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern")
  161. ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1)
  162. ser_central = ser.tz_convert("US/Central")
  163. # different timezones convert to UTC
  164. new1, new2 = ser.align(ser_central)
  165. assert new1.index.tz is timezone.utc
  166. assert new2.index.tz is timezone.utc
  167. def test_align_periodindex(join_type):
  168. rng = period_range("1/1/2000", "1/1/2010", freq="Y")
  169. ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
  170. # TODO: assert something?
  171. ts.align(ts[::2], join=join_type)
  172. def test_align_stringindex(any_string_dtype):
  173. left = Series(range(3), index=pd.Index(["a", "b", "d"], dtype=any_string_dtype))
  174. right = Series(range(3), index=pd.Index(["a", "b", "c"], dtype=any_string_dtype))
  175. result_left, result_right = left.align(right)
  176. expected_idx = pd.Index(["a", "b", "c", "d"], dtype=any_string_dtype)
  177. expected_left = Series([0, 1, np.nan, 2], index=expected_idx)
  178. expected_right = Series([0, 1, 2, np.nan], index=expected_idx)
  179. tm.assert_series_equal(result_left, expected_left)
  180. tm.assert_series_equal(result_right, expected_right)
  181. def test_align_left_fewer_levels():
  182. # GH#45224
  183. left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
  184. right = Series(
  185. [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
  186. )
  187. result_left, result_right = left.align(right)
  188. expected_right = Series(
  189. [1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
  190. )
  191. expected_left = Series(
  192. [2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
  193. )
  194. tm.assert_series_equal(result_left, expected_left)
  195. tm.assert_series_equal(result_right, expected_right)
  196. def test_align_left_different_named_levels():
  197. # GH#45224
  198. left = Series(
  199. [2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
  200. )
  201. right = Series(
  202. [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
  203. )
  204. result_left, result_right = left.align(right)
  205. expected_left = Series(
  206. [2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
  207. )
  208. expected_right = Series(
  209. [1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
  210. )
  211. tm.assert_series_equal(result_left, expected_left)
  212. tm.assert_series_equal(result_right, expected_right)