test_unstack.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. Index,
  7. MultiIndex,
  8. Series,
  9. date_range,
  10. )
  11. import pandas._testing as tm
  12. def test_unstack_preserves_object():
  13. mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
  14. ser = Series(np.arange(4.0), index=mi, dtype=object)
  15. res1 = ser.unstack()
  16. assert (res1.dtypes == object).all()
  17. res2 = ser.unstack(level=0)
  18. assert (res2.dtypes == object).all()
  19. def test_unstack():
  20. index = MultiIndex(
  21. levels=[["bar", "foo"], ["one", "three", "two"]],
  22. codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
  23. )
  24. s = Series(np.arange(4.0), index=index)
  25. unstacked = s.unstack()
  26. expected = DataFrame(
  27. [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
  28. index=["bar", "foo"],
  29. columns=["one", "three", "two"],
  30. )
  31. tm.assert_frame_equal(unstacked, expected)
  32. unstacked = s.unstack(level=0)
  33. tm.assert_frame_equal(unstacked, expected.T)
  34. index = MultiIndex(
  35. levels=[["bar"], ["one", "two", "three"], [0, 1]],
  36. codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
  37. )
  38. s = Series(np.random.default_rng(2).standard_normal(6), index=index)
  39. exp_index = MultiIndex(
  40. levels=[["one", "two", "three"], [0, 1]],
  41. codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
  42. )
  43. expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
  44. unstacked = s.unstack(0).sort_index()
  45. tm.assert_frame_equal(unstacked, expected)
  46. # GH5873
  47. idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
  48. ts = Series([1, 2], index=idx)
  49. left = ts.unstack()
  50. right = DataFrame(
  51. [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
  52. )
  53. tm.assert_frame_equal(left, right)
  54. idx = MultiIndex.from_arrays(
  55. [
  56. ["cat", "cat", "cat", "dog", "dog"],
  57. ["a", "a", "b", "a", "b"],
  58. [1, 2, 1, 1, np.nan],
  59. ]
  60. )
  61. ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
  62. right = DataFrame(
  63. [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
  64. columns=["cat", "dog"],
  65. )
  66. tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
  67. right.index = MultiIndex.from_tuples(tpls)
  68. tm.assert_frame_equal(ts.unstack(level=0), right)
  69. def test_unstack_tuplename_in_multiindex():
  70. # GH 19966
  71. idx = MultiIndex.from_product(
  72. [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
  73. )
  74. ser = Series(1, index=idx)
  75. result = ser.unstack(("A", "a"))
  76. expected = DataFrame(
  77. [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
  78. columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
  79. index=Index([1, 2, 3], name=("B", "b")),
  80. )
  81. tm.assert_frame_equal(result, expected)
  82. @pytest.mark.parametrize(
  83. "unstack_idx, expected_values, expected_index, expected_columns",
  84. [
  85. (
  86. ("A", "a"),
  87. [[1, 1], [1, 1], [1, 1], [1, 1]],
  88. MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
  89. MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
  90. ),
  91. (
  92. (("A", "a"), "B"),
  93. [[1, 1, 1, 1], [1, 1, 1, 1]],
  94. Index([3, 4], name="C"),
  95. MultiIndex.from_tuples(
  96. [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
  97. ),
  98. ),
  99. ],
  100. )
  101. def test_unstack_mixed_type_name_in_multiindex(
  102. unstack_idx, expected_values, expected_index, expected_columns
  103. ):
  104. # GH 19966
  105. idx = MultiIndex.from_product(
  106. [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
  107. )
  108. ser = Series(1, index=idx)
  109. result = ser.unstack(unstack_idx)
  110. expected = DataFrame(
  111. expected_values, columns=expected_columns, index=expected_index
  112. )
  113. tm.assert_frame_equal(result, expected)
  114. def test_unstack_multi_index_categorical_values():
  115. df = DataFrame(
  116. np.random.default_rng(2).standard_normal((10, 4)),
  117. columns=Index(list("ABCD")),
  118. index=date_range("2000-01-01", periods=10, freq="B"),
  119. )
  120. mi = df.stack(future_stack=True).index.rename(["major", "minor"])
  121. ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
  122. result = ser.unstack()
  123. dti = ser.index.levels[0]
  124. c = pd.Categorical(["foo"] * len(dti))
  125. expected = DataFrame(
  126. {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
  127. columns=Index(list("ABCD"), name="minor"),
  128. index=dti.rename("major"),
  129. )
  130. tm.assert_frame_equal(result, expected)
  131. def test_unstack_mixed_level_names():
  132. # GH#48763
  133. arrays = [["a", "a"], [1, 2], ["red", "blue"]]
  134. idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y"))
  135. ser = Series([1, 2], index=idx)
  136. result = ser.unstack("x")
  137. expected = DataFrame(
  138. [[1], [2]],
  139. columns=Index(["a"], name="x"),
  140. index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
  141. )
  142. tm.assert_frame_equal(result, expected)