test_conversion.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. import numpy as np
  2. import pytest
  3. from pandas.compat.numpy import np_version_gt2
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. MultiIndex,
  8. )
  9. import pandas._testing as tm
  10. def test_to_numpy(idx):
  11. result = idx.to_numpy()
  12. exp = idx.values
  13. tm.assert_numpy_array_equal(result, exp)
  14. def test_array_interface(idx):
  15. # https://github.com/pandas-dev/pandas/pull/60046
  16. result = np.asarray(idx)
  17. expected = np.empty((6,), dtype=object)
  18. expected[:] = [
  19. ("foo", "one"),
  20. ("foo", "two"),
  21. ("bar", "one"),
  22. ("baz", "two"),
  23. ("qux", "one"),
  24. ("qux", "two"),
  25. ]
  26. tm.assert_numpy_array_equal(result, expected)
  27. # it always gives a copy by default, but the values are cached, so results
  28. # are still sharing memory
  29. result_copy1 = np.asarray(idx)
  30. result_copy2 = np.asarray(idx)
  31. assert np.may_share_memory(result_copy1, result_copy2)
  32. # with explicit copy=True, then it is an actual copy
  33. result_copy1 = np.array(idx, copy=True)
  34. result_copy2 = np.array(idx, copy=True)
  35. assert not np.may_share_memory(result_copy1, result_copy2)
  36. if not np_version_gt2:
  37. # copy=False semantics are only supported in NumPy>=2.
  38. return
  39. # for MultiIndex, copy=False is never allowed
  40. msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
  41. with tm.assert_produces_warning(FutureWarning, match=msg):
  42. np.array(idx, copy=False)
  43. def test_to_frame():
  44. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  45. index = MultiIndex.from_tuples(tuples)
  46. result = index.to_frame(index=False)
  47. expected = DataFrame(tuples)
  48. tm.assert_frame_equal(result, expected)
  49. result = index.to_frame()
  50. expected.index = index
  51. tm.assert_frame_equal(result, expected)
  52. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  53. index = MultiIndex.from_tuples(tuples, names=["first", "second"])
  54. result = index.to_frame(index=False)
  55. expected = DataFrame(tuples)
  56. expected.columns = ["first", "second"]
  57. tm.assert_frame_equal(result, expected)
  58. result = index.to_frame()
  59. expected.index = index
  60. tm.assert_frame_equal(result, expected)
  61. # See GH-22580
  62. index = MultiIndex.from_tuples(tuples)
  63. result = index.to_frame(index=False, name=["first", "second"])
  64. expected = DataFrame(tuples)
  65. expected.columns = ["first", "second"]
  66. tm.assert_frame_equal(result, expected)
  67. result = index.to_frame(name=["first", "second"])
  68. expected.index = index
  69. expected.columns = ["first", "second"]
  70. tm.assert_frame_equal(result, expected)
  71. msg = "'name' must be a list / sequence of column names."
  72. with pytest.raises(TypeError, match=msg):
  73. index.to_frame(name="first")
  74. msg = "'name' should have same length as number of levels on index."
  75. with pytest.raises(ValueError, match=msg):
  76. index.to_frame(name=["first"])
  77. # Tests for datetime index
  78. index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
  79. result = index.to_frame(index=False)
  80. expected = DataFrame(
  81. {
  82. 0: np.repeat(np.arange(5, dtype="int64"), 3),
  83. 1: np.tile(pd.date_range("20130101", periods=3), 5),
  84. }
  85. )
  86. tm.assert_frame_equal(result, expected)
  87. result = index.to_frame()
  88. expected.index = index
  89. tm.assert_frame_equal(result, expected)
  90. # See GH-22580
  91. result = index.to_frame(index=False, name=["first", "second"])
  92. expected = DataFrame(
  93. {
  94. "first": np.repeat(np.arange(5, dtype="int64"), 3),
  95. "second": np.tile(pd.date_range("20130101", periods=3), 5),
  96. }
  97. )
  98. tm.assert_frame_equal(result, expected)
  99. result = index.to_frame(name=["first", "second"])
  100. expected.index = index
  101. tm.assert_frame_equal(result, expected)
  102. def test_to_frame_dtype_fidelity():
  103. # GH 22420
  104. mi = MultiIndex.from_arrays(
  105. [
  106. pd.date_range("19910905", periods=6, tz="US/Eastern"),
  107. [1, 1, 1, 2, 2, 2],
  108. pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  109. ["x", "x", "y", "z", "x", "y"],
  110. ],
  111. names=["dates", "a", "b", "c"],
  112. )
  113. original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
  114. expected_df = DataFrame(
  115. {
  116. "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
  117. "a": [1, 1, 1, 2, 2, 2],
  118. "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  119. "c": ["x", "x", "y", "z", "x", "y"],
  120. }
  121. )
  122. df = mi.to_frame(index=False)
  123. df_dtypes = df.dtypes.to_dict()
  124. tm.assert_frame_equal(df, expected_df)
  125. assert original_dtypes == df_dtypes
  126. def test_to_frame_resulting_column_order():
  127. # GH 22420
  128. expected = ["z", 0, "a"]
  129. mi = MultiIndex.from_arrays(
  130. [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
  131. )
  132. result = mi.to_frame().columns.tolist()
  133. assert result == expected
  134. def test_to_frame_duplicate_labels():
  135. # GH 45245
  136. data = [(1, 2), (3, 4)]
  137. names = ["a", "a"]
  138. index = MultiIndex.from_tuples(data, names=names)
  139. with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
  140. index.to_frame()
  141. result = index.to_frame(allow_duplicates=True)
  142. expected = DataFrame(data, index=index, columns=names)
  143. tm.assert_frame_equal(result, expected)
  144. names = [None, 0]
  145. index = MultiIndex.from_tuples(data, names=names)
  146. with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
  147. index.to_frame()
  148. result = index.to_frame(allow_duplicates=True)
  149. expected = DataFrame(data, index=index, columns=[0, 0])
  150. tm.assert_frame_equal(result, expected)
  151. def test_to_flat_index(idx):
  152. expected = pd.Index(
  153. (
  154. ("foo", "one"),
  155. ("foo", "two"),
  156. ("bar", "one"),
  157. ("baz", "two"),
  158. ("qux", "one"),
  159. ("qux", "two"),
  160. ),
  161. tupleize_cols=False,
  162. )
  163. result = idx.to_flat_index()
  164. tm.assert_index_equal(result, expected)