test_apply_mutate.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. import numpy as np
  2. import pandas as pd
  3. import pandas._testing as tm
  4. def test_group_by_copy():
  5. # GH#44803
  6. df = pd.DataFrame(
  7. {
  8. "name": ["Alice", "Bob", "Carl"],
  9. "age": [20, 21, 20],
  10. }
  11. ).set_index("name")
  12. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  13. with tm.assert_produces_warning(FutureWarning, match=msg):
  14. grp_by_same_value = df.groupby(["age"], group_keys=False).apply(
  15. lambda group: group
  16. )
  17. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  18. with tm.assert_produces_warning(FutureWarning, match=msg):
  19. grp_by_copy = df.groupby(["age"], group_keys=False).apply(
  20. lambda group: group.copy()
  21. )
  22. tm.assert_frame_equal(grp_by_same_value, grp_by_copy)
  23. def test_mutate_groups():
  24. # GH3380
  25. df = pd.DataFrame(
  26. {
  27. "cat1": ["a"] * 8 + ["b"] * 6,
  28. "cat2": ["c"] * 2
  29. + ["d"] * 2
  30. + ["e"] * 2
  31. + ["f"] * 2
  32. + ["c"] * 2
  33. + ["d"] * 2
  34. + ["e"] * 2,
  35. "cat3": [f"g{x}" for x in range(1, 15)],
  36. "val": np.random.default_rng(2).integers(100, size=14),
  37. }
  38. )
  39. def f_copy(x):
  40. x = x.copy()
  41. x["rank"] = x.val.rank(method="min")
  42. return x.groupby("cat2")["rank"].min()
  43. def f_no_copy(x):
  44. x["rank"] = x.val.rank(method="min")
  45. return x.groupby("cat2")["rank"].min()
  46. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  47. with tm.assert_produces_warning(FutureWarning, match=msg):
  48. grpby_copy = df.groupby("cat1").apply(f_copy)
  49. with tm.assert_produces_warning(FutureWarning, match=msg):
  50. grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
  51. tm.assert_series_equal(grpby_copy, grpby_no_copy)
  52. def test_no_mutate_but_looks_like():
  53. # GH 8467
  54. # first show's mutation indicator
  55. # second does not, but should yield the same results
  56. df = pd.DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
  57. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  58. with tm.assert_produces_warning(FutureWarning, match=msg):
  59. result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key)
  60. with tm.assert_produces_warning(FutureWarning, match=msg):
  61. result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key)
  62. tm.assert_series_equal(result1, result2)
  63. def test_apply_function_with_indexing(warn_copy_on_write):
  64. # GH: 33058
  65. df = pd.DataFrame(
  66. {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}
  67. )
  68. def fn(x):
  69. x.loc[x.index[-1], "col2"] = 0
  70. return x.col2
  71. msg = "DataFrameGroupBy.apply operated on the grouping columns"
  72. with tm.assert_produces_warning(
  73. FutureWarning, match=msg, raise_on_extra_warnings=not warn_copy_on_write
  74. ):
  75. result = df.groupby(["col1"], as_index=False).apply(fn)
  76. expected = pd.Series(
  77. [1, 2, 0, 4, 5, 0],
  78. index=pd.MultiIndex.from_tuples(
  79. [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)]
  80. ),
  81. name="col2",
  82. )
  83. tm.assert_series_equal(result, expected)
  84. def test_apply_mutate_columns_multiindex():
  85. # GH 12652
  86. df = pd.DataFrame(
  87. {
  88. ("C", "julian"): [1, 2, 3],
  89. ("B", "geoffrey"): [1, 2, 3],
  90. ("A", "julian"): [1, 2, 3],
  91. ("B", "julian"): [1, 2, 3],
  92. ("A", "geoffrey"): [1, 2, 3],
  93. ("C", "geoffrey"): [1, 2, 3],
  94. },
  95. columns=pd.MultiIndex.from_tuples(
  96. [
  97. ("A", "julian"),
  98. ("A", "geoffrey"),
  99. ("B", "julian"),
  100. ("B", "geoffrey"),
  101. ("C", "julian"),
  102. ("C", "geoffrey"),
  103. ]
  104. ),
  105. )
  106. def add_column(grouped):
  107. name = grouped.columns[0][1]
  108. grouped["sum", name] = grouped.sum(axis=1)
  109. return grouped
  110. msg = "DataFrame.groupby with axis=1 is deprecated"
  111. with tm.assert_produces_warning(FutureWarning, match=msg):
  112. gb = df.groupby(level=1, axis=1)
  113. result = gb.apply(add_column)
  114. expected = pd.DataFrame(
  115. [
  116. [1, 1, 1, 3, 1, 1, 1, 3],
  117. [2, 2, 2, 6, 2, 2, 2, 6],
  118. [
  119. 3,
  120. 3,
  121. 3,
  122. 9,
  123. 3,
  124. 3,
  125. 3,
  126. 9,
  127. ],
  128. ],
  129. columns=pd.MultiIndex.from_tuples(
  130. [
  131. ("geoffrey", "A", "geoffrey"),
  132. ("geoffrey", "B", "geoffrey"),
  133. ("geoffrey", "C", "geoffrey"),
  134. ("geoffrey", "sum", "geoffrey"),
  135. ("julian", "A", "julian"),
  136. ("julian", "B", "julian"),
  137. ("julian", "C", "julian"),
  138. ("julian", "sum", "julian"),
  139. ]
  140. ),
  141. )
  142. tm.assert_frame_equal(result, expected)