test_groupby.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. """ Test cases for GroupBy.plot """
  2. import numpy as np
  3. import pytest
  4. from pandas import (
  5. DataFrame,
  6. Index,
  7. Series,
  8. )
  9. from pandas.tests.plotting.common import (
  10. _check_axes_shape,
  11. _check_legend_labels,
  12. )
  13. pytest.importorskip("matplotlib")
  14. class TestDataFrameGroupByPlots:
  15. def test_series_groupby_plotting_nominally_works(self):
  16. n = 10
  17. weight = Series(np.random.default_rng(2).normal(166, 20, size=n))
  18. gender = np.random.default_rng(2).choice(["male", "female"], size=n)
  19. weight.groupby(gender).plot()
  20. def test_series_groupby_plotting_nominally_works_hist(self):
  21. n = 10
  22. height = Series(np.random.default_rng(2).normal(60, 10, size=n))
  23. gender = np.random.default_rng(2).choice(["male", "female"], size=n)
  24. height.groupby(gender).hist()
  25. def test_series_groupby_plotting_nominally_works_alpha(self):
  26. n = 10
  27. height = Series(np.random.default_rng(2).normal(60, 10, size=n))
  28. gender = np.random.default_rng(2).choice(["male", "female"], size=n)
  29. # Regression test for GH8733
  30. height.groupby(gender).plot(alpha=0.5)
  31. def test_plotting_with_float_index_works(self):
  32. # GH 7025
  33. df = DataFrame(
  34. {
  35. "def": [1, 1, 1, 2, 2, 2, 3, 3, 3],
  36. "val": np.random.default_rng(2).standard_normal(9),
  37. },
  38. index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
  39. )
  40. df.groupby("def")["val"].plot()
  41. def test_plotting_with_float_index_works_apply(self):
  42. # GH 7025
  43. df = DataFrame(
  44. {
  45. "def": [1, 1, 1, 2, 2, 2, 3, 3, 3],
  46. "val": np.random.default_rng(2).standard_normal(9),
  47. },
  48. index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
  49. )
  50. df.groupby("def")["val"].apply(lambda x: x.plot())
  51. def test_hist_single_row(self):
  52. # GH10214
  53. bins = np.arange(80, 100 + 2, 1)
  54. df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]})
  55. df["Mark"].hist(by=df["ByCol"], bins=bins)
  56. def test_hist_single_row_single_bycol(self):
  57. # GH10214
  58. bins = np.arange(80, 100 + 2, 1)
  59. df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]})
  60. df["Mark"].hist(by=df["ByCol"], bins=bins)
  61. def test_plot_submethod_works(self):
  62. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  63. df.groupby("z").plot.scatter("x", "y")
  64. def test_plot_submethod_works_line(self):
  65. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  66. df.groupby("z")["x"].plot.line()
  67. def test_plot_kwargs(self):
  68. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  69. res = df.groupby("z").plot(kind="scatter", x="x", y="y")
  70. # check that a scatter plot is effectively plotted: the axes should
  71. # contain a PathCollection from the scatter plot (GH11805)
  72. assert len(res["a"].collections) == 1
  73. def test_plot_kwargs_scatter(self):
  74. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  75. res = df.groupby("z").plot.scatter(x="x", y="y")
  76. assert len(res["a"].collections) == 1
  77. @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
  78. def test_groupby_hist_frame_with_legend(self, column, expected_axes_num):
  79. # GH 6279 - DataFrameGroupBy histogram can have a legend
  80. expected_layout = (1, expected_axes_num)
  81. expected_labels = column or [["a"], ["b"]]
  82. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  83. df = DataFrame(
  84. np.random.default_rng(2).standard_normal((30, 2)),
  85. index=index,
  86. columns=["a", "b"],
  87. )
  88. g = df.groupby("c")
  89. for axes in g.hist(legend=True, column=column):
  90. _check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
  91. for ax, expected_label in zip(axes[0], expected_labels):
  92. _check_legend_labels(ax, expected_label)
  93. @pytest.mark.parametrize("column", [None, "b"])
  94. def test_groupby_hist_frame_with_legend_raises(self, column):
  95. # GH 6279 - DataFrameGroupBy histogram with legend and label raises
  96. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  97. df = DataFrame(
  98. np.random.default_rng(2).standard_normal((30, 2)),
  99. index=index,
  100. columns=["a", "b"],
  101. )
  102. g = df.groupby("c")
  103. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  104. g.hist(legend=True, column=column, label="d")
  105. def test_groupby_hist_series_with_legend(self):
  106. # GH 6279 - SeriesGroupBy histogram can have a legend
  107. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  108. df = DataFrame(
  109. np.random.default_rng(2).standard_normal((30, 2)),
  110. index=index,
  111. columns=["a", "b"],
  112. )
  113. g = df.groupby("c")
  114. for ax in g["a"].hist(legend=True):
  115. _check_axes_shape(ax, axes_num=1, layout=(1, 1))
  116. _check_legend_labels(ax, ["1", "2"])
  117. def test_groupby_hist_series_with_legend_raises(self):
  118. # GH 6279 - SeriesGroupBy histogram with legend and label raises
  119. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  120. df = DataFrame(
  121. np.random.default_rng(2).standard_normal((30, 2)),
  122. index=index,
  123. columns=["a", "b"],
  124. )
  125. g = df.groupby("c")
  126. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  127. g.hist(legend=True, label="d")