test_boxplot_method.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. """ Test cases for .boxplot method """
  2. from __future__ import annotations
  3. import itertools
  4. import string
  5. import numpy as np
  6. import pytest
  7. from pandas import (
  8. DataFrame,
  9. MultiIndex,
  10. Series,
  11. date_range,
  12. plotting,
  13. timedelta_range,
  14. )
  15. import pandas._testing as tm
  16. from pandas.tests.plotting.common import (
  17. _check_axes_shape,
  18. _check_box_return_type,
  19. _check_plot_works,
  20. _check_ticks_props,
  21. _check_visible,
  22. )
  23. from pandas.util.version import Version
  24. from pandas.io.formats.printing import pprint_thing
  25. mpl = pytest.importorskip("matplotlib")
  26. plt = pytest.importorskip("matplotlib.pyplot")
  27. def _check_ax_limits(col, ax):
  28. y_min, y_max = ax.get_ylim()
  29. assert y_min <= col.min()
  30. assert y_max >= col.max()
  31. if Version(mpl.__version__) < Version("3.10"):
  32. verts: list[dict[str, bool | str]] = [{"vert": False}, {"vert": True}]
  33. else:
  34. verts = [{"orientation": "horizontal"}, {"orientation": "vertical"}]
  35. @pytest.fixture(params=verts)
  36. def vert(request):
  37. return request.param
  38. class TestDataFramePlots:
  39. def test_stacked_boxplot_set_axis(self):
  40. # GH2980
  41. import matplotlib.pyplot as plt
  42. n = 80
  43. df = DataFrame(
  44. {
  45. "Clinical": np.random.default_rng(2).choice([0, 1, 2, 3], n),
  46. "Confirmed": np.random.default_rng(2).choice([0, 1, 2, 3], n),
  47. "Discarded": np.random.default_rng(2).choice([0, 1, 2, 3], n),
  48. },
  49. index=np.arange(0, n),
  50. )
  51. ax = df.plot(kind="bar", stacked=True)
  52. assert [int(x.get_text()) for x in ax.get_xticklabels()] == df.index.to_list()
  53. ax.set_xticks(np.arange(0, 80, 10))
  54. plt.draw() # Update changes
  55. assert [int(x.get_text()) for x in ax.get_xticklabels()] == list(
  56. np.arange(0, 80, 10)
  57. )
  58. @pytest.mark.slow
  59. @pytest.mark.parametrize(
  60. "kwargs, warn",
  61. [
  62. [{"return_type": "dict"}, None],
  63. [{"column": ["one", "two"]}, None],
  64. [{"column": ["one", "two"], "by": "indic"}, UserWarning],
  65. [{"column": ["one"], "by": ["indic", "indic2"]}, None],
  66. [{"by": "indic"}, UserWarning],
  67. [{"by": ["indic", "indic2"]}, UserWarning],
  68. [{"notch": 1}, None],
  69. [{"by": "indic", "notch": 1}, UserWarning],
  70. ],
  71. )
  72. def test_boxplot_legacy1(self, kwargs, warn):
  73. df = DataFrame(
  74. np.random.default_rng(2).standard_normal((6, 4)),
  75. index=list(string.ascii_letters[:6]),
  76. columns=["one", "two", "three", "four"],
  77. )
  78. df["indic"] = ["foo", "bar"] * 3
  79. df["indic2"] = ["foo", "bar", "foo"] * 2
  80. # _check_plot_works can add an ax so catch warning. see GH #13188
  81. with tm.assert_produces_warning(warn, check_stacklevel=False):
  82. _check_plot_works(df.boxplot, **kwargs)
  83. def test_boxplot_legacy1_series(self):
  84. ser = Series(np.random.default_rng(2).standard_normal(6))
  85. _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict")
  86. def test_boxplot_legacy2(self):
  87. df = DataFrame(
  88. np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"]
  89. )
  90. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  91. df["Y"] = Series(["A"] * 10)
  92. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  93. _check_plot_works(df.boxplot, by="X")
  94. def test_boxplot_legacy2_with_ax(self):
  95. df = DataFrame(
  96. np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"]
  97. )
  98. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  99. df["Y"] = Series(["A"] * 10)
  100. # When ax is supplied and required number of axes is 1,
  101. # passed ax should be used:
  102. _, ax = mpl.pyplot.subplots()
  103. axes = df.boxplot("Col1", by="X", ax=ax)
  104. ax_axes = ax.axes
  105. assert ax_axes is axes
  106. def test_boxplot_legacy2_with_ax_return_type(self):
  107. df = DataFrame(
  108. np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"]
  109. )
  110. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  111. df["Y"] = Series(["A"] * 10)
  112. fig, ax = mpl.pyplot.subplots()
  113. axes = df.groupby("Y").boxplot(ax=ax, return_type="axes")
  114. ax_axes = ax.axes
  115. assert ax_axes is axes["A"]
  116. def test_boxplot_legacy2_with_multi_col(self):
  117. df = DataFrame(
  118. np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"]
  119. )
  120. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  121. df["Y"] = Series(["A"] * 10)
  122. # Multiple columns with an ax argument should use same figure
  123. fig, ax = mpl.pyplot.subplots()
  124. with tm.assert_produces_warning(UserWarning):
  125. axes = df.boxplot(
  126. column=["Col1", "Col2"], by="X", ax=ax, return_type="axes"
  127. )
  128. assert axes["Col1"].get_figure() is fig
  129. def test_boxplot_legacy2_by_none(self):
  130. df = DataFrame(
  131. np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"]
  132. )
  133. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  134. df["Y"] = Series(["A"] * 10)
  135. # When by is None, check that all relevant lines are present in the
  136. # dict
  137. _, ax = mpl.pyplot.subplots()
  138. d = df.boxplot(ax=ax, return_type="dict")
  139. lines = list(itertools.chain.from_iterable(d.values()))
  140. assert len(ax.get_lines()) == len(lines)
  141. def test_boxplot_return_type_none(self, hist_df):
  142. # GH 12216; return_type=None & by=None -> axes
  143. result = hist_df.boxplot()
  144. assert isinstance(result, mpl.pyplot.Axes)
  145. def test_boxplot_return_type_legacy(self):
  146. # API change in https://github.com/pandas-dev/pandas/pull/7096
  147. df = DataFrame(
  148. np.random.default_rng(2).standard_normal((6, 4)),
  149. index=list(string.ascii_letters[:6]),
  150. columns=["one", "two", "three", "four"],
  151. )
  152. msg = "return_type must be {'axes', 'dict', 'both'}"
  153. with pytest.raises(ValueError, match=msg):
  154. df.boxplot(return_type="NOT_A_TYPE")
  155. result = df.boxplot()
  156. _check_box_return_type(result, "axes")
  157. @pytest.mark.parametrize("return_type", ["dict", "axes", "both"])
  158. def test_boxplot_return_type_legacy_return_type(self, return_type):
  159. # API change in https://github.com/pandas-dev/pandas/pull/7096
  160. df = DataFrame(
  161. np.random.default_rng(2).standard_normal((6, 4)),
  162. index=list(string.ascii_letters[:6]),
  163. columns=["one", "two", "three", "four"],
  164. )
  165. with tm.assert_produces_warning(False):
  166. result = df.boxplot(return_type=return_type)
  167. _check_box_return_type(result, return_type)
  168. def test_boxplot_axis_limits(self, hist_df):
  169. df = hist_df.copy()
  170. df["age"] = np.random.default_rng(2).integers(1, 20, df.shape[0])
  171. # One full row
  172. height_ax, weight_ax = df.boxplot(["height", "weight"], by="category")
  173. _check_ax_limits(df["height"], height_ax)
  174. _check_ax_limits(df["weight"], weight_ax)
  175. assert weight_ax._sharey == height_ax
  176. def test_boxplot_axis_limits_two_rows(self, hist_df):
  177. df = hist_df.copy()
  178. df["age"] = np.random.default_rng(2).integers(1, 20, df.shape[0])
  179. # Two rows, one partial
  180. p = df.boxplot(["height", "weight", "age"], by="category")
  181. height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0]
  182. dummy_ax = p[1, 1]
  183. _check_ax_limits(df["height"], height_ax)
  184. _check_ax_limits(df["weight"], weight_ax)
  185. _check_ax_limits(df["age"], age_ax)
  186. assert weight_ax._sharey == height_ax
  187. assert age_ax._sharey == height_ax
  188. assert dummy_ax._sharey is None
  189. def test_boxplot_empty_column(self):
  190. df = DataFrame(np.random.default_rng(2).standard_normal((20, 4)))
  191. df.loc[:, 0] = np.nan
  192. _check_plot_works(df.boxplot, return_type="axes")
  193. def test_figsize(self):
  194. df = DataFrame(
  195. np.random.default_rng(2).random((10, 5)), columns=["A", "B", "C", "D", "E"]
  196. )
  197. result = df.boxplot(return_type="axes", figsize=(12, 8))
  198. assert result.figure.bbox_inches.width == 12
  199. assert result.figure.bbox_inches.height == 8
  200. def test_fontsize(self):
  201. df = DataFrame({"a": [1, 2, 3, 4, 5, 6]})
  202. _check_ticks_props(df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16)
  203. def test_boxplot_numeric_data(self):
  204. # GH 22799
  205. df = DataFrame(
  206. {
  207. "a": date_range("2012-01-01", periods=100),
  208. "b": np.random.default_rng(2).standard_normal(100),
  209. "c": np.random.default_rng(2).standard_normal(100) + 2,
  210. "d": date_range("2012-01-01", periods=100).astype(str),
  211. "e": date_range("2012-01-01", periods=100, tz="UTC"),
  212. "f": timedelta_range("1 days", periods=100),
  213. }
  214. )
  215. ax = df.plot(kind="box")
  216. assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"]
  217. @pytest.mark.parametrize(
  218. "colors_kwd, expected",
  219. [
  220. (
  221. {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
  222. {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
  223. ),
  224. ({"boxes": "r"}, {"boxes": "r"}),
  225. ("r", {"boxes": "r", "whiskers": "r", "medians": "r", "caps": "r"}),
  226. ],
  227. )
  228. def test_color_kwd(self, colors_kwd, expected):
  229. # GH: 26214
  230. df = DataFrame(np.random.default_rng(2).random((10, 2)))
  231. result = df.boxplot(color=colors_kwd, return_type="dict")
  232. for k, v in expected.items():
  233. assert result[k][0].get_color() == v
  234. @pytest.mark.parametrize(
  235. "scheme,expected",
  236. [
  237. (
  238. "dark_background",
  239. {
  240. "boxes": "#8dd3c7",
  241. "whiskers": "#8dd3c7",
  242. "medians": "#bfbbd9",
  243. "caps": "#8dd3c7",
  244. },
  245. ),
  246. (
  247. "default",
  248. {
  249. "boxes": "#1f77b4",
  250. "whiskers": "#1f77b4",
  251. "medians": "#2ca02c",
  252. "caps": "#1f77b4",
  253. },
  254. ),
  255. ],
  256. )
  257. def test_colors_in_theme(self, scheme, expected):
  258. # GH: 40769
  259. df = DataFrame(np.random.default_rng(2).random((10, 2)))
  260. import matplotlib.pyplot as plt
  261. plt.style.use(scheme)
  262. result = df.plot.box(return_type="dict")
  263. for k, v in expected.items():
  264. assert result[k][0].get_color() == v
  265. @pytest.mark.parametrize(
  266. "dict_colors, msg",
  267. [({"boxes": "r", "invalid_key": "r"}, "invalid key 'invalid_key'")],
  268. )
  269. def test_color_kwd_errors(self, dict_colors, msg):
  270. # GH: 26214
  271. df = DataFrame(np.random.default_rng(2).random((10, 2)))
  272. with pytest.raises(ValueError, match=msg):
  273. df.boxplot(color=dict_colors, return_type="dict")
  274. @pytest.mark.parametrize(
  275. "props, expected",
  276. [
  277. ("boxprops", "boxes"),
  278. ("whiskerprops", "whiskers"),
  279. ("capprops", "caps"),
  280. ("medianprops", "medians"),
  281. ],
  282. )
  283. def test_specified_props_kwd(self, props, expected):
  284. # GH 30346
  285. df = DataFrame({k: np.random.default_rng(2).random(10) for k in "ABC"})
  286. kwd = {props: {"color": "C1"}}
  287. result = df.boxplot(return_type="dict", **kwd)
  288. assert result[expected][0].get_color() == "C1"
  289. @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
  290. def test_plot_xlabel_ylabel(self, vert):
  291. df = DataFrame(
  292. {
  293. "a": np.random.default_rng(2).standard_normal(10),
  294. "b": np.random.default_rng(2).standard_normal(10),
  295. "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
  296. }
  297. )
  298. xlabel, ylabel = "x", "y"
  299. ax = df.plot(kind="box", xlabel=xlabel, ylabel=ylabel, **vert)
  300. assert ax.get_xlabel() == xlabel
  301. assert ax.get_ylabel() == ylabel
  302. @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
  303. def test_plot_box(self, vert):
  304. # GH 54941
  305. rng = np.random.default_rng(2)
  306. df1 = DataFrame(rng.integers(0, 100, size=(100, 4)), columns=list("ABCD"))
  307. df2 = DataFrame(rng.integers(0, 100, size=(100, 4)), columns=list("ABCD"))
  308. xlabel, ylabel = "x", "y"
  309. _, axs = plt.subplots(ncols=2, figsize=(10, 7), sharey=True)
  310. df1.plot.box(ax=axs[0], xlabel=xlabel, ylabel=ylabel, **vert)
  311. df2.plot.box(ax=axs[1], xlabel=xlabel, ylabel=ylabel, **vert)
  312. for ax in axs:
  313. assert ax.get_xlabel() == xlabel
  314. assert ax.get_ylabel() == ylabel
  315. mpl.pyplot.close()
  316. @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
  317. def test_boxplot_xlabel_ylabel(self, vert):
  318. df = DataFrame(
  319. {
  320. "a": np.random.default_rng(2).standard_normal(10),
  321. "b": np.random.default_rng(2).standard_normal(10),
  322. "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
  323. }
  324. )
  325. xlabel, ylabel = "x", "y"
  326. ax = df.boxplot(xlabel=xlabel, ylabel=ylabel, **vert)
  327. assert ax.get_xlabel() == xlabel
  328. assert ax.get_ylabel() == ylabel
  329. @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
  330. def test_boxplot_group_xlabel_ylabel(self, vert):
  331. df = DataFrame(
  332. {
  333. "a": np.random.default_rng(2).standard_normal(10),
  334. "b": np.random.default_rng(2).standard_normal(10),
  335. "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
  336. }
  337. )
  338. xlabel, ylabel = "x", "y"
  339. ax = df.boxplot(by="group", xlabel=xlabel, ylabel=ylabel, **vert)
  340. for subplot in ax:
  341. assert subplot.get_xlabel() == xlabel
  342. assert subplot.get_ylabel() == ylabel
  343. mpl.pyplot.close()
  344. @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
  345. def test_boxplot_group_no_xlabel_ylabel(self, vert, request):
  346. if Version(mpl.__version__) >= Version("3.10") and vert == {
  347. "orientation": "horizontal"
  348. }:
  349. request.applymarker(
  350. pytest.mark.xfail(reason=f"{vert} fails starting with matplotlib 3.10")
  351. )
  352. df = DataFrame(
  353. {
  354. "a": np.random.default_rng(2).standard_normal(10),
  355. "b": np.random.default_rng(2).standard_normal(10),
  356. "group": np.random.default_rng(2).choice(["group1", "group2"], 10),
  357. }
  358. )
  359. ax = df.boxplot(by="group", **vert)
  360. for subplot in ax:
  361. target_label = (
  362. subplot.get_xlabel()
  363. if vert == {"vert": True} # noqa: PLR1714
  364. or vert == {"orientation": "vertical"}
  365. else subplot.get_ylabel()
  366. )
  367. assert target_label == pprint_thing(["group"])
  368. mpl.pyplot.close()
  369. class TestDataFrameGroupByPlots:
  370. def test_boxplot_legacy1(self, hist_df):
  371. grouped = hist_df.groupby(by="gender")
  372. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  373. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  374. _check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2))
  375. def test_boxplot_legacy1_return_type(self, hist_df):
  376. grouped = hist_df.groupby(by="gender")
  377. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  378. _check_axes_shape(axes, axes_num=1, layout=(1, 1))
  379. @pytest.mark.slow
  380. def test_boxplot_legacy2(self):
  381. tuples = zip(string.ascii_letters[:10], range(10))
  382. df = DataFrame(
  383. np.random.default_rng(2).random((10, 3)),
  384. index=MultiIndex.from_tuples(tuples),
  385. )
  386. grouped = df.groupby(level=1)
  387. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  388. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  389. _check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3))
  390. @pytest.mark.slow
  391. def test_boxplot_legacy2_return_type(self):
  392. tuples = zip(string.ascii_letters[:10], range(10))
  393. df = DataFrame(
  394. np.random.default_rng(2).random((10, 3)),
  395. index=MultiIndex.from_tuples(tuples),
  396. )
  397. grouped = df.groupby(level=1)
  398. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  399. _check_axes_shape(axes, axes_num=1, layout=(1, 1))
  400. @pytest.mark.parametrize(
  401. "subplots, warn, axes_num, layout",
  402. [[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]],
  403. )
  404. def test_boxplot_legacy3(self, subplots, warn, axes_num, layout):
  405. tuples = zip(string.ascii_letters[:10], range(10))
  406. df = DataFrame(
  407. np.random.default_rng(2).random((10, 3)),
  408. index=MultiIndex.from_tuples(tuples),
  409. )
  410. msg = "DataFrame.groupby with axis=1 is deprecated"
  411. with tm.assert_produces_warning(FutureWarning, match=msg):
  412. grouped = df.unstack(level=1).groupby(level=0, axis=1)
  413. with tm.assert_produces_warning(warn, check_stacklevel=False):
  414. axes = _check_plot_works(
  415. grouped.boxplot, subplots=subplots, return_type="axes"
  416. )
  417. _check_axes_shape(axes, axes_num=axes_num, layout=layout)
  418. def test_grouped_plot_fignums(self):
  419. n = 10
  420. weight = Series(np.random.default_rng(2).normal(166, 20, size=n))
  421. height = Series(np.random.default_rng(2).normal(60, 10, size=n))
  422. gender = np.random.default_rng(2).choice(["male", "female"], size=n)
  423. df = DataFrame({"height": height, "weight": weight, "gender": gender})
  424. gb = df.groupby("gender")
  425. res = gb.plot()
  426. assert len(mpl.pyplot.get_fignums()) == 2
  427. assert len(res) == 2
  428. plt.close("all")
  429. res = gb.boxplot(return_type="axes")
  430. assert len(mpl.pyplot.get_fignums()) == 1
  431. assert len(res) == 2
  432. def test_grouped_plot_fignums_excluded_col(self):
  433. n = 10
  434. weight = Series(np.random.default_rng(2).normal(166, 20, size=n))
  435. height = Series(np.random.default_rng(2).normal(60, 10, size=n))
  436. gender = np.random.default_rng(2).choice(["male", "female"], size=n)
  437. df = DataFrame({"height": height, "weight": weight, "gender": gender})
  438. # now works with GH 5610 as gender is excluded
  439. df.groupby("gender").hist()
  440. @pytest.mark.slow
  441. def test_grouped_box_return_type(self, hist_df):
  442. df = hist_df
  443. # old style: return_type=None
  444. result = df.boxplot(by="gender")
  445. assert isinstance(result, np.ndarray)
  446. _check_box_return_type(
  447. result, None, expected_keys=["height", "weight", "category"]
  448. )
  449. @pytest.mark.slow
  450. def test_grouped_box_return_type_groupby(self, hist_df):
  451. df = hist_df
  452. # now for groupby
  453. result = df.groupby("gender").boxplot(return_type="dict")
  454. _check_box_return_type(result, "dict", expected_keys=["Male", "Female"])
  455. @pytest.mark.slow
  456. @pytest.mark.parametrize("return_type", ["dict", "axes", "both"])
  457. def test_grouped_box_return_type_arg(self, hist_df, return_type):
  458. df = hist_df
  459. returned = df.groupby("classroom").boxplot(return_type=return_type)
  460. _check_box_return_type(returned, return_type, expected_keys=["A", "B", "C"])
  461. returned = df.boxplot(by="classroom", return_type=return_type)
  462. _check_box_return_type(
  463. returned, return_type, expected_keys=["height", "weight", "category"]
  464. )
  465. @pytest.mark.slow
  466. @pytest.mark.parametrize("return_type", ["dict", "axes", "both"])
  467. def test_grouped_box_return_type_arg_duplcate_cats(self, return_type):
  468. columns2 = "X B C D A".split()
  469. df2 = DataFrame(
  470. np.random.default_rng(2).standard_normal((6, 5)), columns=columns2
  471. )
  472. categories2 = "A B".split()
  473. df2["category"] = categories2 * 3
  474. returned = df2.groupby("category").boxplot(return_type=return_type)
  475. _check_box_return_type(returned, return_type, expected_keys=categories2)
  476. returned = df2.boxplot(by="category", return_type=return_type)
  477. _check_box_return_type(returned, return_type, expected_keys=columns2)
  478. @pytest.mark.slow
  479. def test_grouped_box_layout_too_small(self, hist_df):
  480. df = hist_df
  481. msg = "Layout of 1x1 must be larger than required size 2"
  482. with pytest.raises(ValueError, match=msg):
  483. df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1))
  484. @pytest.mark.slow
  485. def test_grouped_box_layout_needs_by(self, hist_df):
  486. df = hist_df
  487. msg = "The 'layout' keyword is not supported when 'by' is None"
  488. with pytest.raises(ValueError, match=msg):
  489. df.boxplot(
  490. column=["height", "weight", "category"],
  491. layout=(2, 1),
  492. return_type="dict",
  493. )
  494. @pytest.mark.slow
  495. def test_grouped_box_layout_positive_layout(self, hist_df):
  496. df = hist_df
  497. msg = "At least one dimension of layout must be positive"
  498. with pytest.raises(ValueError, match=msg):
  499. df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1))
  500. @pytest.mark.slow
  501. @pytest.mark.parametrize(
  502. "gb_key, axes_num, rows",
  503. [["gender", 2, 1], ["category", 4, 2], ["classroom", 3, 2]],
  504. )
  505. def test_grouped_box_layout_positive_layout_axes(
  506. self, hist_df, gb_key, axes_num, rows
  507. ):
  508. df = hist_df
  509. # _check_plot_works adds an ax so catch warning. see GH #13188 GH 6769
  510. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  511. _check_plot_works(
  512. df.groupby(gb_key).boxplot, column="height", return_type="dict"
  513. )
  514. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=axes_num, layout=(rows, 2))
  515. @pytest.mark.slow
  516. @pytest.mark.parametrize(
  517. "col, visible", [["height", False], ["weight", True], ["category", True]]
  518. )
  519. def test_grouped_box_layout_visible(self, hist_df, col, visible):
  520. df = hist_df
  521. # GH 5897
  522. axes = df.boxplot(
  523. column=["height", "weight", "category"], by="gender", return_type="axes"
  524. )
  525. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=3, layout=(2, 2))
  526. ax = axes[col]
  527. _check_visible(ax.get_xticklabels(), visible=visible)
  528. _check_visible([ax.xaxis.get_label()], visible=visible)
  529. @pytest.mark.slow
  530. def test_grouped_box_layout_shape(self, hist_df):
  531. df = hist_df
  532. df.groupby("classroom").boxplot(
  533. column=["height", "weight", "category"], return_type="dict"
  534. )
  535. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=3, layout=(2, 2))
  536. @pytest.mark.slow
  537. @pytest.mark.parametrize("cols", [2, -1])
  538. def test_grouped_box_layout_works(self, hist_df, cols):
  539. df = hist_df
  540. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  541. _check_plot_works(
  542. df.groupby("category").boxplot,
  543. column="height",
  544. layout=(3, cols),
  545. return_type="dict",
  546. )
  547. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=4, layout=(3, 2))
  548. @pytest.mark.slow
  549. @pytest.mark.parametrize("rows, res", [[4, 4], [-1, 3]])
  550. def test_grouped_box_layout_axes_shape_rows(self, hist_df, rows, res):
  551. df = hist_df
  552. df.boxplot(
  553. column=["height", "weight", "category"], by="gender", layout=(rows, 1)
  554. )
  555. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=3, layout=(res, 1))
  556. @pytest.mark.slow
  557. @pytest.mark.parametrize("cols, res", [[4, 4], [-1, 3]])
  558. def test_grouped_box_layout_axes_shape_cols_groupby(self, hist_df, cols, res):
  559. df = hist_df
  560. df.groupby("classroom").boxplot(
  561. column=["height", "weight", "category"],
  562. layout=(1, cols),
  563. return_type="dict",
  564. )
  565. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=3, layout=(1, res))
  566. @pytest.mark.slow
  567. def test_grouped_box_multiple_axes(self, hist_df):
  568. # GH 6970, GH 7069
  569. df = hist_df
  570. # check warning to ignore sharex / sharey
  571. # this check should be done in the first function which
  572. # passes multiple axes to plot, hist or boxplot
  573. # location should be changed if other test is added
  574. # which has earlier alphabetical order
  575. with tm.assert_produces_warning(UserWarning):
  576. _, axes = mpl.pyplot.subplots(2, 2)
  577. df.groupby("category").boxplot(column="height", return_type="axes", ax=axes)
  578. _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=4, layout=(2, 2))
  579. @pytest.mark.slow
  580. def test_grouped_box_multiple_axes_on_fig(self, hist_df):
  581. # GH 6970, GH 7069
  582. df = hist_df
  583. fig, axes = mpl.pyplot.subplots(2, 3)
  584. with tm.assert_produces_warning(UserWarning):
  585. returned = df.boxplot(
  586. column=["height", "weight", "category"],
  587. by="gender",
  588. return_type="axes",
  589. ax=axes[0],
  590. )
  591. returned = np.array(list(returned.values))
  592. _check_axes_shape(returned, axes_num=3, layout=(1, 3))
  593. tm.assert_numpy_array_equal(returned, axes[0])
  594. assert returned[0].figure is fig
  595. # draw on second row
  596. with tm.assert_produces_warning(UserWarning):
  597. returned = df.groupby("classroom").boxplot(
  598. column=["height", "weight", "category"], return_type="axes", ax=axes[1]
  599. )
  600. returned = np.array(list(returned.values))
  601. _check_axes_shape(returned, axes_num=3, layout=(1, 3))
  602. tm.assert_numpy_array_equal(returned, axes[1])
  603. assert returned[0].figure is fig
  604. @pytest.mark.slow
  605. def test_grouped_box_multiple_axes_ax_error(self, hist_df):
  606. # GH 6970, GH 7069
  607. df = hist_df
  608. msg = "The number of passed axes must be 3, the same as the output plot"
  609. with pytest.raises(ValueError, match=msg):
  610. fig, axes = mpl.pyplot.subplots(2, 3)
  611. # pass different number of axes from required
  612. with tm.assert_produces_warning(UserWarning):
  613. axes = df.groupby("classroom").boxplot(ax=axes)
  614. def test_fontsize(self):
  615. df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]})
  616. _check_ticks_props(
  617. df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16
  618. )
  619. @pytest.mark.parametrize(
  620. "col, expected_xticklabel",
  621. [
  622. ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
  623. (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
  624. ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]),
  625. (
  626. ["v", "v1"],
  627. [
  628. "(a, v)",
  629. "(a, v1)",
  630. "(b, v)",
  631. "(b, v1)",
  632. "(c, v)",
  633. "(c, v1)",
  634. "(d, v)",
  635. "(d, v1)",
  636. "(e, v)",
  637. "(e, v1)",
  638. ],
  639. ),
  640. (
  641. None,
  642. [
  643. "(a, v)",
  644. "(a, v1)",
  645. "(b, v)",
  646. "(b, v1)",
  647. "(c, v)",
  648. "(c, v1)",
  649. "(d, v)",
  650. "(d, v1)",
  651. "(e, v)",
  652. "(e, v1)",
  653. ],
  654. ),
  655. ],
  656. )
  657. def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel):
  658. # GH 16748
  659. df = DataFrame(
  660. {
  661. "cat": np.random.default_rng(2).choice(list("abcde"), 100),
  662. "v": np.random.default_rng(2).random(100),
  663. "v1": np.random.default_rng(2).random(100),
  664. }
  665. )
  666. grouped = df.groupby("cat")
  667. axes = _check_plot_works(
  668. grouped.boxplot, subplots=False, column=col, return_type="axes"
  669. )
  670. result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
  671. assert expected_xticklabel == result_xticklabel
  672. def test_groupby_boxplot_object(self, hist_df):
  673. # GH 43480
  674. df = hist_df.astype("object")
  675. grouped = df.groupby("gender")
  676. msg = "boxplot method requires numerical columns, nothing to plot"
  677. with pytest.raises(ValueError, match=msg):
  678. _check_plot_works(grouped.boxplot, subplots=False)
  679. def test_boxplot_multiindex_column(self):
  680. # GH 16748
  681. arrays = [
  682. ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
  683. ["one", "two", "one", "two", "one", "two", "one", "two"],
  684. ]
  685. tuples = list(zip(*arrays))
  686. index = MultiIndex.from_tuples(tuples, names=["first", "second"])
  687. df = DataFrame(
  688. np.random.default_rng(2).standard_normal((3, 8)),
  689. index=["A", "B", "C"],
  690. columns=index,
  691. )
  692. col = [("bar", "one"), ("bar", "two")]
  693. axes = _check_plot_works(df.boxplot, column=col, return_type="axes")
  694. expected_xticklabel = ["(bar, one)", "(bar, two)"]
  695. result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
  696. assert expected_xticklabel == result_xticklabel