test_hist_method.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. """ Test cases for .hist method """
  2. import re
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. DataFrame,
  7. Index,
  8. Series,
  9. date_range,
  10. to_datetime,
  11. )
  12. import pandas._testing as tm
  13. from pandas.tests.plotting.common import (
  14. _check_ax_scales,
  15. _check_axes_shape,
  16. _check_colors,
  17. _check_legend_labels,
  18. _check_patches_all_filled,
  19. _check_plot_works,
  20. _check_text_labels,
  21. _check_ticks_props,
  22. get_x_axis,
  23. get_y_axis,
  24. )
  25. mpl = pytest.importorskip("matplotlib")
  26. @pytest.fixture
  27. def ts():
  28. return Series(
  29. np.arange(30, dtype=np.float64),
  30. index=date_range("2020-01-01", periods=30, freq="B"),
  31. name="ts",
  32. )
  33. class TestSeriesPlots:
  34. @pytest.mark.parametrize("kwargs", [{}, {"grid": False}, {"figsize": (8, 10)}])
  35. def test_hist_legacy_kwargs(self, ts, kwargs):
  36. _check_plot_works(ts.hist, **kwargs)
  37. @pytest.mark.parametrize("kwargs", [{}, {"bins": 5}])
  38. def test_hist_legacy_kwargs_warning(self, ts, kwargs):
  39. # _check_plot_works adds an ax so catch warning. see GH #13188
  40. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  41. _check_plot_works(ts.hist, by=ts.index.month, **kwargs)
  42. def test_hist_legacy_ax(self, ts):
  43. fig, ax = mpl.pyplot.subplots(1, 1)
  44. _check_plot_works(ts.hist, ax=ax, default_axes=True)
  45. def test_hist_legacy_ax_and_fig(self, ts):
  46. fig, ax = mpl.pyplot.subplots(1, 1)
  47. _check_plot_works(ts.hist, ax=ax, figure=fig, default_axes=True)
  48. def test_hist_legacy_fig(self, ts):
  49. fig, _ = mpl.pyplot.subplots(1, 1)
  50. _check_plot_works(ts.hist, figure=fig, default_axes=True)
  51. def test_hist_legacy_multi_ax(self, ts):
  52. fig, (ax1, ax2) = mpl.pyplot.subplots(1, 2)
  53. _check_plot_works(ts.hist, figure=fig, ax=ax1, default_axes=True)
  54. _check_plot_works(ts.hist, figure=fig, ax=ax2, default_axes=True)
  55. def test_hist_legacy_by_fig_error(self, ts):
  56. fig, _ = mpl.pyplot.subplots(1, 1)
  57. msg = (
  58. "Cannot pass 'figure' when using the 'by' argument, since a new 'Figure' "
  59. "instance will be created"
  60. )
  61. with pytest.raises(ValueError, match=msg):
  62. ts.hist(by=ts.index, figure=fig)
  63. def test_hist_bins_legacy(self):
  64. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  65. ax = df.hist(bins=2)[0][0]
  66. assert len(ax.patches) == 2
  67. def test_hist_layout(self, hist_df):
  68. df = hist_df
  69. msg = "The 'layout' keyword is not supported when 'by' is None"
  70. with pytest.raises(ValueError, match=msg):
  71. df.height.hist(layout=(1, 1))
  72. with pytest.raises(ValueError, match=msg):
  73. df.height.hist(layout=[1, 1])
  74. @pytest.mark.slow
  75. @pytest.mark.parametrize(
  76. "by, layout, axes_num, res_layout",
  77. [
  78. ["gender", (2, 1), 2, (2, 1)],
  79. ["gender", (3, -1), 2, (3, 1)],
  80. ["category", (4, 1), 4, (4, 1)],
  81. ["category", (2, -1), 4, (2, 2)],
  82. ["category", (3, -1), 4, (3, 2)],
  83. ["category", (-1, 4), 4, (1, 4)],
  84. ["classroom", (2, 2), 3, (2, 2)],
  85. ],
  86. )
  87. def test_hist_layout_with_by(self, hist_df, by, layout, axes_num, res_layout):
  88. df = hist_df
  89. # _check_plot_works adds an `ax` kwarg to the method call
  90. # so we get a warning about an axis being cleared, even
  91. # though we don't explicing pass one, see GH #13188
  92. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  93. axes = _check_plot_works(df.height.hist, by=getattr(df, by), layout=layout)
  94. _check_axes_shape(axes, axes_num=axes_num, layout=res_layout)
  95. def test_hist_layout_with_by_shape(self, hist_df):
  96. df = hist_df
  97. axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7))
  98. _check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
  99. def test_hist_no_overlap(self):
  100. from matplotlib.pyplot import (
  101. gcf,
  102. subplot,
  103. )
  104. x = Series(np.random.default_rng(2).standard_normal(2))
  105. y = Series(np.random.default_rng(2).standard_normal(2))
  106. subplot(121)
  107. x.hist()
  108. subplot(122)
  109. y.hist()
  110. fig = gcf()
  111. axes = fig.axes
  112. assert len(axes) == 2
  113. def test_hist_by_no_extra_plots(self, hist_df):
  114. df = hist_df
  115. df.height.hist(by=df.gender)
  116. assert len(mpl.pyplot.get_fignums()) == 1
  117. def test_plot_fails_when_ax_differs_from_figure(self, ts):
  118. from pylab import figure
  119. fig1 = figure()
  120. fig2 = figure()
  121. ax1 = fig1.add_subplot(111)
  122. msg = "passed axis not bound to passed figure"
  123. with pytest.raises(AssertionError, match=msg):
  124. ts.hist(ax=ax1, figure=fig2)
  125. @pytest.mark.parametrize(
  126. "histtype, expected",
  127. [
  128. ("bar", True),
  129. ("barstacked", True),
  130. ("step", False),
  131. ("stepfilled", True),
  132. ],
  133. )
  134. def test_histtype_argument(self, histtype, expected):
  135. # GH23992 Verify functioning of histtype argument
  136. ser = Series(np.random.default_rng(2).integers(1, 10))
  137. ax = ser.hist(histtype=histtype)
  138. _check_patches_all_filled(ax, filled=expected)
  139. @pytest.mark.parametrize(
  140. "by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))]
  141. )
  142. def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
  143. # GH 6279 - Series histogram can have a legend
  144. index = 15 * ["1"] + 15 * ["2"]
  145. s = Series(np.random.default_rng(2).standard_normal(30), index=index, name="a")
  146. s.index.name = "b"
  147. # Use default_axes=True when plotting method generate subplots itself
  148. axes = _check_plot_works(s.hist, default_axes=True, legend=True, by=by)
  149. _check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
  150. _check_legend_labels(axes, "a")
  151. @pytest.mark.parametrize("by", [None, "b"])
  152. def test_hist_with_legend_raises(self, by):
  153. # GH 6279 - Series histogram with legend and label raises
  154. index = 15 * ["1"] + 15 * ["2"]
  155. s = Series(np.random.default_rng(2).standard_normal(30), index=index, name="a")
  156. s.index.name = "b"
  157. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  158. s.hist(legend=True, by=by, label="c")
  159. def test_hist_kwargs(self, ts):
  160. _, ax = mpl.pyplot.subplots()
  161. ax = ts.plot.hist(bins=5, ax=ax)
  162. assert len(ax.patches) == 5
  163. _check_text_labels(ax.yaxis.get_label(), "Frequency")
  164. def test_hist_kwargs_horizontal(self, ts):
  165. _, ax = mpl.pyplot.subplots()
  166. ax = ts.plot.hist(bins=5, ax=ax)
  167. ax = ts.plot.hist(orientation="horizontal", ax=ax)
  168. _check_text_labels(ax.xaxis.get_label(), "Frequency")
  169. def test_hist_kwargs_align(self, ts):
  170. _, ax = mpl.pyplot.subplots()
  171. ax = ts.plot.hist(bins=5, ax=ax)
  172. ax = ts.plot.hist(align="left", stacked=True, ax=ax)
  173. @pytest.mark.xfail(reason="Api changed in 3.6.0")
  174. def test_hist_kde(self, ts):
  175. pytest.importorskip("scipy")
  176. _, ax = mpl.pyplot.subplots()
  177. ax = ts.plot.hist(logy=True, ax=ax)
  178. _check_ax_scales(ax, yaxis="log")
  179. xlabels = ax.get_xticklabels()
  180. # ticks are values, thus ticklabels are blank
  181. _check_text_labels(xlabels, [""] * len(xlabels))
  182. ylabels = ax.get_yticklabels()
  183. _check_text_labels(ylabels, [""] * len(ylabels))
  184. def test_hist_kde_plot_works(self, ts):
  185. pytest.importorskip("scipy")
  186. _check_plot_works(ts.plot.kde)
  187. def test_hist_kde_density_works(self, ts):
  188. pytest.importorskip("scipy")
  189. _check_plot_works(ts.plot.density)
  190. @pytest.mark.xfail(reason="Api changed in 3.6.0")
  191. def test_hist_kde_logy(self, ts):
  192. pytest.importorskip("scipy")
  193. _, ax = mpl.pyplot.subplots()
  194. ax = ts.plot.kde(logy=True, ax=ax)
  195. _check_ax_scales(ax, yaxis="log")
  196. xlabels = ax.get_xticklabels()
  197. _check_text_labels(xlabels, [""] * len(xlabels))
  198. ylabels = ax.get_yticklabels()
  199. _check_text_labels(ylabels, [""] * len(ylabels))
  200. def test_hist_kde_color_bins(self, ts):
  201. pytest.importorskip("scipy")
  202. _, ax = mpl.pyplot.subplots()
  203. ax = ts.plot.hist(logy=True, bins=10, color="b", ax=ax)
  204. _check_ax_scales(ax, yaxis="log")
  205. assert len(ax.patches) == 10
  206. _check_colors(ax.patches, facecolors=["b"] * 10)
  207. def test_hist_kde_color(self, ts):
  208. pytest.importorskip("scipy")
  209. _, ax = mpl.pyplot.subplots()
  210. ax = ts.plot.kde(logy=True, color="r", ax=ax)
  211. _check_ax_scales(ax, yaxis="log")
  212. lines = ax.get_lines()
  213. assert len(lines) == 1
  214. _check_colors(lines, ["r"])
  215. class TestDataFramePlots:
  216. @pytest.mark.slow
  217. def test_hist_df_legacy(self, hist_df):
  218. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  219. _check_plot_works(hist_df.hist)
  220. @pytest.mark.slow
  221. def test_hist_df_legacy_layout(self):
  222. # make sure layout is handled
  223. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  224. df[2] = to_datetime(
  225. np.random.default_rng(2).integers(
  226. 812419200000000000,
  227. 819331200000000000,
  228. size=10,
  229. dtype=np.int64,
  230. )
  231. )
  232. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  233. axes = _check_plot_works(df.hist, grid=False)
  234. _check_axes_shape(axes, axes_num=3, layout=(2, 2))
  235. assert not axes[1, 1].get_visible()
  236. _check_plot_works(df[[2]].hist)
  237. @pytest.mark.slow
  238. def test_hist_df_legacy_layout2(self):
  239. df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)))
  240. _check_plot_works(df.hist)
  241. @pytest.mark.slow
  242. def test_hist_df_legacy_layout3(self):
  243. # make sure layout is handled
  244. df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
  245. df[5] = to_datetime(
  246. np.random.default_rng(2).integers(
  247. 812419200000000000,
  248. 819331200000000000,
  249. size=10,
  250. dtype=np.int64,
  251. )
  252. )
  253. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  254. axes = _check_plot_works(df.hist, layout=(4, 2))
  255. _check_axes_shape(axes, axes_num=6, layout=(4, 2))
  256. @pytest.mark.slow
  257. @pytest.mark.parametrize(
  258. "kwargs", [{"sharex": True, "sharey": True}, {"figsize": (8, 10)}, {"bins": 5}]
  259. )
  260. def test_hist_df_legacy_layout_kwargs(self, kwargs):
  261. df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
  262. df[5] = to_datetime(
  263. np.random.default_rng(2).integers(
  264. 812419200000000000,
  265. 819331200000000000,
  266. size=10,
  267. dtype=np.int64,
  268. )
  269. )
  270. # make sure sharex, sharey is handled
  271. # handle figsize arg
  272. # check bins argument
  273. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  274. _check_plot_works(df.hist, **kwargs)
  275. @pytest.mark.slow
  276. def test_hist_df_legacy_layout_labelsize_rot(self, frame_or_series):
  277. # make sure xlabelsize and xrot are handled
  278. obj = frame_or_series(range(10))
  279. xf, yf = 20, 18
  280. xrot, yrot = 30, 40
  281. axes = obj.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  282. _check_ticks_props(axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  283. @pytest.mark.slow
  284. def test_hist_df_legacy_rectangles(self):
  285. from matplotlib.patches import Rectangle
  286. ser = Series(range(10))
  287. ax = ser.hist(cumulative=True, bins=4, density=True)
  288. # height of last bin (index 5) must be 1.0
  289. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  290. tm.assert_almost_equal(rects[-1].get_height(), 1.0)
  291. @pytest.mark.slow
  292. def test_hist_df_legacy_scale(self):
  293. ser = Series(range(10))
  294. ax = ser.hist(log=True)
  295. # scale of y must be 'log'
  296. _check_ax_scales(ax, yaxis="log")
  297. @pytest.mark.slow
  298. def test_hist_df_legacy_external_error(self):
  299. ser = Series(range(10))
  300. # propagate attr exception from matplotlib.Axes.hist
  301. with tm.external_error_raised(AttributeError):
  302. ser.hist(foo="bar")
  303. def test_hist_non_numerical_or_datetime_raises(self):
  304. # gh-10444, GH32590
  305. df = DataFrame(
  306. {
  307. "a": np.random.default_rng(2).random(10),
  308. "b": np.random.default_rng(2).integers(0, 10, 10),
  309. "c": to_datetime(
  310. np.random.default_rng(2).integers(
  311. 1582800000000000000, 1583500000000000000, 10, dtype=np.int64
  312. )
  313. ),
  314. "d": to_datetime(
  315. np.random.default_rng(2).integers(
  316. 1582800000000000000, 1583500000000000000, 10, dtype=np.int64
  317. ),
  318. utc=True,
  319. ),
  320. }
  321. )
  322. df_o = df.astype(object)
  323. msg = "hist method requires numerical or datetime columns, nothing to plot."
  324. with pytest.raises(ValueError, match=msg):
  325. df_o.hist()
  326. @pytest.mark.parametrize(
  327. "layout_test",
  328. (
  329. {"layout": None, "expected_size": (2, 2)}, # default is 2x2
  330. {"layout": (2, 2), "expected_size": (2, 2)},
  331. {"layout": (4, 1), "expected_size": (4, 1)},
  332. {"layout": (1, 4), "expected_size": (1, 4)},
  333. {"layout": (3, 3), "expected_size": (3, 3)},
  334. {"layout": (-1, 4), "expected_size": (1, 4)},
  335. {"layout": (4, -1), "expected_size": (4, 1)},
  336. {"layout": (-1, 2), "expected_size": (2, 2)},
  337. {"layout": (2, -1), "expected_size": (2, 2)},
  338. ),
  339. )
  340. def test_hist_layout(self, layout_test):
  341. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  342. df[2] = to_datetime(
  343. np.random.default_rng(2).integers(
  344. 812419200000000000,
  345. 819331200000000000,
  346. size=10,
  347. dtype=np.int64,
  348. )
  349. )
  350. axes = df.hist(layout=layout_test["layout"])
  351. expected = layout_test["expected_size"]
  352. _check_axes_shape(axes, axes_num=3, layout=expected)
  353. def test_hist_layout_error(self):
  354. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  355. df[2] = to_datetime(
  356. np.random.default_rng(2).integers(
  357. 812419200000000000,
  358. 819331200000000000,
  359. size=10,
  360. dtype=np.int64,
  361. )
  362. )
  363. # layout too small for all 4 plots
  364. msg = "Layout of 1x1 must be larger than required size 3"
  365. with pytest.raises(ValueError, match=msg):
  366. df.hist(layout=(1, 1))
  367. # invalid format for layout
  368. msg = re.escape("Layout must be a tuple of (rows, columns)")
  369. with pytest.raises(ValueError, match=msg):
  370. df.hist(layout=(1,))
  371. msg = "At least one dimension of layout must be positive"
  372. with pytest.raises(ValueError, match=msg):
  373. df.hist(layout=(-1, -1))
  374. # GH 9351
  375. def test_tight_layout(self):
  376. df = DataFrame(np.random.default_rng(2).standard_normal((100, 2)))
  377. df[2] = to_datetime(
  378. np.random.default_rng(2).integers(
  379. 812419200000000000,
  380. 819331200000000000,
  381. size=100,
  382. dtype=np.int64,
  383. )
  384. )
  385. # Use default_axes=True when plotting method generate subplots itself
  386. _check_plot_works(df.hist, default_axes=True)
  387. mpl.pyplot.tight_layout()
  388. def test_hist_subplot_xrot(self):
  389. # GH 30288
  390. df = DataFrame(
  391. {
  392. "length": [1.5, 0.5, 1.2, 0.9, 3],
  393. "animal": ["pig", "rabbit", "pig", "pig", "rabbit"],
  394. }
  395. )
  396. # Use default_axes=True when plotting method generate subplots itself
  397. axes = _check_plot_works(
  398. df.hist,
  399. default_axes=True,
  400. column="length",
  401. by="animal",
  402. bins=5,
  403. xrot=0,
  404. )
  405. _check_ticks_props(axes, xrot=0)
  406. @pytest.mark.parametrize(
  407. "column, expected",
  408. [
  409. (None, ["width", "length", "height"]),
  410. (["length", "width", "height"], ["length", "width", "height"]),
  411. ],
  412. )
  413. def test_hist_column_order_unchanged(self, column, expected):
  414. # GH29235
  415. df = DataFrame(
  416. {
  417. "width": [0.7, 0.2, 0.15, 0.2, 1.1],
  418. "length": [1.5, 0.5, 1.2, 0.9, 3],
  419. "height": [3, 0.5, 3.4, 2, 1],
  420. },
  421. index=["pig", "rabbit", "duck", "chicken", "horse"],
  422. )
  423. # Use default_axes=True when plotting method generate subplots itself
  424. axes = _check_plot_works(
  425. df.hist,
  426. default_axes=True,
  427. column=column,
  428. layout=(1, 3),
  429. )
  430. result = [axes[0, i].get_title() for i in range(3)]
  431. assert result == expected
  432. @pytest.mark.parametrize(
  433. "histtype, expected",
  434. [
  435. ("bar", True),
  436. ("barstacked", True),
  437. ("step", False),
  438. ("stepfilled", True),
  439. ],
  440. )
  441. def test_histtype_argument(self, histtype, expected):
  442. # GH23992 Verify functioning of histtype argument
  443. df = DataFrame(
  444. np.random.default_rng(2).integers(1, 10, size=(100, 2)), columns=["a", "b"]
  445. )
  446. ax = df.hist(histtype=histtype)
  447. _check_patches_all_filled(ax, filled=expected)
  448. @pytest.mark.parametrize("by", [None, "c"])
  449. @pytest.mark.parametrize("column", [None, "b"])
  450. def test_hist_with_legend(self, by, column):
  451. # GH 6279 - DataFrame histogram can have a legend
  452. expected_axes_num = 1 if by is None and column is not None else 2
  453. expected_layout = (1, expected_axes_num)
  454. expected_labels = column or ["a", "b"]
  455. if by is not None:
  456. expected_labels = [expected_labels] * 2
  457. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  458. df = DataFrame(
  459. np.random.default_rng(2).standard_normal((30, 2)),
  460. index=index,
  461. columns=["a", "b"],
  462. )
  463. # Use default_axes=True when plotting method generate subplots itself
  464. axes = _check_plot_works(
  465. df.hist,
  466. default_axes=True,
  467. legend=True,
  468. by=by,
  469. column=column,
  470. )
  471. _check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
  472. if by is None and column is None:
  473. axes = axes[0]
  474. for expected_label, ax in zip(expected_labels, axes):
  475. _check_legend_labels(ax, expected_label)
  476. @pytest.mark.parametrize("by", [None, "c"])
  477. @pytest.mark.parametrize("column", [None, "b"])
  478. def test_hist_with_legend_raises(self, by, column):
  479. # GH 6279 - DataFrame histogram with legend and label raises
  480. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  481. df = DataFrame(
  482. np.random.default_rng(2).standard_normal((30, 2)),
  483. index=index,
  484. columns=["a", "b"],
  485. )
  486. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  487. df.hist(legend=True, by=by, column=column, label="d")
  488. def test_hist_df_kwargs(self):
  489. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  490. _, ax = mpl.pyplot.subplots()
  491. ax = df.plot.hist(bins=5, ax=ax)
  492. assert len(ax.patches) == 10
  493. def test_hist_df_with_nonnumerics(self):
  494. # GH 9853
  495. df = DataFrame(
  496. np.random.default_rng(2).standard_normal((10, 4)),
  497. columns=["A", "B", "C", "D"],
  498. )
  499. df["E"] = ["x", "y"] * 5
  500. _, ax = mpl.pyplot.subplots()
  501. ax = df.plot.hist(bins=5, ax=ax)
  502. assert len(ax.patches) == 20
  503. def test_hist_df_with_nonnumerics_no_bins(self):
  504. # GH 9853
  505. df = DataFrame(
  506. np.random.default_rng(2).standard_normal((10, 4)),
  507. columns=["A", "B", "C", "D"],
  508. )
  509. df["E"] = ["x", "y"] * 5
  510. _, ax = mpl.pyplot.subplots()
  511. ax = df.plot.hist(ax=ax) # bins=10
  512. assert len(ax.patches) == 40
  513. def test_hist_secondary_legend(self):
  514. # GH 9610
  515. df = DataFrame(
  516. np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd")
  517. )
  518. # primary -> secondary
  519. _, ax = mpl.pyplot.subplots()
  520. ax = df["a"].plot.hist(legend=True, ax=ax)
  521. df["b"].plot.hist(ax=ax, legend=True, secondary_y=True)
  522. # both legends are drawn on left ax
  523. # left and right axis must be visible
  524. _check_legend_labels(ax, labels=["a", "b (right)"])
  525. assert ax.get_yaxis().get_visible()
  526. assert ax.right_ax.get_yaxis().get_visible()
  527. def test_hist_secondary_secondary(self):
  528. # GH 9610
  529. df = DataFrame(
  530. np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd")
  531. )
  532. # secondary -> secondary
  533. _, ax = mpl.pyplot.subplots()
  534. ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax)
  535. df["b"].plot.hist(ax=ax, legend=True, secondary_y=True)
  536. # both legends are draw on left ax
  537. # left axis must be invisible, right axis must be visible
  538. _check_legend_labels(ax.left_ax, labels=["a (right)", "b (right)"])
  539. assert not ax.left_ax.get_yaxis().get_visible()
  540. assert ax.get_yaxis().get_visible()
  541. def test_hist_secondary_primary(self):
  542. # GH 9610
  543. df = DataFrame(
  544. np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd")
  545. )
  546. # secondary -> primary
  547. _, ax = mpl.pyplot.subplots()
  548. ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax)
  549. # right axes is returned
  550. df["b"].plot.hist(ax=ax, legend=True)
  551. # both legends are draw on left ax
  552. # left and right axis must be visible
  553. _check_legend_labels(ax.left_ax, labels=["a (right)", "b"])
  554. assert ax.left_ax.get_yaxis().get_visible()
  555. assert ax.get_yaxis().get_visible()
  556. def test_hist_with_nans_and_weights(self):
  557. # GH 48884
  558. mpl_patches = pytest.importorskip("matplotlib.patches")
  559. df = DataFrame(
  560. [[np.nan, 0.2, 0.3], [0.4, np.nan, np.nan], [0.7, 0.8, 0.9]],
  561. columns=list("abc"),
  562. )
  563. weights = np.array([0.25, 0.3, 0.45])
  564. no_nan_df = DataFrame([[0.4, 0.2, 0.3], [0.7, 0.8, 0.9]], columns=list("abc"))
  565. no_nan_weights = np.array([[0.3, 0.25, 0.25], [0.45, 0.45, 0.45]])
  566. _, ax0 = mpl.pyplot.subplots()
  567. df.plot.hist(ax=ax0, weights=weights)
  568. rects = [x for x in ax0.get_children() if isinstance(x, mpl_patches.Rectangle)]
  569. heights = [rect.get_height() for rect in rects]
  570. _, ax1 = mpl.pyplot.subplots()
  571. no_nan_df.plot.hist(ax=ax1, weights=no_nan_weights)
  572. no_nan_rects = [
  573. x for x in ax1.get_children() if isinstance(x, mpl_patches.Rectangle)
  574. ]
  575. no_nan_heights = [rect.get_height() for rect in no_nan_rects]
  576. assert all(h0 == h1 for h0, h1 in zip(heights, no_nan_heights))
  577. idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]])
  578. msg = "weights must have the same shape as data, or be a single column"
  579. with pytest.raises(ValueError, match=msg):
  580. _, ax2 = mpl.pyplot.subplots()
  581. no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights)
  582. class TestDataFrameGroupByPlots:
  583. def test_grouped_hist_legacy(self):
  584. from pandas.plotting._matplotlib.hist import _grouped_hist
  585. rs = np.random.default_rng(10)
  586. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  587. df["B"] = to_datetime(
  588. rs.integers(
  589. 812419200000000000,
  590. 819331200000000000,
  591. size=10,
  592. dtype=np.int64,
  593. )
  594. )
  595. df["C"] = rs.integers(0, 4, 10)
  596. df["D"] = ["X"] * 10
  597. axes = _grouped_hist(df.A, by=df.C)
  598. _check_axes_shape(axes, axes_num=4, layout=(2, 2))
  599. def test_grouped_hist_legacy_axes_shape_no_col(self):
  600. rs = np.random.default_rng(10)
  601. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  602. df["B"] = to_datetime(
  603. rs.integers(
  604. 812419200000000000,
  605. 819331200000000000,
  606. size=10,
  607. dtype=np.int64,
  608. )
  609. )
  610. df["C"] = rs.integers(0, 4, 10)
  611. df["D"] = ["X"] * 10
  612. axes = df.hist(by=df.C)
  613. _check_axes_shape(axes, axes_num=4, layout=(2, 2))
  614. def test_grouped_hist_legacy_single_key(self):
  615. rs = np.random.default_rng(2)
  616. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  617. df["B"] = to_datetime(
  618. rs.integers(
  619. 812419200000000000,
  620. 819331200000000000,
  621. size=10,
  622. dtype=np.int64,
  623. )
  624. )
  625. df["C"] = rs.integers(0, 4, 10)
  626. df["D"] = ["X"] * 10
  627. # group by a key with single value
  628. axes = df.hist(by="D", rot=30)
  629. _check_axes_shape(axes, axes_num=1, layout=(1, 1))
  630. _check_ticks_props(axes, xrot=30)
  631. def test_grouped_hist_legacy_grouped_hist_kwargs(self):
  632. from matplotlib.patches import Rectangle
  633. from pandas.plotting._matplotlib.hist import _grouped_hist
  634. rs = np.random.default_rng(2)
  635. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  636. df["B"] = to_datetime(
  637. rs.integers(
  638. 812419200000000000,
  639. 819331200000000000,
  640. size=10,
  641. dtype=np.int64,
  642. )
  643. )
  644. df["C"] = rs.integers(0, 4, 10)
  645. # make sure kwargs to hist are handled
  646. xf, yf = 20, 18
  647. xrot, yrot = 30, 40
  648. axes = _grouped_hist(
  649. df.A,
  650. by=df.C,
  651. cumulative=True,
  652. bins=4,
  653. xlabelsize=xf,
  654. xrot=xrot,
  655. ylabelsize=yf,
  656. yrot=yrot,
  657. density=True,
  658. )
  659. # height of last bin (index 5) must be 1.0
  660. for ax in axes.ravel():
  661. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  662. height = rects[-1].get_height()
  663. tm.assert_almost_equal(height, 1.0)
  664. _check_ticks_props(axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  665. def test_grouped_hist_legacy_grouped_hist(self):
  666. from pandas.plotting._matplotlib.hist import _grouped_hist
  667. rs = np.random.default_rng(2)
  668. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  669. df["B"] = to_datetime(
  670. rs.integers(
  671. 812419200000000000,
  672. 819331200000000000,
  673. size=10,
  674. dtype=np.int64,
  675. )
  676. )
  677. df["C"] = rs.integers(0, 4, 10)
  678. df["D"] = ["X"] * 10
  679. axes = _grouped_hist(df.A, by=df.C, log=True)
  680. # scale of y must be 'log'
  681. _check_ax_scales(axes, yaxis="log")
  682. def test_grouped_hist_legacy_external_err(self):
  683. from pandas.plotting._matplotlib.hist import _grouped_hist
  684. rs = np.random.default_rng(2)
  685. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  686. df["B"] = to_datetime(
  687. rs.integers(
  688. 812419200000000000,
  689. 819331200000000000,
  690. size=10,
  691. dtype=np.int64,
  692. )
  693. )
  694. df["C"] = rs.integers(0, 4, 10)
  695. df["D"] = ["X"] * 10
  696. # propagate attr exception from matplotlib.Axes.hist
  697. with tm.external_error_raised(AttributeError):
  698. _grouped_hist(df.A, by=df.C, foo="bar")
  699. def test_grouped_hist_legacy_figsize_err(self):
  700. rs = np.random.default_rng(2)
  701. df = DataFrame(rs.standard_normal((10, 1)), columns=["A"])
  702. df["B"] = to_datetime(
  703. rs.integers(
  704. 812419200000000000,
  705. 819331200000000000,
  706. size=10,
  707. dtype=np.int64,
  708. )
  709. )
  710. df["C"] = rs.integers(0, 4, 10)
  711. df["D"] = ["X"] * 10
  712. msg = "Specify figure size by tuple instead"
  713. with pytest.raises(ValueError, match=msg):
  714. df.hist(by="C", figsize="default")
  715. def test_grouped_hist_legacy2(self):
  716. n = 10
  717. weight = Series(np.random.default_rng(2).normal(166, 20, size=n))
  718. height = Series(np.random.default_rng(2).normal(60, 10, size=n))
  719. gender_int = np.random.default_rng(2).choice([0, 1], size=n)
  720. df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
  721. gb = df_int.groupby("gender")
  722. axes = gb.hist()
  723. assert len(axes) == 2
  724. assert len(mpl.pyplot.get_fignums()) == 2
  725. @pytest.mark.slow
  726. @pytest.mark.parametrize(
  727. "msg, plot_col, by_col, layout",
  728. [
  729. [
  730. "Layout of 1x1 must be larger than required size 2",
  731. "weight",
  732. "gender",
  733. (1, 1),
  734. ],
  735. [
  736. "Layout of 1x3 must be larger than required size 4",
  737. "height",
  738. "category",
  739. (1, 3),
  740. ],
  741. [
  742. "At least one dimension of layout must be positive",
  743. "height",
  744. "category",
  745. (-1, -1),
  746. ],
  747. ],
  748. )
  749. def test_grouped_hist_layout_error(self, hist_df, msg, plot_col, by_col, layout):
  750. df = hist_df
  751. with pytest.raises(ValueError, match=msg):
  752. df.hist(column=plot_col, by=getattr(df, by_col), layout=layout)
  753. @pytest.mark.slow
  754. def test_grouped_hist_layout_warning(self, hist_df):
  755. df = hist_df
  756. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  757. axes = _check_plot_works(
  758. df.hist, column="height", by=df.gender, layout=(2, 1)
  759. )
  760. _check_axes_shape(axes, axes_num=2, layout=(2, 1))
  761. @pytest.mark.slow
  762. @pytest.mark.parametrize(
  763. "layout, check_layout, figsize",
  764. [[(4, 1), (4, 1), None], [(-1, 1), (4, 1), None], [(4, 2), (4, 2), (12, 8)]],
  765. )
  766. def test_grouped_hist_layout_figsize(self, hist_df, layout, check_layout, figsize):
  767. df = hist_df
  768. axes = df.hist(column="height", by=df.category, layout=layout, figsize=figsize)
  769. _check_axes_shape(axes, axes_num=4, layout=check_layout, figsize=figsize)
  770. @pytest.mark.slow
  771. @pytest.mark.parametrize("kwargs", [{}, {"column": "height", "layout": (2, 2)}])
  772. def test_grouped_hist_layout_by_warning(self, hist_df, kwargs):
  773. df = hist_df
  774. # GH 6769
  775. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  776. axes = _check_plot_works(df.hist, by="classroom", **kwargs)
  777. _check_axes_shape(axes, axes_num=3, layout=(2, 2))
  778. @pytest.mark.slow
  779. @pytest.mark.parametrize(
  780. "kwargs, axes_num, layout",
  781. [
  782. [{"by": "gender", "layout": (3, 5)}, 2, (3, 5)],
  783. [{"column": ["height", "weight", "category"]}, 3, (2, 2)],
  784. ],
  785. )
  786. def test_grouped_hist_layout_axes(self, hist_df, kwargs, axes_num, layout):
  787. df = hist_df
  788. axes = df.hist(**kwargs)
  789. _check_axes_shape(axes, axes_num=axes_num, layout=layout)
  790. def test_grouped_hist_multiple_axes(self, hist_df):
  791. # GH 6970, GH 7069
  792. df = hist_df
  793. fig, axes = mpl.pyplot.subplots(2, 3)
  794. returned = df.hist(column=["height", "weight", "category"], ax=axes[0])
  795. _check_axes_shape(returned, axes_num=3, layout=(1, 3))
  796. tm.assert_numpy_array_equal(returned, axes[0])
  797. assert returned[0].figure is fig
  798. def test_grouped_hist_multiple_axes_no_cols(self, hist_df):
  799. # GH 6970, GH 7069
  800. df = hist_df
  801. fig, axes = mpl.pyplot.subplots(2, 3)
  802. returned = df.hist(by="classroom", ax=axes[1])
  803. _check_axes_shape(returned, axes_num=3, layout=(1, 3))
  804. tm.assert_numpy_array_equal(returned, axes[1])
  805. assert returned[0].figure is fig
  806. def test_grouped_hist_multiple_axes_error(self, hist_df):
  807. # GH 6970, GH 7069
  808. df = hist_df
  809. fig, axes = mpl.pyplot.subplots(2, 3)
  810. # pass different number of axes from required
  811. msg = "The number of passed axes must be 1, the same as the output plot"
  812. with pytest.raises(ValueError, match=msg):
  813. axes = df.hist(column="height", ax=axes)
  814. def test_axis_share_x(self, hist_df):
  815. df = hist_df
  816. # GH4089
  817. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True)
  818. # share x
  819. assert get_x_axis(ax1).joined(ax1, ax2)
  820. assert get_x_axis(ax2).joined(ax1, ax2)
  821. # don't share y
  822. assert not get_y_axis(ax1).joined(ax1, ax2)
  823. assert not get_y_axis(ax2).joined(ax1, ax2)
  824. def test_axis_share_y(self, hist_df):
  825. df = hist_df
  826. ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True)
  827. # share y
  828. assert get_y_axis(ax1).joined(ax1, ax2)
  829. assert get_y_axis(ax2).joined(ax1, ax2)
  830. # don't share x
  831. assert not get_x_axis(ax1).joined(ax1, ax2)
  832. assert not get_x_axis(ax2).joined(ax1, ax2)
  833. def test_axis_share_xy(self, hist_df):
  834. df = hist_df
  835. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)
  836. # share both x and y
  837. assert get_x_axis(ax1).joined(ax1, ax2)
  838. assert get_x_axis(ax2).joined(ax1, ax2)
  839. assert get_y_axis(ax1).joined(ax1, ax2)
  840. assert get_y_axis(ax2).joined(ax1, ax2)
  841. @pytest.mark.parametrize(
  842. "histtype, expected",
  843. [
  844. ("bar", True),
  845. ("barstacked", True),
  846. ("step", False),
  847. ("stepfilled", True),
  848. ],
  849. )
  850. def test_histtype_argument(self, histtype, expected):
  851. # GH23992 Verify functioning of histtype argument
  852. df = DataFrame(
  853. np.random.default_rng(2).integers(1, 10, size=(10, 2)), columns=["a", "b"]
  854. )
  855. ax = df.hist(by="a", histtype=histtype)
  856. _check_patches_all_filled(ax, filled=expected)