_misc.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. from __future__ import annotations
  2. from contextlib import contextmanager
  3. from typing import (
  4. TYPE_CHECKING,
  5. Any,
  6. )
  7. from pandas.plotting._core import _get_plot_backend
  8. if TYPE_CHECKING:
  9. from collections.abc import (
  10. Generator,
  11. Mapping,
  12. )
  13. from matplotlib.axes import Axes
  14. from matplotlib.colors import Colormap
  15. from matplotlib.figure import Figure
  16. from matplotlib.table import Table
  17. import numpy as np
  18. from pandas import (
  19. DataFrame,
  20. Series,
  21. )
  22. def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table:
  23. """
  24. Helper function to convert DataFrame and Series to matplotlib.table.
  25. Parameters
  26. ----------
  27. ax : Matplotlib axes object
  28. data : DataFrame or Series
  29. Data for table contents.
  30. **kwargs
  31. Keyword arguments to be passed to matplotlib.table.table.
  32. If `rowLabels` or `colLabels` is not specified, data index or column
  33. name will be used.
  34. Returns
  35. -------
  36. matplotlib table object
  37. Examples
  38. --------
  39. .. plot::
  40. :context: close-figs
  41. >>> import matplotlib.pyplot as plt
  42. >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
  43. >>> fix, ax = plt.subplots()
  44. >>> ax.axis('off')
  45. (0.0, 1.0, 0.0, 1.0)
  46. >>> table = pd.plotting.table(ax, df, loc='center',
  47. ... cellLoc='center', colWidths=list([.2, .2]))
  48. """
  49. plot_backend = _get_plot_backend("matplotlib")
  50. return plot_backend.table(
  51. ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
  52. )
  53. def register() -> None:
  54. """
  55. Register pandas formatters and converters with matplotlib.
  56. This function modifies the global ``matplotlib.units.registry``
  57. dictionary. pandas adds custom converters for
  58. * pd.Timestamp
  59. * pd.Period
  60. * np.datetime64
  61. * datetime.datetime
  62. * datetime.date
  63. * datetime.time
  64. See Also
  65. --------
  66. deregister_matplotlib_converters : Remove pandas formatters and converters.
  67. Examples
  68. --------
  69. .. plot::
  70. :context: close-figs
  71. The following line is done automatically by pandas so
  72. the plot can be rendered:
  73. >>> pd.plotting.register_matplotlib_converters()
  74. >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'),
  75. ... 'y': [1, 2]
  76. ... })
  77. >>> plot = df.plot.line(x='ts', y='y')
  78. Unsetting the register manually an error will be raised:
  79. >>> pd.set_option("plotting.matplotlib.register_converters",
  80. ... False) # doctest: +SKIP
  81. >>> df.plot.line(x='ts', y='y') # doctest: +SKIP
  82. Traceback (most recent call last):
  83. TypeError: float() argument must be a string or a real number, not 'Period'
  84. """
  85. plot_backend = _get_plot_backend("matplotlib")
  86. plot_backend.register()
  87. def deregister() -> None:
  88. """
  89. Remove pandas formatters and converters.
  90. Removes the custom converters added by :func:`register`. This
  91. attempts to set the state of the registry back to the state before
  92. pandas registered its own units. Converters for pandas' own types like
  93. Timestamp and Period are removed completely. Converters for types
  94. pandas overwrites, like ``datetime.datetime``, are restored to their
  95. original value.
  96. See Also
  97. --------
  98. register_matplotlib_converters : Register pandas formatters and converters
  99. with matplotlib.
  100. Examples
  101. --------
  102. .. plot::
  103. :context: close-figs
  104. The following line is done automatically by pandas so
  105. the plot can be rendered:
  106. >>> pd.plotting.register_matplotlib_converters()
  107. >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'),
  108. ... 'y': [1, 2]
  109. ... })
  110. >>> plot = df.plot.line(x='ts', y='y')
  111. Unsetting the register manually an error will be raised:
  112. >>> pd.set_option("plotting.matplotlib.register_converters",
  113. ... False) # doctest: +SKIP
  114. >>> df.plot.line(x='ts', y='y') # doctest: +SKIP
  115. Traceback (most recent call last):
  116. TypeError: float() argument must be a string or a real number, not 'Period'
  117. """
  118. plot_backend = _get_plot_backend("matplotlib")
  119. plot_backend.deregister()
  120. def scatter_matrix(
  121. frame: DataFrame,
  122. alpha: float = 0.5,
  123. figsize: tuple[float, float] | None = None,
  124. ax: Axes | None = None,
  125. grid: bool = False,
  126. diagonal: str = "hist",
  127. marker: str = ".",
  128. density_kwds: Mapping[str, Any] | None = None,
  129. hist_kwds: Mapping[str, Any] | None = None,
  130. range_padding: float = 0.05,
  131. **kwargs,
  132. ) -> np.ndarray:
  133. """
  134. Draw a matrix of scatter plots.
  135. Parameters
  136. ----------
  137. frame : DataFrame
  138. alpha : float, optional
  139. Amount of transparency applied.
  140. figsize : (float,float), optional
  141. A tuple (width, height) in inches.
  142. ax : Matplotlib axis object, optional
  143. grid : bool, optional
  144. Setting this to True will show the grid.
  145. diagonal : {'hist', 'kde'}
  146. Pick between 'kde' and 'hist' for either Kernel Density Estimation or
  147. Histogram plot in the diagonal.
  148. marker : str, optional
  149. Matplotlib marker type, default '.'.
  150. density_kwds : keywords
  151. Keyword arguments to be passed to kernel density estimate plot.
  152. hist_kwds : keywords
  153. Keyword arguments to be passed to hist function.
  154. range_padding : float, default 0.05
  155. Relative extension of axis range in x and y with respect to
  156. (x_max - x_min) or (y_max - y_min).
  157. **kwargs
  158. Keyword arguments to be passed to scatter function.
  159. Returns
  160. -------
  161. numpy.ndarray
  162. A matrix of scatter plots.
  163. Examples
  164. --------
  165. .. plot::
  166. :context: close-figs
  167. >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
  168. >>> pd.plotting.scatter_matrix(df, alpha=0.2)
  169. array([[<Axes: xlabel='A', ylabel='A'>, <Axes: xlabel='B', ylabel='A'>,
  170. <Axes: xlabel='C', ylabel='A'>, <Axes: xlabel='D', ylabel='A'>],
  171. [<Axes: xlabel='A', ylabel='B'>, <Axes: xlabel='B', ylabel='B'>,
  172. <Axes: xlabel='C', ylabel='B'>, <Axes: xlabel='D', ylabel='B'>],
  173. [<Axes: xlabel='A', ylabel='C'>, <Axes: xlabel='B', ylabel='C'>,
  174. <Axes: xlabel='C', ylabel='C'>, <Axes: xlabel='D', ylabel='C'>],
  175. [<Axes: xlabel='A', ylabel='D'>, <Axes: xlabel='B', ylabel='D'>,
  176. <Axes: xlabel='C', ylabel='D'>, <Axes: xlabel='D', ylabel='D'>]],
  177. dtype=object)
  178. """
  179. plot_backend = _get_plot_backend("matplotlib")
  180. return plot_backend.scatter_matrix(
  181. frame=frame,
  182. alpha=alpha,
  183. figsize=figsize,
  184. ax=ax,
  185. grid=grid,
  186. diagonal=diagonal,
  187. marker=marker,
  188. density_kwds=density_kwds,
  189. hist_kwds=hist_kwds,
  190. range_padding=range_padding,
  191. **kwargs,
  192. )
  193. def radviz(
  194. frame: DataFrame,
  195. class_column: str,
  196. ax: Axes | None = None,
  197. color: list[str] | tuple[str, ...] | None = None,
  198. colormap: Colormap | str | None = None,
  199. **kwds,
  200. ) -> Axes:
  201. """
  202. Plot a multidimensional dataset in 2D.
  203. Each Series in the DataFrame is represented as a evenly distributed
  204. slice on a circle. Each data point is rendered in the circle according to
  205. the value on each Series. Highly correlated `Series` in the `DataFrame`
  206. are placed closer on the unit circle.
  207. RadViz allow to project a N-dimensional data set into a 2D space where the
  208. influence of each dimension can be interpreted as a balance between the
  209. influence of all dimensions.
  210. More info available at the `original article
  211. <https://doi.org/10.1145/331770.331775>`_
  212. describing RadViz.
  213. Parameters
  214. ----------
  215. frame : `DataFrame`
  216. Object holding the data.
  217. class_column : str
  218. Column name containing the name of the data point category.
  219. ax : :class:`matplotlib.axes.Axes`, optional
  220. A plot instance to which to add the information.
  221. color : list[str] or tuple[str], optional
  222. Assign a color to each category. Example: ['blue', 'green'].
  223. colormap : str or :class:`matplotlib.colors.Colormap`, default None
  224. Colormap to select colors from. If string, load colormap with that
  225. name from matplotlib.
  226. **kwds
  227. Options to pass to matplotlib scatter plotting method.
  228. Returns
  229. -------
  230. :class:`matplotlib.axes.Axes`
  231. See Also
  232. --------
  233. pandas.plotting.andrews_curves : Plot clustering visualization.
  234. Examples
  235. --------
  236. .. plot::
  237. :context: close-figs
  238. >>> df = pd.DataFrame(
  239. ... {
  240. ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6],
  241. ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6],
  242. ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0],
  243. ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2],
  244. ... 'Category': [
  245. ... 'virginica',
  246. ... 'virginica',
  247. ... 'setosa',
  248. ... 'virginica',
  249. ... 'virginica',
  250. ... 'versicolor',
  251. ... 'versicolor',
  252. ... 'setosa',
  253. ... 'virginica',
  254. ... 'setosa'
  255. ... ]
  256. ... }
  257. ... )
  258. >>> pd.plotting.radviz(df, 'Category') # doctest: +SKIP
  259. """
  260. plot_backend = _get_plot_backend("matplotlib")
  261. return plot_backend.radviz(
  262. frame=frame,
  263. class_column=class_column,
  264. ax=ax,
  265. color=color,
  266. colormap=colormap,
  267. **kwds,
  268. )
  269. def andrews_curves(
  270. frame: DataFrame,
  271. class_column: str,
  272. ax: Axes | None = None,
  273. samples: int = 200,
  274. color: list[str] | tuple[str, ...] | None = None,
  275. colormap: Colormap | str | None = None,
  276. **kwargs,
  277. ) -> Axes:
  278. """
  279. Generate a matplotlib plot for visualizing clusters of multivariate data.
  280. Andrews curves have the functional form:
  281. .. math::
  282. f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) +
  283. x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots
  284. Where :math:`x` coefficients correspond to the values of each dimension
  285. and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`.
  286. Each row of frame then corresponds to a single curve.
  287. Parameters
  288. ----------
  289. frame : DataFrame
  290. Data to be plotted, preferably normalized to (0.0, 1.0).
  291. class_column : label
  292. Name of the column containing class names.
  293. ax : axes object, default None
  294. Axes to use.
  295. samples : int
  296. Number of points to plot in each curve.
  297. color : str, list[str] or tuple[str], optional
  298. Colors to use for the different classes. Colors can be strings
  299. or 3-element floating point RGB values.
  300. colormap : str or matplotlib colormap object, default None
  301. Colormap to select colors from. If a string, load colormap with that
  302. name from matplotlib.
  303. **kwargs
  304. Options to pass to matplotlib plotting method.
  305. Returns
  306. -------
  307. :class:`matplotlib.axes.Axes`
  308. Examples
  309. --------
  310. .. plot::
  311. :context: close-figs
  312. >>> df = pd.read_csv(
  313. ... 'https://raw.githubusercontent.com/pandas-dev/'
  314. ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
  315. ... )
  316. >>> pd.plotting.andrews_curves(df, 'Name') # doctest: +SKIP
  317. """
  318. plot_backend = _get_plot_backend("matplotlib")
  319. return plot_backend.andrews_curves(
  320. frame=frame,
  321. class_column=class_column,
  322. ax=ax,
  323. samples=samples,
  324. color=color,
  325. colormap=colormap,
  326. **kwargs,
  327. )
  328. def bootstrap_plot(
  329. series: Series,
  330. fig: Figure | None = None,
  331. size: int = 50,
  332. samples: int = 500,
  333. **kwds,
  334. ) -> Figure:
  335. """
  336. Bootstrap plot on mean, median and mid-range statistics.
  337. The bootstrap plot is used to estimate the uncertainty of a statistic
  338. by relying on random sampling with replacement [1]_. This function will
  339. generate bootstrapping plots for mean, median and mid-range statistics
  340. for the given number of samples of the given size.
  341. .. [1] "Bootstrapping (statistics)" in \
  342. https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
  343. Parameters
  344. ----------
  345. series : pandas.Series
  346. Series from where to get the samplings for the bootstrapping.
  347. fig : matplotlib.figure.Figure, default None
  348. If given, it will use the `fig` reference for plotting instead of
  349. creating a new one with default parameters.
  350. size : int, default 50
  351. Number of data points to consider during each sampling. It must be
  352. less than or equal to the length of the `series`.
  353. samples : int, default 500
  354. Number of times the bootstrap procedure is performed.
  355. **kwds
  356. Options to pass to matplotlib plotting method.
  357. Returns
  358. -------
  359. matplotlib.figure.Figure
  360. Matplotlib figure.
  361. See Also
  362. --------
  363. pandas.DataFrame.plot : Basic plotting for DataFrame objects.
  364. pandas.Series.plot : Basic plotting for Series objects.
  365. Examples
  366. --------
  367. This example draws a basic bootstrap plot for a Series.
  368. .. plot::
  369. :context: close-figs
  370. >>> s = pd.Series(np.random.uniform(size=100))
  371. >>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
  372. <Figure size 640x480 with 6 Axes>
  373. """
  374. plot_backend = _get_plot_backend("matplotlib")
  375. return plot_backend.bootstrap_plot(
  376. series=series, fig=fig, size=size, samples=samples, **kwds
  377. )
  378. def parallel_coordinates(
  379. frame: DataFrame,
  380. class_column: str,
  381. cols: list[str] | None = None,
  382. ax: Axes | None = None,
  383. color: list[str] | tuple[str, ...] | None = None,
  384. use_columns: bool = False,
  385. xticks: list | tuple | None = None,
  386. colormap: Colormap | str | None = None,
  387. axvlines: bool = True,
  388. axvlines_kwds: Mapping[str, Any] | None = None,
  389. sort_labels: bool = False,
  390. **kwargs,
  391. ) -> Axes:
  392. """
  393. Parallel coordinates plotting.
  394. Parameters
  395. ----------
  396. frame : DataFrame
  397. class_column : str
  398. Column name containing class names.
  399. cols : list, optional
  400. A list of column names to use.
  401. ax : matplotlib.axis, optional
  402. Matplotlib axis object.
  403. color : list or tuple, optional
  404. Colors to use for the different classes.
  405. use_columns : bool, optional
  406. If true, columns will be used as xticks.
  407. xticks : list or tuple, optional
  408. A list of values to use for xticks.
  409. colormap : str or matplotlib colormap, default None
  410. Colormap to use for line colors.
  411. axvlines : bool, optional
  412. If true, vertical lines will be added at each xtick.
  413. axvlines_kwds : keywords, optional
  414. Options to be passed to axvline method for vertical lines.
  415. sort_labels : bool, default False
  416. Sort class_column labels, useful when assigning colors.
  417. **kwargs
  418. Options to pass to matplotlib plotting method.
  419. Returns
  420. -------
  421. matplotlib.axes.Axes
  422. Examples
  423. --------
  424. .. plot::
  425. :context: close-figs
  426. >>> df = pd.read_csv(
  427. ... 'https://raw.githubusercontent.com/pandas-dev/'
  428. ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
  429. ... )
  430. >>> pd.plotting.parallel_coordinates(
  431. ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')
  432. ... ) # doctest: +SKIP
  433. """
  434. plot_backend = _get_plot_backend("matplotlib")
  435. return plot_backend.parallel_coordinates(
  436. frame=frame,
  437. class_column=class_column,
  438. cols=cols,
  439. ax=ax,
  440. color=color,
  441. use_columns=use_columns,
  442. xticks=xticks,
  443. colormap=colormap,
  444. axvlines=axvlines,
  445. axvlines_kwds=axvlines_kwds,
  446. sort_labels=sort_labels,
  447. **kwargs,
  448. )
  449. def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes:
  450. """
  451. Lag plot for time series.
  452. Parameters
  453. ----------
  454. series : Series
  455. The time series to visualize.
  456. lag : int, default 1
  457. Lag length of the scatter plot.
  458. ax : Matplotlib axis object, optional
  459. The matplotlib axis object to use.
  460. **kwds
  461. Matplotlib scatter method keyword arguments.
  462. Returns
  463. -------
  464. matplotlib.axes.Axes
  465. Examples
  466. --------
  467. Lag plots are most commonly used to look for patterns in time series data.
  468. Given the following time series
  469. .. plot::
  470. :context: close-figs
  471. >>> np.random.seed(5)
  472. >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50))
  473. >>> s = pd.Series(x)
  474. >>> s.plot() # doctest: +SKIP
  475. A lag plot with ``lag=1`` returns
  476. .. plot::
  477. :context: close-figs
  478. >>> pd.plotting.lag_plot(s, lag=1)
  479. <Axes: xlabel='y(t)', ylabel='y(t + 1)'>
  480. """
  481. plot_backend = _get_plot_backend("matplotlib")
  482. return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
  483. def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes:
  484. """
  485. Autocorrelation plot for time series.
  486. Parameters
  487. ----------
  488. series : Series
  489. The time series to visualize.
  490. ax : Matplotlib axis object, optional
  491. The matplotlib axis object to use.
  492. **kwargs
  493. Options to pass to matplotlib plotting method.
  494. Returns
  495. -------
  496. matplotlib.axes.Axes
  497. Examples
  498. --------
  499. The horizontal lines in the plot correspond to 95% and 99% confidence bands.
  500. The dashed line is 99% confidence band.
  501. .. plot::
  502. :context: close-figs
  503. >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
  504. >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
  505. >>> pd.plotting.autocorrelation_plot(s) # doctest: +SKIP
  506. """
  507. plot_backend = _get_plot_backend("matplotlib")
  508. return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
  509. class _Options(dict):
  510. """
  511. Stores pandas plotting options.
  512. Allows for parameter aliasing so you can just use parameter names that are
  513. the same as the plot function parameters, but is stored in a canonical
  514. format that makes it easy to breakdown into groups later.
  515. Examples
  516. --------
  517. .. plot::
  518. :context: close-figs
  519. >>> np.random.seed(42)
  520. >>> df = pd.DataFrame({'A': np.random.randn(10),
  521. ... 'B': np.random.randn(10)},
  522. ... index=pd.date_range("1/1/2000",
  523. ... freq='4MS', periods=10))
  524. >>> with pd.plotting.plot_params.use("x_compat", True):
  525. ... _ = df["A"].plot(color="r")
  526. ... _ = df["B"].plot(color="g")
  527. """
  528. # alias so the names are same as plotting method parameter names
  529. _ALIASES = {"x_compat": "xaxis.compat"}
  530. _DEFAULT_KEYS = ["xaxis.compat"]
  531. def __init__(self, deprecated: bool = False) -> None:
  532. self._deprecated = deprecated
  533. super().__setitem__("xaxis.compat", False)
  534. def __getitem__(self, key):
  535. key = self._get_canonical_key(key)
  536. if key not in self:
  537. raise ValueError(f"{key} is not a valid pandas plotting option")
  538. return super().__getitem__(key)
  539. def __setitem__(self, key, value) -> None:
  540. key = self._get_canonical_key(key)
  541. super().__setitem__(key, value)
  542. def __delitem__(self, key) -> None:
  543. key = self._get_canonical_key(key)
  544. if key in self._DEFAULT_KEYS:
  545. raise ValueError(f"Cannot remove default parameter {key}")
  546. super().__delitem__(key)
  547. def __contains__(self, key) -> bool:
  548. key = self._get_canonical_key(key)
  549. return super().__contains__(key)
  550. def reset(self) -> None:
  551. """
  552. Reset the option store to its initial state
  553. Returns
  554. -------
  555. None
  556. """
  557. # error: Cannot access "__init__" directly
  558. self.__init__() # type: ignore[misc]
  559. def _get_canonical_key(self, key):
  560. return self._ALIASES.get(key, key)
  561. @contextmanager
  562. def use(self, key, value) -> Generator[_Options, None, None]:
  563. """
  564. Temporarily set a parameter value using the with statement.
  565. Aliasing allowed.
  566. """
  567. old_value = self[key]
  568. try:
  569. self[key] = value
  570. yield self
  571. finally:
  572. self[key] = old_value
  573. plot_params = _Options()