excel.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. """
  2. Utilities for conversion to writer-agnostic Excel representation.
  3. """
  4. from __future__ import annotations
  5. from collections.abc import (
  6. Hashable,
  7. Iterable,
  8. Mapping,
  9. Sequence,
  10. )
  11. import functools
  12. import itertools
  13. import re
  14. from typing import (
  15. TYPE_CHECKING,
  16. Any,
  17. Callable,
  18. cast,
  19. )
  20. import warnings
  21. import numpy as np
  22. from pandas._libs.lib import is_list_like
  23. from pandas.util._decorators import doc
  24. from pandas.util._exceptions import find_stack_level
  25. from pandas.core.dtypes import missing
  26. from pandas.core.dtypes.common import (
  27. is_float,
  28. is_scalar,
  29. )
  30. from pandas import (
  31. DataFrame,
  32. Index,
  33. MultiIndex,
  34. PeriodIndex,
  35. )
  36. import pandas.core.common as com
  37. from pandas.core.shared_docs import _shared_docs
  38. from pandas.io.formats._color_data import CSS4_COLORS
  39. from pandas.io.formats.css import (
  40. CSSResolver,
  41. CSSWarning,
  42. )
  43. from pandas.io.formats.format import get_level_lengths
  44. from pandas.io.formats.printing import pprint_thing
  45. if TYPE_CHECKING:
  46. from pandas._typing import (
  47. FilePath,
  48. IndexLabel,
  49. StorageOptions,
  50. WriteExcelBuffer,
  51. )
  52. from pandas import ExcelWriter
  53. class ExcelCell:
  54. __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
  55. __slots__ = __fields__
  56. def __init__(
  57. self,
  58. row: int,
  59. col: int,
  60. val,
  61. style=None,
  62. mergestart: int | None = None,
  63. mergeend: int | None = None,
  64. ) -> None:
  65. self.row = row
  66. self.col = col
  67. self.val = val
  68. self.style = style
  69. self.mergestart = mergestart
  70. self.mergeend = mergeend
  71. class CssExcelCell(ExcelCell):
  72. def __init__(
  73. self,
  74. row: int,
  75. col: int,
  76. val,
  77. style: dict | None,
  78. css_styles: dict[tuple[int, int], list[tuple[str, Any]]] | None,
  79. css_row: int,
  80. css_col: int,
  81. css_converter: Callable | None,
  82. **kwargs,
  83. ) -> None:
  84. if css_styles and css_converter:
  85. # Use dict to get only one (case-insensitive) declaration per property
  86. declaration_dict = {
  87. prop.lower(): val for prop, val in css_styles[css_row, css_col]
  88. }
  89. # Convert to frozenset for order-invariant caching
  90. unique_declarations = frozenset(declaration_dict.items())
  91. style = css_converter(unique_declarations)
  92. super().__init__(row=row, col=col, val=val, style=style, **kwargs)
  93. class CSSToExcelConverter:
  94. """
  95. A callable for converting CSS declarations to ExcelWriter styles
  96. Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
  97. focusing on font styling, backgrounds, borders and alignment.
  98. Operates by first computing CSS styles in a fairly generic
  99. way (see :meth:`compute_css`) then determining Excel style
  100. properties from CSS properties (see :meth:`build_xlstyle`).
  101. Parameters
  102. ----------
  103. inherited : str, optional
  104. CSS declarations understood to be the containing scope for the
  105. CSS processed by :meth:`__call__`.
  106. """
  107. NAMED_COLORS = CSS4_COLORS
  108. VERTICAL_MAP = {
  109. "top": "top",
  110. "text-top": "top",
  111. "middle": "center",
  112. "baseline": "bottom",
  113. "bottom": "bottom",
  114. "text-bottom": "bottom",
  115. # OpenXML also has 'justify', 'distributed'
  116. }
  117. BOLD_MAP = {
  118. "bold": True,
  119. "bolder": True,
  120. "600": True,
  121. "700": True,
  122. "800": True,
  123. "900": True,
  124. "normal": False,
  125. "lighter": False,
  126. "100": False,
  127. "200": False,
  128. "300": False,
  129. "400": False,
  130. "500": False,
  131. }
  132. ITALIC_MAP = {
  133. "normal": False,
  134. "italic": True,
  135. "oblique": True,
  136. }
  137. FAMILY_MAP = {
  138. "serif": 1, # roman
  139. "sans-serif": 2, # swiss
  140. "cursive": 4, # script
  141. "fantasy": 5, # decorative
  142. }
  143. BORDER_STYLE_MAP = {
  144. style.lower(): style
  145. for style in [
  146. "dashed",
  147. "mediumDashDot",
  148. "dashDotDot",
  149. "hair",
  150. "dotted",
  151. "mediumDashDotDot",
  152. "double",
  153. "dashDot",
  154. "slantDashDot",
  155. "mediumDashed",
  156. ]
  157. }
  158. # NB: Most of the methods here could be classmethods, as only __init__
  159. # and __call__ make use of instance attributes. We leave them as
  160. # instancemethods so that users can easily experiment with extensions
  161. # without monkey-patching.
  162. inherited: dict[str, str] | None
  163. def __init__(self, inherited: str | None = None) -> None:
  164. if inherited is not None:
  165. self.inherited = self.compute_css(inherited)
  166. else:
  167. self.inherited = None
  168. # We should avoid cache on the __call__ method.
  169. # Otherwise once the method __call__ has been called
  170. # garbage collection no longer deletes the instance.
  171. self._call_cached = functools.cache(self._call_uncached)
  172. compute_css = CSSResolver()
  173. def __call__(
  174. self, declarations: str | frozenset[tuple[str, str]]
  175. ) -> dict[str, dict[str, str]]:
  176. """
  177. Convert CSS declarations to ExcelWriter style.
  178. Parameters
  179. ----------
  180. declarations : str | frozenset[tuple[str, str]]
  181. CSS string or set of CSS declaration tuples.
  182. e.g. "font-weight: bold; background: blue" or
  183. {("font-weight", "bold"), ("background", "blue")}
  184. Returns
  185. -------
  186. xlstyle : dict
  187. A style as interpreted by ExcelWriter when found in
  188. ExcelCell.style.
  189. """
  190. return self._call_cached(declarations)
  191. def _call_uncached(
  192. self, declarations: str | frozenset[tuple[str, str]]
  193. ) -> dict[str, dict[str, str]]:
  194. properties = self.compute_css(declarations, self.inherited)
  195. return self.build_xlstyle(properties)
  196. def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]:
  197. out = {
  198. "alignment": self.build_alignment(props),
  199. "border": self.build_border(props),
  200. "fill": self.build_fill(props),
  201. "font": self.build_font(props),
  202. "number_format": self.build_number_format(props),
  203. }
  204. # TODO: handle cell width and height: needs support in pandas.io.excel
  205. def remove_none(d: dict[str, str | None]) -> None:
  206. """Remove key where value is None, through nested dicts"""
  207. for k, v in list(d.items()):
  208. if v is None:
  209. del d[k]
  210. elif isinstance(v, dict):
  211. remove_none(v)
  212. if not v:
  213. del d[k]
  214. remove_none(out)
  215. return out
  216. def build_alignment(self, props: Mapping[str, str]) -> dict[str, bool | str | None]:
  217. # TODO: text-indent, padding-left -> alignment.indent
  218. return {
  219. "horizontal": props.get("text-align"),
  220. "vertical": self._get_vertical_alignment(props),
  221. "wrap_text": self._get_is_wrap_text(props),
  222. }
  223. def _get_vertical_alignment(self, props: Mapping[str, str]) -> str | None:
  224. vertical_align = props.get("vertical-align")
  225. if vertical_align:
  226. return self.VERTICAL_MAP.get(vertical_align)
  227. return None
  228. def _get_is_wrap_text(self, props: Mapping[str, str]) -> bool | None:
  229. if props.get("white-space") is None:
  230. return None
  231. return bool(props["white-space"] not in ("nowrap", "pre", "pre-line"))
  232. def build_border(
  233. self, props: Mapping[str, str]
  234. ) -> dict[str, dict[str, str | None]]:
  235. return {
  236. side: {
  237. "style": self._border_style(
  238. props.get(f"border-{side}-style"),
  239. props.get(f"border-{side}-width"),
  240. self.color_to_excel(props.get(f"border-{side}-color")),
  241. ),
  242. "color": self.color_to_excel(props.get(f"border-{side}-color")),
  243. }
  244. for side in ["top", "right", "bottom", "left"]
  245. }
  246. def _border_style(self, style: str | None, width: str | None, color: str | None):
  247. # convert styles and widths to openxml, one of:
  248. # 'dashDot'
  249. # 'dashDotDot'
  250. # 'dashed'
  251. # 'dotted'
  252. # 'double'
  253. # 'hair'
  254. # 'medium'
  255. # 'mediumDashDot'
  256. # 'mediumDashDotDot'
  257. # 'mediumDashed'
  258. # 'slantDashDot'
  259. # 'thick'
  260. # 'thin'
  261. if width is None and style is None and color is None:
  262. # Return None will remove "border" from style dictionary
  263. return None
  264. if width is None and style is None:
  265. # Return "none" will keep "border" in style dictionary
  266. return "none"
  267. if style in ("none", "hidden"):
  268. return "none"
  269. width_name = self._get_width_name(width)
  270. if width_name is None:
  271. return "none"
  272. if style in (None, "groove", "ridge", "inset", "outset", "solid"):
  273. # not handled
  274. return width_name
  275. if style == "double":
  276. return "double"
  277. if style == "dotted":
  278. if width_name in ("hair", "thin"):
  279. return "dotted"
  280. return "mediumDashDotDot"
  281. if style == "dashed":
  282. if width_name in ("hair", "thin"):
  283. return "dashed"
  284. return "mediumDashed"
  285. elif style in self.BORDER_STYLE_MAP:
  286. # Excel-specific styles
  287. return self.BORDER_STYLE_MAP[style]
  288. else:
  289. warnings.warn(
  290. f"Unhandled border style format: {repr(style)}",
  291. CSSWarning,
  292. stacklevel=find_stack_level(),
  293. )
  294. return "none"
  295. def _get_width_name(self, width_input: str | None) -> str | None:
  296. width = self._width_to_float(width_input)
  297. if width < 1e-5:
  298. return None
  299. elif width < 1.3:
  300. return "thin"
  301. elif width < 2.8:
  302. return "medium"
  303. return "thick"
  304. def _width_to_float(self, width: str | None) -> float:
  305. if width is None:
  306. width = "2pt"
  307. return self._pt_to_float(width)
  308. def _pt_to_float(self, pt_string: str) -> float:
  309. assert pt_string.endswith("pt")
  310. return float(pt_string.rstrip("pt"))
  311. def build_fill(self, props: Mapping[str, str]):
  312. # TODO: perhaps allow for special properties
  313. # -excel-pattern-bgcolor and -excel-pattern-type
  314. fill_color = props.get("background-color")
  315. if fill_color not in (None, "transparent", "none"):
  316. return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"}
  317. def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
  318. fc = props.get("number-format")
  319. fc = fc.replace("§", ";") if isinstance(fc, str) else fc
  320. return {"format_code": fc}
  321. def build_font(
  322. self, props: Mapping[str, str]
  323. ) -> dict[str, bool | float | str | None]:
  324. font_names = self._get_font_names(props)
  325. decoration = self._get_decoration(props)
  326. return {
  327. "name": font_names[0] if font_names else None,
  328. "family": self._select_font_family(font_names),
  329. "size": self._get_font_size(props),
  330. "bold": self._get_is_bold(props),
  331. "italic": self._get_is_italic(props),
  332. "underline": ("single" if "underline" in decoration else None),
  333. "strike": ("line-through" in decoration) or None,
  334. "color": self.color_to_excel(props.get("color")),
  335. # shadow if nonzero digit before shadow color
  336. "shadow": self._get_shadow(props),
  337. }
  338. def _get_is_bold(self, props: Mapping[str, str]) -> bool | None:
  339. weight = props.get("font-weight")
  340. if weight:
  341. return self.BOLD_MAP.get(weight)
  342. return None
  343. def _get_is_italic(self, props: Mapping[str, str]) -> bool | None:
  344. font_style = props.get("font-style")
  345. if font_style:
  346. return self.ITALIC_MAP.get(font_style)
  347. return None
  348. def _get_decoration(self, props: Mapping[str, str]) -> Sequence[str]:
  349. decoration = props.get("text-decoration")
  350. if decoration is not None:
  351. return decoration.split()
  352. else:
  353. return ()
  354. def _get_underline(self, decoration: Sequence[str]) -> str | None:
  355. if "underline" in decoration:
  356. return "single"
  357. return None
  358. def _get_shadow(self, props: Mapping[str, str]) -> bool | None:
  359. if "text-shadow" in props:
  360. return bool(re.search("^[^#(]*[1-9]", props["text-shadow"]))
  361. return None
  362. def _get_font_names(self, props: Mapping[str, str]) -> Sequence[str]:
  363. font_names_tmp = re.findall(
  364. r"""(?x)
  365. (
  366. "(?:[^"]|\\")+"
  367. |
  368. '(?:[^']|\\')+'
  369. |
  370. [^'",]+
  371. )(?=,|\s*$)
  372. """,
  373. props.get("font-family", ""),
  374. )
  375. font_names = []
  376. for name in font_names_tmp:
  377. if name[:1] == '"':
  378. name = name[1:-1].replace('\\"', '"')
  379. elif name[:1] == "'":
  380. name = name[1:-1].replace("\\'", "'")
  381. else:
  382. name = name.strip()
  383. if name:
  384. font_names.append(name)
  385. return font_names
  386. def _get_font_size(self, props: Mapping[str, str]) -> float | None:
  387. size = props.get("font-size")
  388. if size is None:
  389. return size
  390. return self._pt_to_float(size)
  391. def _select_font_family(self, font_names: Sequence[str]) -> int | None:
  392. family = None
  393. for name in font_names:
  394. family = self.FAMILY_MAP.get(name)
  395. if family:
  396. break
  397. return family
  398. def color_to_excel(self, val: str | None) -> str | None:
  399. if val is None:
  400. return None
  401. if self._is_hex_color(val):
  402. return self._convert_hex_to_excel(val)
  403. try:
  404. return self.NAMED_COLORS[val]
  405. except KeyError:
  406. warnings.warn(
  407. f"Unhandled color format: {repr(val)}",
  408. CSSWarning,
  409. stacklevel=find_stack_level(),
  410. )
  411. return None
  412. def _is_hex_color(self, color_string: str) -> bool:
  413. return bool(color_string.startswith("#"))
  414. def _convert_hex_to_excel(self, color_string: str) -> str:
  415. code = color_string.lstrip("#")
  416. if self._is_shorthand_color(color_string):
  417. return (code[0] * 2 + code[1] * 2 + code[2] * 2).upper()
  418. else:
  419. return code.upper()
  420. def _is_shorthand_color(self, color_string: str) -> bool:
  421. """Check if color code is shorthand.
  422. #FFF is a shorthand as opposed to full #FFFFFF.
  423. """
  424. code = color_string.lstrip("#")
  425. if len(code) == 3:
  426. return True
  427. elif len(code) == 6:
  428. return False
  429. else:
  430. raise ValueError(f"Unexpected color {color_string}")
  431. class ExcelFormatter:
  432. """
  433. Class for formatting a DataFrame to a list of ExcelCells,
  434. Parameters
  435. ----------
  436. df : DataFrame or Styler
  437. na_rep: na representation
  438. float_format : str, default None
  439. Format string for floating point numbers
  440. cols : sequence, optional
  441. Columns to write
  442. header : bool or sequence of str, default True
  443. Write out column names. If a list of string is given it is
  444. assumed to be aliases for the column names
  445. index : bool, default True
  446. output row names (index)
  447. index_label : str or sequence, default None
  448. Column label for index column(s) if desired. If None is given, and
  449. `header` and `index` are True, then the index names are used. A
  450. sequence should be given if the DataFrame uses MultiIndex.
  451. merge_cells : bool, default False
  452. Format MultiIndex and Hierarchical Rows as merged cells.
  453. inf_rep : str, default `'inf'`
  454. representation for np.inf values (which aren't representable in Excel)
  455. A `'-'` sign will be added in front of -inf.
  456. style_converter : callable, optional
  457. This translates Styler styles (CSS) into ExcelWriter styles.
  458. Defaults to ``CSSToExcelConverter()``.
  459. It should have signature css_declarations string -> excel style.
  460. This is only called for body cells.
  461. """
  462. max_rows = 2**20
  463. max_cols = 2**14
  464. def __init__(
  465. self,
  466. df,
  467. na_rep: str = "",
  468. float_format: str | None = None,
  469. cols: Sequence[Hashable] | None = None,
  470. header: Sequence[Hashable] | bool = True,
  471. index: bool = True,
  472. index_label: IndexLabel | None = None,
  473. merge_cells: bool = False,
  474. inf_rep: str = "inf",
  475. style_converter: Callable | None = None,
  476. ) -> None:
  477. self.rowcounter = 0
  478. self.na_rep = na_rep
  479. if not isinstance(df, DataFrame):
  480. self.styler = df
  481. self.styler._compute() # calculate applied styles
  482. df = df.data
  483. if style_converter is None:
  484. style_converter = CSSToExcelConverter()
  485. self.style_converter: Callable | None = style_converter
  486. else:
  487. self.styler = None
  488. self.style_converter = None
  489. self.df = df
  490. if cols is not None:
  491. # all missing, raise
  492. if not len(Index(cols).intersection(df.columns)):
  493. raise KeyError("passes columns are not ALL present dataframe")
  494. if len(Index(cols).intersection(df.columns)) != len(set(cols)):
  495. # Deprecated in GH#17295, enforced in 1.0.0
  496. raise KeyError("Not all names specified in 'columns' are found")
  497. self.df = df.reindex(columns=cols)
  498. self.columns = self.df.columns
  499. self.float_format = float_format
  500. self.index = index
  501. self.index_label = index_label
  502. self.header = header
  503. self.merge_cells = merge_cells
  504. self.inf_rep = inf_rep
  505. @property
  506. def header_style(self) -> dict[str, dict[str, str | bool]]:
  507. return {
  508. "font": {"bold": True},
  509. "borders": {
  510. "top": "thin",
  511. "right": "thin",
  512. "bottom": "thin",
  513. "left": "thin",
  514. },
  515. "alignment": {"horizontal": "center", "vertical": "top"},
  516. }
  517. def _format_value(self, val):
  518. if is_scalar(val) and missing.isna(val):
  519. val = self.na_rep
  520. elif is_float(val):
  521. if missing.isposinf_scalar(val):
  522. val = self.inf_rep
  523. elif missing.isneginf_scalar(val):
  524. val = f"-{self.inf_rep}"
  525. elif self.float_format is not None:
  526. val = float(self.float_format % val)
  527. if getattr(val, "tzinfo", None) is not None:
  528. raise ValueError(
  529. "Excel does not support datetimes with "
  530. "timezones. Please ensure that datetimes "
  531. "are timezone unaware before writing to Excel."
  532. )
  533. return val
  534. def _format_header_mi(self) -> Iterable[ExcelCell]:
  535. if self.columns.nlevels > 1:
  536. if not self.index:
  537. raise NotImplementedError(
  538. "Writing to Excel with MultiIndex columns and no "
  539. "index ('index'=False) is not yet implemented."
  540. )
  541. if not (self._has_aliases or self.header):
  542. return
  543. columns = self.columns
  544. level_strs = columns._format_multi(
  545. sparsify=self.merge_cells, include_names=False
  546. )
  547. level_lengths = get_level_lengths(level_strs)
  548. coloffset = 0
  549. lnum = 0
  550. if self.index and isinstance(self.df.index, MultiIndex):
  551. coloffset = len(self.df.index[0]) - 1
  552. if self.merge_cells:
  553. # Format multi-index as a merged cells.
  554. for lnum, name in enumerate(columns.names):
  555. yield ExcelCell(
  556. row=lnum,
  557. col=coloffset,
  558. val=name,
  559. style=self.header_style,
  560. )
  561. for lnum, (spans, levels, level_codes) in enumerate(
  562. zip(level_lengths, columns.levels, columns.codes)
  563. ):
  564. values = levels.take(level_codes)
  565. for i, span_val in spans.items():
  566. mergestart, mergeend = None, None
  567. if span_val > 1:
  568. mergestart, mergeend = lnum, coloffset + i + span_val
  569. yield CssExcelCell(
  570. row=lnum,
  571. col=coloffset + i + 1,
  572. val=values[i],
  573. style=self.header_style,
  574. css_styles=getattr(self.styler, "ctx_columns", None),
  575. css_row=lnum,
  576. css_col=i,
  577. css_converter=self.style_converter,
  578. mergestart=mergestart,
  579. mergeend=mergeend,
  580. )
  581. else:
  582. # Format in legacy format with dots to indicate levels.
  583. for i, values in enumerate(zip(*level_strs)):
  584. v = ".".join(map(pprint_thing, values))
  585. yield CssExcelCell(
  586. row=lnum,
  587. col=coloffset + i + 1,
  588. val=v,
  589. style=self.header_style,
  590. css_styles=getattr(self.styler, "ctx_columns", None),
  591. css_row=lnum,
  592. css_col=i,
  593. css_converter=self.style_converter,
  594. )
  595. self.rowcounter = lnum
  596. def _format_header_regular(self) -> Iterable[ExcelCell]:
  597. if self._has_aliases or self.header:
  598. coloffset = 0
  599. if self.index:
  600. coloffset = 1
  601. if isinstance(self.df.index, MultiIndex):
  602. coloffset = len(self.df.index.names)
  603. colnames = self.columns
  604. if self._has_aliases:
  605. self.header = cast(Sequence, self.header)
  606. if len(self.header) != len(self.columns):
  607. raise ValueError(
  608. f"Writing {len(self.columns)} cols "
  609. f"but got {len(self.header)} aliases"
  610. )
  611. colnames = self.header
  612. for colindex, colname in enumerate(colnames):
  613. yield CssExcelCell(
  614. row=self.rowcounter,
  615. col=colindex + coloffset,
  616. val=colname,
  617. style=self.header_style,
  618. css_styles=getattr(self.styler, "ctx_columns", None),
  619. css_row=0,
  620. css_col=colindex,
  621. css_converter=self.style_converter,
  622. )
  623. def _format_header(self) -> Iterable[ExcelCell]:
  624. gen: Iterable[ExcelCell]
  625. if isinstance(self.columns, MultiIndex):
  626. gen = self._format_header_mi()
  627. else:
  628. gen = self._format_header_regular()
  629. gen2: Iterable[ExcelCell] = ()
  630. if self.df.index.names:
  631. row = [x if x is not None else "" for x in self.df.index.names] + [
  632. ""
  633. ] * len(self.columns)
  634. if functools.reduce(lambda x, y: x and y, (x != "" for x in row)):
  635. gen2 = (
  636. ExcelCell(self.rowcounter, colindex, val, self.header_style)
  637. for colindex, val in enumerate(row)
  638. )
  639. self.rowcounter += 1
  640. return itertools.chain(gen, gen2)
  641. def _format_body(self) -> Iterable[ExcelCell]:
  642. if isinstance(self.df.index, MultiIndex):
  643. return self._format_hierarchical_rows()
  644. else:
  645. return self._format_regular_rows()
  646. def _format_regular_rows(self) -> Iterable[ExcelCell]:
  647. if self._has_aliases or self.header:
  648. self.rowcounter += 1
  649. # output index and index_label?
  650. if self.index:
  651. # check aliases
  652. # if list only take first as this is not a MultiIndex
  653. if self.index_label and isinstance(
  654. self.index_label, (list, tuple, np.ndarray, Index)
  655. ):
  656. index_label = self.index_label[0]
  657. # if string good to go
  658. elif self.index_label and isinstance(self.index_label, str):
  659. index_label = self.index_label
  660. else:
  661. index_label = self.df.index.names[0]
  662. if isinstance(self.columns, MultiIndex):
  663. self.rowcounter += 1
  664. if index_label and self.header is not False:
  665. yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style)
  666. # write index_values
  667. index_values = self.df.index
  668. if isinstance(self.df.index, PeriodIndex):
  669. index_values = self.df.index.to_timestamp()
  670. for idx, idxval in enumerate(index_values):
  671. yield CssExcelCell(
  672. row=self.rowcounter + idx,
  673. col=0,
  674. val=idxval,
  675. style=self.header_style,
  676. css_styles=getattr(self.styler, "ctx_index", None),
  677. css_row=idx,
  678. css_col=0,
  679. css_converter=self.style_converter,
  680. )
  681. coloffset = 1
  682. else:
  683. coloffset = 0
  684. yield from self._generate_body(coloffset)
  685. def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
  686. if self._has_aliases or self.header:
  687. self.rowcounter += 1
  688. gcolidx = 0
  689. if self.index:
  690. index_labels = self.df.index.names
  691. # check for aliases
  692. if self.index_label and isinstance(
  693. self.index_label, (list, tuple, np.ndarray, Index)
  694. ):
  695. index_labels = self.index_label
  696. # MultiIndex columns require an extra row
  697. # with index names (blank if None) for
  698. # unambiguous round-trip, unless not merging,
  699. # in which case the names all go on one row Issue #11328
  700. if isinstance(self.columns, MultiIndex) and self.merge_cells:
  701. self.rowcounter += 1
  702. # if index labels are not empty go ahead and dump
  703. if com.any_not_none(*index_labels) and self.header is not False:
  704. for cidx, name in enumerate(index_labels):
  705. yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style)
  706. if self.merge_cells:
  707. # Format hierarchical rows as merged cells.
  708. level_strs = self.df.index._format_multi(
  709. sparsify=True, include_names=False
  710. )
  711. level_lengths = get_level_lengths(level_strs)
  712. for spans, levels, level_codes in zip(
  713. level_lengths, self.df.index.levels, self.df.index.codes
  714. ):
  715. values = levels.take(
  716. level_codes,
  717. allow_fill=levels._can_hold_na,
  718. fill_value=levels._na_value,
  719. )
  720. for i, span_val in spans.items():
  721. mergestart, mergeend = None, None
  722. if span_val > 1:
  723. mergestart = self.rowcounter + i + span_val - 1
  724. mergeend = gcolidx
  725. yield CssExcelCell(
  726. row=self.rowcounter + i,
  727. col=gcolidx,
  728. val=values[i],
  729. style=self.header_style,
  730. css_styles=getattr(self.styler, "ctx_index", None),
  731. css_row=i,
  732. css_col=gcolidx,
  733. css_converter=self.style_converter,
  734. mergestart=mergestart,
  735. mergeend=mergeend,
  736. )
  737. gcolidx += 1
  738. else:
  739. # Format hierarchical rows with non-merged values.
  740. for indexcolvals in zip(*self.df.index):
  741. for idx, indexcolval in enumerate(indexcolvals):
  742. yield CssExcelCell(
  743. row=self.rowcounter + idx,
  744. col=gcolidx,
  745. val=indexcolval,
  746. style=self.header_style,
  747. css_styles=getattr(self.styler, "ctx_index", None),
  748. css_row=idx,
  749. css_col=gcolidx,
  750. css_converter=self.style_converter,
  751. )
  752. gcolidx += 1
  753. yield from self._generate_body(gcolidx)
  754. @property
  755. def _has_aliases(self) -> bool:
  756. """Whether the aliases for column names are present."""
  757. return is_list_like(self.header)
  758. def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]:
  759. # Write the body of the frame data series by series.
  760. for colidx in range(len(self.columns)):
  761. series = self.df.iloc[:, colidx]
  762. for i, val in enumerate(series):
  763. yield CssExcelCell(
  764. row=self.rowcounter + i,
  765. col=colidx + coloffset,
  766. val=val,
  767. style=None,
  768. css_styles=getattr(self.styler, "ctx", None),
  769. css_row=i,
  770. css_col=colidx,
  771. css_converter=self.style_converter,
  772. )
  773. def get_formatted_cells(self) -> Iterable[ExcelCell]:
  774. for cell in itertools.chain(self._format_header(), self._format_body()):
  775. cell.val = self._format_value(cell.val)
  776. yield cell
  777. @doc(storage_options=_shared_docs["storage_options"])
  778. def write(
  779. self,
  780. writer: FilePath | WriteExcelBuffer | ExcelWriter,
  781. sheet_name: str = "Sheet1",
  782. startrow: int = 0,
  783. startcol: int = 0,
  784. freeze_panes: tuple[int, int] | None = None,
  785. engine: str | None = None,
  786. storage_options: StorageOptions | None = None,
  787. engine_kwargs: dict | None = None,
  788. ) -> None:
  789. """
  790. writer : path-like, file-like, or ExcelWriter object
  791. File path or existing ExcelWriter
  792. sheet_name : str, default 'Sheet1'
  793. Name of sheet which will contain DataFrame
  794. startrow :
  795. upper left cell row to dump data frame
  796. startcol :
  797. upper left cell column to dump data frame
  798. freeze_panes : tuple of integer (length 2), default None
  799. Specifies the one-based bottommost row and rightmost column that
  800. is to be frozen
  801. engine : string, default None
  802. write engine to use if writer is a path - you can also set this
  803. via the options ``io.excel.xlsx.writer``,
  804. or ``io.excel.xlsm.writer``.
  805. {storage_options}
  806. engine_kwargs: dict, optional
  807. Arbitrary keyword arguments passed to excel engine.
  808. """
  809. from pandas.io.excel import ExcelWriter
  810. num_rows, num_cols = self.df.shape
  811. if num_rows > self.max_rows or num_cols > self.max_cols:
  812. raise ValueError(
  813. f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} "
  814. f"Max sheet size is: {self.max_rows}, {self.max_cols}"
  815. )
  816. if engine_kwargs is None:
  817. engine_kwargs = {}
  818. formatted_cells = self.get_formatted_cells()
  819. if isinstance(writer, ExcelWriter):
  820. need_save = False
  821. else:
  822. writer = ExcelWriter(
  823. writer,
  824. engine=engine,
  825. storage_options=storage_options,
  826. engine_kwargs=engine_kwargs,
  827. )
  828. need_save = True
  829. try:
  830. writer._write_cells(
  831. formatted_cells,
  832. sheet_name,
  833. startrow=startrow,
  834. startcol=startcol,
  835. freeze_panes=freeze_panes,
  836. )
  837. finally:
  838. # make sure to close opened file handles
  839. if need_save:
  840. writer.close()