_util.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. from __future__ import annotations
  2. from collections.abc import (
  3. Hashable,
  4. Iterable,
  5. MutableMapping,
  6. Sequence,
  7. )
  8. from typing import (
  9. TYPE_CHECKING,
  10. Any,
  11. Callable,
  12. Literal,
  13. TypeVar,
  14. overload,
  15. )
  16. from pandas.compat._optional import import_optional_dependency
  17. from pandas.core.dtypes.common import (
  18. is_integer,
  19. is_list_like,
  20. )
  21. if TYPE_CHECKING:
  22. from pandas.io.excel._base import ExcelWriter
  23. ExcelWriter_t = type[ExcelWriter]
  24. usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
  25. _writers: MutableMapping[str, ExcelWriter_t] = {}
  26. def register_writer(klass: ExcelWriter_t) -> None:
  27. """
  28. Add engine to the excel writer registry.io.excel.
  29. You must use this method to integrate with ``to_excel``.
  30. Parameters
  31. ----------
  32. klass : ExcelWriter
  33. """
  34. if not callable(klass):
  35. raise ValueError("Can only register callables as engines")
  36. engine_name = klass._engine
  37. _writers[engine_name] = klass
  38. def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
  39. """
  40. Return the default reader/writer for the given extension.
  41. Parameters
  42. ----------
  43. ext : str
  44. The excel file extension for which to get the default engine.
  45. mode : str {'reader', 'writer'}
  46. Whether to get the default engine for reading or writing.
  47. Either 'reader' or 'writer'
  48. Returns
  49. -------
  50. str
  51. The default engine for the extension.
  52. """
  53. _default_readers = {
  54. "xlsx": "openpyxl",
  55. "xlsm": "openpyxl",
  56. "xlsb": "pyxlsb",
  57. "xls": "xlrd",
  58. "ods": "odf",
  59. }
  60. _default_writers = {
  61. "xlsx": "openpyxl",
  62. "xlsm": "openpyxl",
  63. "xlsb": "pyxlsb",
  64. "ods": "odf",
  65. }
  66. assert mode in ["reader", "writer"]
  67. if mode == "writer":
  68. # Prefer xlsxwriter over openpyxl if installed
  69. xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
  70. if xlsxwriter:
  71. _default_writers["xlsx"] = "xlsxwriter"
  72. return _default_writers[ext]
  73. else:
  74. return _default_readers[ext]
  75. def get_writer(engine_name: str) -> ExcelWriter_t:
  76. try:
  77. return _writers[engine_name]
  78. except KeyError as err:
  79. raise ValueError(f"No Excel writer '{engine_name}'") from err
  80. def _excel2num(x: str) -> int:
  81. """
  82. Convert Excel column name like 'AB' to 0-based column index.
  83. Parameters
  84. ----------
  85. x : str
  86. The Excel column name to convert to a 0-based column index.
  87. Returns
  88. -------
  89. num : int
  90. The column index corresponding to the name.
  91. Raises
  92. ------
  93. ValueError
  94. Part of the Excel column name was invalid.
  95. """
  96. index = 0
  97. for c in x.upper().strip():
  98. cp = ord(c)
  99. if cp < ord("A") or cp > ord("Z"):
  100. raise ValueError(f"Invalid column name: {x}")
  101. index = index * 26 + cp - ord("A") + 1
  102. return index - 1
  103. def _range2cols(areas: str) -> list[int]:
  104. """
  105. Convert comma separated list of column names and ranges to indices.
  106. Parameters
  107. ----------
  108. areas : str
  109. A string containing a sequence of column ranges (or areas).
  110. Returns
  111. -------
  112. cols : list
  113. A list of 0-based column indices.
  114. Examples
  115. --------
  116. >>> _range2cols('A:E')
  117. [0, 1, 2, 3, 4]
  118. >>> _range2cols('A,C,Z:AB')
  119. [0, 2, 25, 26, 27]
  120. """
  121. cols: list[int] = []
  122. for rng in areas.split(","):
  123. if ":" in rng:
  124. rngs = rng.split(":")
  125. cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
  126. else:
  127. cols.append(_excel2num(rng))
  128. return cols
  129. @overload
  130. def maybe_convert_usecols(usecols: str | list[int]) -> list[int]:
  131. ...
  132. @overload
  133. def maybe_convert_usecols(usecols: list[str]) -> list[str]:
  134. ...
  135. @overload
  136. def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
  137. ...
  138. @overload
  139. def maybe_convert_usecols(usecols: None) -> None:
  140. ...
  141. def maybe_convert_usecols(
  142. usecols: str | list[int] | list[str] | usecols_func | None,
  143. ) -> None | list[int] | list[str] | usecols_func:
  144. """
  145. Convert `usecols` into a compatible format for parsing in `parsers.py`.
  146. Parameters
  147. ----------
  148. usecols : object
  149. The use-columns object to potentially convert.
  150. Returns
  151. -------
  152. converted : object
  153. The compatible format of `usecols`.
  154. """
  155. if usecols is None:
  156. return usecols
  157. if is_integer(usecols):
  158. raise ValueError(
  159. "Passing an integer for `usecols` is no longer supported. "
  160. "Please pass in a list of int from 0 to `usecols` inclusive instead."
  161. )
  162. if isinstance(usecols, str):
  163. return _range2cols(usecols)
  164. return usecols
  165. @overload
  166. def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
  167. ...
  168. @overload
  169. def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
  170. ...
  171. def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
  172. if freeze_panes is not None:
  173. if len(freeze_panes) == 2 and all(
  174. isinstance(item, int) for item in freeze_panes
  175. ):
  176. return True
  177. raise ValueError(
  178. "freeze_panes must be of form (row, column) "
  179. "where row and column are integers"
  180. )
  181. # freeze_panes wasn't specified, return False so it won't be applied
  182. # to output sheet
  183. return False
  184. def fill_mi_header(
  185. row: list[Hashable], control_row: list[bool]
  186. ) -> tuple[list[Hashable], list[bool]]:
  187. """
  188. Forward fill blank entries in row but only inside the same parent index.
  189. Used for creating headers in Multiindex.
  190. Parameters
  191. ----------
  192. row : list
  193. List of items in a single row.
  194. control_row : list of bool
  195. Helps to determine if particular column is in same parent index as the
  196. previous value. Used to stop propagation of empty cells between
  197. different indexes.
  198. Returns
  199. -------
  200. Returns changed row and control_row
  201. """
  202. last = row[0]
  203. for i in range(1, len(row)):
  204. if not control_row[i]:
  205. last = row[i]
  206. if row[i] == "" or row[i] is None:
  207. row[i] = last
  208. else:
  209. control_row[i] = False
  210. last = row[i]
  211. return row, control_row
  212. def pop_header_name(
  213. row: list[Hashable], index_col: int | Sequence[int]
  214. ) -> tuple[Hashable | None, list[Hashable]]:
  215. """
  216. Pop the header name for MultiIndex parsing.
  217. Parameters
  218. ----------
  219. row : list
  220. The data row to parse for the header name.
  221. index_col : int, list
  222. The index columns for our data. Assumed to be non-null.
  223. Returns
  224. -------
  225. header_name : str
  226. The extracted header name.
  227. trimmed_row : list
  228. The original data row with the header name removed.
  229. """
  230. # Pop out header name and fill w/blank.
  231. if is_list_like(index_col):
  232. assert isinstance(index_col, Iterable)
  233. i = max(index_col)
  234. else:
  235. assert not isinstance(index_col, Iterable)
  236. i = index_col
  237. header_name = row[i]
  238. header_name = None if header_name == "" else header_name
  239. return header_name, row[:i] + [""] + row[i + 1 :]
  240. def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
  241. """
  242. Used to combine two sources of kwargs for the backend engine.
  243. Use of kwargs is deprecated, this function is solely for use in 1.3 and should
  244. be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
  245. or kwargs must be None or empty respectively.
  246. Parameters
  247. ----------
  248. engine_kwargs: dict
  249. kwargs to be passed through to the engine.
  250. kwargs: dict
  251. kwargs to be psased through to the engine (deprecated)
  252. Returns
  253. -------
  254. engine_kwargs combined with kwargs
  255. """
  256. if engine_kwargs is None:
  257. result = {}
  258. else:
  259. result = engine_kwargs.copy()
  260. result.update(kwargs)
  261. return result