test_openpyxl.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. import contextlib
  2. from pathlib import Path
  3. import re
  4. import numpy as np
  5. import pytest
  6. from pandas.compat import is_platform_windows
  7. import pandas as pd
  8. from pandas import DataFrame
  9. import pandas._testing as tm
  10. from pandas.io.excel import (
  11. ExcelWriter,
  12. _OpenpyxlWriter,
  13. )
  14. from pandas.io.excel._openpyxl import OpenpyxlReader
  15. openpyxl = pytest.importorskip("openpyxl")
  16. if is_platform_windows():
  17. pytestmark = pytest.mark.single_cpu
  18. @pytest.fixture
  19. def ext():
  20. return ".xlsx"
  21. def test_to_excel_styleconverter():
  22. from openpyxl import styles
  23. hstyle = {
  24. "font": {"color": "00FF0000", "bold": True},
  25. "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
  26. "alignment": {"horizontal": "center", "vertical": "top"},
  27. "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
  28. "number_format": {"format_code": "0.00"},
  29. "protection": {"locked": True, "hidden": False},
  30. }
  31. font_color = styles.Color("00FF0000")
  32. font = styles.Font(bold=True, color=font_color)
  33. side = styles.Side(style=styles.borders.BORDER_THIN)
  34. border = styles.Border(top=side, right=side, bottom=side, left=side)
  35. alignment = styles.Alignment(horizontal="center", vertical="top")
  36. fill_color = styles.Color(rgb="006666FF", tint=0.3)
  37. fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
  38. number_format = "0.00"
  39. protection = styles.Protection(locked=True, hidden=False)
  40. kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
  41. assert kw["font"] == font
  42. assert kw["border"] == border
  43. assert kw["alignment"] == alignment
  44. assert kw["fill"] == fill
  45. assert kw["number_format"] == number_format
  46. assert kw["protection"] == protection
  47. def test_write_cells_merge_styled(ext):
  48. from pandas.io.formats.excel import ExcelCell
  49. sheet_name = "merge_styled"
  50. sty_b1 = {"font": {"color": "00FF0000"}}
  51. sty_a2 = {"font": {"color": "0000FF00"}}
  52. initial_cells = [
  53. ExcelCell(col=1, row=0, val=42, style=sty_b1),
  54. ExcelCell(col=0, row=1, val=99, style=sty_a2),
  55. ]
  56. sty_merged = {"font": {"color": "000000FF", "bold": True}}
  57. sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
  58. openpyxl_sty_merged = sty_kwargs["font"]
  59. merge_cells = [
  60. ExcelCell(
  61. col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
  62. )
  63. ]
  64. with tm.ensure_clean(ext) as path:
  65. with _OpenpyxlWriter(path) as writer:
  66. writer._write_cells(initial_cells, sheet_name=sheet_name)
  67. writer._write_cells(merge_cells, sheet_name=sheet_name)
  68. wks = writer.sheets[sheet_name]
  69. xcell_b1 = wks["B1"]
  70. xcell_a2 = wks["A2"]
  71. assert xcell_b1.font == openpyxl_sty_merged
  72. assert xcell_a2.font == openpyxl_sty_merged
  73. @pytest.mark.parametrize("iso_dates", [True, False])
  74. def test_engine_kwargs_write(ext, iso_dates):
  75. # GH 42286 GH 43445
  76. engine_kwargs = {"iso_dates": iso_dates}
  77. with tm.ensure_clean(ext) as f:
  78. with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
  79. assert writer.book.iso_dates == iso_dates
  80. # ExcelWriter won't allow us to close without writing something
  81. DataFrame().to_excel(writer)
  82. def test_engine_kwargs_append_invalid(ext):
  83. # GH 43445
  84. # test whether an invalid engine kwargs actually raises
  85. with tm.ensure_clean(ext) as f:
  86. DataFrame(["hello", "world"]).to_excel(f)
  87. with pytest.raises(
  88. TypeError,
  89. match=re.escape(
  90. "load_workbook() got an unexpected keyword argument 'apple_banana'"
  91. ),
  92. ):
  93. with ExcelWriter(
  94. f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
  95. ) as writer:
  96. # ExcelWriter needs us to write something to close properly
  97. DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
  98. @pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
  99. def test_engine_kwargs_append_data_only(ext, data_only, expected):
  100. # GH 43445
  101. # tests whether the data_only engine_kwarg actually works well for
  102. # openpyxl's load_workbook
  103. with tm.ensure_clean(ext) as f:
  104. DataFrame(["=1+1"]).to_excel(f)
  105. with ExcelWriter(
  106. f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
  107. ) as writer:
  108. assert writer.sheets["Sheet1"]["B2"].value == expected
  109. # ExcelWriter needs us to writer something to close properly?
  110. DataFrame().to_excel(writer, sheet_name="Sheet2")
  111. # ensure that data_only also works for reading
  112. # and that formulas/values roundtrip
  113. assert (
  114. pd.read_excel(
  115. f,
  116. sheet_name="Sheet1",
  117. engine="openpyxl",
  118. engine_kwargs={"data_only": data_only},
  119. ).iloc[0, 1]
  120. == expected
  121. )
  122. @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
  123. @pytest.mark.parametrize("kwarg_value", [True, False])
  124. def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
  125. # GH 55027
  126. # test that `read_only` and `data_only` can be passed to
  127. # `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
  128. filename = datapath("io", "data", "excel", "test1" + ext)
  129. with contextlib.closing(
  130. OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
  131. ) as reader:
  132. assert getattr(reader.book, kwarg_name) == kwarg_value
  133. @pytest.mark.parametrize(
  134. "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
  135. )
  136. def test_write_append_mode(ext, mode, expected):
  137. df = DataFrame([1], columns=["baz"])
  138. with tm.ensure_clean(ext) as f:
  139. wb = openpyxl.Workbook()
  140. wb.worksheets[0].title = "foo"
  141. wb.worksheets[0]["A1"].value = "foo"
  142. wb.create_sheet("bar")
  143. wb.worksheets[1]["A1"].value = "bar"
  144. wb.save(f)
  145. with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
  146. df.to_excel(writer, sheet_name="baz", index=False)
  147. with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
  148. result = [sheet.title for sheet in wb2.worksheets]
  149. assert result == expected
  150. for index, cell_value in enumerate(expected):
  151. assert wb2.worksheets[index]["A1"].value == cell_value
  152. @pytest.mark.parametrize(
  153. "if_sheet_exists,num_sheets,expected",
  154. [
  155. ("new", 2, ["apple", "banana"]),
  156. ("replace", 1, ["pear"]),
  157. ("overlay", 1, ["pear", "banana"]),
  158. ],
  159. )
  160. def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
  161. # GH 40230
  162. df1 = DataFrame({"fruit": ["apple", "banana"]})
  163. df2 = DataFrame({"fruit": ["pear"]})
  164. with tm.ensure_clean(ext) as f:
  165. df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
  166. with ExcelWriter(
  167. f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
  168. ) as writer:
  169. df2.to_excel(writer, sheet_name="foo", index=False)
  170. with contextlib.closing(openpyxl.load_workbook(f)) as wb:
  171. assert len(wb.sheetnames) == num_sheets
  172. assert wb.sheetnames[0] == "foo"
  173. result = pd.read_excel(wb, "foo", engine="openpyxl")
  174. assert list(result["fruit"]) == expected
  175. if len(wb.sheetnames) == 2:
  176. result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
  177. tm.assert_frame_equal(result, df2)
  178. @pytest.mark.parametrize(
  179. "startrow, startcol, greeting, goodbye",
  180. [
  181. (0, 0, ["poop", "world"], ["goodbye", "people"]),
  182. (0, 1, ["hello", "world"], ["poop", "people"]),
  183. (1, 0, ["hello", "poop"], ["goodbye", "people"]),
  184. (1, 1, ["hello", "world"], ["goodbye", "poop"]),
  185. ],
  186. )
  187. def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
  188. df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
  189. df2 = DataFrame(["poop"])
  190. with tm.ensure_clean(ext) as f:
  191. df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
  192. with ExcelWriter(
  193. f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
  194. ) as writer:
  195. # use startrow+1 because we don't have a header
  196. df2.to_excel(
  197. writer,
  198. index=False,
  199. header=False,
  200. startrow=startrow + 1,
  201. startcol=startcol,
  202. sheet_name="poo",
  203. )
  204. result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
  205. expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
  206. tm.assert_frame_equal(result, expected)
  207. @pytest.mark.parametrize(
  208. "if_sheet_exists,msg",
  209. [
  210. (
  211. "invalid",
  212. "'invalid' is not valid for if_sheet_exists. Valid options "
  213. "are 'error', 'new', 'replace' and 'overlay'.",
  214. ),
  215. (
  216. "error",
  217. "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
  218. ),
  219. (
  220. None,
  221. "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
  222. ),
  223. ],
  224. )
  225. def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
  226. # GH 40230
  227. df = DataFrame({"fruit": ["pear"]})
  228. with tm.ensure_clean(ext) as f:
  229. with pytest.raises(ValueError, match=re.escape(msg)):
  230. df.to_excel(f, sheet_name="foo", engine="openpyxl")
  231. with ExcelWriter(
  232. f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
  233. ) as writer:
  234. df.to_excel(writer, sheet_name="foo")
  235. def test_to_excel_with_openpyxl_engine(ext):
  236. # GH 29854
  237. with tm.ensure_clean(ext) as filename:
  238. df1 = DataFrame({"A": np.linspace(1, 10, 10)})
  239. df2 = DataFrame({"B": np.linspace(1, 20, 10)})
  240. df = pd.concat([df1, df2], axis=1)
  241. styled = df.style.map(
  242. lambda val: f"color: {'red' if val < 0 else 'black'}"
  243. ).highlight_max()
  244. styled.to_excel(filename, engine="openpyxl")
  245. @pytest.mark.parametrize("read_only", [True, False])
  246. def test_read_workbook(datapath, ext, read_only):
  247. # GH 39528
  248. filename = datapath("io", "data", "excel", "test1" + ext)
  249. with contextlib.closing(
  250. openpyxl.load_workbook(filename, read_only=read_only)
  251. ) as wb:
  252. result = pd.read_excel(wb, engine="openpyxl")
  253. expected = pd.read_excel(filename)
  254. tm.assert_frame_equal(result, expected)
  255. @pytest.mark.parametrize(
  256. "header, expected_data",
  257. [
  258. (
  259. 0,
  260. {
  261. "Title": [np.nan, "A", 1, 2, 3],
  262. "Unnamed: 1": [np.nan, "B", 4, 5, 6],
  263. "Unnamed: 2": [np.nan, "C", 7, 8, 9],
  264. },
  265. ),
  266. (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
  267. ],
  268. )
  269. @pytest.mark.parametrize(
  270. "filename", ["dimension_missing", "dimension_small", "dimension_large"]
  271. )
  272. # When read_only is None, use read_excel instead of a workbook
  273. @pytest.mark.parametrize("read_only", [True, False, None])
  274. def test_read_with_bad_dimension(
  275. datapath, ext, header, expected_data, filename, read_only
  276. ):
  277. # GH 38956, 39001 - no/incorrect dimension information
  278. path = datapath("io", "data", "excel", f"{filename}{ext}")
  279. if read_only is None:
  280. result = pd.read_excel(path, header=header)
  281. else:
  282. with contextlib.closing(
  283. openpyxl.load_workbook(path, read_only=read_only)
  284. ) as wb:
  285. result = pd.read_excel(wb, engine="openpyxl", header=header)
  286. expected = DataFrame(expected_data)
  287. tm.assert_frame_equal(result, expected)
  288. def test_append_mode_file(ext):
  289. # GH 39576
  290. df = DataFrame()
  291. with tm.ensure_clean(ext) as f:
  292. df.to_excel(f, engine="openpyxl")
  293. with ExcelWriter(
  294. f, mode="a", engine="openpyxl", if_sheet_exists="new"
  295. ) as writer:
  296. df.to_excel(writer)
  297. # make sure that zip files are not concatenated by making sure that
  298. # "docProps/app.xml" only occurs twice in the file
  299. data = Path(f).read_bytes()
  300. first = data.find(b"docProps/app.xml")
  301. second = data.find(b"docProps/app.xml", first + 1)
  302. third = data.find(b"docProps/app.xml", second + 1)
  303. assert second != -1 and third == -1
  304. # When read_only is None, use read_excel instead of a workbook
  305. @pytest.mark.parametrize("read_only", [True, False, None])
  306. def test_read_with_empty_trailing_rows(datapath, ext, read_only):
  307. # GH 39181
  308. path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
  309. if read_only is None:
  310. result = pd.read_excel(path)
  311. else:
  312. with contextlib.closing(
  313. openpyxl.load_workbook(path, read_only=read_only)
  314. ) as wb:
  315. result = pd.read_excel(wb, engine="openpyxl")
  316. expected = DataFrame(
  317. {
  318. "Title": [np.nan, "A", 1, 2, 3],
  319. "Unnamed: 1": [np.nan, "B", 4, 5, 6],
  320. "Unnamed: 2": [np.nan, "C", 7, 8, 9],
  321. }
  322. )
  323. tm.assert_frame_equal(result, expected)
  324. # When read_only is None, use read_excel instead of a workbook
  325. @pytest.mark.parametrize("read_only", [True, False, None])
  326. def test_read_empty_with_blank_row(datapath, ext, read_only):
  327. # GH 39547 - empty excel file with a row that has no data
  328. path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
  329. if read_only is None:
  330. result = pd.read_excel(path)
  331. else:
  332. with contextlib.closing(
  333. openpyxl.load_workbook(path, read_only=read_only)
  334. ) as wb:
  335. result = pd.read_excel(wb, engine="openpyxl")
  336. expected = DataFrame()
  337. tm.assert_frame_equal(result, expected)
  338. def test_book_and_sheets_consistent(ext):
  339. # GH#45687 - Ensure sheets is updated if user modifies book
  340. with tm.ensure_clean(ext) as f:
  341. with ExcelWriter(f, engine="openpyxl") as writer:
  342. assert writer.sheets == {}
  343. sheet = writer.book.create_sheet("test_name", 0)
  344. assert writer.sheets == {"test_name": sheet}
  345. def test_ints_spelled_with_decimals(datapath, ext):
  346. # GH 46988 - openpyxl returns this sheet with floats
  347. path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
  348. result = pd.read_excel(path)
  349. expected = DataFrame(range(2, 12), columns=[1])
  350. tm.assert_frame_equal(result, expected)
  351. def test_read_multiindex_header_no_index_names(datapath, ext):
  352. # GH#47487
  353. path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
  354. result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
  355. expected = DataFrame(
  356. [[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
  357. columns=pd.MultiIndex.from_tuples(
  358. [("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
  359. ),
  360. index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
  361. )
  362. tm.assert_frame_equal(result, expected)