test_writers.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514
  1. from datetime import (
  2. date,
  3. datetime,
  4. timedelta,
  5. )
  6. from functools import partial
  7. from io import BytesIO
  8. import os
  9. import re
  10. import numpy as np
  11. import pytest
  12. from pandas.compat import is_platform_windows
  13. from pandas.compat._constants import PY310
  14. from pandas.compat._optional import import_optional_dependency
  15. import pandas.util._test_decorators as td
  16. import pandas as pd
  17. from pandas import (
  18. DataFrame,
  19. Index,
  20. MultiIndex,
  21. date_range,
  22. option_context,
  23. )
  24. import pandas._testing as tm
  25. from pandas.io.excel import (
  26. ExcelFile,
  27. ExcelWriter,
  28. _OpenpyxlWriter,
  29. _XlsxWriter,
  30. register_writer,
  31. )
  32. from pandas.io.excel._util import _writers
  33. if is_platform_windows():
  34. pytestmark = pytest.mark.single_cpu
  35. def get_exp_unit(path: str) -> str:
  36. return "ns"
  37. @pytest.fixture
  38. def frame(float_frame):
  39. """
  40. Returns the first ten items in fixture "float_frame".
  41. """
  42. return float_frame[:10]
  43. @pytest.fixture(params=[True, False])
  44. def merge_cells(request):
  45. return request.param
  46. @pytest.fixture
  47. def path(ext):
  48. """
  49. Fixture to open file for use in each test case.
  50. """
  51. with tm.ensure_clean(ext) as file_path:
  52. yield file_path
  53. @pytest.fixture
  54. def set_engine(engine, ext):
  55. """
  56. Fixture to set engine for use in each test case.
  57. Rather than requiring `engine=...` to be provided explicitly as an
  58. argument in each test, this fixture sets a global option to dictate
  59. which engine should be used to write Excel files. After executing
  60. the test it rolls back said change to the global option.
  61. """
  62. option_name = f"io.excel.{ext.strip('.')}.writer"
  63. with option_context(option_name, engine):
  64. yield
  65. @pytest.mark.parametrize(
  66. "ext",
  67. [
  68. pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
  69. pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
  70. pytest.param(
  71. ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]
  72. ),
  73. pytest.param(".ods", marks=td.skip_if_no("odf")),
  74. ],
  75. )
  76. class TestRoundTrip:
  77. @pytest.mark.parametrize(
  78. "header,expected",
  79. [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))],
  80. )
  81. def test_read_one_empty_col_no_header(self, ext, header, expected):
  82. # xref gh-12292
  83. filename = "no_header"
  84. df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
  85. with tm.ensure_clean(ext) as path:
  86. df.to_excel(path, sheet_name=filename, index=False, header=False)
  87. result = pd.read_excel(
  88. path, sheet_name=filename, usecols=[0], header=header
  89. )
  90. tm.assert_frame_equal(result, expected)
  91. @pytest.mark.parametrize(
  92. "header,expected",
  93. [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))],
  94. )
  95. def test_read_one_empty_col_with_header(self, ext, header, expected):
  96. filename = "with_header"
  97. df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
  98. with tm.ensure_clean(ext) as path:
  99. df.to_excel(path, sheet_name="with_header", index=False, header=True)
  100. result = pd.read_excel(
  101. path, sheet_name=filename, usecols=[0], header=header
  102. )
  103. tm.assert_frame_equal(result, expected)
  104. def test_set_column_names_in_parameter(self, ext):
  105. # GH 12870 : pass down column names associated with
  106. # keyword argument names
  107. refdf = DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"])
  108. with tm.ensure_clean(ext) as pth:
  109. with ExcelWriter(pth) as writer:
  110. refdf.to_excel(
  111. writer, sheet_name="Data_no_head", header=False, index=False
  112. )
  113. refdf.to_excel(writer, sheet_name="Data_with_head", index=False)
  114. refdf.columns = ["A", "B"]
  115. with ExcelFile(pth) as reader:
  116. xlsdf_no_head = pd.read_excel(
  117. reader, sheet_name="Data_no_head", header=None, names=["A", "B"]
  118. )
  119. xlsdf_with_head = pd.read_excel(
  120. reader,
  121. sheet_name="Data_with_head",
  122. index_col=None,
  123. names=["A", "B"],
  124. )
  125. tm.assert_frame_equal(xlsdf_no_head, refdf)
  126. tm.assert_frame_equal(xlsdf_with_head, refdf)
  127. def test_creating_and_reading_multiple_sheets(self, ext):
  128. # see gh-9450
  129. #
  130. # Test reading multiple sheets, from a runtime
  131. # created Excel file with multiple sheets.
  132. def tdf(col_sheet_name):
  133. d, i = [11, 22, 33], [1, 2, 3]
  134. return DataFrame(d, i, columns=[col_sheet_name])
  135. sheets = ["AAA", "BBB", "CCC"]
  136. dfs = [tdf(s) for s in sheets]
  137. dfs = dict(zip(sheets, dfs))
  138. with tm.ensure_clean(ext) as pth:
  139. with ExcelWriter(pth) as ew:
  140. for sheetname, df in dfs.items():
  141. df.to_excel(ew, sheet_name=sheetname)
  142. dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0)
  143. for s in sheets:
  144. tm.assert_frame_equal(dfs[s], dfs_returned[s])
  145. def test_read_excel_multiindex_empty_level(self, ext):
  146. # see gh-12453
  147. with tm.ensure_clean(ext) as path:
  148. df = DataFrame(
  149. {
  150. ("One", "x"): {0: 1},
  151. ("Two", "X"): {0: 3},
  152. ("Two", "Y"): {0: 7},
  153. ("Zero", ""): {0: 0},
  154. }
  155. )
  156. expected = DataFrame(
  157. {
  158. ("One", "x"): {0: 1},
  159. ("Two", "X"): {0: 3},
  160. ("Two", "Y"): {0: 7},
  161. ("Zero", "Unnamed: 4_level_1"): {0: 0},
  162. }
  163. )
  164. df.to_excel(path)
  165. actual = pd.read_excel(path, header=[0, 1], index_col=0)
  166. tm.assert_frame_equal(actual, expected)
  167. df = DataFrame(
  168. {
  169. ("Beg", ""): {0: 0},
  170. ("Middle", "x"): {0: 1},
  171. ("Tail", "X"): {0: 3},
  172. ("Tail", "Y"): {0: 7},
  173. }
  174. )
  175. expected = DataFrame(
  176. {
  177. ("Beg", "Unnamed: 1_level_1"): {0: 0},
  178. ("Middle", "x"): {0: 1},
  179. ("Tail", "X"): {0: 3},
  180. ("Tail", "Y"): {0: 7},
  181. }
  182. )
  183. df.to_excel(path)
  184. actual = pd.read_excel(path, header=[0, 1], index_col=0)
  185. tm.assert_frame_equal(actual, expected)
  186. @pytest.mark.parametrize("c_idx_names", ["a", None])
  187. @pytest.mark.parametrize("r_idx_names", ["b", None])
  188. @pytest.mark.parametrize("c_idx_levels", [1, 3])
  189. @pytest.mark.parametrize("r_idx_levels", [1, 3])
  190. def test_excel_multindex_roundtrip(
  191. self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels, request
  192. ):
  193. # see gh-4679
  194. with tm.ensure_clean(ext) as pth:
  195. # Empty name case current read in as
  196. # unnamed levels, not Nones.
  197. check_names = bool(r_idx_names) or r_idx_levels <= 1
  198. if c_idx_levels == 1:
  199. columns = Index(list("abcde"))
  200. else:
  201. columns = MultiIndex.from_arrays(
  202. [range(5) for _ in range(c_idx_levels)],
  203. names=[f"{c_idx_names}-{i}" for i in range(c_idx_levels)],
  204. )
  205. if r_idx_levels == 1:
  206. index = Index(list("ghijk"))
  207. else:
  208. index = MultiIndex.from_arrays(
  209. [range(5) for _ in range(r_idx_levels)],
  210. names=[f"{r_idx_names}-{i}" for i in range(r_idx_levels)],
  211. )
  212. df = DataFrame(
  213. 1.1 * np.ones((5, 5)),
  214. columns=columns,
  215. index=index,
  216. )
  217. df.to_excel(pth)
  218. act = pd.read_excel(
  219. pth,
  220. index_col=list(range(r_idx_levels)),
  221. header=list(range(c_idx_levels)),
  222. )
  223. tm.assert_frame_equal(df, act, check_names=check_names)
  224. df.iloc[0, :] = np.nan
  225. df.to_excel(pth)
  226. act = pd.read_excel(
  227. pth,
  228. index_col=list(range(r_idx_levels)),
  229. header=list(range(c_idx_levels)),
  230. )
  231. tm.assert_frame_equal(df, act, check_names=check_names)
  232. df.iloc[-1, :] = np.nan
  233. df.to_excel(pth)
  234. act = pd.read_excel(
  235. pth,
  236. index_col=list(range(r_idx_levels)),
  237. header=list(range(c_idx_levels)),
  238. )
  239. tm.assert_frame_equal(df, act, check_names=check_names)
  240. def test_read_excel_parse_dates(self, ext):
  241. # see gh-11544, gh-12051
  242. df = DataFrame(
  243. {"col": [1, 2, 3], "date_strings": date_range("2012-01-01", periods=3)}
  244. )
  245. df2 = df.copy()
  246. df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")
  247. with tm.ensure_clean(ext) as pth:
  248. df2.to_excel(pth)
  249. res = pd.read_excel(pth, index_col=0)
  250. tm.assert_frame_equal(df2, res)
  251. res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0)
  252. tm.assert_frame_equal(df, res)
  253. date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y")
  254. with tm.assert_produces_warning(
  255. FutureWarning,
  256. match="use 'date_format' instead",
  257. raise_on_extra_warnings=False,
  258. ):
  259. res = pd.read_excel(
  260. pth,
  261. parse_dates=["date_strings"],
  262. date_parser=date_parser,
  263. index_col=0,
  264. )
  265. tm.assert_frame_equal(df, res)
  266. res = pd.read_excel(
  267. pth, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0
  268. )
  269. tm.assert_frame_equal(df, res)
  270. def test_multiindex_interval_datetimes(self, ext):
  271. # GH 30986
  272. midx = MultiIndex.from_arrays(
  273. [
  274. range(4),
  275. pd.interval_range(
  276. start=pd.Timestamp("2020-01-01"), periods=4, freq="6ME"
  277. ),
  278. ]
  279. )
  280. df = DataFrame(range(4), index=midx)
  281. with tm.ensure_clean(ext) as pth:
  282. df.to_excel(pth)
  283. result = pd.read_excel(pth, index_col=[0, 1])
  284. expected = DataFrame(
  285. range(4),
  286. MultiIndex.from_arrays(
  287. [
  288. range(4),
  289. [
  290. "(2020-01-31 00:00:00, 2020-07-31 00:00:00]",
  291. "(2020-07-31 00:00:00, 2021-01-31 00:00:00]",
  292. "(2021-01-31 00:00:00, 2021-07-31 00:00:00]",
  293. "(2021-07-31 00:00:00, 2022-01-31 00:00:00]",
  294. ],
  295. ]
  296. ),
  297. )
  298. tm.assert_frame_equal(result, expected)
  299. @pytest.mark.parametrize(
  300. "engine,ext",
  301. [
  302. pytest.param(
  303. "openpyxl",
  304. ".xlsx",
  305. marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
  306. ),
  307. pytest.param(
  308. "openpyxl",
  309. ".xlsm",
  310. marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
  311. ),
  312. pytest.param(
  313. "xlsxwriter",
  314. ".xlsx",
  315. marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")],
  316. ),
  317. pytest.param("odf", ".ods", marks=td.skip_if_no("odf")),
  318. ],
  319. )
  320. @pytest.mark.usefixtures("set_engine")
  321. class TestExcelWriter:
  322. def test_excel_sheet_size(self, path):
  323. # GH 26080
  324. breaking_row_count = 2**20 + 1
  325. breaking_col_count = 2**14 + 1
  326. # purposely using two arrays to prevent memory issues while testing
  327. row_arr = np.zeros(shape=(breaking_row_count, 1))
  328. col_arr = np.zeros(shape=(1, breaking_col_count))
  329. row_df = DataFrame(row_arr)
  330. col_df = DataFrame(col_arr)
  331. msg = "sheet is too large"
  332. with pytest.raises(ValueError, match=msg):
  333. row_df.to_excel(path)
  334. with pytest.raises(ValueError, match=msg):
  335. col_df.to_excel(path)
  336. def test_excel_sheet_by_name_raise(self, path):
  337. gt = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  338. gt.to_excel(path)
  339. with ExcelFile(path) as xl:
  340. df = pd.read_excel(xl, sheet_name=0, index_col=0)
  341. tm.assert_frame_equal(gt, df)
  342. msg = "Worksheet named '0' not found"
  343. with pytest.raises(ValueError, match=msg):
  344. pd.read_excel(xl, "0")
  345. def test_excel_writer_context_manager(self, frame, path):
  346. with ExcelWriter(path) as writer:
  347. frame.to_excel(writer, sheet_name="Data1")
  348. frame2 = frame.copy()
  349. frame2.columns = frame.columns[::-1]
  350. frame2.to_excel(writer, sheet_name="Data2")
  351. with ExcelFile(path) as reader:
  352. found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0)
  353. found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0)
  354. tm.assert_frame_equal(found_df, frame)
  355. tm.assert_frame_equal(found_df2, frame2)
  356. def test_roundtrip(self, frame, path):
  357. frame = frame.copy()
  358. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  359. frame.to_excel(path, sheet_name="test1")
  360. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  361. frame.to_excel(path, sheet_name="test1", header=False)
  362. frame.to_excel(path, sheet_name="test1", index=False)
  363. # test roundtrip
  364. frame.to_excel(path, sheet_name="test1")
  365. recons = pd.read_excel(path, sheet_name="test1", index_col=0)
  366. tm.assert_frame_equal(frame, recons)
  367. frame.to_excel(path, sheet_name="test1", index=False)
  368. recons = pd.read_excel(path, sheet_name="test1", index_col=None)
  369. recons.index = frame.index
  370. tm.assert_frame_equal(frame, recons)
  371. frame.to_excel(path, sheet_name="test1", na_rep="NA")
  372. recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"])
  373. tm.assert_frame_equal(frame, recons)
  374. # GH 3611
  375. frame.to_excel(path, sheet_name="test1", na_rep="88")
  376. recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"])
  377. tm.assert_frame_equal(frame, recons)
  378. frame.to_excel(path, sheet_name="test1", na_rep="88")
  379. recons = pd.read_excel(
  380. path, sheet_name="test1", index_col=0, na_values=[88, 88.0]
  381. )
  382. tm.assert_frame_equal(frame, recons)
  383. # GH 6573
  384. frame.to_excel(path, sheet_name="Sheet1")
  385. recons = pd.read_excel(path, index_col=0)
  386. tm.assert_frame_equal(frame, recons)
  387. frame.to_excel(path, sheet_name="0")
  388. recons = pd.read_excel(path, index_col=0)
  389. tm.assert_frame_equal(frame, recons)
  390. # GH 8825 Pandas Series should provide to_excel method
  391. s = frame["A"]
  392. s.to_excel(path)
  393. recons = pd.read_excel(path, index_col=0)
  394. tm.assert_frame_equal(s.to_frame(), recons)
  395. def test_mixed(self, frame, path):
  396. mixed_frame = frame.copy()
  397. mixed_frame["foo"] = "bar"
  398. mixed_frame.to_excel(path, sheet_name="test1")
  399. with ExcelFile(path) as reader:
  400. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  401. tm.assert_frame_equal(mixed_frame, recons)
  402. def test_ts_frame(self, path):
  403. unit = get_exp_unit(path)
  404. df = DataFrame(
  405. np.random.default_rng(2).standard_normal((5, 4)),
  406. columns=Index(list("ABCD")),
  407. index=date_range("2000-01-01", periods=5, freq="B"),
  408. )
  409. # freq doesn't round-trip
  410. index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
  411. df.index = index
  412. expected = df[:]
  413. expected.index = expected.index.as_unit(unit)
  414. df.to_excel(path, sheet_name="test1")
  415. with ExcelFile(path) as reader:
  416. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  417. tm.assert_frame_equal(expected, recons)
  418. def test_basics_with_nan(self, frame, path):
  419. frame = frame.copy()
  420. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  421. frame.to_excel(path, sheet_name="test1")
  422. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  423. frame.to_excel(path, sheet_name="test1", header=False)
  424. frame.to_excel(path, sheet_name="test1", index=False)
  425. @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64])
  426. def test_int_types(self, np_type, path):
  427. # Test np.int values read come back as int
  428. # (rather than float which is Excel's format).
  429. df = DataFrame(
  430. np.random.default_rng(2).integers(-10, 10, size=(10, 2)), dtype=np_type
  431. )
  432. df.to_excel(path, sheet_name="test1")
  433. with ExcelFile(path) as reader:
  434. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  435. int_frame = df.astype(np.int64)
  436. tm.assert_frame_equal(int_frame, recons)
  437. recons2 = pd.read_excel(path, sheet_name="test1", index_col=0)
  438. tm.assert_frame_equal(int_frame, recons2)
  439. @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
  440. def test_float_types(self, np_type, path):
  441. # Test np.float values read come back as float.
  442. df = DataFrame(np.random.default_rng(2).random(10), dtype=np_type)
  443. df.to_excel(path, sheet_name="test1")
  444. with ExcelFile(path) as reader:
  445. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  446. np_type
  447. )
  448. tm.assert_frame_equal(df, recons)
  449. def test_bool_types(self, path):
  450. # Test np.bool_ values read come back as float.
  451. df = DataFrame([1, 0, True, False], dtype=np.bool_)
  452. df.to_excel(path, sheet_name="test1")
  453. with ExcelFile(path) as reader:
  454. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  455. np.bool_
  456. )
  457. tm.assert_frame_equal(df, recons)
  458. def test_inf_roundtrip(self, path):
  459. df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
  460. df.to_excel(path, sheet_name="test1")
  461. with ExcelFile(path) as reader:
  462. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  463. tm.assert_frame_equal(df, recons)
  464. def test_sheets(self, frame, path):
  465. # freq doesn't round-trip
  466. unit = get_exp_unit(path)
  467. tsframe = DataFrame(
  468. np.random.default_rng(2).standard_normal((5, 4)),
  469. columns=Index(list("ABCD")),
  470. index=date_range("2000-01-01", periods=5, freq="B"),
  471. )
  472. index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
  473. tsframe.index = index
  474. expected = tsframe[:]
  475. expected.index = expected.index.as_unit(unit)
  476. frame = frame.copy()
  477. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  478. frame.to_excel(path, sheet_name="test1")
  479. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  480. frame.to_excel(path, sheet_name="test1", header=False)
  481. frame.to_excel(path, sheet_name="test1", index=False)
  482. # Test writing to separate sheets
  483. with ExcelWriter(path) as writer:
  484. frame.to_excel(writer, sheet_name="test1")
  485. tsframe.to_excel(writer, sheet_name="test2")
  486. with ExcelFile(path) as reader:
  487. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  488. tm.assert_frame_equal(frame, recons)
  489. recons = pd.read_excel(reader, sheet_name="test2", index_col=0)
  490. tm.assert_frame_equal(expected, recons)
  491. assert 2 == len(reader.sheet_names)
  492. assert "test1" == reader.sheet_names[0]
  493. assert "test2" == reader.sheet_names[1]
  494. def test_colaliases(self, frame, path):
  495. frame = frame.copy()
  496. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  497. frame.to_excel(path, sheet_name="test1")
  498. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  499. frame.to_excel(path, sheet_name="test1", header=False)
  500. frame.to_excel(path, sheet_name="test1", index=False)
  501. # column aliases
  502. col_aliases = Index(["AA", "X", "Y", "Z"])
  503. frame.to_excel(path, sheet_name="test1", header=col_aliases)
  504. with ExcelFile(path) as reader:
  505. rs = pd.read_excel(reader, sheet_name="test1", index_col=0)
  506. xp = frame.copy()
  507. xp.columns = col_aliases
  508. tm.assert_frame_equal(xp, rs)
  509. def test_roundtrip_indexlabels(self, merge_cells, frame, path):
  510. frame = frame.copy()
  511. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  512. frame.to_excel(path, sheet_name="test1")
  513. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  514. frame.to_excel(path, sheet_name="test1", header=False)
  515. frame.to_excel(path, sheet_name="test1", index=False)
  516. # test index_label
  517. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0
  518. df.to_excel(
  519. path, sheet_name="test1", index_label=["test"], merge_cells=merge_cells
  520. )
  521. with ExcelFile(path) as reader:
  522. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  523. np.int64
  524. )
  525. df.index.names = ["test"]
  526. assert df.index.names == recons.index.names
  527. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0
  528. df.to_excel(
  529. path,
  530. sheet_name="test1",
  531. index_label=["test", "dummy", "dummy2"],
  532. merge_cells=merge_cells,
  533. )
  534. with ExcelFile(path) as reader:
  535. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  536. np.int64
  537. )
  538. df.index.names = ["test"]
  539. assert df.index.names == recons.index.names
  540. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0
  541. df.to_excel(
  542. path, sheet_name="test1", index_label="test", merge_cells=merge_cells
  543. )
  544. with ExcelFile(path) as reader:
  545. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  546. np.int64
  547. )
  548. df.index.names = ["test"]
  549. tm.assert_frame_equal(df, recons.astype(bool))
  550. frame.to_excel(
  551. path,
  552. sheet_name="test1",
  553. columns=["A", "B", "C", "D"],
  554. index=False,
  555. merge_cells=merge_cells,
  556. )
  557. # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
  558. df = frame.copy()
  559. df = df.set_index(["A", "B"])
  560. with ExcelFile(path) as reader:
  561. recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  562. tm.assert_frame_equal(df, recons)
  563. def test_excel_roundtrip_indexname(self, merge_cells, path):
  564. df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
  565. df.index.name = "foo"
  566. df.to_excel(path, merge_cells=merge_cells)
  567. with ExcelFile(path) as xf:
  568. result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0)
  569. tm.assert_frame_equal(result, df)
  570. assert result.index.name == "foo"
  571. def test_excel_roundtrip_datetime(self, merge_cells, path):
  572. # datetime.date, not sure what to test here exactly
  573. unit = get_exp_unit(path)
  574. # freq does not round-trip
  575. tsframe = DataFrame(
  576. np.random.default_rng(2).standard_normal((5, 4)),
  577. columns=Index(list("ABCD")),
  578. index=date_range("2000-01-01", periods=5, freq="B"),
  579. )
  580. index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
  581. tsframe.index = index
  582. tsf = tsframe.copy()
  583. tsf.index = [x.date() for x in tsframe.index]
  584. tsf.to_excel(path, sheet_name="test1", merge_cells=merge_cells)
  585. with ExcelFile(path) as reader:
  586. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  587. expected = tsframe[:]
  588. expected.index = expected.index.as_unit(unit)
  589. tm.assert_frame_equal(expected, recons)
  590. def test_excel_date_datetime_format(self, ext, path):
  591. # see gh-4133
  592. #
  593. # Excel output format strings
  594. unit = get_exp_unit(path)
  595. df = DataFrame(
  596. [
  597. [date(2014, 1, 31), date(1999, 9, 24)],
  598. [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
  599. ],
  600. index=["DATE", "DATETIME"],
  601. columns=["X", "Y"],
  602. )
  603. df_expected = DataFrame(
  604. [
  605. [datetime(2014, 1, 31), datetime(1999, 9, 24)],
  606. [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
  607. ],
  608. index=["DATE", "DATETIME"],
  609. columns=["X", "Y"],
  610. )
  611. df_expected = df_expected.astype(f"M8[{unit}]")
  612. with tm.ensure_clean(ext) as filename2:
  613. with ExcelWriter(path) as writer1:
  614. df.to_excel(writer1, sheet_name="test1")
  615. with ExcelWriter(
  616. filename2,
  617. date_format="DD.MM.YYYY",
  618. datetime_format="DD.MM.YYYY HH-MM-SS",
  619. ) as writer2:
  620. df.to_excel(writer2, sheet_name="test1")
  621. with ExcelFile(path) as reader1:
  622. rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
  623. with ExcelFile(filename2) as reader2:
  624. rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
  625. tm.assert_frame_equal(rs1, rs2)
  626. # Since the reader returns a datetime object for dates,
  627. # we need to use df_expected to check the result.
  628. tm.assert_frame_equal(rs2, df_expected)
  629. @pytest.mark.filterwarnings(
  630. "ignore:invalid value encountered in cast:RuntimeWarning"
  631. )
  632. def test_to_excel_interval_no_labels(self, path, using_infer_string):
  633. # see gh-19242
  634. #
  635. # Test writing Interval without labels.
  636. df = DataFrame(
  637. np.random.default_rng(2).integers(-10, 10, size=(20, 1)), dtype=np.int64
  638. )
  639. expected = df.copy()
  640. df["new"] = pd.cut(df[0], 10)
  641. expected["new"] = pd.cut(expected[0], 10).astype(
  642. str if not using_infer_string else "str"
  643. )
  644. df.to_excel(path, sheet_name="test1")
  645. with ExcelFile(path) as reader:
  646. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  647. tm.assert_frame_equal(expected, recons)
  648. def test_to_excel_interval_labels(self, path):
  649. # see gh-19242
  650. #
  651. # Test writing Interval with labels.
  652. df = DataFrame(
  653. np.random.default_rng(2).integers(-10, 10, size=(20, 1)), dtype=np.int64
  654. )
  655. expected = df.copy()
  656. intervals = pd.cut(
  657. df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
  658. )
  659. df["new"] = intervals
  660. expected["new"] = pd.Series(list(intervals))
  661. df.to_excel(path, sheet_name="test1")
  662. with ExcelFile(path) as reader:
  663. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  664. tm.assert_frame_equal(expected, recons)
  665. def test_to_excel_timedelta(self, path):
  666. # see gh-19242, gh-9155
  667. #
  668. # Test writing timedelta to xls.
  669. df = DataFrame(
  670. np.random.default_rng(2).integers(-10, 10, size=(20, 1)),
  671. columns=["A"],
  672. dtype=np.int64,
  673. )
  674. expected = df.copy()
  675. df["new"] = df["A"].apply(lambda x: timedelta(seconds=x))
  676. expected["new"] = expected["A"].apply(
  677. lambda x: timedelta(seconds=x).total_seconds() / 86400
  678. )
  679. df.to_excel(path, sheet_name="test1")
  680. with ExcelFile(path) as reader:
  681. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  682. tm.assert_frame_equal(expected, recons)
  683. def test_to_excel_periodindex(self, path):
  684. # xp has a PeriodIndex
  685. df = DataFrame(
  686. np.random.default_rng(2).standard_normal((5, 4)),
  687. columns=Index(list("ABCD")),
  688. index=date_range("2000-01-01", periods=5, freq="B"),
  689. )
  690. xp = df.resample("ME").mean().to_period("M")
  691. xp.to_excel(path, sheet_name="sht1")
  692. with ExcelFile(path) as reader:
  693. rs = pd.read_excel(reader, sheet_name="sht1", index_col=0)
  694. tm.assert_frame_equal(xp, rs.to_period("M"))
  695. def test_to_excel_multiindex(self, merge_cells, frame, path):
  696. arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
  697. new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
  698. frame.index = new_index
  699. frame.to_excel(path, sheet_name="test1", header=False)
  700. frame.to_excel(path, sheet_name="test1", columns=["A", "B"])
  701. # round trip
  702. frame.to_excel(path, sheet_name="test1", merge_cells=merge_cells)
  703. with ExcelFile(path) as reader:
  704. df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  705. tm.assert_frame_equal(frame, df)
  706. # GH13511
  707. def test_to_excel_multiindex_nan_label(self, merge_cells, path):
  708. df = DataFrame(
  709. {
  710. "A": [None, 2, 3],
  711. "B": [10, 20, 30],
  712. "C": np.random.default_rng(2).random(3),
  713. }
  714. )
  715. df = df.set_index(["A", "B"])
  716. df.to_excel(path, merge_cells=merge_cells)
  717. df1 = pd.read_excel(path, index_col=[0, 1])
  718. tm.assert_frame_equal(df, df1)
  719. # Test for Issue 11328. If column indices are integers, make
  720. # sure they are handled correctly for either setting of
  721. # merge_cells
  722. def test_to_excel_multiindex_cols(self, merge_cells, frame, path):
  723. arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
  724. new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
  725. frame.index = new_index
  726. new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)])
  727. frame.columns = new_cols_index
  728. header = [0, 1]
  729. if not merge_cells:
  730. header = 0
  731. # round trip
  732. frame.to_excel(path, sheet_name="test1", merge_cells=merge_cells)
  733. with ExcelFile(path) as reader:
  734. df = pd.read_excel(
  735. reader, sheet_name="test1", header=header, index_col=[0, 1]
  736. )
  737. if not merge_cells:
  738. fm = frame.columns._format_multi(sparsify=False, include_names=False)
  739. frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
  740. tm.assert_frame_equal(frame, df)
  741. def test_to_excel_multiindex_dates(self, merge_cells, path):
  742. # try multiindex with dates
  743. unit = get_exp_unit(path)
  744. tsframe = DataFrame(
  745. np.random.default_rng(2).standard_normal((5, 4)),
  746. columns=Index(list("ABCD")),
  747. index=date_range("2000-01-01", periods=5, freq="B"),
  748. )
  749. tsframe.index = MultiIndex.from_arrays(
  750. [
  751. tsframe.index.as_unit(unit),
  752. np.arange(len(tsframe.index), dtype=np.int64),
  753. ],
  754. names=["time", "foo"],
  755. )
  756. tsframe.to_excel(path, sheet_name="test1", merge_cells=merge_cells)
  757. with ExcelFile(path) as reader:
  758. recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  759. tm.assert_frame_equal(tsframe, recons)
  760. assert recons.index.names == ("time", "foo")
  761. def test_to_excel_multiindex_no_write_index(self, path):
  762. # Test writing and re-reading a MI without the index. GH 5616.
  763. # Initial non-MI frame.
  764. frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]})
  765. # Add a MI.
  766. frame2 = frame1.copy()
  767. multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
  768. frame2.index = multi_index
  769. # Write out to Excel without the index.
  770. frame2.to_excel(path, sheet_name="test1", index=False)
  771. # Read it back in.
  772. with ExcelFile(path) as reader:
  773. frame3 = pd.read_excel(reader, sheet_name="test1")
  774. # Test that it is the same as the initial frame.
  775. tm.assert_frame_equal(frame1, frame3)
  776. def test_to_excel_empty_multiindex(self, path):
  777. # GH 19543.
  778. expected = DataFrame([], columns=[0, 1, 2])
  779. df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2])
  780. df.to_excel(path, sheet_name="test1")
  781. with ExcelFile(path) as reader:
  782. result = pd.read_excel(reader, sheet_name="test1")
  783. tm.assert_frame_equal(
  784. result, expected, check_index_type=False, check_dtype=False
  785. )
  786. def test_to_excel_float_format(self, path):
  787. df = DataFrame(
  788. [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
  789. index=["A", "B"],
  790. columns=["X", "Y", "Z"],
  791. )
  792. df.to_excel(path, sheet_name="test1", float_format="%.2f")
  793. with ExcelFile(path) as reader:
  794. result = pd.read_excel(reader, sheet_name="test1", index_col=0)
  795. expected = DataFrame(
  796. [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
  797. index=["A", "B"],
  798. columns=["X", "Y", "Z"],
  799. )
  800. tm.assert_frame_equal(result, expected)
  801. def test_to_excel_output_encoding(self, ext):
  802. # Avoid mixed inferred_type.
  803. df = DataFrame(
  804. [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]],
  805. index=["A\u0192", "B"],
  806. columns=["X\u0193", "Y", "Z"],
  807. )
  808. with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename:
  809. df.to_excel(filename, sheet_name="TestSheet")
  810. result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0)
  811. tm.assert_frame_equal(result, df)
  812. def test_to_excel_unicode_filename(self, ext):
  813. with tm.ensure_clean("\u0192u." + ext) as filename:
  814. try:
  815. with open(filename, "wb"):
  816. pass
  817. except UnicodeEncodeError:
  818. pytest.skip("No unicode file names on this system")
  819. df = DataFrame(
  820. [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
  821. index=["A", "B"],
  822. columns=["X", "Y", "Z"],
  823. )
  824. df.to_excel(filename, sheet_name="test1", float_format="%.2f")
  825. with ExcelFile(filename) as reader:
  826. result = pd.read_excel(reader, sheet_name="test1", index_col=0)
  827. expected = DataFrame(
  828. [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
  829. index=["A", "B"],
  830. columns=["X", "Y", "Z"],
  831. )
  832. tm.assert_frame_equal(result, expected)
  833. @pytest.mark.parametrize("use_headers", [True, False])
  834. @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3])
  835. @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3])
  836. def test_excel_010_hemstring(
  837. self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path
  838. ):
  839. def roundtrip(data, header=True, parser_hdr=0, index=True):
  840. data.to_excel(path, header=header, merge_cells=merge_cells, index=index)
  841. with ExcelFile(path) as xf:
  842. return pd.read_excel(
  843. xf, sheet_name=xf.sheet_names[0], header=parser_hdr
  844. )
  845. # Basic test.
  846. parser_header = 0 if use_headers else None
  847. res = roundtrip(DataFrame([0]), use_headers, parser_header)
  848. assert res.shape == (1, 2)
  849. assert res.iloc[0, 0] is not np.nan
  850. # More complex tests with multi-index.
  851. nrows = 5
  852. ncols = 3
  853. # ensure limited functionality in 0.10
  854. # override of gh-2370 until sorted out in 0.11
  855. if c_idx_nlevels == 1:
  856. columns = Index([f"a-{i}" for i in range(ncols)], dtype=object)
  857. else:
  858. columns = MultiIndex.from_arrays(
  859. [range(ncols) for _ in range(c_idx_nlevels)],
  860. names=[f"i-{i}" for i in range(c_idx_nlevels)],
  861. )
  862. if r_idx_nlevels == 1:
  863. index = Index([f"b-{i}" for i in range(nrows)], dtype=object)
  864. else:
  865. index = MultiIndex.from_arrays(
  866. [range(nrows) for _ in range(r_idx_nlevels)],
  867. names=[f"j-{i}" for i in range(r_idx_nlevels)],
  868. )
  869. df = DataFrame(
  870. np.ones((nrows, ncols)),
  871. columns=columns,
  872. index=index,
  873. )
  874. # This if will be removed once multi-column Excel writing
  875. # is implemented. For now fixing gh-9794.
  876. if c_idx_nlevels > 1:
  877. msg = (
  878. "Writing to Excel with MultiIndex columns and no index "
  879. "\\('index'=False\\) is not yet implemented."
  880. )
  881. with pytest.raises(NotImplementedError, match=msg):
  882. roundtrip(df, use_headers, index=False)
  883. else:
  884. res = roundtrip(df, use_headers)
  885. if use_headers:
  886. assert res.shape == (nrows, ncols + r_idx_nlevels)
  887. else:
  888. # First row taken as columns.
  889. assert res.shape == (nrows - 1, ncols + r_idx_nlevels)
  890. # No NaNs.
  891. for r in range(len(res.index)):
  892. for c in range(len(res.columns)):
  893. assert res.iloc[r, c] is not np.nan
  894. def test_duplicated_columns(self, path):
  895. # see gh-5235
  896. df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"])
  897. df.to_excel(path, sheet_name="test1")
  898. expected = DataFrame(
  899. [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"]
  900. )
  901. # By default, we mangle.
  902. result = pd.read_excel(path, sheet_name="test1", index_col=0)
  903. tm.assert_frame_equal(result, expected)
  904. # see gh-11007, gh-10970
  905. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"])
  906. df.to_excel(path, sheet_name="test1")
  907. result = pd.read_excel(path, sheet_name="test1", index_col=0)
  908. expected = DataFrame(
  909. [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"]
  910. )
  911. tm.assert_frame_equal(result, expected)
  912. # see gh-10982
  913. df.to_excel(path, sheet_name="test1", index=False, header=False)
  914. result = pd.read_excel(path, sheet_name="test1", header=None)
  915. expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
  916. tm.assert_frame_equal(result, expected)
  917. def test_swapped_columns(self, path):
  918. # Test for issue #5427.
  919. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
  920. write_frame.to_excel(path, sheet_name="test1", columns=["B", "A"])
  921. read_frame = pd.read_excel(path, sheet_name="test1", header=0)
  922. tm.assert_series_equal(write_frame["A"], read_frame["A"])
  923. tm.assert_series_equal(write_frame["B"], read_frame["B"])
  924. def test_invalid_columns(self, path):
  925. # see gh-10982
  926. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
  927. with pytest.raises(KeyError, match="Not all names specified"):
  928. write_frame.to_excel(path, sheet_name="test1", columns=["B", "C"])
  929. with pytest.raises(
  930. KeyError, match="'passes columns are not ALL present dataframe'"
  931. ):
  932. write_frame.to_excel(path, sheet_name="test1", columns=["C", "D"])
  933. @pytest.mark.parametrize(
  934. "to_excel_index,read_excel_index_col",
  935. [
  936. (True, 0), # Include index in write to file
  937. (False, None), # Dont include index in write to file
  938. ],
  939. )
  940. def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col):
  941. # GH 31677
  942. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]})
  943. write_frame.to_excel(
  944. path, sheet_name="col_subset_bug", columns=["A", "B"], index=to_excel_index
  945. )
  946. expected = write_frame[["A", "B"]]
  947. read_frame = pd.read_excel(
  948. path, sheet_name="col_subset_bug", index_col=read_excel_index_col
  949. )
  950. tm.assert_frame_equal(expected, read_frame)
  951. def test_comment_arg(self, path):
  952. # see gh-18735
  953. #
  954. # Test the comment argument functionality to pd.read_excel.
  955. # Create file to read in.
  956. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  957. df.to_excel(path, sheet_name="test_c")
  958. # Read file without comment arg.
  959. result1 = pd.read_excel(path, sheet_name="test_c", index_col=0)
  960. result1.iloc[1, 0] = None
  961. result1.iloc[1, 1] = None
  962. result1.iloc[2, 1] = None
  963. result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
  964. tm.assert_frame_equal(result1, result2)
  965. def test_comment_default(self, path):
  966. # Re issue #18735
  967. # Test the comment argument default to pd.read_excel
  968. # Create file to read in
  969. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  970. df.to_excel(path, sheet_name="test_c")
  971. # Read file with default and explicit comment=None
  972. result1 = pd.read_excel(path, sheet_name="test_c")
  973. result2 = pd.read_excel(path, sheet_name="test_c", comment=None)
  974. tm.assert_frame_equal(result1, result2)
  975. def test_comment_used(self, path):
  976. # see gh-18735
  977. #
  978. # Test the comment argument is working as expected when used.
  979. # Create file to read in.
  980. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  981. df.to_excel(path, sheet_name="test_c")
  982. # Test read_frame_comment against manually produced expected output.
  983. expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]})
  984. result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
  985. tm.assert_frame_equal(result, expected)
  986. def test_comment_empty_line(self, path):
  987. # Re issue #18735
  988. # Test that pd.read_excel ignores commented lines at the end of file
  989. df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]})
  990. df.to_excel(path, index=False)
  991. # Test that all-comment lines at EoF are ignored
  992. expected = DataFrame({"a": [1], "b": [2]})
  993. result = pd.read_excel(path, comment="#")
  994. tm.assert_frame_equal(result, expected)
  995. def test_datetimes(self, path):
  996. # Test writing and reading datetimes. For issue #9139. (xref #9185)
  997. unit = get_exp_unit(path)
  998. datetimes = [
  999. datetime(2013, 1, 13, 1, 2, 3),
  1000. datetime(2013, 1, 13, 2, 45, 56),
  1001. datetime(2013, 1, 13, 4, 29, 49),
  1002. datetime(2013, 1, 13, 6, 13, 42),
  1003. datetime(2013, 1, 13, 7, 57, 35),
  1004. datetime(2013, 1, 13, 9, 41, 28),
  1005. datetime(2013, 1, 13, 11, 25, 21),
  1006. datetime(2013, 1, 13, 13, 9, 14),
  1007. datetime(2013, 1, 13, 14, 53, 7),
  1008. datetime(2013, 1, 13, 16, 37, 0),
  1009. datetime(2013, 1, 13, 18, 20, 52),
  1010. ]
  1011. write_frame = DataFrame({"A": datetimes})
  1012. write_frame.to_excel(path, sheet_name="Sheet1")
  1013. read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
  1014. expected = write_frame.astype(f"M8[{unit}]")
  1015. tm.assert_series_equal(expected["A"], read_frame["A"])
  1016. def test_bytes_io(self, engine):
  1017. # see gh-7074
  1018. with BytesIO() as bio:
  1019. df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)))
  1020. # Pass engine explicitly, as there is no file path to infer from.
  1021. with ExcelWriter(bio, engine=engine) as writer:
  1022. df.to_excel(writer)
  1023. bio.seek(0)
  1024. reread_df = pd.read_excel(bio, index_col=0)
  1025. tm.assert_frame_equal(df, reread_df)
  1026. def test_engine_kwargs(self, engine, path):
  1027. # GH#52368
  1028. df = DataFrame([{"A": 1, "B": 2}, {"A": 3, "B": 4}])
  1029. msgs = {
  1030. "odf": r"OpenDocumentSpreadsheet() got an unexpected keyword "
  1031. r"argument 'foo'",
  1032. "openpyxl": r"__init__() got an unexpected keyword argument 'foo'",
  1033. "xlsxwriter": r"__init__() got an unexpected keyword argument 'foo'",
  1034. }
  1035. if PY310:
  1036. msgs[
  1037. "openpyxl"
  1038. ] = "Workbook.__init__() got an unexpected keyword argument 'foo'"
  1039. msgs[
  1040. "xlsxwriter"
  1041. ] = "Workbook.__init__() got an unexpected keyword argument 'foo'"
  1042. # Handle change in error message for openpyxl (write and append mode)
  1043. if engine == "openpyxl" and not os.path.exists(path):
  1044. msgs[
  1045. "openpyxl"
  1046. ] = r"load_workbook() got an unexpected keyword argument 'foo'"
  1047. with pytest.raises(TypeError, match=re.escape(msgs[engine])):
  1048. df.to_excel(
  1049. path,
  1050. engine=engine,
  1051. engine_kwargs={"foo": "bar"},
  1052. )
  1053. def test_write_lists_dict(self, path):
  1054. # see gh-8188.
  1055. df = DataFrame(
  1056. {
  1057. "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}],
  1058. "numeric": [1, 2, 3.0],
  1059. "str": ["apple", "banana", "cherry"],
  1060. }
  1061. )
  1062. df.to_excel(path, sheet_name="Sheet1")
  1063. read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0)
  1064. expected = df.copy()
  1065. expected.mixed = expected.mixed.apply(str)
  1066. expected.numeric = expected.numeric.astype("int64")
  1067. tm.assert_frame_equal(read, expected)
  1068. def test_render_as_column_name(self, path):
  1069. # see gh-34331
  1070. df = DataFrame({"render": [1, 2], "data": [3, 4]})
  1071. df.to_excel(path, sheet_name="Sheet1")
  1072. read = pd.read_excel(path, "Sheet1", index_col=0)
  1073. expected = df
  1074. tm.assert_frame_equal(read, expected)
  1075. def test_true_and_false_value_options(self, path):
  1076. # see gh-13347
  1077. df = DataFrame([["foo", "bar"]], columns=["col1", "col2"], dtype=object)
  1078. with option_context("future.no_silent_downcasting", True):
  1079. expected = df.replace({"foo": True, "bar": False}).astype("bool")
  1080. df.to_excel(path)
  1081. read_frame = pd.read_excel(
  1082. path, true_values=["foo"], false_values=["bar"], index_col=0
  1083. )
  1084. tm.assert_frame_equal(read_frame, expected)
  1085. def test_freeze_panes(self, path):
  1086. # see gh-15160
  1087. expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
  1088. expected.to_excel(path, sheet_name="Sheet1", freeze_panes=(1, 1))
  1089. result = pd.read_excel(path, index_col=0)
  1090. tm.assert_frame_equal(result, expected)
  1091. def test_path_path_lib(self, engine, ext):
  1092. df = DataFrame(
  1093. 1.1 * np.arange(120).reshape((30, 4)),
  1094. columns=Index(list("ABCD")),
  1095. index=Index([f"i-{i}" for i in range(30)]),
  1096. )
  1097. writer = partial(df.to_excel, engine=engine)
  1098. reader = partial(pd.read_excel, index_col=0)
  1099. result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}")
  1100. tm.assert_frame_equal(result, df)
  1101. def test_path_local_path(self, engine, ext):
  1102. df = DataFrame(
  1103. 1.1 * np.arange(120).reshape((30, 4)),
  1104. columns=Index(list("ABCD")),
  1105. index=Index([f"i-{i}" for i in range(30)]),
  1106. )
  1107. writer = partial(df.to_excel, engine=engine)
  1108. reader = partial(pd.read_excel, index_col=0)
  1109. result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}")
  1110. tm.assert_frame_equal(result, df)
  1111. def test_merged_cell_custom_objects(self, path):
  1112. # see GH-27006
  1113. mi = MultiIndex.from_tuples(
  1114. [
  1115. (pd.Period("2018"), pd.Period("2018Q1")),
  1116. (pd.Period("2018"), pd.Period("2018Q2")),
  1117. ]
  1118. )
  1119. expected = DataFrame(np.ones((2, 2), dtype="int64"), columns=mi)
  1120. expected.to_excel(path)
  1121. result = pd.read_excel(path, header=[0, 1], index_col=0)
  1122. # need to convert PeriodIndexes to standard Indexes for assert equal
  1123. expected.columns = expected.columns.set_levels(
  1124. [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],
  1125. level=[0, 1],
  1126. )
  1127. tm.assert_frame_equal(result, expected)
  1128. @pytest.mark.parametrize("dtype", [None, object])
  1129. def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path):
  1130. # GH 27008, GH 7056
  1131. tz = tz_aware_fixture
  1132. data = pd.Timestamp("2019", tz=tz)
  1133. df = DataFrame([data], dtype=dtype)
  1134. with pytest.raises(ValueError, match="Excel does not support"):
  1135. df.to_excel(path)
  1136. data = data.to_pydatetime()
  1137. df = DataFrame([data], dtype=dtype)
  1138. with pytest.raises(ValueError, match="Excel does not support"):
  1139. df.to_excel(path)
  1140. def test_excel_duplicate_columns_with_names(self, path):
  1141. # GH#39695
  1142. df = DataFrame({"A": [0, 1], "B": [10, 11]})
  1143. df.to_excel(path, columns=["A", "B", "A"], index=False)
  1144. result = pd.read_excel(path)
  1145. expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
  1146. tm.assert_frame_equal(result, expected)
  1147. def test_if_sheet_exists_raises(self, ext):
  1148. # GH 40230
  1149. msg = "if_sheet_exists is only valid in append mode (mode='a')"
  1150. with tm.ensure_clean(ext) as f:
  1151. with pytest.raises(ValueError, match=re.escape(msg)):
  1152. ExcelWriter(f, if_sheet_exists="replace")
  1153. def test_excel_writer_empty_frame(self, engine, ext):
  1154. # GH#45793
  1155. with tm.ensure_clean(ext) as path:
  1156. with ExcelWriter(path, engine=engine) as writer:
  1157. DataFrame().to_excel(writer)
  1158. result = pd.read_excel(path)
  1159. expected = DataFrame()
  1160. tm.assert_frame_equal(result, expected)
  1161. def test_to_excel_empty_frame(self, engine, ext):
  1162. # GH#45793
  1163. with tm.ensure_clean(ext) as path:
  1164. DataFrame().to_excel(path, engine=engine)
  1165. result = pd.read_excel(path)
  1166. expected = DataFrame()
  1167. tm.assert_frame_equal(result, expected)
  1168. class TestExcelWriterEngineTests:
  1169. @pytest.mark.parametrize(
  1170. "klass,ext",
  1171. [
  1172. pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")),
  1173. pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")),
  1174. ],
  1175. )
  1176. def test_ExcelWriter_dispatch(self, klass, ext):
  1177. with tm.ensure_clean(ext) as path:
  1178. with ExcelWriter(path) as writer:
  1179. if ext == ".xlsx" and bool(
  1180. import_optional_dependency("xlsxwriter", errors="ignore")
  1181. ):
  1182. # xlsxwriter has preference over openpyxl if both installed
  1183. assert isinstance(writer, _XlsxWriter)
  1184. else:
  1185. assert isinstance(writer, klass)
  1186. def test_ExcelWriter_dispatch_raises(self):
  1187. with pytest.raises(ValueError, match="No engine"):
  1188. ExcelWriter("nothing")
  1189. def test_register_writer(self):
  1190. class DummyClass(ExcelWriter):
  1191. called_save = False
  1192. called_write_cells = False
  1193. called_sheets = False
  1194. _supported_extensions = ("xlsx", "xls")
  1195. _engine = "dummy"
  1196. def book(self):
  1197. pass
  1198. def _save(self):
  1199. type(self).called_save = True
  1200. def _write_cells(self, *args, **kwargs):
  1201. type(self).called_write_cells = True
  1202. @property
  1203. def sheets(self):
  1204. type(self).called_sheets = True
  1205. @classmethod
  1206. def assert_called_and_reset(cls):
  1207. assert cls.called_save
  1208. assert cls.called_write_cells
  1209. assert not cls.called_sheets
  1210. cls.called_save = False
  1211. cls.called_write_cells = False
  1212. register_writer(DummyClass)
  1213. with option_context("io.excel.xlsx.writer", "dummy"):
  1214. path = "something.xlsx"
  1215. with tm.ensure_clean(path) as filepath:
  1216. with ExcelWriter(filepath) as writer:
  1217. assert isinstance(writer, DummyClass)
  1218. df = DataFrame(
  1219. ["a"],
  1220. columns=Index(["b"], name="foo"),
  1221. index=Index(["c"], name="bar"),
  1222. )
  1223. df.to_excel(filepath)
  1224. DummyClass.assert_called_and_reset()
  1225. with tm.ensure_clean("something.xls") as filepath:
  1226. df.to_excel(filepath, engine="dummy")
  1227. DummyClass.assert_called_and_reset()
  1228. @td.skip_if_no("xlrd")
  1229. @td.skip_if_no("openpyxl")
  1230. class TestFSPath:
  1231. def test_excelfile_fspath(self):
  1232. with tm.ensure_clean("foo.xlsx") as path:
  1233. df = DataFrame({"A": [1, 2]})
  1234. df.to_excel(path)
  1235. with ExcelFile(path) as xl:
  1236. result = os.fspath(xl)
  1237. assert result == path
  1238. def test_excelwriter_fspath(self):
  1239. with tm.ensure_clean("foo.xlsx") as path:
  1240. with ExcelWriter(path) as writer:
  1241. assert os.fspath(writer) == str(path)
  1242. def test_to_excel_pos_args_deprecation(self):
  1243. # GH-54229
  1244. df = DataFrame({"a": [1, 2, 3]})
  1245. msg = (
  1246. r"Starting with pandas version 3.0 all arguments of to_excel except "
  1247. r"for the argument 'excel_writer' will be keyword-only."
  1248. )
  1249. with tm.assert_produces_warning(FutureWarning, match=msg):
  1250. buf = BytesIO()
  1251. writer = ExcelWriter(buf)
  1252. df.to_excel(writer, "Sheet_name_1")
  1253. @pytest.mark.parametrize("klass", _writers.values())
  1254. def test_subclass_attr(klass):
  1255. # testing that subclasses of ExcelWriter don't have public attributes (issue 49602)
  1256. attrs_base = {name for name in dir(ExcelWriter) if not name.startswith("_")}
  1257. attrs_klass = {name for name in dir(klass) if not name.startswith("_")}
  1258. assert not attrs_base.symmetric_difference(attrs_klass)