test_clipboard.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. from textwrap import dedent
  2. import numpy as np
  3. import pytest
  4. from pandas.errors import (
  5. PyperclipException,
  6. PyperclipWindowsException,
  7. )
  8. import pandas as pd
  9. from pandas import (
  10. NA,
  11. DataFrame,
  12. Series,
  13. get_option,
  14. read_clipboard,
  15. )
  16. import pandas._testing as tm
  17. from pandas.io.clipboard import (
  18. CheckedCall,
  19. _stringifyText,
  20. init_qt_clipboard,
  21. )
  22. def build_kwargs(sep, excel):
  23. kwargs = {}
  24. if excel != "default":
  25. kwargs["excel"] = excel
  26. if sep != "default":
  27. kwargs["sep"] = sep
  28. return kwargs
  29. @pytest.fixture(
  30. params=[
  31. "delims",
  32. "utf8",
  33. "utf16",
  34. "string",
  35. "long",
  36. "nonascii",
  37. "colwidth",
  38. "mixed",
  39. "float",
  40. "int",
  41. ]
  42. )
  43. def df(request):
  44. data_type = request.param
  45. if data_type == "delims":
  46. return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]})
  47. elif data_type == "utf8":
  48. return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]})
  49. elif data_type == "utf16":
  50. return DataFrame(
  51. {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
  52. )
  53. elif data_type == "string":
  54. return DataFrame(
  55. np.array([f"i-{i}" for i in range(15)]).reshape(5, 3), columns=list("abc")
  56. )
  57. elif data_type == "long":
  58. max_rows = get_option("display.max_rows")
  59. return DataFrame(
  60. np.random.default_rng(2).integers(0, 10, size=(max_rows + 1, 3)),
  61. columns=list("abc"),
  62. )
  63. elif data_type == "nonascii":
  64. return DataFrame({"en": "in English".split(), "es": "en español".split()})
  65. elif data_type == "colwidth":
  66. _cw = get_option("display.max_colwidth") + 1
  67. return DataFrame(
  68. np.array(["x" * _cw for _ in range(15)]).reshape(5, 3), columns=list("abc")
  69. )
  70. elif data_type == "mixed":
  71. return DataFrame(
  72. {
  73. "a": np.arange(1.0, 6.0) + 0.01,
  74. "b": np.arange(1, 6).astype(np.int64),
  75. "c": list("abcde"),
  76. }
  77. )
  78. elif data_type == "float":
  79. return DataFrame(np.random.default_rng(2).random((5, 3)), columns=list("abc"))
  80. elif data_type == "int":
  81. return DataFrame(
  82. np.random.default_rng(2).integers(0, 10, (5, 3)), columns=list("abc")
  83. )
  84. else:
  85. raise ValueError
  86. @pytest.fixture
  87. def mock_ctypes(monkeypatch):
  88. """
  89. Mocks WinError to help with testing the clipboard.
  90. """
  91. def _mock_win_error():
  92. return "Window Error"
  93. # Set raising to False because WinError won't exist on non-windows platforms
  94. with monkeypatch.context() as m:
  95. m.setattr("ctypes.WinError", _mock_win_error, raising=False)
  96. yield
  97. @pytest.mark.usefixtures("mock_ctypes")
  98. def test_checked_call_with_bad_call(monkeypatch):
  99. """
  100. Give CheckCall a function that returns a falsey value and
  101. mock get_errno so it returns false so an exception is raised.
  102. """
  103. def _return_false():
  104. return False
  105. monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True)
  106. msg = f"Error calling {_return_false.__name__} \\(Window Error\\)"
  107. with pytest.raises(PyperclipWindowsException, match=msg):
  108. CheckedCall(_return_false)()
  109. @pytest.mark.usefixtures("mock_ctypes")
  110. def test_checked_call_with_valid_call(monkeypatch):
  111. """
  112. Give CheckCall a function that returns a truthy value and
  113. mock get_errno so it returns true so an exception is not raised.
  114. The function should return the results from _return_true.
  115. """
  116. def _return_true():
  117. return True
  118. monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False)
  119. # Give CheckedCall a callable that returns a truthy value s
  120. checked_call = CheckedCall(_return_true)
  121. assert checked_call() is True
  122. @pytest.mark.parametrize(
  123. "text",
  124. [
  125. "String_test",
  126. True,
  127. 1,
  128. 1.0,
  129. 1j,
  130. ],
  131. )
  132. def test_stringify_text(text):
  133. valid_types = (str, int, float, bool)
  134. if isinstance(text, valid_types):
  135. result = _stringifyText(text)
  136. assert result == str(text)
  137. else:
  138. msg = (
  139. "only str, int, float, and bool values "
  140. f"can be copied to the clipboard, not {type(text).__name__}"
  141. )
  142. with pytest.raises(PyperclipException, match=msg):
  143. _stringifyText(text)
  144. @pytest.fixture
  145. def set_pyqt_clipboard(monkeypatch):
  146. qt_cut, qt_paste = init_qt_clipboard()
  147. with monkeypatch.context() as m:
  148. m.setattr(pd.io.clipboard, "clipboard_set", qt_cut)
  149. m.setattr(pd.io.clipboard, "clipboard_get", qt_paste)
  150. yield
  151. @pytest.fixture
  152. def clipboard(qapp):
  153. clip = qapp.clipboard()
  154. yield clip
  155. clip.clear()
  156. @pytest.mark.single_cpu
  157. @pytest.mark.clipboard
  158. @pytest.mark.usefixtures("set_pyqt_clipboard")
  159. @pytest.mark.usefixtures("clipboard")
  160. class TestClipboard:
  161. # Test that default arguments copy as tab delimited
  162. # Test that explicit delimiters are respected
  163. @pytest.mark.parametrize("sep", [None, "\t", ",", "|"])
  164. @pytest.mark.parametrize("encoding", [None, "UTF-8", "utf-8", "utf8"])
  165. def test_round_trip_frame_sep(self, df, sep, encoding):
  166. df.to_clipboard(excel=None, sep=sep, encoding=encoding)
  167. result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
  168. tm.assert_frame_equal(df, result)
  169. # Test white space separator
  170. def test_round_trip_frame_string(self, df):
  171. df.to_clipboard(excel=False, sep=None)
  172. result = read_clipboard()
  173. assert df.to_string() == result.to_string()
  174. assert df.shape == result.shape
  175. # Two character separator is not supported in to_clipboard
  176. # Test that multi-character separators are not silently passed
  177. def test_excel_sep_warning(self, df):
  178. with tm.assert_produces_warning(
  179. UserWarning,
  180. match="to_clipboard in excel mode requires a single character separator.",
  181. check_stacklevel=False,
  182. ):
  183. df.to_clipboard(excel=True, sep=r"\t")
  184. # Separator is ignored when excel=False and should produce a warning
  185. def test_copy_delim_warning(self, df):
  186. with tm.assert_produces_warning():
  187. df.to_clipboard(excel=False, sep="\t")
  188. # Tests that the default behavior of to_clipboard is tab
  189. # delimited and excel="True"
  190. @pytest.mark.parametrize("sep", ["\t", None, "default"])
  191. @pytest.mark.parametrize("excel", [True, None, "default"])
  192. def test_clipboard_copy_tabs_default(self, sep, excel, df, clipboard):
  193. kwargs = build_kwargs(sep, excel)
  194. df.to_clipboard(**kwargs)
  195. assert clipboard.text() == df.to_csv(sep="\t")
  196. # Tests reading of white space separated tables
  197. @pytest.mark.parametrize("sep", [None, "default"])
  198. def test_clipboard_copy_strings(self, sep, df):
  199. kwargs = build_kwargs(sep, False)
  200. df.to_clipboard(**kwargs)
  201. result = read_clipboard(sep=r"\s+")
  202. assert result.to_string() == df.to_string()
  203. assert df.shape == result.shape
  204. def test_read_clipboard_infer_excel(self, clipboard):
  205. # gh-19010: avoid warnings
  206. clip_kwargs = {"engine": "python"}
  207. text = dedent(
  208. """
  209. John James\tCharlie Mingus
  210. 1\t2
  211. 4\tHarry Carney
  212. """.strip()
  213. )
  214. clipboard.setText(text)
  215. df = read_clipboard(**clip_kwargs)
  216. # excel data is parsed correctly
  217. assert df.iloc[1, 1] == "Harry Carney"
  218. # having diff tab counts doesn't trigger it
  219. text = dedent(
  220. """
  221. a\t b
  222. 1 2
  223. 3 4
  224. """.strip()
  225. )
  226. clipboard.setText(text)
  227. res = read_clipboard(**clip_kwargs)
  228. text = dedent(
  229. """
  230. a b
  231. 1 2
  232. 3 4
  233. """.strip()
  234. )
  235. clipboard.setText(text)
  236. exp = read_clipboard(**clip_kwargs)
  237. tm.assert_frame_equal(res, exp)
  238. def test_infer_excel_with_nulls(self, clipboard):
  239. # GH41108
  240. text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
  241. clipboard.setText(text)
  242. df = read_clipboard()
  243. df_expected = DataFrame(
  244. data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
  245. )
  246. # excel data is parsed correctly
  247. tm.assert_frame_equal(df, df_expected)
  248. @pytest.mark.parametrize(
  249. "multiindex",
  250. [
  251. ( # Can't use `dedent` here as it will remove the leading `\t`
  252. "\n".join(
  253. [
  254. "\t\t\tcol1\tcol2",
  255. "A\t0\tTrue\t1\tred",
  256. "A\t1\tTrue\t\tblue",
  257. "B\t0\tFalse\t2\tgreen",
  258. ]
  259. ),
  260. [["A", "A", "B"], [0, 1, 0], [True, True, False]],
  261. ),
  262. (
  263. "\n".join(
  264. ["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
  265. ),
  266. [["A", "A", "B"], [0, 1, 0]],
  267. ),
  268. ],
  269. )
  270. def test_infer_excel_with_multiindex(self, clipboard, multiindex):
  271. # GH41108
  272. clipboard.setText(multiindex[0])
  273. df = read_clipboard()
  274. df_expected = DataFrame(
  275. data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
  276. index=multiindex[1],
  277. )
  278. # excel data is parsed correctly
  279. tm.assert_frame_equal(df, df_expected)
  280. def test_invalid_encoding(self, df):
  281. msg = "clipboard only supports utf-8 encoding"
  282. # test case for testing invalid encoding
  283. with pytest.raises(ValueError, match=msg):
  284. df.to_clipboard(encoding="ascii")
  285. with pytest.raises(NotImplementedError, match=msg):
  286. read_clipboard(encoding="ascii")
  287. @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."])
  288. def test_raw_roundtrip(self, data):
  289. # PR #25040 wide unicode wasn't copied correctly on PY3 on windows
  290. df = DataFrame({"data": [data]})
  291. df.to_clipboard()
  292. result = read_clipboard()
  293. tm.assert_frame_equal(df, result)
  294. @pytest.mark.parametrize("engine", ["c", "python"])
  295. def test_read_clipboard_dtype_backend(
  296. self, clipboard, string_storage, dtype_backend, engine, using_infer_string
  297. ):
  298. # GH#50502
  299. if dtype_backend == "pyarrow":
  300. pa = pytest.importorskip("pyarrow")
  301. if engine == "c" and string_storage == "pyarrow":
  302. # TODO avoid this exception?
  303. string_dtype = pd.ArrowDtype(pa.large_string())
  304. else:
  305. string_dtype = pd.ArrowDtype(pa.string())
  306. else:
  307. string_dtype = pd.StringDtype(string_storage)
  308. text = """a,b,c,d,e,f,g,h,i
  309. x,1,4.0,x,2,4.0,,True,False
  310. y,2,5.0,,,,,False,"""
  311. clipboard.setText(text)
  312. with pd.option_context("mode.string_storage", string_storage):
  313. result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine)
  314. expected = DataFrame(
  315. {
  316. "a": Series(["x", "y"], dtype=string_dtype),
  317. "b": Series([1, 2], dtype="Int64"),
  318. "c": Series([4.0, 5.0], dtype="Float64"),
  319. "d": Series(["x", None], dtype=string_dtype),
  320. "e": Series([2, NA], dtype="Int64"),
  321. "f": Series([4.0, NA], dtype="Float64"),
  322. "g": Series([NA, NA], dtype="Int64"),
  323. "h": Series([True, False], dtype="boolean"),
  324. "i": Series([False, NA], dtype="boolean"),
  325. }
  326. )
  327. if dtype_backend == "pyarrow":
  328. from pandas.arrays import ArrowExtensionArray
  329. expected = DataFrame(
  330. {
  331. col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
  332. for col in expected.columns
  333. }
  334. )
  335. expected["g"] = ArrowExtensionArray(pa.array([None, None]))
  336. if using_infer_string:
  337. expected.columns = expected.columns.astype(
  338. pd.StringDtype(string_storage, na_value=np.nan)
  339. )
  340. tm.assert_frame_equal(result, expected)
  341. def test_invalid_dtype_backend(self):
  342. msg = (
  343. "dtype_backend numpy is invalid, only 'numpy_nullable' and "
  344. "'pyarrow' are allowed."
  345. )
  346. with pytest.raises(ValueError, match=msg):
  347. read_clipboard(dtype_backend="numpy")
  348. def test_to_clipboard_pos_args_deprecation(self):
  349. # GH-54229
  350. df = DataFrame({"a": [1, 2, 3]})
  351. msg = (
  352. r"Starting with pandas version 3.0 all arguments of to_clipboard "
  353. r"will be keyword-only."
  354. )
  355. with tm.assert_produces_warning(FutureWarning, match=msg):
  356. df.to_clipboard(True, None)