test_formats.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. import pandas as pd
  8. from pandas import (
  9. Categorical,
  10. DataFrame,
  11. Index,
  12. Series,
  13. date_range,
  14. option_context,
  15. period_range,
  16. timedelta_range,
  17. )
  18. import pandas._testing as tm
  19. class TestSeriesRepr:
  20. def test_multilevel_name_print_0(self):
  21. # GH#55415 None does not get printed, but 0 does
  22. # (matching DataFrame and flat index behavior)
  23. mi = pd.MultiIndex.from_product([range(2, 3), range(3, 4)], names=[0, None])
  24. ser = Series(1.5, index=mi)
  25. res = repr(ser)
  26. expected = "0 \n2 3 1.5\ndtype: float64"
  27. assert res == expected
  28. def test_multilevel_name_print(self, lexsorted_two_level_string_multiindex):
  29. index = lexsorted_two_level_string_multiindex
  30. ser = Series(range(len(index)), index=index, name="sth")
  31. expected = [
  32. "first second",
  33. "foo one 0",
  34. " two 1",
  35. " three 2",
  36. "bar one 3",
  37. " two 4",
  38. "baz two 5",
  39. " three 6",
  40. "qux one 7",
  41. " two 8",
  42. " three 9",
  43. "Name: sth, dtype: int64",
  44. ]
  45. expected = "\n".join(expected)
  46. assert repr(ser) == expected
  47. def test_small_name_printing(self):
  48. # Test small Series.
  49. s = Series([0, 1, 2])
  50. s.name = "test"
  51. assert "Name: test" in repr(s)
  52. s.name = None
  53. assert "Name:" not in repr(s)
  54. def test_big_name_printing(self):
  55. # Test big Series (diff code path).
  56. s = Series(range(1000))
  57. s.name = "test"
  58. assert "Name: test" in repr(s)
  59. s.name = None
  60. assert "Name:" not in repr(s)
  61. def test_empty_name_printing(self):
  62. s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object)
  63. assert "Name: test" in repr(s)
  64. @pytest.mark.parametrize("args", [(), (0, -1)])
  65. def test_float_range(self, args):
  66. str(
  67. Series(
  68. np.random.default_rng(2).standard_normal(1000),
  69. index=np.arange(1000, *args),
  70. )
  71. )
  72. def test_empty_object(self):
  73. # empty
  74. str(Series(dtype=object))
  75. def test_string(self, string_series):
  76. str(string_series)
  77. str(string_series.astype(int))
  78. # with NaNs
  79. string_series[5:7] = np.nan
  80. str(string_series)
  81. def test_object(self, object_series):
  82. str(object_series)
  83. def test_datetime(self, datetime_series):
  84. str(datetime_series)
  85. # with Nones
  86. ots = datetime_series.astype("O")
  87. ots[::2] = None
  88. repr(ots)
  89. @pytest.mark.parametrize(
  90. "name",
  91. [
  92. "",
  93. 1,
  94. 1.2,
  95. "foo",
  96. "\u03B1\u03B2\u03B3",
  97. "loooooooooooooooooooooooooooooooooooooooooooooooooooong",
  98. ("foo", "bar", "baz"),
  99. (1, 2),
  100. ("foo", 1, 2.3),
  101. ("\u03B1", "\u03B2", "\u03B3"),
  102. ("\u03B1", "bar"),
  103. ],
  104. )
  105. def test_various_names(self, name, string_series):
  106. # various names
  107. string_series.name = name
  108. repr(string_series)
  109. def test_tuple_name(self):
  110. biggie = Series(
  111. np.random.default_rng(2).standard_normal(1000),
  112. index=np.arange(1000),
  113. name=("foo", "bar", "baz"),
  114. )
  115. repr(biggie)
  116. @pytest.mark.parametrize("arg", [100, 1001])
  117. def test_tidy_repr_name_0(self, arg):
  118. # tidy repr
  119. ser = Series(np.random.default_rng(2).standard_normal(arg), name=0)
  120. rep_str = repr(ser)
  121. assert "Name: 0" in rep_str
  122. def test_newline(self, any_string_dtype):
  123. ser = Series(
  124. ["a\n\r\tb"],
  125. name="a\n\r\td",
  126. index=Index(["a\n\r\tf"], dtype=any_string_dtype),
  127. dtype=any_string_dtype,
  128. )
  129. assert "\t" not in repr(ser)
  130. assert "\r" not in repr(ser)
  131. assert "a\n" not in repr(ser)
  132. @pytest.mark.parametrize(
  133. "name, expected",
  134. [
  135. ["foo", "Series([], Name: foo, dtype: int64)"],
  136. [None, "Series([], dtype: int64)"],
  137. ],
  138. )
  139. def test_empty_int64(self, name, expected):
  140. # with empty series (#4651)
  141. s = Series([], dtype=np.int64, name=name)
  142. assert repr(s) == expected
  143. def test_repr_bool_fails(self, capsys):
  144. s = Series(
  145. [
  146. DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
  147. for i in range(5)
  148. ]
  149. )
  150. # It works (with no Cython exception barf)!
  151. repr(s)
  152. captured = capsys.readouterr()
  153. assert captured.err == ""
  154. def test_repr_name_iterable_indexable(self):
  155. s = Series([1, 2, 3], name=np.int64(3))
  156. # it works!
  157. repr(s)
  158. s.name = ("\u05d0",) * 2
  159. repr(s)
  160. def test_repr_max_rows(self):
  161. # GH 6863
  162. with option_context("display.max_rows", None):
  163. str(Series(range(1001))) # should not raise exception
  164. def test_unicode_string_with_unicode(self):
  165. df = Series(["\u05d0"], name="\u05d1")
  166. str(df)
  167. ser = Series(["\u03c3"] * 10)
  168. repr(ser)
  169. ser2 = Series(["\u05d0"] * 1000)
  170. ser2.name = "title1"
  171. repr(ser2)
  172. def test_str_to_bytes_raises(self):
  173. # GH 26447
  174. df = Series(["abc"], name="abc")
  175. msg = "^'str' object cannot be interpreted as an integer$"
  176. with pytest.raises(TypeError, match=msg):
  177. bytes(df)
  178. def test_timeseries_repr_object_dtype(self):
  179. index = Index(
  180. [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
  181. )
  182. ts = Series(np.random.default_rng(2).standard_normal(len(index)), index)
  183. repr(ts)
  184. ts = Series(
  185. np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
  186. )
  187. assert repr(ts).splitlines()[-1].startswith("Freq:")
  188. ts2 = ts.iloc[np.random.default_rng(2).integers(0, len(ts) - 1, 400)]
  189. repr(ts2).splitlines()[-1]
  190. def test_latex_repr(self):
  191. pytest.importorskip("jinja2") # uses Styler implementation
  192. result = r"""\begin{tabular}{ll}
  193. \toprule
  194. & 0 \\
  195. \midrule
  196. 0 & $\alpha$ \\
  197. 1 & b \\
  198. 2 & c \\
  199. \bottomrule
  200. \end{tabular}
  201. """
  202. with option_context(
  203. "styler.format.escape", None, "styler.render.repr", "latex"
  204. ):
  205. s = Series([r"$\alpha$", "b", "c"])
  206. assert result == s._repr_latex_()
  207. assert s._repr_latex_() is None
  208. def test_index_repr_in_frame_with_nan(self):
  209. # see gh-25061
  210. i = Index([1, np.nan])
  211. s = Series([1, 2], index=i)
  212. exp = """1.0 1\nNaN 2\ndtype: int64"""
  213. assert repr(s) == exp
  214. def test_format_pre_1900_dates(self):
  215. rng = date_range("1/1/1850", "1/1/1950", freq="YE-DEC")
  216. msg = "DatetimeIndex.format is deprecated"
  217. with tm.assert_produces_warning(FutureWarning, match=msg):
  218. rng.format()
  219. ts = Series(1, index=rng)
  220. repr(ts)
  221. def test_series_repr_nat(self):
  222. series = Series([0, 1000, 2000, pd.NaT._value], dtype="M8[ns]")
  223. result = repr(series)
  224. expected = (
  225. "0 1970-01-01 00:00:00.000000\n"
  226. "1 1970-01-01 00:00:00.000001\n"
  227. "2 1970-01-01 00:00:00.000002\n"
  228. "3 NaT\n"
  229. "dtype: datetime64[ns]"
  230. )
  231. assert result == expected
  232. def test_float_repr(self):
  233. # GH#35603
  234. # check float format when cast to object
  235. ser = Series([1.0]).astype(object)
  236. expected = "0 1.0\ndtype: object"
  237. assert repr(ser) == expected
  238. def test_different_null_objects(self):
  239. # GH#45263
  240. ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
  241. result = repr(ser)
  242. expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
  243. assert result == expected
  244. class TestCategoricalRepr:
  245. def test_categorical_repr_unicode(self):
  246. # see gh-21002
  247. class County:
  248. name = "San Sebastián"
  249. state = "PR"
  250. def __repr__(self) -> str:
  251. return self.name + ", " + self.state
  252. cat = Categorical([County() for _ in range(61)])
  253. idx = Index(cat)
  254. ser = idx.to_series()
  255. repr(ser)
  256. str(ser)
  257. def test_categorical_repr(self, using_infer_string):
  258. a = Series(Categorical([1, 2, 3, 4]))
  259. exp = (
  260. "0 1\n1 2\n2 3\n3 4\n"
  261. "dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
  262. )
  263. assert exp == a.__str__()
  264. a = Series(Categorical(["a", "b"] * 25))
  265. exp = (
  266. "0 a\n1 b\n"
  267. " ..\n"
  268. "48 a\n49 b\n"
  269. "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
  270. )
  271. if using_infer_string:
  272. exp = exp.replace("object", "str")
  273. with option_context("display.max_rows", 5):
  274. assert exp == repr(a)
  275. levs = list("abcdefghijklmnopqrstuvwxyz")
  276. a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
  277. exp = (
  278. "0 a\n1 b\n"
  279. "dtype: category\n"
  280. "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
  281. "'w' < 'x' < 'y' < 'z']"
  282. )
  283. if using_infer_string:
  284. exp = exp.replace("object", "str")
  285. assert exp == a.__str__()
  286. def test_categorical_series_repr(self):
  287. s = Series(Categorical([1, 2, 3]))
  288. exp = """0 1
  289. 1 2
  290. 2 3
  291. dtype: category
  292. Categories (3, int64): [1, 2, 3]"""
  293. assert repr(s) == exp
  294. s = Series(Categorical(np.arange(10)))
  295. exp = f"""0 0
  296. 1 1
  297. 2 2
  298. 3 3
  299. 4 4
  300. 5 5
  301. 6 6
  302. 7 7
  303. 8 8
  304. 9 9
  305. dtype: category
  306. Categories (10, {np.dtype(int)}): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
  307. assert repr(s) == exp
  308. def test_categorical_series_repr_ordered(self):
  309. s = Series(Categorical([1, 2, 3], ordered=True))
  310. exp = """0 1
  311. 1 2
  312. 2 3
  313. dtype: category
  314. Categories (3, int64): [1 < 2 < 3]"""
  315. assert repr(s) == exp
  316. s = Series(Categorical(np.arange(10), ordered=True))
  317. exp = f"""0 0
  318. 1 1
  319. 2 2
  320. 3 3
  321. 4 4
  322. 5 5
  323. 6 6
  324. 7 7
  325. 8 8
  326. 9 9
  327. dtype: category
  328. Categories (10, {np.dtype(int)}): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
  329. assert repr(s) == exp
  330. def test_categorical_series_repr_datetime(self):
  331. idx = date_range("2011-01-01 09:00", freq="h", periods=5)
  332. s = Series(Categorical(idx))
  333. exp = """0 2011-01-01 09:00:00
  334. 1 2011-01-01 10:00:00
  335. 2 2011-01-01 11:00:00
  336. 3 2011-01-01 12:00:00
  337. 4 2011-01-01 13:00:00
  338. dtype: category
  339. Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
  340. 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa: E501
  341. assert repr(s) == exp
  342. idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
  343. s = Series(Categorical(idx))
  344. exp = """0 2011-01-01 09:00:00-05:00
  345. 1 2011-01-01 10:00:00-05:00
  346. 2 2011-01-01 11:00:00-05:00
  347. 3 2011-01-01 12:00:00-05:00
  348. 4 2011-01-01 13:00:00-05:00
  349. dtype: category
  350. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
  351. 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
  352. 2011-01-01 13:00:00-05:00]""" # noqa: E501
  353. assert repr(s) == exp
  354. def test_categorical_series_repr_datetime_ordered(self):
  355. idx = date_range("2011-01-01 09:00", freq="h", periods=5)
  356. s = Series(Categorical(idx, ordered=True))
  357. exp = """0 2011-01-01 09:00:00
  358. 1 2011-01-01 10:00:00
  359. 2 2011-01-01 11:00:00
  360. 3 2011-01-01 12:00:00
  361. 4 2011-01-01 13:00:00
  362. dtype: category
  363. Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
  364. 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
  365. assert repr(s) == exp
  366. idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
  367. s = Series(Categorical(idx, ordered=True))
  368. exp = """0 2011-01-01 09:00:00-05:00
  369. 1 2011-01-01 10:00:00-05:00
  370. 2 2011-01-01 11:00:00-05:00
  371. 3 2011-01-01 12:00:00-05:00
  372. 4 2011-01-01 13:00:00-05:00
  373. dtype: category
  374. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
  375. 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
  376. 2011-01-01 13:00:00-05:00]""" # noqa: E501
  377. assert repr(s) == exp
  378. def test_categorical_series_repr_period(self):
  379. idx = period_range("2011-01-01 09:00", freq="h", periods=5)
  380. s = Series(Categorical(idx))
  381. exp = """0 2011-01-01 09:00
  382. 1 2011-01-01 10:00
  383. 2 2011-01-01 11:00
  384. 3 2011-01-01 12:00
  385. 4 2011-01-01 13:00
  386. dtype: category
  387. Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
  388. 2011-01-01 13:00]""" # noqa: E501
  389. assert repr(s) == exp
  390. idx = period_range("2011-01", freq="M", periods=5)
  391. s = Series(Categorical(idx))
  392. exp = """0 2011-01
  393. 1 2011-02
  394. 2 2011-03
  395. 3 2011-04
  396. 4 2011-05
  397. dtype: category
  398. Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
  399. assert repr(s) == exp
  400. def test_categorical_series_repr_period_ordered(self):
  401. idx = period_range("2011-01-01 09:00", freq="h", periods=5)
  402. s = Series(Categorical(idx, ordered=True))
  403. exp = """0 2011-01-01 09:00
  404. 1 2011-01-01 10:00
  405. 2 2011-01-01 11:00
  406. 3 2011-01-01 12:00
  407. 4 2011-01-01 13:00
  408. dtype: category
  409. Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
  410. 2011-01-01 13:00]""" # noqa: E501
  411. assert repr(s) == exp
  412. idx = period_range("2011-01", freq="M", periods=5)
  413. s = Series(Categorical(idx, ordered=True))
  414. exp = """0 2011-01
  415. 1 2011-02
  416. 2 2011-03
  417. 3 2011-04
  418. 4 2011-05
  419. dtype: category
  420. Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
  421. assert repr(s) == exp
  422. def test_categorical_series_repr_timedelta(self):
  423. idx = timedelta_range("1 days", periods=5)
  424. s = Series(Categorical(idx))
  425. exp = """0 1 days
  426. 1 2 days
  427. 2 3 days
  428. 3 4 days
  429. 4 5 days
  430. dtype: category
  431. Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
  432. assert repr(s) == exp
  433. idx = timedelta_range("1 hours", periods=10)
  434. s = Series(Categorical(idx))
  435. exp = """0 0 days 01:00:00
  436. 1 1 days 01:00:00
  437. 2 2 days 01:00:00
  438. 3 3 days 01:00:00
  439. 4 4 days 01:00:00
  440. 5 5 days 01:00:00
  441. 6 6 days 01:00:00
  442. 7 7 days 01:00:00
  443. 8 8 days 01:00:00
  444. 9 9 days 01:00:00
  445. dtype: category
  446. Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
  447. 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
  448. 8 days 01:00:00, 9 days 01:00:00]""" # noqa: E501
  449. assert repr(s) == exp
  450. def test_categorical_series_repr_timedelta_ordered(self):
  451. idx = timedelta_range("1 days", periods=5)
  452. s = Series(Categorical(idx, ordered=True))
  453. exp = """0 1 days
  454. 1 2 days
  455. 2 3 days
  456. 3 4 days
  457. 4 5 days
  458. dtype: category
  459. Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
  460. assert repr(s) == exp
  461. idx = timedelta_range("1 hours", periods=10)
  462. s = Series(Categorical(idx, ordered=True))
  463. exp = """0 0 days 01:00:00
  464. 1 1 days 01:00:00
  465. 2 2 days 01:00:00
  466. 3 3 days 01:00:00
  467. 4 4 days 01:00:00
  468. 5 5 days 01:00:00
  469. 6 6 days 01:00:00
  470. 7 7 days 01:00:00
  471. 8 8 days 01:00:00
  472. 9 9 days 01:00:00
  473. dtype: category
  474. Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
  475. 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
  476. 8 days 01:00:00 < 9 days 01:00:00]""" # noqa: E501
  477. assert repr(s) == exp