test_case_justify.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. from datetime import datetime
  2. import operator
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. Series,
  7. _testing as tm,
  8. )
  9. def test_title(any_string_dtype):
  10. s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
  11. result = s.str.title()
  12. expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype)
  13. tm.assert_series_equal(result, expected)
  14. def test_title_mixed_object():
  15. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
  16. result = s.str.title()
  17. expected = Series(
  18. ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
  19. dtype=object,
  20. )
  21. tm.assert_almost_equal(result, expected)
  22. def test_lower_upper(any_string_dtype):
  23. s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype)
  24. result = s.str.upper()
  25. expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype)
  26. tm.assert_series_equal(result, expected)
  27. result = result.str.lower()
  28. tm.assert_series_equal(result, s)
  29. def test_lower_upper_mixed_object():
  30. s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
  31. result = s.str.upper()
  32. expected = Series(
  33. ["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan], dtype=object
  34. )
  35. tm.assert_series_equal(result, expected)
  36. result = s.str.lower()
  37. expected = Series(
  38. ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
  39. )
  40. tm.assert_series_equal(result, expected)
  41. @pytest.mark.parametrize(
  42. "data, expected",
  43. [
  44. (
  45. ["FOO", "BAR", np.nan, "Blah", "blurg"],
  46. ["Foo", "Bar", np.nan, "Blah", "Blurg"],
  47. ),
  48. (["a", "b", "c"], ["A", "B", "C"]),
  49. (["a b", "a bc. de"], ["A b", "A bc. de"]),
  50. ],
  51. )
  52. def test_capitalize(data, expected, any_string_dtype):
  53. s = Series(data, dtype=any_string_dtype)
  54. result = s.str.capitalize()
  55. expected = Series(expected, dtype=any_string_dtype)
  56. tm.assert_series_equal(result, expected)
  57. def test_capitalize_mixed_object():
  58. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
  59. result = s.str.capitalize()
  60. expected = Series(
  61. ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
  62. dtype=object,
  63. )
  64. tm.assert_series_equal(result, expected)
  65. def test_swapcase(any_string_dtype):
  66. s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
  67. result = s.str.swapcase()
  68. expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype)
  69. tm.assert_series_equal(result, expected)
  70. def test_swapcase_mixed_object():
  71. s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
  72. result = s.str.swapcase()
  73. expected = Series(
  74. ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan],
  75. dtype=object,
  76. )
  77. tm.assert_series_equal(result, expected)
  78. def test_casefold():
  79. # GH25405
  80. expected = Series(["ss", np.nan, "case", "ssd"])
  81. s = Series(["ß", np.nan, "case", "ßd"])
  82. result = s.str.casefold()
  83. tm.assert_series_equal(result, expected)
  84. def test_casemethods(any_string_dtype):
  85. values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"]
  86. s = Series(values, dtype=any_string_dtype)
  87. assert s.str.lower().tolist() == [v.lower() for v in values]
  88. assert s.str.upper().tolist() == [v.upper() for v in values]
  89. assert s.str.title().tolist() == [v.title() for v in values]
  90. assert s.str.capitalize().tolist() == [v.capitalize() for v in values]
  91. assert s.str.swapcase().tolist() == [v.swapcase() for v in values]
  92. def test_pad(any_string_dtype):
  93. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  94. result = s.str.pad(5, side="left")
  95. expected = Series(
  96. [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
  97. )
  98. tm.assert_series_equal(result, expected)
  99. result = s.str.pad(5, side="right")
  100. expected = Series(
  101. ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
  102. )
  103. tm.assert_series_equal(result, expected)
  104. result = s.str.pad(5, side="both")
  105. expected = Series(
  106. [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
  107. )
  108. tm.assert_series_equal(result, expected)
  109. def test_pad_mixed_object():
  110. s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0])
  111. result = s.str.pad(5, side="left")
  112. expected = Series(
  113. [" a", np.nan, " b", np.nan, np.nan, " ee", None, np.nan, np.nan],
  114. dtype=object,
  115. )
  116. tm.assert_series_equal(result, expected)
  117. result = s.str.pad(5, side="right")
  118. expected = Series(
  119. ["a ", np.nan, "b ", np.nan, np.nan, "ee ", None, np.nan, np.nan],
  120. dtype=object,
  121. )
  122. tm.assert_series_equal(result, expected)
  123. result = s.str.pad(5, side="both")
  124. expected = Series(
  125. [" a ", np.nan, " b ", np.nan, np.nan, " ee ", None, np.nan, np.nan],
  126. dtype=object,
  127. )
  128. tm.assert_series_equal(result, expected)
  129. def test_pad_fillchar(any_string_dtype):
  130. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  131. result = s.str.pad(5, side="left", fillchar="X")
  132. expected = Series(
  133. ["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype
  134. )
  135. tm.assert_series_equal(result, expected)
  136. result = s.str.pad(5, side="right", fillchar="X")
  137. expected = Series(
  138. ["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype
  139. )
  140. tm.assert_series_equal(result, expected)
  141. result = s.str.pad(5, side="both", fillchar="X")
  142. expected = Series(
  143. ["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype
  144. )
  145. tm.assert_series_equal(result, expected)
  146. def test_pad_fillchar_bad_arg_raises(any_string_dtype):
  147. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  148. msg = "fillchar must be a character, not str"
  149. with pytest.raises(TypeError, match=msg):
  150. s.str.pad(5, fillchar="XY")
  151. msg = "fillchar must be a character, not int"
  152. with pytest.raises(TypeError, match=msg):
  153. s.str.pad(5, fillchar=5)
  154. @pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"])
  155. def test_pad_width_bad_arg_raises(method_name, any_string_dtype):
  156. # see gh-13598
  157. s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype)
  158. op = operator.methodcaller(method_name, "f")
  159. msg = "width must be of integer type, not str"
  160. with pytest.raises(TypeError, match=msg):
  161. op(s.str)
  162. def test_center_ljust_rjust(any_string_dtype):
  163. s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
  164. result = s.str.center(5)
  165. expected = Series(
  166. [" a ", " b ", np.nan, " c ", np.nan, "eeeeee"], dtype=any_string_dtype
  167. )
  168. tm.assert_series_equal(result, expected)
  169. result = s.str.ljust(5)
  170. expected = Series(
  171. ["a ", "b ", np.nan, "c ", np.nan, "eeeeee"], dtype=any_string_dtype
  172. )
  173. tm.assert_series_equal(result, expected)
  174. result = s.str.rjust(5)
  175. expected = Series(
  176. [" a", " b", np.nan, " c", np.nan, "eeeeee"], dtype=any_string_dtype
  177. )
  178. tm.assert_series_equal(result, expected)
  179. def test_center_ljust_rjust_mixed_object():
  180. s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0])
  181. result = s.str.center(5)
  182. expected = Series(
  183. [
  184. " a ",
  185. np.nan,
  186. " b ",
  187. np.nan,
  188. np.nan,
  189. " c ",
  190. " eee ",
  191. None,
  192. np.nan,
  193. np.nan,
  194. ],
  195. dtype=object,
  196. )
  197. tm.assert_series_equal(result, expected)
  198. result = s.str.ljust(5)
  199. expected = Series(
  200. [
  201. "a ",
  202. np.nan,
  203. "b ",
  204. np.nan,
  205. np.nan,
  206. "c ",
  207. "eee ",
  208. None,
  209. np.nan,
  210. np.nan,
  211. ],
  212. dtype=object,
  213. )
  214. tm.assert_series_equal(result, expected)
  215. result = s.str.rjust(5)
  216. expected = Series(
  217. [
  218. " a",
  219. np.nan,
  220. " b",
  221. np.nan,
  222. np.nan,
  223. " c",
  224. " eee",
  225. None,
  226. np.nan,
  227. np.nan,
  228. ],
  229. dtype=object,
  230. )
  231. tm.assert_series_equal(result, expected)
  232. def test_center_ljust_rjust_fillchar(any_string_dtype):
  233. # GH#54533, GH#54792
  234. s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
  235. result = s.str.center(5, fillchar="X")
  236. expected = Series(
  237. ["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
  238. )
  239. tm.assert_series_equal(result, expected)
  240. expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_)
  241. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  242. result = s.str.ljust(5, fillchar="X")
  243. expected = Series(
  244. ["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype
  245. )
  246. tm.assert_series_equal(result, expected)
  247. expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_)
  248. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  249. result = s.str.rjust(5, fillchar="X")
  250. expected = Series(
  251. ["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
  252. )
  253. tm.assert_series_equal(result, expected)
  254. expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_)
  255. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  256. def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype):
  257. s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
  258. # If fillchar is not a character, normal str raises TypeError
  259. # 'aaa'.ljust(5, 'XY')
  260. # TypeError: must be char, not str
  261. template = "fillchar must be a character, not {dtype}"
  262. with pytest.raises(TypeError, match=template.format(dtype="str")):
  263. s.str.center(5, fillchar="XY")
  264. with pytest.raises(TypeError, match=template.format(dtype="str")):
  265. s.str.ljust(5, fillchar="XY")
  266. with pytest.raises(TypeError, match=template.format(dtype="str")):
  267. s.str.rjust(5, fillchar="XY")
  268. with pytest.raises(TypeError, match=template.format(dtype="int")):
  269. s.str.center(5, fillchar=1)
  270. with pytest.raises(TypeError, match=template.format(dtype="int")):
  271. s.str.ljust(5, fillchar=1)
  272. with pytest.raises(TypeError, match=template.format(dtype="int")):
  273. s.str.rjust(5, fillchar=1)
  274. def test_zfill(any_string_dtype):
  275. s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype)
  276. result = s.str.zfill(5)
  277. expected = Series(
  278. ["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype
  279. )
  280. tm.assert_series_equal(result, expected)
  281. expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_)
  282. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  283. result = s.str.zfill(3)
  284. expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype)
  285. tm.assert_series_equal(result, expected)
  286. expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_)
  287. tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
  288. s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype)
  289. result = s.str.zfill(5)
  290. expected = Series(
  291. ["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype
  292. )
  293. tm.assert_series_equal(result, expected)
  294. def test_wrap(any_string_dtype):
  295. # test values are: two words less than width, two words equal to width,
  296. # two words greater than width, one word less than width, one word
  297. # equal to width, one word greater than width, multiple tokens with
  298. # trailing whitespace equal to width
  299. s = Series(
  300. [
  301. "hello world",
  302. "hello world!",
  303. "hello world!!",
  304. "abcdefabcde",
  305. "abcdefabcdef",
  306. "abcdefabcdefa",
  307. "ab ab ab ab ",
  308. "ab ab ab ab a",
  309. "\t",
  310. ],
  311. dtype=any_string_dtype,
  312. )
  313. # expected values
  314. expected = Series(
  315. [
  316. "hello world",
  317. "hello world!",
  318. "hello\nworld!!",
  319. "abcdefabcde",
  320. "abcdefabcdef",
  321. "abcdefabcdef\na",
  322. "ab ab ab ab",
  323. "ab ab ab ab\na",
  324. "",
  325. ],
  326. dtype=any_string_dtype,
  327. )
  328. result = s.str.wrap(12, break_long_words=True)
  329. tm.assert_series_equal(result, expected)
  330. def test_wrap_unicode(any_string_dtype):
  331. # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
  332. s = Series(
  333. [" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype
  334. )
  335. expected = Series(
  336. [" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype
  337. )
  338. result = s.str.wrap(6)
  339. tm.assert_series_equal(result, expected)