test_rank.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. from itertools import chain
  2. import operator
  3. import numpy as np
  4. import pytest
  5. from pandas._libs.algos import (
  6. Infinity,
  7. NegInfinity,
  8. )
  9. import pandas.util._test_decorators as td
  10. from pandas import (
  11. NA,
  12. NaT,
  13. Series,
  14. Timestamp,
  15. date_range,
  16. )
  17. import pandas._testing as tm
  18. from pandas.api.types import CategoricalDtype
  19. @pytest.fixture
  20. def ser():
  21. return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])
  22. @pytest.fixture(
  23. params=[
  24. ["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])],
  25. ["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])],
  26. ["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
  27. ["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
  28. ["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
  29. ],
  30. ids=lambda x: x[0],
  31. )
  32. def results(request):
  33. return request.param
  34. @pytest.fixture(
  35. params=[
  36. "object",
  37. "float64",
  38. "int64",
  39. "Float64",
  40. "Int64",
  41. pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
  42. pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
  43. pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
  44. "string[python]",
  45. "str",
  46. ]
  47. )
  48. def dtype(request):
  49. return request.param
  50. def expected_dtype(dtype, method, pct=False):
  51. exp_dtype = "float64"
  52. # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
  53. if dtype in ["string[pyarrow]"]:
  54. exp_dtype = "Float64"
  55. elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]:
  56. if method == "average" or pct:
  57. exp_dtype = "double[pyarrow]"
  58. else:
  59. exp_dtype = "uint64[pyarrow]"
  60. return exp_dtype
  61. class TestSeriesRank:
  62. def test_rank(self, datetime_series):
  63. sp_stats = pytest.importorskip("scipy.stats")
  64. datetime_series[::2] = np.nan
  65. datetime_series[:10:3] = 4.0
  66. ranks = datetime_series.rank()
  67. oranks = datetime_series.astype("O").rank()
  68. tm.assert_series_equal(ranks, oranks)
  69. mask = np.isnan(datetime_series)
  70. filled = datetime_series.fillna(np.inf)
  71. # rankdata returns a ndarray
  72. exp = Series(sp_stats.rankdata(filled), index=filled.index, name="ts")
  73. exp[mask] = np.nan
  74. tm.assert_series_equal(ranks, exp)
  75. iseries = Series(np.arange(5).repeat(2))
  76. iranks = iseries.rank()
  77. exp = iseries.astype(float).rank()
  78. tm.assert_series_equal(iranks, exp)
  79. iseries = Series(np.arange(5)) + 1.0
  80. exp = iseries / 5.0
  81. iranks = iseries.rank(pct=True)
  82. tm.assert_series_equal(iranks, exp)
  83. iseries = Series(np.repeat(1, 100))
  84. exp = Series(np.repeat(0.505, 100))
  85. iranks = iseries.rank(pct=True)
  86. tm.assert_series_equal(iranks, exp)
  87. # Explicit cast to float to avoid implicit cast when setting nan
  88. iseries = iseries.astype("float")
  89. iseries[1] = np.nan
  90. exp = Series(np.repeat(50.0 / 99.0, 100))
  91. exp[1] = np.nan
  92. iranks = iseries.rank(pct=True)
  93. tm.assert_series_equal(iranks, exp)
  94. iseries = Series(np.arange(5)) + 1.0
  95. iseries[4] = np.nan
  96. exp = iseries / 4.0
  97. iranks = iseries.rank(pct=True)
  98. tm.assert_series_equal(iranks, exp)
  99. iseries = Series(np.repeat(np.nan, 100))
  100. exp = iseries.copy()
  101. iranks = iseries.rank(pct=True)
  102. tm.assert_series_equal(iranks, exp)
  103. # Explicit cast to float to avoid implicit cast when setting nan
  104. iseries = Series(np.arange(5), dtype="float") + 1
  105. iseries[4] = np.nan
  106. exp = iseries / 4.0
  107. iranks = iseries.rank(pct=True)
  108. tm.assert_series_equal(iranks, exp)
  109. rng = date_range("1/1/1990", periods=5)
  110. # Explicit cast to float to avoid implicit cast when setting nan
  111. iseries = Series(np.arange(5), rng, dtype="float") + 1
  112. iseries.iloc[4] = np.nan
  113. exp = iseries / 4.0
  114. iranks = iseries.rank(pct=True)
  115. tm.assert_series_equal(iranks, exp)
  116. iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
  117. exp = Series([2, 1, 3, 5, 4, 6.0])
  118. iranks = iseries.rank()
  119. tm.assert_series_equal(iranks, exp)
  120. # GH 5968
  121. iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
  122. exp = Series([3, 2, 1, np.nan])
  123. iranks = iseries.rank()
  124. tm.assert_series_equal(iranks, exp)
  125. values = np.array(
  126. [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
  127. dtype="float64",
  128. )
  129. random_order = np.random.default_rng(2).permutation(len(values))
  130. iseries = Series(values[random_order])
  131. exp = Series(random_order + 1.0, dtype="float64")
  132. iranks = iseries.rank()
  133. tm.assert_series_equal(iranks, exp)
  134. def test_rank_categorical(self):
  135. # GH issue #15420 rank incorrectly orders ordered categories
  136. # Test ascending/descending ranking for ordered categoricals
  137. exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
  138. exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
  139. ordered = Series(
  140. ["first", "second", "third", "fourth", "fifth", "sixth"]
  141. ).astype(
  142. CategoricalDtype(
  143. categories=["first", "second", "third", "fourth", "fifth", "sixth"],
  144. ordered=True,
  145. )
  146. )
  147. tm.assert_series_equal(ordered.rank(), exp)
  148. tm.assert_series_equal(ordered.rank(ascending=False), exp_desc)
  149. # Unordered categoricals should be ranked as objects
  150. unordered = Series(
  151. ["first", "second", "third", "fourth", "fifth", "sixth"]
  152. ).astype(
  153. CategoricalDtype(
  154. categories=["first", "second", "third", "fourth", "fifth", "sixth"],
  155. ordered=False,
  156. )
  157. )
  158. exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
  159. res = unordered.rank()
  160. tm.assert_series_equal(res, exp_unordered)
  161. unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
  162. CategoricalDtype([1, 2, 3, 4, 5, 6], False)
  163. )
  164. exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
  165. res1 = unordered1.rank()
  166. tm.assert_series_equal(res1, exp_unordered1)
  167. # Test na_option for rank data
  168. na_ser = Series(
  169. ["first", "second", "third", "fourth", "fifth", "sixth", np.nan]
  170. ).astype(
  171. CategoricalDtype(
  172. ["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
  173. True,
  174. )
  175. )
  176. exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
  177. exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
  178. exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.nan])
  179. tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top)
  180. tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
  181. tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
  182. # Test na_option for rank data with ascending False
  183. exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
  184. exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
  185. exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.nan])
  186. tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
  187. tm.assert_series_equal(
  188. na_ser.rank(na_option="bottom", ascending=False), exp_bot
  189. )
  190. tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
  191. # Test invalid values for na_option
  192. msg = "na_option must be one of 'keep', 'top', or 'bottom'"
  193. with pytest.raises(ValueError, match=msg):
  194. na_ser.rank(na_option="bad", ascending=False)
  195. # invalid type
  196. with pytest.raises(ValueError, match=msg):
  197. na_ser.rank(na_option=True, ascending=False)
  198. # Test with pct=True
  199. na_ser = Series(["first", "second", "third", "fourth", np.nan]).astype(
  200. CategoricalDtype(["first", "second", "third", "fourth"], True)
  201. )
  202. exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
  203. exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
  204. exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.nan])
  205. tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
  206. tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
  207. tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
  208. def test_rank_signature(self):
  209. s = Series([0, 1])
  210. s.rank(method="average")
  211. msg = "No axis named average for object type Series"
  212. with pytest.raises(ValueError, match=msg):
  213. s.rank("average")
  214. def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
  215. method, exp = results
  216. if (
  217. dtype == "int64"
  218. or dtype == "Int64"
  219. or (not using_infer_string and dtype == "str")
  220. ):
  221. pytest.skip("int64/str does not support NaN")
  222. ser = ser if dtype is None else ser.astype(dtype)
  223. result = ser.rank(method=method)
  224. tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
  225. @pytest.mark.parametrize("ascending", [True, False])
  226. @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
  227. @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
  228. @pytest.mark.parametrize(
  229. "dtype, na_value, pos_inf, neg_inf",
  230. [
  231. ("object", None, Infinity(), NegInfinity()),
  232. ("float64", np.nan, np.inf, -np.inf),
  233. ("Float64", NA, np.inf, -np.inf),
  234. pytest.param(
  235. "float64[pyarrow]",
  236. NA,
  237. np.inf,
  238. -np.inf,
  239. marks=td.skip_if_no("pyarrow"),
  240. ),
  241. ],
  242. )
  243. def test_rank_tie_methods_on_infs_nans(
  244. self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
  245. ):
  246. pytest.importorskip("scipy")
  247. if dtype == "float64[pyarrow]":
  248. if method == "average":
  249. exp_dtype = "float64[pyarrow]"
  250. else:
  251. exp_dtype = "uint64[pyarrow]"
  252. else:
  253. exp_dtype = "float64"
  254. chunk = 3
  255. in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
  256. iseries = Series(in_arr, dtype=dtype)
  257. exp_ranks = {
  258. "average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
  259. "min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
  260. "max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
  261. "first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
  262. "dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
  263. }
  264. ranks = exp_ranks[method]
  265. if na_option == "top":
  266. order = [ranks[1], ranks[0], ranks[2]]
  267. elif na_option == "bottom":
  268. order = [ranks[0], ranks[2], ranks[1]]
  269. else:
  270. order = [ranks[0], [np.nan] * chunk, ranks[1]]
  271. expected = order if ascending else order[::-1]
  272. expected = list(chain.from_iterable(expected))
  273. result = iseries.rank(method=method, na_option=na_option, ascending=ascending)
  274. tm.assert_series_equal(result, Series(expected, dtype=exp_dtype))
  275. def test_rank_desc_mix_nans_infs(self):
  276. # GH 19538
  277. # check descending ranking when mix nans and infs
  278. iseries = Series([1, np.nan, np.inf, -np.inf, 25])
  279. result = iseries.rank(ascending=False)
  280. exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
  281. tm.assert_series_equal(result, exp)
  282. @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
  283. @pytest.mark.parametrize(
  284. "op, value",
  285. [
  286. [operator.add, 0],
  287. [operator.add, 1e6],
  288. [operator.mul, 1e-6],
  289. ],
  290. )
  291. def test_rank_methods_series(self, method, op, value):
  292. sp_stats = pytest.importorskip("scipy.stats")
  293. xs = np.random.default_rng(2).standard_normal(9)
  294. xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
  295. np.random.default_rng(2).shuffle(xs)
  296. index = [chr(ord("a") + i) for i in range(len(xs))]
  297. vals = op(xs, value)
  298. ts = Series(vals, index=index)
  299. result = ts.rank(method=method)
  300. sprank = sp_stats.rankdata(vals, method if method != "first" else "ordinal")
  301. expected = Series(sprank, index=index).astype("float64")
  302. tm.assert_series_equal(result, expected)
  303. @pytest.mark.parametrize(
  304. "ser, exp",
  305. [
  306. ([1], [1]),
  307. ([2], [1]),
  308. ([0], [1]),
  309. ([2, 2], [1, 1]),
  310. ([1, 2, 3], [1, 2, 3]),
  311. ([4, 2, 1], [3, 2, 1]),
  312. ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
  313. ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
  314. ],
  315. )
  316. def test_rank_dense_method(self, dtype, ser, exp):
  317. if ser[0] < 0 and dtype.startswith("str"):
  318. exp = exp[::-1]
  319. s = Series(ser).astype(dtype)
  320. result = s.rank(method="dense")
  321. expected = Series(exp).astype(expected_dtype(dtype, "dense"))
  322. tm.assert_series_equal(result, expected)
  323. def test_rank_descending(self, ser, results, dtype, using_infer_string):
  324. method, _ = results
  325. if dtype == "int64" or (not using_infer_string and dtype == "str"):
  326. s = ser.dropna()
  327. else:
  328. s = ser.astype(dtype)
  329. res = s.rank(ascending=False)
  330. if dtype.startswith("str"):
  331. expected = (s.astype("float64").max() - s.astype("float64")).rank()
  332. else:
  333. expected = (s.max() - s).rank()
  334. tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
  335. if dtype.startswith("str"):
  336. expected = (s.astype("float64").max() - s.astype("float64")).rank(
  337. method=method
  338. )
  339. else:
  340. expected = (s.max() - s).rank(method=method)
  341. res2 = s.rank(method=method, ascending=False)
  342. tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
  343. def test_rank_int(self, ser, results):
  344. method, exp = results
  345. s = ser.dropna().astype("i8")
  346. result = s.rank(method=method)
  347. expected = Series(exp).dropna()
  348. expected.index = result.index
  349. tm.assert_series_equal(result, expected)
  350. def test_rank_object_bug(self):
  351. # GH 13445
  352. # smoke tests
  353. Series([np.nan] * 32).astype(object).rank(ascending=True)
  354. Series([np.nan] * 32).astype(object).rank(ascending=False)
  355. def test_rank_modify_inplace(self):
  356. # GH 18521
  357. # Check rank does not mutate series
  358. s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
  359. expected = s.copy()
  360. s.rank()
  361. result = s
  362. tm.assert_series_equal(result, expected)
  363. def test_rank_ea_small_values(self):
  364. # GH#52471
  365. ser = Series(
  366. [5.4954145e29, -9.791984e-21, 9.3715776e-26, NA, 1.8790257e-28],
  367. dtype="Float64",
  368. )
  369. result = ser.rank(method="min")
  370. expected = Series([4, 1, 3, np.nan, 2])
  371. tm.assert_series_equal(result, expected)
  372. # GH15630, pct should be on 100% basis when method='dense'
  373. @pytest.mark.parametrize(
  374. "ser, exp",
  375. [
  376. ([1], [1.0]),
  377. ([1, 2], [1.0 / 2, 2.0 / 2]),
  378. ([2, 2], [1.0, 1.0]),
  379. ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  380. ([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
  381. ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
  382. ([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
  383. ([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
  384. ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
  385. ],
  386. )
  387. def test_rank_dense_pct(dtype, ser, exp):
  388. if ser[0] < 0 and dtype.startswith("str"):
  389. exp = exp[::-1]
  390. s = Series(ser).astype(dtype)
  391. result = s.rank(method="dense", pct=True)
  392. expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True))
  393. tm.assert_series_equal(result, expected)
  394. @pytest.mark.parametrize(
  395. "ser, exp",
  396. [
  397. ([1], [1.0]),
  398. ([1, 2], [1.0 / 2, 2.0 / 2]),
  399. ([2, 2], [1.0 / 2, 1.0 / 2]),
  400. ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  401. ([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
  402. ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
  403. ([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
  404. ([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
  405. ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
  406. ],
  407. )
  408. def test_rank_min_pct(dtype, ser, exp):
  409. if ser[0] < 0 and dtype.startswith("str"):
  410. exp = exp[::-1]
  411. s = Series(ser).astype(dtype)
  412. result = s.rank(method="min", pct=True)
  413. expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True))
  414. tm.assert_series_equal(result, expected)
  415. @pytest.mark.parametrize(
  416. "ser, exp",
  417. [
  418. ([1], [1.0]),
  419. ([1, 2], [1.0 / 2, 2.0 / 2]),
  420. ([2, 2], [1.0, 1.0]),
  421. ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  422. ([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
  423. ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
  424. ([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
  425. ([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
  426. ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
  427. ],
  428. )
  429. def test_rank_max_pct(dtype, ser, exp):
  430. if ser[0] < 0 and dtype.startswith("str"):
  431. exp = exp[::-1]
  432. s = Series(ser).astype(dtype)
  433. result = s.rank(method="max", pct=True)
  434. expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True))
  435. tm.assert_series_equal(result, expected)
  436. @pytest.mark.parametrize(
  437. "ser, exp",
  438. [
  439. ([1], [1.0]),
  440. ([1, 2], [1.0 / 2, 2.0 / 2]),
  441. ([2, 2], [1.5 / 2, 1.5 / 2]),
  442. ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  443. ([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
  444. ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
  445. ([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
  446. ([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
  447. ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
  448. ],
  449. )
  450. def test_rank_average_pct(dtype, ser, exp):
  451. if ser[0] < 0 and dtype.startswith("str"):
  452. exp = exp[::-1]
  453. s = Series(ser).astype(dtype)
  454. result = s.rank(method="average", pct=True)
  455. expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True))
  456. tm.assert_series_equal(result, expected)
  457. @pytest.mark.parametrize(
  458. "ser, exp",
  459. [
  460. ([1], [1.0]),
  461. ([1, 2], [1.0 / 2, 2.0 / 2]),
  462. ([2, 2], [1.0 / 2, 2.0 / 2.0]),
  463. ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  464. ([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
  465. ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
  466. ([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
  467. ([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
  468. ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
  469. ],
  470. )
  471. def test_rank_first_pct(dtype, ser, exp):
  472. if ser[0] < 0 and dtype.startswith("str"):
  473. exp = exp[::-1]
  474. s = Series(ser).astype(dtype)
  475. result = s.rank(method="first", pct=True)
  476. expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True))
  477. tm.assert_series_equal(result, expected)
  478. @pytest.mark.single_cpu
  479. def test_pct_max_many_rows():
  480. # GH 18271
  481. s = Series(np.arange(2**24 + 1))
  482. result = s.rank(pct=True).max()
  483. assert result == 1