test_reductions.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. from decimal import Decimal
  6. import numpy as np
  7. import pytest
  8. import pandas as pd
  9. from pandas import (
  10. Categorical,
  11. DataFrame,
  12. DatetimeIndex,
  13. Index,
  14. NaT,
  15. Period,
  16. PeriodIndex,
  17. RangeIndex,
  18. Series,
  19. Timedelta,
  20. TimedeltaIndex,
  21. Timestamp,
  22. date_range,
  23. isna,
  24. period_range,
  25. timedelta_range,
  26. to_timedelta,
  27. )
  28. import pandas._testing as tm
  29. from pandas.core import nanops
  30. from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
  31. def get_objs():
  32. indexes = [
  33. Index([True, False] * 5, name="a"),
  34. Index(np.arange(10), dtype=np.int64, name="a"),
  35. Index(np.arange(10), dtype=np.float64, name="a"),
  36. DatetimeIndex(date_range("2020-01-01", periods=10), name="a"),
  37. DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
  38. tz="US/Eastern"
  39. ),
  40. PeriodIndex(period_range("2020-01-01", periods=10, freq="D"), name="a"),
  41. Index([str(i) for i in range(10)], name="a"),
  42. ]
  43. arr = np.random.default_rng(2).standard_normal(10)
  44. series = [Series(arr, index=idx, name="a") for idx in indexes]
  45. objs = indexes + series
  46. return objs
  47. class TestReductions:
  48. @pytest.mark.filterwarnings(
  49. "ignore:Period with BDay freq is deprecated:FutureWarning"
  50. )
  51. @pytest.mark.parametrize("opname", ["max", "min"])
  52. @pytest.mark.parametrize("obj", get_objs())
  53. def test_ops(self, opname, obj):
  54. result = getattr(obj, opname)()
  55. if not isinstance(obj, PeriodIndex):
  56. if isinstance(obj.values, ArrowStringArrayNumpySemantics):
  57. # max not on the interface
  58. expected = getattr(np.array(obj.values), opname)()
  59. else:
  60. expected = getattr(obj.values, opname)()
  61. else:
  62. expected = Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
  63. if getattr(obj, "tz", None) is not None:
  64. # We need to de-localize before comparing to the numpy-produced result
  65. expected = expected.astype("M8[ns]").astype("int64")
  66. assert result._value == expected
  67. else:
  68. assert result == expected
  69. @pytest.mark.parametrize("opname", ["max", "min"])
  70. @pytest.mark.parametrize(
  71. "dtype, val",
  72. [
  73. ("object", 2.0),
  74. ("float64", 2.0),
  75. ("datetime64[ns]", datetime(2011, 11, 1)),
  76. ("Int64", 2),
  77. ("boolean", True),
  78. ],
  79. )
  80. def test_nanminmax(self, opname, dtype, val, index_or_series):
  81. # GH#7261
  82. klass = index_or_series
  83. def check_missing(res):
  84. if dtype == "datetime64[ns]":
  85. return res is NaT
  86. elif dtype in ["Int64", "boolean"]:
  87. return res is pd.NA
  88. else:
  89. return isna(res)
  90. obj = klass([None], dtype=dtype)
  91. assert check_missing(getattr(obj, opname)())
  92. assert check_missing(getattr(obj, opname)(skipna=False))
  93. obj = klass([], dtype=dtype)
  94. assert check_missing(getattr(obj, opname)())
  95. assert check_missing(getattr(obj, opname)(skipna=False))
  96. if dtype == "object":
  97. # generic test with object only works for empty / all NaN
  98. return
  99. obj = klass([None, val], dtype=dtype)
  100. assert getattr(obj, opname)() == val
  101. assert check_missing(getattr(obj, opname)(skipna=False))
  102. obj = klass([None, val, None], dtype=dtype)
  103. assert getattr(obj, opname)() == val
  104. assert check_missing(getattr(obj, opname)(skipna=False))
  105. @pytest.mark.parametrize("opname", ["max", "min"])
  106. def test_nanargminmax(self, opname, index_or_series):
  107. # GH#7261
  108. klass = index_or_series
  109. arg_op = "arg" + opname if klass is Index else "idx" + opname
  110. obj = klass([NaT, datetime(2011, 11, 1)])
  111. assert getattr(obj, arg_op)() == 1
  112. msg = (
  113. "The behavior of (DatetimeIndex|Series).argmax/argmin with "
  114. "skipna=False and NAs"
  115. )
  116. if klass is Series:
  117. msg = "The behavior of Series.(idxmax|idxmin) with all-NA"
  118. with tm.assert_produces_warning(FutureWarning, match=msg):
  119. result = getattr(obj, arg_op)(skipna=False)
  120. if klass is Series:
  121. assert np.isnan(result)
  122. else:
  123. assert result == -1
  124. obj = klass([NaT, datetime(2011, 11, 1), NaT])
  125. # check DatetimeIndex non-monotonic path
  126. assert getattr(obj, arg_op)() == 1
  127. with tm.assert_produces_warning(FutureWarning, match=msg):
  128. result = getattr(obj, arg_op)(skipna=False)
  129. if klass is Series:
  130. assert np.isnan(result)
  131. else:
  132. assert result == -1
  133. @pytest.mark.parametrize("opname", ["max", "min"])
  134. @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"])
  135. def test_nanops_empty_object(self, opname, index_or_series, dtype):
  136. klass = index_or_series
  137. arg_op = "arg" + opname if klass is Index else "idx" + opname
  138. obj = klass([], dtype=dtype)
  139. assert getattr(obj, opname)() is NaT
  140. assert getattr(obj, opname)(skipna=False) is NaT
  141. with pytest.raises(ValueError, match="empty sequence"):
  142. getattr(obj, arg_op)()
  143. with pytest.raises(ValueError, match="empty sequence"):
  144. getattr(obj, arg_op)(skipna=False)
  145. def test_argminmax(self):
  146. obj = Index(np.arange(5, dtype="int64"))
  147. assert obj.argmin() == 0
  148. assert obj.argmax() == 4
  149. obj = Index([np.nan, 1, np.nan, 2])
  150. assert obj.argmin() == 1
  151. assert obj.argmax() == 3
  152. msg = "The behavior of Index.argmax/argmin with skipna=False and NAs"
  153. with tm.assert_produces_warning(FutureWarning, match=msg):
  154. assert obj.argmin(skipna=False) == -1
  155. with tm.assert_produces_warning(FutureWarning, match=msg):
  156. assert obj.argmax(skipna=False) == -1
  157. obj = Index([np.nan])
  158. with tm.assert_produces_warning(FutureWarning, match=msg):
  159. assert obj.argmin() == -1
  160. with tm.assert_produces_warning(FutureWarning, match=msg):
  161. assert obj.argmax() == -1
  162. with tm.assert_produces_warning(FutureWarning, match=msg):
  163. assert obj.argmin(skipna=False) == -1
  164. with tm.assert_produces_warning(FutureWarning, match=msg):
  165. assert obj.argmax(skipna=False) == -1
  166. msg = "The behavior of DatetimeIndex.argmax/argmin with skipna=False and NAs"
  167. obj = Index([NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), NaT])
  168. assert obj.argmin() == 1
  169. assert obj.argmax() == 2
  170. with tm.assert_produces_warning(FutureWarning, match=msg):
  171. assert obj.argmin(skipna=False) == -1
  172. with tm.assert_produces_warning(FutureWarning, match=msg):
  173. assert obj.argmax(skipna=False) == -1
  174. obj = Index([NaT])
  175. with tm.assert_produces_warning(FutureWarning, match=msg):
  176. assert obj.argmin() == -1
  177. with tm.assert_produces_warning(FutureWarning, match=msg):
  178. assert obj.argmax() == -1
  179. with tm.assert_produces_warning(FutureWarning, match=msg):
  180. assert obj.argmin(skipna=False) == -1
  181. with tm.assert_produces_warning(FutureWarning, match=msg):
  182. assert obj.argmax(skipna=False) == -1
  183. @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]])
  184. def test_same_tz_min_max_axis_1(self, op, expected_col):
  185. # GH 10390
  186. df = DataFrame(
  187. date_range("2016-01-01 00:00:00", periods=3, tz="UTC"), columns=["a"]
  188. )
  189. df["b"] = df.a.subtract(Timedelta(seconds=3600))
  190. result = getattr(df, op)(axis=1)
  191. expected = df[expected_col].rename(None)
  192. tm.assert_series_equal(result, expected)
  193. @pytest.mark.parametrize("func", ["maximum", "minimum"])
  194. def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func):
  195. # GH 15552
  196. tz = tz_aware_fixture
  197. arg = pd.to_datetime(["2019"]).tz_localize(tz)
  198. expected = Series(arg)
  199. result = getattr(np, func)(expected, expected)
  200. tm.assert_series_equal(result, expected)
  201. def test_nan_int_timedelta_sum(self):
  202. # GH 27185
  203. df = DataFrame(
  204. {
  205. "A": Series([1, 2, NaT], dtype="timedelta64[ns]"),
  206. "B": Series([1, 2, np.nan], dtype="Int64"),
  207. }
  208. )
  209. expected = Series({"A": Timedelta(3), "B": 3})
  210. result = df.sum()
  211. tm.assert_series_equal(result, expected)
  212. class TestIndexReductions:
  213. # Note: the name TestIndexReductions indicates these tests
  214. # were moved from a Index-specific test file, _not_ that these tests are
  215. # intended long-term to be Index-specific
  216. @pytest.mark.parametrize(
  217. "start,stop,step",
  218. [
  219. (0, 400, 3),
  220. (500, 0, -6),
  221. (-(10**6), 10**6, 4),
  222. (10**6, -(10**6), -4),
  223. (0, 10, 20),
  224. ],
  225. )
  226. def test_max_min_range(self, start, stop, step):
  227. # GH#17607
  228. idx = RangeIndex(start, stop, step)
  229. expected = idx._values.max()
  230. result = idx.max()
  231. assert result == expected
  232. # skipna should be irrelevant since RangeIndex should never have NAs
  233. result2 = idx.max(skipna=False)
  234. assert result2 == expected
  235. expected = idx._values.min()
  236. result = idx.min()
  237. assert result == expected
  238. # skipna should be irrelevant since RangeIndex should never have NAs
  239. result2 = idx.min(skipna=False)
  240. assert result2 == expected
  241. # empty
  242. idx = RangeIndex(start, stop, -step)
  243. assert isna(idx.max())
  244. assert isna(idx.min())
  245. def test_minmax_timedelta64(self):
  246. # monotonic
  247. idx1 = TimedeltaIndex(["1 days", "2 days", "3 days"])
  248. assert idx1.is_monotonic_increasing
  249. # non-monotonic
  250. idx2 = TimedeltaIndex(["1 days", np.nan, "3 days", "NaT"])
  251. assert not idx2.is_monotonic_increasing
  252. for idx in [idx1, idx2]:
  253. assert idx.min() == Timedelta("1 days")
  254. assert idx.max() == Timedelta("3 days")
  255. assert idx.argmin() == 0
  256. assert idx.argmax() == 2
  257. @pytest.mark.parametrize("op", ["min", "max"])
  258. def test_minmax_timedelta_empty_or_na(self, op):
  259. # Return NaT
  260. obj = TimedeltaIndex([])
  261. assert getattr(obj, op)() is NaT
  262. obj = TimedeltaIndex([NaT])
  263. assert getattr(obj, op)() is NaT
  264. obj = TimedeltaIndex([NaT, NaT, NaT])
  265. assert getattr(obj, op)() is NaT
  266. def test_numpy_minmax_timedelta64(self):
  267. td = timedelta_range("16815 days", "16820 days", freq="D")
  268. assert np.min(td) == Timedelta("16815 days")
  269. assert np.max(td) == Timedelta("16820 days")
  270. errmsg = "the 'out' parameter is not supported"
  271. with pytest.raises(ValueError, match=errmsg):
  272. np.min(td, out=0)
  273. with pytest.raises(ValueError, match=errmsg):
  274. np.max(td, out=0)
  275. assert np.argmin(td) == 0
  276. assert np.argmax(td) == 5
  277. errmsg = "the 'out' parameter is not supported"
  278. with pytest.raises(ValueError, match=errmsg):
  279. np.argmin(td, out=0)
  280. with pytest.raises(ValueError, match=errmsg):
  281. np.argmax(td, out=0)
  282. def test_timedelta_ops(self):
  283. # GH#4984
  284. # make sure ops return Timedelta
  285. s = Series(
  286. [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)]
  287. )
  288. td = s.diff()
  289. result = td.mean()
  290. expected = to_timedelta(timedelta(seconds=9))
  291. assert result == expected
  292. result = td.to_frame().mean()
  293. assert result[0] == expected
  294. result = td.quantile(0.1)
  295. expected = Timedelta(np.timedelta64(2600, "ms"))
  296. assert result == expected
  297. result = td.median()
  298. expected = to_timedelta("00:00:09")
  299. assert result == expected
  300. result = td.to_frame().median()
  301. assert result[0] == expected
  302. # GH#6462
  303. # consistency in returned values for sum
  304. result = td.sum()
  305. expected = to_timedelta("00:01:21")
  306. assert result == expected
  307. result = td.to_frame().sum()
  308. assert result[0] == expected
  309. # std
  310. result = td.std()
  311. expected = to_timedelta(Series(td.dropna().values).std())
  312. assert result == expected
  313. result = td.to_frame().std()
  314. assert result[0] == expected
  315. # GH#10040
  316. # make sure NaT is properly handled by median()
  317. s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")])
  318. assert s.diff().median() == timedelta(days=4)
  319. s = Series(
  320. [Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15")]
  321. )
  322. assert s.diff().median() == timedelta(days=6)
  323. @pytest.mark.parametrize("opname", ["skew", "kurt", "sem", "prod", "var"])
  324. def test_invalid_td64_reductions(self, opname):
  325. s = Series(
  326. [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)]
  327. )
  328. td = s.diff()
  329. msg = "|".join(
  330. [
  331. f"reduction operation '{opname}' not allowed for this dtype",
  332. rf"cannot perform {opname} with type timedelta64\[ns\]",
  333. f"does not support reduction '{opname}'",
  334. ]
  335. )
  336. with pytest.raises(TypeError, match=msg):
  337. getattr(td, opname)()
  338. with pytest.raises(TypeError, match=msg):
  339. getattr(td.to_frame(), opname)(numeric_only=False)
  340. def test_minmax_tz(self, tz_naive_fixture):
  341. tz = tz_naive_fixture
  342. # monotonic
  343. idx1 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz=tz)
  344. assert idx1.is_monotonic_increasing
  345. # non-monotonic
  346. idx2 = DatetimeIndex(
  347. ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], tz=tz
  348. )
  349. assert not idx2.is_monotonic_increasing
  350. for idx in [idx1, idx2]:
  351. assert idx.min() == Timestamp("2011-01-01", tz=tz)
  352. assert idx.max() == Timestamp("2011-01-03", tz=tz)
  353. assert idx.argmin() == 0
  354. assert idx.argmax() == 2
  355. @pytest.mark.parametrize("op", ["min", "max"])
  356. def test_minmax_nat_datetime64(self, op):
  357. # Return NaT
  358. obj = DatetimeIndex([])
  359. assert isna(getattr(obj, op)())
  360. obj = DatetimeIndex([NaT])
  361. assert isna(getattr(obj, op)())
  362. obj = DatetimeIndex([NaT, NaT, NaT])
  363. assert isna(getattr(obj, op)())
  364. def test_numpy_minmax_integer(self):
  365. # GH#26125
  366. idx = Index([1, 2, 3])
  367. expected = idx.values.max()
  368. result = np.max(idx)
  369. assert result == expected
  370. expected = idx.values.min()
  371. result = np.min(idx)
  372. assert result == expected
  373. errmsg = "the 'out' parameter is not supported"
  374. with pytest.raises(ValueError, match=errmsg):
  375. np.min(idx, out=0)
  376. with pytest.raises(ValueError, match=errmsg):
  377. np.max(idx, out=0)
  378. expected = idx.values.argmax()
  379. result = np.argmax(idx)
  380. assert result == expected
  381. expected = idx.values.argmin()
  382. result = np.argmin(idx)
  383. assert result == expected
  384. errmsg = "the 'out' parameter is not supported"
  385. with pytest.raises(ValueError, match=errmsg):
  386. np.argmin(idx, out=0)
  387. with pytest.raises(ValueError, match=errmsg):
  388. np.argmax(idx, out=0)
  389. def test_numpy_minmax_range(self):
  390. # GH#26125
  391. idx = RangeIndex(0, 10, 3)
  392. result = np.max(idx)
  393. assert result == 9
  394. result = np.min(idx)
  395. assert result == 0
  396. errmsg = "the 'out' parameter is not supported"
  397. with pytest.raises(ValueError, match=errmsg):
  398. np.min(idx, out=0)
  399. with pytest.raises(ValueError, match=errmsg):
  400. np.max(idx, out=0)
  401. # No need to test again argmax/argmin compat since the implementation
  402. # is the same as basic integer index
  403. def test_numpy_minmax_datetime64(self):
  404. dr = date_range(start="2016-01-15", end="2016-01-20")
  405. assert np.min(dr) == Timestamp("2016-01-15 00:00:00")
  406. assert np.max(dr) == Timestamp("2016-01-20 00:00:00")
  407. errmsg = "the 'out' parameter is not supported"
  408. with pytest.raises(ValueError, match=errmsg):
  409. np.min(dr, out=0)
  410. with pytest.raises(ValueError, match=errmsg):
  411. np.max(dr, out=0)
  412. assert np.argmin(dr) == 0
  413. assert np.argmax(dr) == 5
  414. errmsg = "the 'out' parameter is not supported"
  415. with pytest.raises(ValueError, match=errmsg):
  416. np.argmin(dr, out=0)
  417. with pytest.raises(ValueError, match=errmsg):
  418. np.argmax(dr, out=0)
  419. def test_minmax_period(self):
  420. # monotonic
  421. idx1 = PeriodIndex([NaT, "2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
  422. assert not idx1.is_monotonic_increasing
  423. assert idx1[1:].is_monotonic_increasing
  424. # non-monotonic
  425. idx2 = PeriodIndex(
  426. ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], freq="D"
  427. )
  428. assert not idx2.is_monotonic_increasing
  429. for idx in [idx1, idx2]:
  430. assert idx.min() == Period("2011-01-01", freq="D")
  431. assert idx.max() == Period("2011-01-03", freq="D")
  432. assert idx1.argmin() == 1
  433. assert idx2.argmin() == 0
  434. assert idx1.argmax() == 3
  435. assert idx2.argmax() == 2
  436. @pytest.mark.parametrize("op", ["min", "max"])
  437. @pytest.mark.parametrize("data", [[], [NaT], [NaT, NaT, NaT]])
  438. def test_minmax_period_empty_nat(self, op, data):
  439. # Return NaT
  440. obj = PeriodIndex(data, freq="M")
  441. result = getattr(obj, op)()
  442. assert result is NaT
  443. def test_numpy_minmax_period(self):
  444. pr = period_range(start="2016-01-15", end="2016-01-20")
  445. assert np.min(pr) == Period("2016-01-15", freq="D")
  446. assert np.max(pr) == Period("2016-01-20", freq="D")
  447. errmsg = "the 'out' parameter is not supported"
  448. with pytest.raises(ValueError, match=errmsg):
  449. np.min(pr, out=0)
  450. with pytest.raises(ValueError, match=errmsg):
  451. np.max(pr, out=0)
  452. assert np.argmin(pr) == 0
  453. assert np.argmax(pr) == 5
  454. errmsg = "the 'out' parameter is not supported"
  455. with pytest.raises(ValueError, match=errmsg):
  456. np.argmin(pr, out=0)
  457. with pytest.raises(ValueError, match=errmsg):
  458. np.argmax(pr, out=0)
  459. def test_min_max_categorical(self):
  460. ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
  461. msg = (
  462. r"Categorical is not ordered for operation min\n"
  463. r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n"
  464. )
  465. with pytest.raises(TypeError, match=msg):
  466. ci.min()
  467. msg = (
  468. r"Categorical is not ordered for operation max\n"
  469. r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n"
  470. )
  471. with pytest.raises(TypeError, match=msg):
  472. ci.max()
  473. ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=True)
  474. assert ci.min() == "c"
  475. assert ci.max() == "b"
  476. class TestSeriesReductions:
  477. # Note: the name TestSeriesReductions indicates these tests
  478. # were moved from a series-specific test file, _not_ that these tests are
  479. # intended long-term to be series-specific
  480. def test_sum_inf(self):
  481. s = Series(np.random.default_rng(2).standard_normal(10))
  482. s2 = s.copy()
  483. s[5:8] = np.inf
  484. s2[5:8] = np.nan
  485. assert np.isinf(s.sum())
  486. arr = np.random.default_rng(2).standard_normal((100, 100)).astype("f4")
  487. arr[:, 2] = np.inf
  488. msg = "use_inf_as_na option is deprecated"
  489. with tm.assert_produces_warning(FutureWarning, match=msg):
  490. with pd.option_context("mode.use_inf_as_na", True):
  491. tm.assert_almost_equal(s.sum(), s2.sum())
  492. res = nanops.nansum(arr, axis=1)
  493. assert np.isinf(res).all()
  494. @pytest.mark.parametrize(
  495. "dtype", ["float64", "Float32", "Int64", "boolean", "object"]
  496. )
  497. @pytest.mark.parametrize("use_bottleneck", [True, False])
  498. @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
  499. def test_empty(self, method, unit, use_bottleneck, dtype):
  500. with pd.option_context("use_bottleneck", use_bottleneck):
  501. # GH#9422 / GH#18921
  502. # Entirely empty
  503. s = Series([], dtype=dtype)
  504. # NA by default
  505. result = getattr(s, method)()
  506. assert result == unit
  507. # Explicit
  508. result = getattr(s, method)(min_count=0)
  509. assert result == unit
  510. result = getattr(s, method)(min_count=1)
  511. assert isna(result)
  512. # Skipna, default
  513. result = getattr(s, method)(skipna=True)
  514. result == unit
  515. # Skipna, explicit
  516. result = getattr(s, method)(skipna=True, min_count=0)
  517. assert result == unit
  518. result = getattr(s, method)(skipna=True, min_count=1)
  519. assert isna(result)
  520. result = getattr(s, method)(skipna=False, min_count=0)
  521. assert result == unit
  522. result = getattr(s, method)(skipna=False, min_count=1)
  523. assert isna(result)
  524. # All-NA
  525. s = Series([np.nan], dtype=dtype)
  526. # NA by default
  527. result = getattr(s, method)()
  528. assert result == unit
  529. # Explicit
  530. result = getattr(s, method)(min_count=0)
  531. assert result == unit
  532. result = getattr(s, method)(min_count=1)
  533. assert isna(result)
  534. # Skipna, default
  535. result = getattr(s, method)(skipna=True)
  536. result == unit
  537. # skipna, explicit
  538. result = getattr(s, method)(skipna=True, min_count=0)
  539. assert result == unit
  540. result = getattr(s, method)(skipna=True, min_count=1)
  541. assert isna(result)
  542. # Mix of valid, empty
  543. s = Series([np.nan, 1], dtype=dtype)
  544. # Default
  545. result = getattr(s, method)()
  546. assert result == 1.0
  547. # Explicit
  548. result = getattr(s, method)(min_count=0)
  549. assert result == 1.0
  550. result = getattr(s, method)(min_count=1)
  551. assert result == 1.0
  552. # Skipna
  553. result = getattr(s, method)(skipna=True)
  554. assert result == 1.0
  555. result = getattr(s, method)(skipna=True, min_count=0)
  556. assert result == 1.0
  557. # GH#844 (changed in GH#9422)
  558. df = DataFrame(np.empty((10, 0)), dtype=dtype)
  559. assert (getattr(df, method)(1) == unit).all()
  560. s = Series([1], dtype=dtype)
  561. result = getattr(s, method)(min_count=2)
  562. assert isna(result)
  563. result = getattr(s, method)(skipna=False, min_count=2)
  564. assert isna(result)
  565. s = Series([np.nan], dtype=dtype)
  566. result = getattr(s, method)(min_count=2)
  567. assert isna(result)
  568. s = Series([np.nan, 1], dtype=dtype)
  569. result = getattr(s, method)(min_count=2)
  570. assert isna(result)
  571. @pytest.mark.parametrize("method", ["mean", "var"])
  572. @pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"])
  573. def test_ops_consistency_on_empty_nullable(self, method, dtype):
  574. # GH#34814
  575. # consistency for nullable dtypes on empty or ALL-NA mean
  576. # empty series
  577. eser = Series([], dtype=dtype)
  578. result = getattr(eser, method)()
  579. assert result is pd.NA
  580. # ALL-NA series
  581. nser = Series([np.nan], dtype=dtype)
  582. result = getattr(nser, method)()
  583. assert result is pd.NA
  584. @pytest.mark.parametrize("method", ["mean", "median", "std", "var"])
  585. def test_ops_consistency_on_empty(self, method):
  586. # GH#7869
  587. # consistency on empty
  588. # float
  589. result = getattr(Series(dtype=float), method)()
  590. assert isna(result)
  591. # timedelta64[ns]
  592. tdser = Series([], dtype="m8[ns]")
  593. if method == "var":
  594. msg = "|".join(
  595. [
  596. "operation 'var' not allowed",
  597. r"cannot perform var with type timedelta64\[ns\]",
  598. "does not support reduction 'var'",
  599. ]
  600. )
  601. with pytest.raises(TypeError, match=msg):
  602. getattr(tdser, method)()
  603. else:
  604. result = getattr(tdser, method)()
  605. assert result is NaT
  606. def test_nansum_buglet(self):
  607. ser = Series([1.0, np.nan], index=[0, 1])
  608. result = np.nansum(ser)
  609. tm.assert_almost_equal(result, 1)
  610. @pytest.mark.parametrize("use_bottleneck", [True, False])
  611. @pytest.mark.parametrize("dtype", ["int32", "int64"])
  612. def test_sum_overflow_int(self, use_bottleneck, dtype):
  613. with pd.option_context("use_bottleneck", use_bottleneck):
  614. # GH#6915
  615. # overflowing on the smaller int dtypes
  616. v = np.arange(5000000, dtype=dtype)
  617. s = Series(v)
  618. result = s.sum(skipna=False)
  619. assert int(result) == v.sum(dtype="int64")
  620. result = s.min(skipna=False)
  621. assert int(result) == 0
  622. result = s.max(skipna=False)
  623. assert int(result) == v[-1]
  624. @pytest.mark.parametrize("use_bottleneck", [True, False])
  625. @pytest.mark.parametrize("dtype", ["float32", "float64"])
  626. def test_sum_overflow_float(self, use_bottleneck, dtype):
  627. with pd.option_context("use_bottleneck", use_bottleneck):
  628. v = np.arange(5000000, dtype=dtype)
  629. s = Series(v)
  630. result = s.sum(skipna=False)
  631. assert result == v.sum(dtype=dtype)
  632. result = s.min(skipna=False)
  633. assert np.allclose(float(result), 0.0)
  634. result = s.max(skipna=False)
  635. assert np.allclose(float(result), v[-1])
  636. def test_mean_masked_overflow(self):
  637. # GH#48378
  638. val = 100_000_000_000_000_000
  639. n_elements = 100
  640. na = np.array([val] * n_elements)
  641. ser = Series([val] * n_elements, dtype="Int64")
  642. result_numpy = np.mean(na)
  643. result_masked = ser.mean()
  644. assert result_masked - result_numpy == 0
  645. assert result_masked == 1e17
  646. @pytest.mark.parametrize("ddof, exp", [(1, 2.5), (0, 2.0)])
  647. def test_var_masked_array(self, ddof, exp):
  648. # GH#48379
  649. ser = Series([1, 2, 3, 4, 5], dtype="Int64")
  650. ser_numpy_dtype = Series([1, 2, 3, 4, 5], dtype="int64")
  651. result = ser.var(ddof=ddof)
  652. result_numpy_dtype = ser_numpy_dtype.var(ddof=ddof)
  653. assert result == result_numpy_dtype
  654. assert result == exp
  655. @pytest.mark.parametrize("dtype", ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]"))
  656. @pytest.mark.parametrize("skipna", [True, False])
  657. def test_empty_timeseries_reductions_return_nat(self, dtype, skipna):
  658. # covers GH#11245
  659. assert Series([], dtype=dtype).min(skipna=skipna) is NaT
  660. assert Series([], dtype=dtype).max(skipna=skipna) is NaT
  661. def test_numpy_argmin(self):
  662. # See GH#16830
  663. data = np.arange(1, 11)
  664. s = Series(data, index=data)
  665. result = np.argmin(s)
  666. expected = np.argmin(data)
  667. assert result == expected
  668. result = s.argmin()
  669. assert result == expected
  670. msg = "the 'out' parameter is not supported"
  671. with pytest.raises(ValueError, match=msg):
  672. np.argmin(s, out=data)
  673. def test_numpy_argmax(self):
  674. # See GH#16830
  675. data = np.arange(1, 11)
  676. ser = Series(data, index=data)
  677. result = np.argmax(ser)
  678. expected = np.argmax(data)
  679. assert result == expected
  680. result = ser.argmax()
  681. assert result == expected
  682. msg = "the 'out' parameter is not supported"
  683. with pytest.raises(ValueError, match=msg):
  684. np.argmax(ser, out=data)
  685. def test_idxmin_dt64index(self, unit):
  686. # GH#43587 should have NaT instead of NaN
  687. dti = DatetimeIndex(["NaT", "2015-02-08", "NaT"]).as_unit(unit)
  688. ser = Series([1.0, 2.0, np.nan], index=dti)
  689. msg = "The behavior of Series.idxmin with all-NA values"
  690. with tm.assert_produces_warning(FutureWarning, match=msg):
  691. res = ser.idxmin(skipna=False)
  692. assert res is NaT
  693. msg = "The behavior of Series.idxmax with all-NA values"
  694. with tm.assert_produces_warning(FutureWarning, match=msg):
  695. res = ser.idxmax(skipna=False)
  696. assert res is NaT
  697. df = ser.to_frame()
  698. msg = "The behavior of DataFrame.idxmin with all-NA values"
  699. with tm.assert_produces_warning(FutureWarning, match=msg):
  700. res = df.idxmin(skipna=False)
  701. assert res.dtype == f"M8[{unit}]"
  702. assert res.isna().all()
  703. msg = "The behavior of DataFrame.idxmax with all-NA values"
  704. with tm.assert_produces_warning(FutureWarning, match=msg):
  705. res = df.idxmax(skipna=False)
  706. assert res.dtype == f"M8[{unit}]"
  707. assert res.isna().all()
  708. def test_idxmin(self):
  709. # test idxmin
  710. # _check_stat_op approach can not be used here because of isna check.
  711. string_series = Series(range(20), dtype=np.float64, name="series")
  712. # add some NaNs
  713. string_series[5:15] = np.nan
  714. # skipna or no
  715. assert string_series[string_series.idxmin()] == string_series.min()
  716. msg = "The behavior of Series.idxmin"
  717. with tm.assert_produces_warning(FutureWarning, match=msg):
  718. assert isna(string_series.idxmin(skipna=False))
  719. # no NaNs
  720. nona = string_series.dropna()
  721. assert nona[nona.idxmin()] == nona.min()
  722. assert nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()
  723. # all NaNs
  724. allna = string_series * np.nan
  725. with tm.assert_produces_warning(FutureWarning, match=msg):
  726. assert isna(allna.idxmin())
  727. # datetime64[ns]
  728. s = Series(date_range("20130102", periods=6))
  729. result = s.idxmin()
  730. assert result == 0
  731. s[0] = np.nan
  732. result = s.idxmin()
  733. assert result == 1
  734. def test_idxmax(self):
  735. # test idxmax
  736. # _check_stat_op approach can not be used here because of isna check.
  737. string_series = Series(range(20), dtype=np.float64, name="series")
  738. # add some NaNs
  739. string_series[5:15] = np.nan
  740. # skipna or no
  741. assert string_series[string_series.idxmax()] == string_series.max()
  742. msg = "The behavior of Series.idxmax with all-NA values"
  743. with tm.assert_produces_warning(FutureWarning, match=msg):
  744. assert isna(string_series.idxmax(skipna=False))
  745. # no NaNs
  746. nona = string_series.dropna()
  747. assert nona[nona.idxmax()] == nona.max()
  748. assert nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax()
  749. # all NaNs
  750. allna = string_series * np.nan
  751. msg = "The behavior of Series.idxmax with all-NA values"
  752. with tm.assert_produces_warning(FutureWarning, match=msg):
  753. assert isna(allna.idxmax())
  754. s = Series(date_range("20130102", periods=6))
  755. result = s.idxmax()
  756. assert result == 5
  757. s[5] = np.nan
  758. result = s.idxmax()
  759. assert result == 4
  760. # Index with float64 dtype
  761. # GH#5914
  762. s = Series([1, 2, 3], [1.1, 2.1, 3.1])
  763. result = s.idxmax()
  764. assert result == 3.1
  765. result = s.idxmin()
  766. assert result == 1.1
  767. s = Series(s.index, s.index)
  768. result = s.idxmax()
  769. assert result == 3.1
  770. result = s.idxmin()
  771. assert result == 1.1
  772. def test_all_any(self):
  773. ts = Series(
  774. np.arange(10, dtype=np.float64),
  775. index=date_range("2020-01-01", periods=10),
  776. name="ts",
  777. )
  778. bool_series = ts > 0
  779. assert not bool_series.all()
  780. assert bool_series.any()
  781. # Alternative types, with implicit 'object' dtype.
  782. s = Series(["abc", True])
  783. assert s.any()
  784. def test_numpy_all_any(self, index_or_series):
  785. # GH#40180
  786. idx = index_or_series([0, 1, 2])
  787. assert not np.all(idx)
  788. assert np.any(idx)
  789. idx = Index([1, 2, 3])
  790. assert np.all(idx)
  791. def test_all_any_skipna(self):
  792. # Check skipna, with implicit 'object' dtype.
  793. s1 = Series([np.nan, True])
  794. s2 = Series([np.nan, False])
  795. assert s1.all(skipna=False) # nan && True => True
  796. assert s1.all(skipna=True)
  797. assert s2.any(skipna=False)
  798. assert not s2.any(skipna=True)
  799. def test_all_any_bool_only(self):
  800. s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2])
  801. # GH#47500 - test bool_only works
  802. assert s.any(bool_only=True)
  803. assert not s.all(bool_only=True)
  804. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  805. @pytest.mark.parametrize("skipna", [True, False])
  806. def test_any_all_object_dtype(self, bool_agg_func, skipna):
  807. # GH#12863
  808. ser = Series(["a", "b", "c", "d", "e"], dtype=object)
  809. result = getattr(ser, bool_agg_func)(skipna=skipna)
  810. expected = True
  811. assert result == expected
  812. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  813. @pytest.mark.parametrize(
  814. "data", [[False, None], [None, False], [False, np.nan], [np.nan, False]]
  815. )
  816. def test_any_all_object_dtype_missing(self, data, bool_agg_func):
  817. # GH#27709
  818. ser = Series(data)
  819. result = getattr(ser, bool_agg_func)(skipna=False)
  820. # None is treated is False, but np.nan is treated as True
  821. expected = bool_agg_func == "any" and None not in data
  822. assert result == expected
  823. @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"])
  824. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  825. @pytest.mark.parametrize("skipna", [True, False])
  826. @pytest.mark.parametrize(
  827. # expected_data indexed as [[skipna=False/any, skipna=False/all],
  828. # [skipna=True/any, skipna=True/all]]
  829. "data,expected_data",
  830. [
  831. ([0, 0, 0], [[False, False], [False, False]]),
  832. ([1, 1, 1], [[True, True], [True, True]]),
  833. ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]),
  834. ([0, pd.NA, 0], [[pd.NA, False], [False, False]]),
  835. ([1, pd.NA, 1], [[True, pd.NA], [True, True]]),
  836. ([1, pd.NA, 0], [[True, False], [True, False]]),
  837. ],
  838. )
  839. def test_any_all_nullable_kleene_logic(
  840. self, bool_agg_func, skipna, data, dtype, expected_data
  841. ):
  842. # GH-37506, GH-41967
  843. ser = Series(data, dtype=dtype)
  844. expected = expected_data[skipna][bool_agg_func == "all"]
  845. result = getattr(ser, bool_agg_func)(skipna=skipna)
  846. assert (result is pd.NA and expected is pd.NA) or result == expected
  847. def test_any_axis1_bool_only(self):
  848. # GH#32432
  849. df = DataFrame({"A": [True, False], "B": [1, 2]})
  850. result = df.any(axis=1, bool_only=True)
  851. expected = Series([True, False])
  852. tm.assert_series_equal(result, expected)
  853. def test_any_all_datetimelike(self):
  854. # GH#38723 these may not be the desired long-term behavior (GH#34479)
  855. # but in the interim should be internally consistent
  856. dta = date_range("1995-01-02", periods=3)._data
  857. ser = Series(dta)
  858. df = DataFrame(ser)
  859. msg = "'(any|all)' with datetime64 dtypes is deprecated"
  860. with tm.assert_produces_warning(FutureWarning, match=msg):
  861. # GH#34479
  862. assert dta.all()
  863. assert dta.any()
  864. assert ser.all()
  865. assert ser.any()
  866. assert df.any().all()
  867. assert df.all().all()
  868. dta = dta.tz_localize("UTC")
  869. ser = Series(dta)
  870. df = DataFrame(ser)
  871. with tm.assert_produces_warning(FutureWarning, match=msg):
  872. # GH#34479
  873. assert dta.all()
  874. assert dta.any()
  875. assert ser.all()
  876. assert ser.any()
  877. assert df.any().all()
  878. assert df.all().all()
  879. tda = dta - dta[0]
  880. ser = Series(tda)
  881. df = DataFrame(ser)
  882. assert tda.any()
  883. assert not tda.all()
  884. assert ser.any()
  885. assert not ser.all()
  886. assert df.any().all()
  887. assert not df.all().any()
  888. def test_any_all_string_dtype(self, any_string_dtype):
  889. # GH#54591
  890. if (
  891. isinstance(any_string_dtype, pd.StringDtype)
  892. and any_string_dtype.na_value is pd.NA
  893. ):
  894. # the nullable string dtype currently still raise an error
  895. # https://github.com/pandas-dev/pandas/issues/51939
  896. ser = Series(["a", "b"], dtype=any_string_dtype)
  897. with pytest.raises(TypeError):
  898. ser.any()
  899. with pytest.raises(TypeError):
  900. ser.all()
  901. return
  902. ser = Series(["", "a"], dtype=any_string_dtype)
  903. assert ser.any()
  904. assert not ser.all()
  905. assert ser.any(skipna=False)
  906. assert not ser.all(skipna=False)
  907. ser = Series([np.nan, "a"], dtype=any_string_dtype)
  908. assert ser.any()
  909. assert ser.all()
  910. assert ser.any(skipna=False)
  911. assert ser.all(skipna=False) # NaN is considered truthy
  912. ser = Series([np.nan, ""], dtype=any_string_dtype)
  913. assert not ser.any()
  914. assert not ser.all()
  915. assert ser.any(skipna=False) # NaN is considered truthy
  916. assert not ser.all(skipna=False)
  917. ser = Series(["a", "b"], dtype=any_string_dtype)
  918. assert ser.any()
  919. assert ser.all()
  920. assert ser.any(skipna=False)
  921. assert ser.all(skipna=False)
  922. ser = Series([], dtype=any_string_dtype)
  923. assert not ser.any()
  924. assert ser.all()
  925. assert not ser.any(skipna=False)
  926. assert ser.all(skipna=False)
  927. ser = Series([""], dtype=any_string_dtype)
  928. assert not ser.any()
  929. assert not ser.all()
  930. assert not ser.any(skipna=False)
  931. assert not ser.all(skipna=False)
  932. ser = Series([np.nan], dtype=any_string_dtype)
  933. assert not ser.any()
  934. assert ser.all()
  935. assert ser.any(skipna=False) # NaN is considered truthy
  936. assert ser.all(skipna=False) # NaN is considered truthy
  937. def test_timedelta64_analytics(self):
  938. # index min/max
  939. dti = date_range("2012-1-1", periods=3, freq="D")
  940. td = Series(dti) - Timestamp("20120101")
  941. result = td.idxmin()
  942. assert result == 0
  943. result = td.idxmax()
  944. assert result == 2
  945. # GH#2982
  946. # with NaT
  947. td[0] = np.nan
  948. result = td.idxmin()
  949. assert result == 1
  950. result = td.idxmax()
  951. assert result == 2
  952. # abs
  953. s1 = Series(date_range("20120101", periods=3))
  954. s2 = Series(date_range("20120102", periods=3))
  955. expected = Series(s2 - s1)
  956. result = np.abs(s1 - s2)
  957. tm.assert_series_equal(result, expected)
  958. result = (s1 - s2).abs()
  959. tm.assert_series_equal(result, expected)
  960. # max/min
  961. result = td.max()
  962. expected = Timedelta("2 days")
  963. assert result == expected
  964. result = td.min()
  965. expected = Timedelta("1 days")
  966. assert result == expected
  967. @pytest.mark.parametrize(
  968. "test_input,error_type",
  969. [
  970. (Series([], dtype="float64"), ValueError),
  971. # For strings, or any Series with dtype 'O'
  972. (Series(["foo", "bar", "baz"]), TypeError),
  973. (Series([(1,), (2,)]), TypeError),
  974. # For mixed data types
  975. (Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError),
  976. ],
  977. )
  978. def test_assert_idxminmax_empty_raises(self, test_input, error_type):
  979. """
  980. Cases where ``Series.argmax`` and related should raise an exception
  981. """
  982. test_input = Series([], dtype="float64")
  983. msg = "attempt to get argmin of an empty sequence"
  984. with pytest.raises(ValueError, match=msg):
  985. test_input.idxmin()
  986. with pytest.raises(ValueError, match=msg):
  987. test_input.idxmin(skipna=False)
  988. msg = "attempt to get argmax of an empty sequence"
  989. with pytest.raises(ValueError, match=msg):
  990. test_input.idxmax()
  991. with pytest.raises(ValueError, match=msg):
  992. test_input.idxmax(skipna=False)
  993. def test_idxminmax_object_dtype(self, using_infer_string):
  994. # pre-2.1 object-dtype was disallowed for argmin/max
  995. ser = Series(["foo", "bar", "baz"])
  996. assert ser.idxmax() == 0
  997. assert ser.idxmax(skipna=False) == 0
  998. assert ser.idxmin() == 1
  999. assert ser.idxmin(skipna=False) == 1
  1000. ser2 = Series([(1,), (2,)])
  1001. assert ser2.idxmax() == 1
  1002. assert ser2.idxmax(skipna=False) == 1
  1003. assert ser2.idxmin() == 0
  1004. assert ser2.idxmin(skipna=False) == 0
  1005. if not using_infer_string:
  1006. # attempting to compare np.nan with string raises
  1007. ser3 = Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"])
  1008. msg = "'>' not supported between instances of 'float' and 'str'"
  1009. with pytest.raises(TypeError, match=msg):
  1010. ser3.idxmax()
  1011. with pytest.raises(TypeError, match=msg):
  1012. ser3.idxmax(skipna=False)
  1013. msg = "'<' not supported between instances of 'float' and 'str'"
  1014. with pytest.raises(TypeError, match=msg):
  1015. ser3.idxmin()
  1016. with pytest.raises(TypeError, match=msg):
  1017. ser3.idxmin(skipna=False)
  1018. def test_idxminmax_object_frame(self):
  1019. # GH#4279
  1020. df = DataFrame([["zimm", 2.5], ["biff", 1.0], ["bid", 12.0]])
  1021. res = df.idxmax()
  1022. exp = Series([0, 2])
  1023. tm.assert_series_equal(res, exp)
  1024. def test_idxminmax_object_tuples(self):
  1025. # GH#43697
  1026. ser = Series([(1, 3), (2, 2), (3, 1)])
  1027. assert ser.idxmax() == 2
  1028. assert ser.idxmin() == 0
  1029. assert ser.idxmax(skipna=False) == 2
  1030. assert ser.idxmin(skipna=False) == 0
  1031. def test_idxminmax_object_decimals(self):
  1032. # GH#40685
  1033. df = DataFrame(
  1034. {
  1035. "idx": [0, 1],
  1036. "x": [Decimal("8.68"), Decimal("42.23")],
  1037. "y": [Decimal("7.11"), Decimal("79.61")],
  1038. }
  1039. )
  1040. res = df.idxmax()
  1041. exp = Series({"idx": 1, "x": 1, "y": 1})
  1042. tm.assert_series_equal(res, exp)
  1043. res2 = df.idxmin()
  1044. exp2 = exp - 1
  1045. tm.assert_series_equal(res2, exp2)
  1046. def test_argminmax_object_ints(self):
  1047. # GH#18021
  1048. ser = Series([0, 1], dtype="object")
  1049. assert ser.argmax() == 1
  1050. assert ser.argmin() == 0
  1051. assert ser.argmax(skipna=False) == 1
  1052. assert ser.argmin(skipna=False) == 0
  1053. def test_idxminmax_with_inf(self):
  1054. # For numeric data with NA and Inf (GH #13595)
  1055. s = Series([0, -np.inf, np.inf, np.nan])
  1056. assert s.idxmin() == 1
  1057. msg = "The behavior of Series.idxmin with all-NA values"
  1058. with tm.assert_produces_warning(FutureWarning, match=msg):
  1059. assert np.isnan(s.idxmin(skipna=False))
  1060. assert s.idxmax() == 2
  1061. msg = "The behavior of Series.idxmax with all-NA values"
  1062. with tm.assert_produces_warning(FutureWarning, match=msg):
  1063. assert np.isnan(s.idxmax(skipna=False))
  1064. msg = "use_inf_as_na option is deprecated"
  1065. with tm.assert_produces_warning(FutureWarning, match=msg):
  1066. # Using old-style behavior that treats floating point nan, -inf, and
  1067. # +inf as missing
  1068. with pd.option_context("mode.use_inf_as_na", True):
  1069. assert s.idxmin() == 0
  1070. assert np.isnan(s.idxmin(skipna=False))
  1071. assert s.idxmax() == 0
  1072. np.isnan(s.idxmax(skipna=False))
  1073. def test_sum_uint64(self):
  1074. # GH 53401
  1075. s = Series([10000000000000000000], dtype="uint64")
  1076. result = s.sum()
  1077. expected = np.uint64(10000000000000000000)
  1078. tm.assert_almost_equal(result, expected)
  1079. class TestDatetime64SeriesReductions:
  1080. # Note: the name TestDatetime64SeriesReductions indicates these tests
  1081. # were moved from a series-specific test file, _not_ that these tests are
  1082. # intended long-term to be series-specific
  1083. @pytest.mark.parametrize(
  1084. "nat_ser",
  1085. [
  1086. Series([NaT, NaT]),
  1087. Series([NaT, Timedelta("nat")]),
  1088. Series([Timedelta("nat"), Timedelta("nat")]),
  1089. ],
  1090. )
  1091. def test_minmax_nat_series(self, nat_ser):
  1092. # GH#23282
  1093. assert nat_ser.min() is NaT
  1094. assert nat_ser.max() is NaT
  1095. assert nat_ser.min(skipna=False) is NaT
  1096. assert nat_ser.max(skipna=False) is NaT
  1097. @pytest.mark.parametrize(
  1098. "nat_df",
  1099. [
  1100. DataFrame([NaT, NaT]),
  1101. DataFrame([NaT, Timedelta("nat")]),
  1102. DataFrame([Timedelta("nat"), Timedelta("nat")]),
  1103. ],
  1104. )
  1105. def test_minmax_nat_dataframe(self, nat_df):
  1106. # GH#23282
  1107. assert nat_df.min()[0] is NaT
  1108. assert nat_df.max()[0] is NaT
  1109. assert nat_df.min(skipna=False)[0] is NaT
  1110. assert nat_df.max(skipna=False)[0] is NaT
  1111. def test_min_max(self):
  1112. rng = date_range("1/1/2000", "12/31/2000")
  1113. rng2 = rng.take(np.random.default_rng(2).permutation(len(rng)))
  1114. the_min = rng2.min()
  1115. the_max = rng2.max()
  1116. assert isinstance(the_min, Timestamp)
  1117. assert isinstance(the_max, Timestamp)
  1118. assert the_min == rng[0]
  1119. assert the_max == rng[-1]
  1120. assert rng.min() == rng[0]
  1121. assert rng.max() == rng[-1]
  1122. def test_min_max_series(self):
  1123. rng = date_range("1/1/2000", periods=10, freq="4h")
  1124. lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"]
  1125. df = DataFrame(
  1126. {
  1127. "TS": rng,
  1128. "V": np.random.default_rng(2).standard_normal(len(rng)),
  1129. "L": lvls,
  1130. }
  1131. )
  1132. result = df.TS.max()
  1133. exp = Timestamp(df.TS.iat[-1])
  1134. assert isinstance(result, Timestamp)
  1135. assert result == exp
  1136. result = df.TS.min()
  1137. exp = Timestamp(df.TS.iat[0])
  1138. assert isinstance(result, Timestamp)
  1139. assert result == exp
  1140. class TestCategoricalSeriesReductions:
  1141. # Note: the name TestCategoricalSeriesReductions indicates these tests
  1142. # were moved from a series-specific test file, _not_ that these tests are
  1143. # intended long-term to be series-specific
  1144. @pytest.mark.parametrize("function", ["min", "max"])
  1145. def test_min_max_unordered_raises(self, function):
  1146. # unordered cats have no min/max
  1147. cat = Series(Categorical(["a", "b", "c", "d"], ordered=False))
  1148. msg = f"Categorical is not ordered for operation {function}"
  1149. with pytest.raises(TypeError, match=msg):
  1150. getattr(cat, function)()
  1151. @pytest.mark.parametrize(
  1152. "values, categories",
  1153. [
  1154. (list("abc"), list("abc")),
  1155. (list("abc"), list("cba")),
  1156. (list("abc") + [np.nan], list("cba")),
  1157. ([1, 2, 3], [3, 2, 1]),
  1158. ([1, 2, 3, np.nan], [3, 2, 1]),
  1159. ],
  1160. )
  1161. @pytest.mark.parametrize("function", ["min", "max"])
  1162. def test_min_max_ordered(self, values, categories, function):
  1163. # GH 25303
  1164. cat = Series(Categorical(values, categories=categories, ordered=True))
  1165. result = getattr(cat, function)(skipna=True)
  1166. expected = categories[0] if function == "min" else categories[2]
  1167. assert result == expected
  1168. @pytest.mark.parametrize("function", ["min", "max"])
  1169. @pytest.mark.parametrize("skipna", [True, False])
  1170. def test_min_max_ordered_with_nan_only(self, function, skipna):
  1171. # https://github.com/pandas-dev/pandas/issues/33450
  1172. cat = Series(Categorical([np.nan], categories=[1, 2], ordered=True))
  1173. result = getattr(cat, function)(skipna=skipna)
  1174. assert result is np.nan
  1175. @pytest.mark.parametrize("function", ["min", "max"])
  1176. @pytest.mark.parametrize("skipna", [True, False])
  1177. def test_min_max_skipna(self, function, skipna):
  1178. cat = Series(
  1179. Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True)
  1180. )
  1181. result = getattr(cat, function)(skipna=skipna)
  1182. if skipna is True:
  1183. expected = "b" if function == "min" else "a"
  1184. assert result == expected
  1185. else:
  1186. assert result is np.nan
  1187. class TestSeriesMode:
  1188. # Note: the name TestSeriesMode indicates these tests
  1189. # were moved from a series-specific test file, _not_ that these tests are
  1190. # intended long-term to be series-specific
  1191. @pytest.mark.parametrize(
  1192. "dropna, expected",
  1193. [(True, Series([], dtype=np.float64)), (False, Series([], dtype=np.float64))],
  1194. )
  1195. def test_mode_empty(self, dropna, expected):
  1196. s = Series([], dtype=np.float64)
  1197. result = s.mode(dropna)
  1198. tm.assert_series_equal(result, expected)
  1199. @pytest.mark.parametrize(
  1200. "dropna, data, expected",
  1201. [
  1202. (True, [1, 1, 1, 2], [1]),
  1203. (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  1204. (False, [1, 1, 1, 2], [1]),
  1205. (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  1206. ],
  1207. )
  1208. @pytest.mark.parametrize(
  1209. "dt", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
  1210. )
  1211. def test_mode_numerical(self, dropna, data, expected, dt):
  1212. s = Series(data, dtype=dt)
  1213. result = s.mode(dropna)
  1214. expected = Series(expected, dtype=dt)
  1215. tm.assert_series_equal(result, expected)
  1216. @pytest.mark.parametrize("dropna, expected", [(True, [1.0]), (False, [1, np.nan])])
  1217. def test_mode_numerical_nan(self, dropna, expected):
  1218. s = Series([1, 1, 2, np.nan, np.nan])
  1219. result = s.mode(dropna)
  1220. expected = Series(expected)
  1221. tm.assert_series_equal(result, expected)
  1222. @pytest.mark.parametrize(
  1223. "dropna, expected1, expected2",
  1224. [
  1225. (True, ["b"], ["bar"]),
  1226. (False, ["b"], [np.nan]),
  1227. ],
  1228. )
  1229. def test_mode_object(self, dropna, expected1, expected2):
  1230. # Test string and object types.
  1231. data = ["a"] * 2 + ["b"] * 3
  1232. s = Series(data, dtype="c")
  1233. result = s.mode(dropna)
  1234. expected1 = Series(expected1, dtype="c")
  1235. tm.assert_series_equal(result, expected1)
  1236. data = ["foo", "bar", "bar", np.nan, np.nan, np.nan]
  1237. s = Series(data, dtype=object)
  1238. result = s.mode(dropna)
  1239. expected2 = Series(expected2, dtype=object)
  1240. tm.assert_series_equal(result, expected2)
  1241. @pytest.mark.parametrize(
  1242. "dropna, expected1, expected2",
  1243. [
  1244. (True, ["b"], ["bar"]),
  1245. (False, ["b"], [np.nan]),
  1246. ],
  1247. )
  1248. def test_mode_string(self, dropna, expected1, expected2, any_string_dtype):
  1249. # Test string and object types.
  1250. data = ["a"] * 2 + ["b"] * 3
  1251. s = Series(data, dtype=any_string_dtype)
  1252. result = s.mode(dropna)
  1253. expected1 = Series(expected1, dtype=any_string_dtype)
  1254. tm.assert_series_equal(result, expected1)
  1255. data = ["foo", "bar", "bar", np.nan, np.nan, np.nan]
  1256. s = Series(data, dtype=any_string_dtype)
  1257. result = s.mode(dropna)
  1258. expected2 = Series(expected2, dtype=any_string_dtype)
  1259. tm.assert_series_equal(result, expected2)
  1260. @pytest.mark.parametrize(
  1261. "dropna, expected1, expected2",
  1262. [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])],
  1263. )
  1264. def test_mode_mixeddtype(self, dropna, expected1, expected2):
  1265. s = Series([1, "foo", "foo"])
  1266. result = s.mode(dropna)
  1267. expected = Series(expected1, dtype=object)
  1268. tm.assert_series_equal(result, expected)
  1269. s = Series([1, "foo", "foo", np.nan, np.nan, np.nan])
  1270. result = s.mode(dropna)
  1271. expected = Series(expected2, dtype=object)
  1272. tm.assert_series_equal(result, expected)
  1273. @pytest.mark.parametrize(
  1274. "dropna, expected1, expected2",
  1275. [
  1276. (
  1277. True,
  1278. ["1900-05-03", "2011-01-03", "2013-01-02"],
  1279. ["2011-01-03", "2013-01-02"],
  1280. ),
  1281. (False, [np.nan], [np.nan, "2011-01-03", "2013-01-02"]),
  1282. ],
  1283. )
  1284. def test_mode_datetime(self, dropna, expected1, expected2):
  1285. s = Series(
  1286. ["2011-01-03", "2013-01-02", "1900-05-03", "nan", "nan"], dtype="M8[ns]"
  1287. )
  1288. result = s.mode(dropna)
  1289. expected1 = Series(expected1, dtype="M8[ns]")
  1290. tm.assert_series_equal(result, expected1)
  1291. s = Series(
  1292. [
  1293. "2011-01-03",
  1294. "2013-01-02",
  1295. "1900-05-03",
  1296. "2011-01-03",
  1297. "2013-01-02",
  1298. "nan",
  1299. "nan",
  1300. ],
  1301. dtype="M8[ns]",
  1302. )
  1303. result = s.mode(dropna)
  1304. expected2 = Series(expected2, dtype="M8[ns]")
  1305. tm.assert_series_equal(result, expected2)
  1306. @pytest.mark.parametrize(
  1307. "dropna, expected1, expected2",
  1308. [
  1309. (True, ["-1 days", "0 days", "1 days"], ["2 min", "1 day"]),
  1310. (False, [np.nan], [np.nan, "2 min", "1 day"]),
  1311. ],
  1312. )
  1313. def test_mode_timedelta(self, dropna, expected1, expected2):
  1314. # gh-5986: Test timedelta types.
  1315. s = Series(
  1316. ["1 days", "-1 days", "0 days", "nan", "nan"], dtype="timedelta64[ns]"
  1317. )
  1318. result = s.mode(dropna)
  1319. expected1 = Series(expected1, dtype="timedelta64[ns]")
  1320. tm.assert_series_equal(result, expected1)
  1321. s = Series(
  1322. [
  1323. "1 day",
  1324. "1 day",
  1325. "-1 day",
  1326. "-1 day 2 min",
  1327. "2 min",
  1328. "2 min",
  1329. "nan",
  1330. "nan",
  1331. ],
  1332. dtype="timedelta64[ns]",
  1333. )
  1334. result = s.mode(dropna)
  1335. expected2 = Series(expected2, dtype="timedelta64[ns]")
  1336. tm.assert_series_equal(result, expected2)
  1337. @pytest.mark.parametrize(
  1338. "dropna, expected1, expected2, expected3",
  1339. [
  1340. (
  1341. True,
  1342. Categorical([1, 2], categories=[1, 2]),
  1343. Categorical(["a"], categories=[1, "a"]),
  1344. Categorical([3, 1], categories=[3, 2, 1], ordered=True),
  1345. ),
  1346. (
  1347. False,
  1348. Categorical([np.nan], categories=[1, 2]),
  1349. Categorical([np.nan, "a"], categories=[1, "a"]),
  1350. Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True),
  1351. ),
  1352. ],
  1353. )
  1354. def test_mode_category(self, dropna, expected1, expected2, expected3):
  1355. s = Series(Categorical([1, 2, np.nan, np.nan]))
  1356. result = s.mode(dropna)
  1357. expected1 = Series(expected1, dtype="category")
  1358. tm.assert_series_equal(result, expected1)
  1359. s = Series(Categorical([1, "a", "a", np.nan, np.nan]))
  1360. result = s.mode(dropna)
  1361. expected2 = Series(expected2, dtype="category")
  1362. tm.assert_series_equal(result, expected2)
  1363. s = Series(
  1364. Categorical(
  1365. [1, 1, 2, 3, 3, np.nan, np.nan], categories=[3, 2, 1], ordered=True
  1366. )
  1367. )
  1368. result = s.mode(dropna)
  1369. expected3 = Series(expected3, dtype="category")
  1370. tm.assert_series_equal(result, expected3)
  1371. @pytest.mark.parametrize(
  1372. "dropna, expected1, expected2",
  1373. [(True, [2**63], [1, 2**63]), (False, [2**63], [1, 2**63])],
  1374. )
  1375. def test_mode_intoverflow(self, dropna, expected1, expected2):
  1376. # Test for uint64 overflow.
  1377. s = Series([1, 2**63, 2**63], dtype=np.uint64)
  1378. result = s.mode(dropna)
  1379. expected1 = Series(expected1, dtype=np.uint64)
  1380. tm.assert_series_equal(result, expected1)
  1381. s = Series([1, 2**63], dtype=np.uint64)
  1382. result = s.mode(dropna)
  1383. expected2 = Series(expected2, dtype=np.uint64)
  1384. tm.assert_series_equal(result, expected2)
  1385. def test_mode_sort_with_na(self):
  1386. s = Series([1, "foo", "foo", np.nan, np.nan])
  1387. expected = Series(["foo", np.nan], dtype=object)
  1388. result = s.mode(dropna=False)
  1389. tm.assert_series_equal(result, expected)
  1390. def test_mode_boolean_with_na(self):
  1391. # GH#42107
  1392. ser = Series([True, False, True, pd.NA], dtype="boolean")
  1393. result = ser.mode()
  1394. expected = Series({0: True}, dtype="boolean")
  1395. tm.assert_series_equal(result, expected)
  1396. @pytest.mark.parametrize(
  1397. "array,expected,dtype",
  1398. [
  1399. (
  1400. [0, 1j, 1, 1, 1 + 1j, 1 + 2j],
  1401. Series([1], dtype=np.complex128),
  1402. np.complex128,
  1403. ),
  1404. (
  1405. [0, 1j, 1, 1, 1 + 1j, 1 + 2j],
  1406. Series([1], dtype=np.complex64),
  1407. np.complex64,
  1408. ),
  1409. (
  1410. [1 + 1j, 2j, 1 + 1j],
  1411. Series([1 + 1j], dtype=np.complex128),
  1412. np.complex128,
  1413. ),
  1414. ],
  1415. )
  1416. def test_single_mode_value_complex(self, array, expected, dtype):
  1417. result = Series(array, dtype=dtype).mode()
  1418. tm.assert_series_equal(result, expected)
  1419. @pytest.mark.parametrize(
  1420. "array,expected,dtype",
  1421. [
  1422. (
  1423. # no modes
  1424. [0, 1j, 1, 1 + 1j, 1 + 2j],
  1425. Series([0j, 1j, 1 + 0j, 1 + 1j, 1 + 2j], dtype=np.complex128),
  1426. np.complex128,
  1427. ),
  1428. (
  1429. [1 + 1j, 2j, 1 + 1j, 2j, 3],
  1430. Series([2j, 1 + 1j], dtype=np.complex64),
  1431. np.complex64,
  1432. ),
  1433. ],
  1434. )
  1435. def test_multimode_complex(self, array, expected, dtype):
  1436. # GH 17927
  1437. # mode tries to sort multimodal series.
  1438. # Complex numbers are sorted by their magnitude
  1439. result = Series(array, dtype=dtype).mode()
  1440. tm.assert_series_equal(result, expected)