test_nanops.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274
  1. from functools import partial
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. from pandas.core.dtypes.common import is_integer_dtype
  6. import pandas as pd
  7. from pandas import (
  8. Series,
  9. isna,
  10. )
  11. import pandas._testing as tm
  12. from pandas.core import nanops
  13. use_bn = nanops._USE_BOTTLENECK
  14. @pytest.fixture
  15. def disable_bottleneck(monkeypatch):
  16. with monkeypatch.context() as m:
  17. m.setattr(nanops, "_USE_BOTTLENECK", False)
  18. yield
  19. @pytest.fixture
  20. def arr_shape():
  21. return 11, 7
  22. @pytest.fixture
  23. def arr_float(arr_shape):
  24. return np.random.default_rng(2).standard_normal(arr_shape)
  25. @pytest.fixture
  26. def arr_complex(arr_float):
  27. return arr_float + arr_float * 1j
  28. @pytest.fixture
  29. def arr_int(arr_shape):
  30. return np.random.default_rng(2).integers(-10, 10, arr_shape)
  31. @pytest.fixture
  32. def arr_bool(arr_shape):
  33. return np.random.default_rng(2).integers(0, 2, arr_shape) == 0
  34. @pytest.fixture
  35. def arr_str(arr_float):
  36. return np.abs(arr_float).astype("S")
  37. @pytest.fixture
  38. def arr_utf(arr_float):
  39. return np.abs(arr_float).astype("U")
  40. @pytest.fixture
  41. def arr_date(arr_shape):
  42. return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
  43. @pytest.fixture
  44. def arr_tdelta(arr_shape):
  45. return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
  46. @pytest.fixture
  47. def arr_nan(arr_shape):
  48. return np.tile(np.nan, arr_shape)
  49. @pytest.fixture
  50. def arr_float_nan(arr_float, arr_nan):
  51. return np.vstack([arr_float, arr_nan])
  52. @pytest.fixture
  53. def arr_nan_float1(arr_nan, arr_float):
  54. return np.vstack([arr_nan, arr_float])
  55. @pytest.fixture
  56. def arr_nan_nan(arr_nan):
  57. return np.vstack([arr_nan, arr_nan])
  58. @pytest.fixture
  59. def arr_inf(arr_float):
  60. return arr_float * np.inf
  61. @pytest.fixture
  62. def arr_float_inf(arr_float, arr_inf):
  63. return np.vstack([arr_float, arr_inf])
  64. @pytest.fixture
  65. def arr_nan_inf(arr_nan, arr_inf):
  66. return np.vstack([arr_nan, arr_inf])
  67. @pytest.fixture
  68. def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
  69. return np.vstack([arr_float, arr_nan, arr_inf])
  70. @pytest.fixture
  71. def arr_nan_nan_inf(arr_nan, arr_inf):
  72. return np.vstack([arr_nan, arr_nan, arr_inf])
  73. @pytest.fixture
  74. def arr_obj(
  75. arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
  76. ):
  77. return np.vstack(
  78. [
  79. arr_float.astype("O"),
  80. arr_int.astype("O"),
  81. arr_bool.astype("O"),
  82. arr_complex.astype("O"),
  83. arr_str.astype("O"),
  84. arr_utf.astype("O"),
  85. arr_date.astype("O"),
  86. arr_tdelta.astype("O"),
  87. ]
  88. )
  89. @pytest.fixture
  90. def arr_nan_nanj(arr_nan):
  91. with np.errstate(invalid="ignore"):
  92. return arr_nan + arr_nan * 1j
  93. @pytest.fixture
  94. def arr_complex_nan(arr_complex, arr_nan_nanj):
  95. with np.errstate(invalid="ignore"):
  96. return np.vstack([arr_complex, arr_nan_nanj])
  97. @pytest.fixture
  98. def arr_nan_infj(arr_inf):
  99. with np.errstate(invalid="ignore"):
  100. return arr_inf * 1j
  101. @pytest.fixture
  102. def arr_complex_nan_infj(arr_complex, arr_nan_infj):
  103. with np.errstate(invalid="ignore"):
  104. return np.vstack([arr_complex, arr_nan_infj])
  105. @pytest.fixture
  106. def arr_float_1d(arr_float):
  107. return arr_float[:, 0]
  108. @pytest.fixture
  109. def arr_nan_1d(arr_nan):
  110. return arr_nan[:, 0]
  111. @pytest.fixture
  112. def arr_float_nan_1d(arr_float_nan):
  113. return arr_float_nan[:, 0]
  114. @pytest.fixture
  115. def arr_float1_nan_1d(arr_float1_nan):
  116. return arr_float1_nan[:, 0]
  117. @pytest.fixture
  118. def arr_nan_float1_1d(arr_nan_float1):
  119. return arr_nan_float1[:, 0]
  120. class TestnanopsDataFrame:
  121. def setup_method(self):
  122. nanops._USE_BOTTLENECK = False
  123. arr_shape = (11, 7)
  124. self.arr_float = np.random.default_rng(2).standard_normal(arr_shape)
  125. self.arr_float1 = np.random.default_rng(2).standard_normal(arr_shape)
  126. self.arr_complex = self.arr_float + self.arr_float1 * 1j
  127. self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape)
  128. self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0
  129. self.arr_str = np.abs(self.arr_float).astype("S")
  130. self.arr_utf = np.abs(self.arr_float).astype("U")
  131. self.arr_date = (
  132. np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
  133. )
  134. self.arr_tdelta = (
  135. np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
  136. )
  137. self.arr_nan = np.tile(np.nan, arr_shape)
  138. self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
  139. self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
  140. self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
  141. self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
  142. self.arr_inf = self.arr_float * np.inf
  143. self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
  144. self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
  145. self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
  146. self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
  147. self.arr_obj = np.vstack(
  148. [
  149. self.arr_float.astype("O"),
  150. self.arr_int.astype("O"),
  151. self.arr_bool.astype("O"),
  152. self.arr_complex.astype("O"),
  153. self.arr_str.astype("O"),
  154. self.arr_utf.astype("O"),
  155. self.arr_date.astype("O"),
  156. self.arr_tdelta.astype("O"),
  157. ]
  158. )
  159. with np.errstate(invalid="ignore"):
  160. self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
  161. self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
  162. self.arr_nan_infj = self.arr_inf * 1j
  163. self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
  164. self.arr_float_2d = self.arr_float
  165. self.arr_float1_2d = self.arr_float1
  166. self.arr_nan_2d = self.arr_nan
  167. self.arr_float_nan_2d = self.arr_float_nan
  168. self.arr_float1_nan_2d = self.arr_float1_nan
  169. self.arr_nan_float1_2d = self.arr_nan_float1
  170. self.arr_float_1d = self.arr_float[:, 0]
  171. self.arr_float1_1d = self.arr_float1[:, 0]
  172. self.arr_nan_1d = self.arr_nan[:, 0]
  173. self.arr_float_nan_1d = self.arr_float_nan[:, 0]
  174. self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
  175. self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
  176. def teardown_method(self):
  177. nanops._USE_BOTTLENECK = use_bn
  178. def check_results(self, targ, res, axis, check_dtype=True):
  179. res = getattr(res, "asm8", res)
  180. if (
  181. axis != 0
  182. and hasattr(targ, "shape")
  183. and targ.ndim
  184. and targ.shape != res.shape
  185. ):
  186. res = np.split(res, [targ.shape[0]], axis=0)[0]
  187. try:
  188. tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
  189. except AssertionError:
  190. # handle timedelta dtypes
  191. if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
  192. raise
  193. # There are sometimes rounding errors with
  194. # complex and object dtypes.
  195. # If it isn't one of those, re-raise the error.
  196. if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
  197. raise
  198. # convert object dtypes to something that can be split into
  199. # real and imaginary parts
  200. if res.dtype.kind == "O":
  201. if targ.dtype.kind != "O":
  202. res = res.astype(targ.dtype)
  203. else:
  204. cast_dtype = "c16" if hasattr(np, "complex128") else "f8"
  205. res = res.astype(cast_dtype)
  206. targ = targ.astype(cast_dtype)
  207. # there should never be a case where numpy returns an object
  208. # but nanops doesn't, so make that an exception
  209. elif targ.dtype.kind == "O":
  210. raise
  211. tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
  212. tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
  213. def check_fun_data(
  214. self,
  215. testfunc,
  216. targfunc,
  217. testarval,
  218. targarval,
  219. skipna,
  220. check_dtype=True,
  221. empty_targfunc=None,
  222. **kwargs,
  223. ):
  224. for axis in list(range(targarval.ndim)) + [None]:
  225. targartempval = targarval if skipna else testarval
  226. if skipna and empty_targfunc and isna(targartempval).all():
  227. targ = empty_targfunc(targartempval, axis=axis, **kwargs)
  228. else:
  229. targ = targfunc(targartempval, axis=axis, **kwargs)
  230. if targartempval.dtype == object and (
  231. targfunc is np.any or targfunc is np.all
  232. ):
  233. # GH#12863 the numpy functions will retain e.g. floatiness
  234. if isinstance(targ, np.ndarray):
  235. targ = targ.astype(bool)
  236. else:
  237. targ = bool(targ)
  238. res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
  239. if (
  240. isinstance(targ, np.complex128)
  241. and isinstance(res, float)
  242. and np.isnan(targ)
  243. and np.isnan(res)
  244. ):
  245. # GH#18463
  246. targ = res
  247. self.check_results(targ, res, axis, check_dtype=check_dtype)
  248. if skipna:
  249. res = testfunc(testarval, axis=axis, **kwargs)
  250. self.check_results(targ, res, axis, check_dtype=check_dtype)
  251. if axis is None:
  252. res = testfunc(testarval, skipna=skipna, **kwargs)
  253. self.check_results(targ, res, axis, check_dtype=check_dtype)
  254. if skipna and axis is None:
  255. res = testfunc(testarval, **kwargs)
  256. self.check_results(targ, res, axis, check_dtype=check_dtype)
  257. if testarval.ndim <= 1:
  258. return
  259. # Recurse on lower-dimension
  260. testarval2 = np.take(testarval, 0, axis=-1)
  261. targarval2 = np.take(targarval, 0, axis=-1)
  262. self.check_fun_data(
  263. testfunc,
  264. targfunc,
  265. testarval2,
  266. targarval2,
  267. skipna=skipna,
  268. check_dtype=check_dtype,
  269. empty_targfunc=empty_targfunc,
  270. **kwargs,
  271. )
  272. def check_fun(
  273. self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
  274. ):
  275. targar = testar
  276. if testar.endswith("_nan") and hasattr(self, testar[:-4]):
  277. targar = testar[:-4]
  278. testarval = getattr(self, testar)
  279. targarval = getattr(self, targar)
  280. self.check_fun_data(
  281. testfunc,
  282. targfunc,
  283. testarval,
  284. targarval,
  285. skipna=skipna,
  286. empty_targfunc=empty_targfunc,
  287. **kwargs,
  288. )
  289. def check_funs(
  290. self,
  291. testfunc,
  292. targfunc,
  293. skipna,
  294. allow_complex=True,
  295. allow_all_nan=True,
  296. allow_date=True,
  297. allow_tdelta=True,
  298. allow_obj=True,
  299. **kwargs,
  300. ):
  301. self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
  302. self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
  303. self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
  304. self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
  305. objs = [
  306. self.arr_float.astype("O"),
  307. self.arr_int.astype("O"),
  308. self.arr_bool.astype("O"),
  309. ]
  310. if allow_all_nan:
  311. self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
  312. if allow_complex:
  313. self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
  314. self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
  315. if allow_all_nan:
  316. self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
  317. objs += [self.arr_complex.astype("O")]
  318. if allow_date:
  319. targfunc(self.arr_date)
  320. self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
  321. objs += [self.arr_date.astype("O")]
  322. if allow_tdelta:
  323. try:
  324. targfunc(self.arr_tdelta)
  325. except TypeError:
  326. pass
  327. else:
  328. self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
  329. objs += [self.arr_tdelta.astype("O")]
  330. if allow_obj:
  331. self.arr_obj = np.vstack(objs)
  332. # some nanops handle object dtypes better than their numpy
  333. # counterparts, so the numpy functions need to be given something
  334. # else
  335. if allow_obj == "convert":
  336. targfunc = partial(
  337. self._badobj_wrap, func=targfunc, allow_complex=allow_complex
  338. )
  339. self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
  340. def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
  341. if value.dtype.kind == "O":
  342. if allow_complex:
  343. value = value.astype("c16")
  344. else:
  345. value = value.astype("f8")
  346. return func(value, **kwargs)
  347. @pytest.mark.parametrize(
  348. "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
  349. )
  350. def test_nan_funcs(self, nan_op, np_op, skipna):
  351. self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
  352. def test_nansum(self, skipna):
  353. self.check_funs(
  354. nanops.nansum,
  355. np.sum,
  356. skipna,
  357. allow_date=False,
  358. check_dtype=False,
  359. empty_targfunc=np.nansum,
  360. )
  361. def test_nanmean(self, skipna):
  362. self.check_funs(
  363. nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
  364. )
  365. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  366. def test_nanmedian(self, skipna):
  367. self.check_funs(
  368. nanops.nanmedian,
  369. np.median,
  370. skipna,
  371. allow_complex=False,
  372. allow_date=False,
  373. allow_obj="convert",
  374. )
  375. @pytest.mark.parametrize("ddof", range(3))
  376. def test_nanvar(self, ddof, skipna):
  377. self.check_funs(
  378. nanops.nanvar,
  379. np.var,
  380. skipna,
  381. allow_complex=False,
  382. allow_date=False,
  383. allow_obj="convert",
  384. ddof=ddof,
  385. )
  386. @pytest.mark.parametrize("ddof", range(3))
  387. def test_nanstd(self, ddof, skipna):
  388. self.check_funs(
  389. nanops.nanstd,
  390. np.std,
  391. skipna,
  392. allow_complex=False,
  393. allow_date=False,
  394. allow_obj="convert",
  395. ddof=ddof,
  396. )
  397. @pytest.mark.parametrize("ddof", range(3))
  398. def test_nansem(self, ddof, skipna):
  399. sp_stats = pytest.importorskip("scipy.stats")
  400. with np.errstate(invalid="ignore"):
  401. self.check_funs(
  402. nanops.nansem,
  403. sp_stats.sem,
  404. skipna,
  405. allow_complex=False,
  406. allow_date=False,
  407. allow_tdelta=False,
  408. allow_obj="convert",
  409. ddof=ddof,
  410. )
  411. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  412. @pytest.mark.parametrize(
  413. "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
  414. )
  415. def test_nanops_with_warnings(self, nan_op, np_op, skipna):
  416. self.check_funs(nan_op, np_op, skipna, allow_obj=False)
  417. def _argminmax_wrap(self, value, axis=None, func=None):
  418. res = func(value, axis)
  419. nans = np.min(value, axis)
  420. nullnan = isna(nans)
  421. if res.ndim:
  422. res[nullnan] = -1
  423. elif (
  424. hasattr(nullnan, "all")
  425. and nullnan.all()
  426. or not hasattr(nullnan, "all")
  427. and nullnan
  428. ):
  429. res = -1
  430. return res
  431. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  432. def test_nanargmax(self, skipna):
  433. func = partial(self._argminmax_wrap, func=np.argmax)
  434. self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
  435. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  436. def test_nanargmin(self, skipna):
  437. func = partial(self._argminmax_wrap, func=np.argmin)
  438. self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
  439. def _skew_kurt_wrap(self, values, axis=None, func=None):
  440. if not isinstance(values.dtype.type, np.floating):
  441. values = values.astype("f8")
  442. result = func(values, axis=axis, bias=False)
  443. # fix for handling cases where all elements in an axis are the same
  444. if isinstance(result, np.ndarray):
  445. result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
  446. return result
  447. elif np.max(values) == np.min(values):
  448. return 0.0
  449. return result
  450. def test_nanskew(self, skipna):
  451. sp_stats = pytest.importorskip("scipy.stats")
  452. func = partial(self._skew_kurt_wrap, func=sp_stats.skew)
  453. with np.errstate(invalid="ignore"):
  454. self.check_funs(
  455. nanops.nanskew,
  456. func,
  457. skipna,
  458. allow_complex=False,
  459. allow_date=False,
  460. allow_tdelta=False,
  461. )
  462. def test_nankurt(self, skipna):
  463. sp_stats = pytest.importorskip("scipy.stats")
  464. func1 = partial(sp_stats.kurtosis, fisher=True)
  465. func = partial(self._skew_kurt_wrap, func=func1)
  466. with np.errstate(invalid="ignore"):
  467. self.check_funs(
  468. nanops.nankurt,
  469. func,
  470. skipna,
  471. allow_complex=False,
  472. allow_date=False,
  473. allow_tdelta=False,
  474. )
  475. def test_nanprod(self, skipna):
  476. self.check_funs(
  477. nanops.nanprod,
  478. np.prod,
  479. skipna,
  480. allow_date=False,
  481. allow_tdelta=False,
  482. empty_targfunc=np.nanprod,
  483. )
  484. def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
  485. res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
  486. res01 = checkfun(
  487. self.arr_float_2d,
  488. self.arr_float1_2d,
  489. min_periods=len(self.arr_float_2d) - 1,
  490. **kwargs,
  491. )
  492. tm.assert_almost_equal(targ0, res00)
  493. tm.assert_almost_equal(targ0, res01)
  494. res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
  495. res11 = checkfun(
  496. self.arr_float_nan_2d,
  497. self.arr_float1_nan_2d,
  498. min_periods=len(self.arr_float_2d) - 1,
  499. **kwargs,
  500. )
  501. tm.assert_almost_equal(targ1, res10)
  502. tm.assert_almost_equal(targ1, res11)
  503. targ2 = np.nan
  504. res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
  505. res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
  506. res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
  507. res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
  508. res24 = checkfun(
  509. self.arr_float_nan_2d,
  510. self.arr_nan_float1_2d,
  511. min_periods=len(self.arr_float_2d) - 1,
  512. **kwargs,
  513. )
  514. res25 = checkfun(
  515. self.arr_float_2d,
  516. self.arr_float1_2d,
  517. min_periods=len(self.arr_float_2d) + 1,
  518. **kwargs,
  519. )
  520. tm.assert_almost_equal(targ2, res20)
  521. tm.assert_almost_equal(targ2, res21)
  522. tm.assert_almost_equal(targ2, res22)
  523. tm.assert_almost_equal(targ2, res23)
  524. tm.assert_almost_equal(targ2, res24)
  525. tm.assert_almost_equal(targ2, res25)
  526. def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
  527. res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
  528. res01 = checkfun(
  529. self.arr_float_1d,
  530. self.arr_float1_1d,
  531. min_periods=len(self.arr_float_1d) - 1,
  532. **kwargs,
  533. )
  534. tm.assert_almost_equal(targ0, res00)
  535. tm.assert_almost_equal(targ0, res01)
  536. res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
  537. res11 = checkfun(
  538. self.arr_float_nan_1d,
  539. self.arr_float1_nan_1d,
  540. min_periods=len(self.arr_float_1d) - 1,
  541. **kwargs,
  542. )
  543. tm.assert_almost_equal(targ1, res10)
  544. tm.assert_almost_equal(targ1, res11)
  545. targ2 = np.nan
  546. res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
  547. res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
  548. res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
  549. res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
  550. res24 = checkfun(
  551. self.arr_float_nan_1d,
  552. self.arr_nan_float1_1d,
  553. min_periods=len(self.arr_float_1d) - 1,
  554. **kwargs,
  555. )
  556. res25 = checkfun(
  557. self.arr_float_1d,
  558. self.arr_float1_1d,
  559. min_periods=len(self.arr_float_1d) + 1,
  560. **kwargs,
  561. )
  562. tm.assert_almost_equal(targ2, res20)
  563. tm.assert_almost_equal(targ2, res21)
  564. tm.assert_almost_equal(targ2, res22)
  565. tm.assert_almost_equal(targ2, res23)
  566. tm.assert_almost_equal(targ2, res24)
  567. tm.assert_almost_equal(targ2, res25)
  568. def test_nancorr(self):
  569. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  570. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  571. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
  572. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  573. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  574. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  575. def test_nancorr_pearson(self):
  576. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  577. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  578. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
  579. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  580. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  581. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  582. def test_nancorr_kendall(self):
  583. sp_stats = pytest.importorskip("scipy.stats")
  584. targ0 = sp_stats.kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
  585. targ1 = sp_stats.kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  586. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
  587. targ0 = sp_stats.kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
  588. targ1 = sp_stats.kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  589. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
  590. def test_nancorr_spearman(self):
  591. sp_stats = pytest.importorskip("scipy.stats")
  592. targ0 = sp_stats.spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
  593. targ1 = sp_stats.spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  594. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
  595. targ0 = sp_stats.spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
  596. targ1 = sp_stats.spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  597. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
  598. def test_invalid_method(self):
  599. pytest.importorskip("scipy")
  600. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  601. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  602. msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
  603. with pytest.raises(ValueError, match=msg):
  604. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
  605. def test_nancov(self):
  606. targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  607. targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  608. self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
  609. targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  610. targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  611. self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
  612. @pytest.mark.parametrize(
  613. "arr, correct",
  614. [
  615. ("arr_complex", False),
  616. ("arr_int", False),
  617. ("arr_bool", False),
  618. ("arr_str", False),
  619. ("arr_utf", False),
  620. ("arr_complex", False),
  621. ("arr_complex_nan", False),
  622. ("arr_nan_nanj", False),
  623. ("arr_nan_infj", True),
  624. ("arr_complex_nan_infj", True),
  625. ],
  626. )
  627. def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
  628. val = request.getfixturevalue(arr)
  629. while getattr(val, "ndim", True):
  630. res0 = nanops._has_infs(val)
  631. if correct:
  632. assert res0
  633. else:
  634. assert not res0
  635. if not hasattr(val, "ndim"):
  636. break
  637. # Reduce dimension for next step in the loop
  638. val = np.take(val, 0, axis=-1)
  639. @pytest.mark.parametrize(
  640. "arr, correct",
  641. [
  642. ("arr_float", False),
  643. ("arr_nan", False),
  644. ("arr_float_nan", False),
  645. ("arr_nan_nan", False),
  646. ("arr_float_inf", True),
  647. ("arr_inf", True),
  648. ("arr_nan_inf", True),
  649. ("arr_float_nan_inf", True),
  650. ("arr_nan_nan_inf", True),
  651. ],
  652. )
  653. @pytest.mark.parametrize("astype", [None, "f4", "f2"])
  654. def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
  655. val = request.getfixturevalue(arr)
  656. if astype is not None:
  657. val = val.astype(astype)
  658. while getattr(val, "ndim", True):
  659. res0 = nanops._has_infs(val)
  660. if correct:
  661. assert res0
  662. else:
  663. assert not res0
  664. if not hasattr(val, "ndim"):
  665. break
  666. # Reduce dimension for next step in the loop
  667. val = np.take(val, 0, axis=-1)
  668. @pytest.mark.parametrize(
  669. "fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
  670. )
  671. def test_bn_ok_dtype(fixture, request, disable_bottleneck):
  672. obj = request.getfixturevalue(fixture)
  673. assert nanops._bn_ok_dtype(obj.dtype, "test")
  674. @pytest.mark.parametrize(
  675. "fixture",
  676. [
  677. "arr_date",
  678. "arr_tdelta",
  679. "arr_obj",
  680. ],
  681. )
  682. def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
  683. obj = request.getfixturevalue(fixture)
  684. assert not nanops._bn_ok_dtype(obj.dtype, "test")
  685. class TestEnsureNumeric:
  686. def test_numeric_values(self):
  687. # Test integer
  688. assert nanops._ensure_numeric(1) == 1
  689. # Test float
  690. assert nanops._ensure_numeric(1.1) == 1.1
  691. # Test complex
  692. assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
  693. def test_ndarray(self):
  694. # Test numeric ndarray
  695. values = np.array([1, 2, 3])
  696. assert np.allclose(nanops._ensure_numeric(values), values)
  697. # Test object ndarray
  698. o_values = values.astype(object)
  699. assert np.allclose(nanops._ensure_numeric(o_values), values)
  700. # Test convertible string ndarray
  701. s_values = np.array(["1", "2", "3"], dtype=object)
  702. msg = r"Could not convert \['1' '2' '3'\] to numeric"
  703. with pytest.raises(TypeError, match=msg):
  704. nanops._ensure_numeric(s_values)
  705. # Test non-convertible string ndarray
  706. s_values = np.array(["foo", "bar", "baz"], dtype=object)
  707. msg = r"Could not convert .* to numeric"
  708. with pytest.raises(TypeError, match=msg):
  709. nanops._ensure_numeric(s_values)
  710. def test_convertable_values(self):
  711. with pytest.raises(TypeError, match="Could not convert string '1' to numeric"):
  712. nanops._ensure_numeric("1")
  713. with pytest.raises(
  714. TypeError, match="Could not convert string '1.1' to numeric"
  715. ):
  716. nanops._ensure_numeric("1.1")
  717. with pytest.raises(
  718. TypeError, match=r"Could not convert string '1\+1j' to numeric"
  719. ):
  720. nanops._ensure_numeric("1+1j")
  721. def test_non_convertable_values(self):
  722. msg = "Could not convert string 'foo' to numeric"
  723. with pytest.raises(TypeError, match=msg):
  724. nanops._ensure_numeric("foo")
  725. # with the wrong type, python raises TypeError for us
  726. msg = "argument must be a string or a number"
  727. with pytest.raises(TypeError, match=msg):
  728. nanops._ensure_numeric({})
  729. with pytest.raises(TypeError, match=msg):
  730. nanops._ensure_numeric([])
  731. class TestNanvarFixedValues:
  732. # xref GH10242
  733. # Samples from a normal distribution.
  734. @pytest.fixture
  735. def variance(self):
  736. return 3.0
  737. @pytest.fixture
  738. def samples(self, variance):
  739. return self.prng.normal(scale=variance**0.5, size=100000)
  740. def test_nanvar_all_finite(self, samples, variance):
  741. actual_variance = nanops.nanvar(samples)
  742. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  743. def test_nanvar_nans(self, samples, variance):
  744. samples_test = np.nan * np.ones(2 * samples.shape[0])
  745. samples_test[::2] = samples
  746. actual_variance = nanops.nanvar(samples_test, skipna=True)
  747. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  748. actual_variance = nanops.nanvar(samples_test, skipna=False)
  749. tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
  750. def test_nanstd_nans(self, samples, variance):
  751. samples_test = np.nan * np.ones(2 * samples.shape[0])
  752. samples_test[::2] = samples
  753. actual_std = nanops.nanstd(samples_test, skipna=True)
  754. tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)
  755. actual_std = nanops.nanvar(samples_test, skipna=False)
  756. tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
  757. def test_nanvar_axis(self, samples, variance):
  758. # Generate some sample data.
  759. samples_unif = self.prng.uniform(size=samples.shape[0])
  760. samples = np.vstack([samples, samples_unif])
  761. actual_variance = nanops.nanvar(samples, axis=1)
  762. tm.assert_almost_equal(
  763. actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
  764. )
  765. def test_nanvar_ddof(self):
  766. n = 5
  767. samples = self.prng.uniform(size=(10000, n + 1))
  768. samples[:, -1] = np.nan # Force use of our own algorithm.
  769. variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
  770. variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
  771. variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
  772. # The unbiased estimate.
  773. var = 1.0 / 12
  774. tm.assert_almost_equal(variance_1, var, rtol=1e-2)
  775. # The underestimated variance.
  776. tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
  777. # The overestimated variance.
  778. tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
  779. @pytest.mark.parametrize("axis", range(2))
  780. @pytest.mark.parametrize("ddof", range(3))
  781. def test_ground_truth(self, axis, ddof):
  782. # Test against values that were precomputed with Numpy.
  783. samples = np.empty((4, 4))
  784. samples[:3, :3] = np.array(
  785. [
  786. [0.97303362, 0.21869576, 0.55560287],
  787. [0.72980153, 0.03109364, 0.99155171],
  788. [0.09317602, 0.60078248, 0.15871292],
  789. ]
  790. )
  791. samples[3] = samples[:, 3] = np.nan
  792. # Actual variances along axis=0, 1 for ddof=0, 1, 2
  793. variance = np.array(
  794. [
  795. [
  796. [0.13762259, 0.05619224, 0.11568816],
  797. [0.20643388, 0.08428837, 0.17353224],
  798. [0.41286776, 0.16857673, 0.34706449],
  799. ],
  800. [
  801. [0.09519783, 0.16435395, 0.05082054],
  802. [0.14279674, 0.24653093, 0.07623082],
  803. [0.28559348, 0.49306186, 0.15246163],
  804. ],
  805. ]
  806. )
  807. # Test nanvar.
  808. var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
  809. tm.assert_almost_equal(var[:3], variance[axis, ddof])
  810. assert np.isnan(var[3])
  811. # Test nanstd.
  812. std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
  813. tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
  814. assert np.isnan(std[3])
  815. @pytest.mark.parametrize("ddof", range(3))
  816. def test_nanstd_roundoff(self, ddof):
  817. # Regression test for GH 10242 (test data taken from GH 10489). Ensure
  818. # that variance is stable.
  819. data = Series(766897346 * np.ones(10))
  820. result = data.std(ddof=ddof)
  821. assert result == 0.0
  822. @property
  823. def prng(self):
  824. return np.random.default_rng(2)
  825. class TestNanskewFixedValues:
  826. # xref GH 11974
  827. # Test data + skewness value (computed with scipy.stats.skew)
  828. @pytest.fixture
  829. def samples(self):
  830. return np.sin(np.linspace(0, 1, 200))
  831. @pytest.fixture
  832. def actual_skew(self):
  833. return -0.1875895205961754
  834. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  835. def test_constant_series(self, val):
  836. # xref GH 11974
  837. data = val * np.ones(300)
  838. skew = nanops.nanskew(data)
  839. assert skew == 0.0
  840. def test_all_finite(self):
  841. alpha, beta = 0.3, 0.1
  842. left_tailed = self.prng.beta(alpha, beta, size=100)
  843. assert nanops.nanskew(left_tailed) < 0
  844. alpha, beta = 0.1, 0.3
  845. right_tailed = self.prng.beta(alpha, beta, size=100)
  846. assert nanops.nanskew(right_tailed) > 0
  847. def test_ground_truth(self, samples, actual_skew):
  848. skew = nanops.nanskew(samples)
  849. tm.assert_almost_equal(skew, actual_skew)
  850. def test_axis(self, samples, actual_skew):
  851. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  852. skew = nanops.nanskew(samples, axis=1)
  853. tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))
  854. def test_nans(self, samples):
  855. samples = np.hstack([samples, np.nan])
  856. skew = nanops.nanskew(samples, skipna=False)
  857. assert np.isnan(skew)
  858. def test_nans_skipna(self, samples, actual_skew):
  859. samples = np.hstack([samples, np.nan])
  860. skew = nanops.nanskew(samples, skipna=True)
  861. tm.assert_almost_equal(skew, actual_skew)
  862. @property
  863. def prng(self):
  864. return np.random.default_rng(2)
  865. class TestNankurtFixedValues:
  866. # xref GH 11974
  867. # Test data + kurtosis value (computed with scipy.stats.kurtosis)
  868. @pytest.fixture
  869. def samples(self):
  870. return np.sin(np.linspace(0, 1, 200))
  871. @pytest.fixture
  872. def actual_kurt(self):
  873. return -1.2058303433799713
  874. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  875. def test_constant_series(self, val):
  876. # xref GH 11974
  877. data = val * np.ones(300)
  878. kurt = nanops.nankurt(data)
  879. assert kurt == 0.0
  880. def test_all_finite(self):
  881. alpha, beta = 0.3, 0.1
  882. left_tailed = self.prng.beta(alpha, beta, size=100)
  883. assert nanops.nankurt(left_tailed) < 2
  884. alpha, beta = 0.1, 0.3
  885. right_tailed = self.prng.beta(alpha, beta, size=100)
  886. assert nanops.nankurt(right_tailed) < 0
  887. def test_ground_truth(self, samples, actual_kurt):
  888. kurt = nanops.nankurt(samples)
  889. tm.assert_almost_equal(kurt, actual_kurt)
  890. def test_axis(self, samples, actual_kurt):
  891. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  892. kurt = nanops.nankurt(samples, axis=1)
  893. tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))
  894. def test_nans(self, samples):
  895. samples = np.hstack([samples, np.nan])
  896. kurt = nanops.nankurt(samples, skipna=False)
  897. assert np.isnan(kurt)
  898. def test_nans_skipna(self, samples, actual_kurt):
  899. samples = np.hstack([samples, np.nan])
  900. kurt = nanops.nankurt(samples, skipna=True)
  901. tm.assert_almost_equal(kurt, actual_kurt)
  902. @property
  903. def prng(self):
  904. return np.random.default_rng(2)
  905. class TestDatetime64NaNOps:
  906. @pytest.fixture(params=["s", "ms", "us", "ns"])
  907. def unit(self, request):
  908. return request.param
  909. # Enabling mean changes the behavior of DataFrame.mean
  910. # See https://github.com/pandas-dev/pandas/issues/24752
  911. def test_nanmean(self, unit):
  912. dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
  913. expected = dti[1]
  914. for obj in [dti, dti._data]:
  915. result = nanops.nanmean(obj)
  916. assert result == expected
  917. dti2 = dti.insert(1, pd.NaT)
  918. for obj in [dti2, dti2._data]:
  919. result = nanops.nanmean(obj)
  920. assert result == expected
  921. @pytest.mark.parametrize("constructor", ["M8", "m8"])
  922. def test_nanmean_skipna_false(self, constructor, unit):
  923. dtype = f"{constructor}[{unit}]"
  924. arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
  925. arr[-1, -1] = "NaT"
  926. result = nanops.nanmean(arr, skipna=False)
  927. assert np.isnat(result)
  928. assert result.dtype == dtype
  929. result = nanops.nanmean(arr, axis=0, skipna=False)
  930. expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
  931. tm.assert_numpy_array_equal(result, expected)
  932. result = nanops.nanmean(arr, axis=1, skipna=False)
  933. expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
  934. tm.assert_numpy_array_equal(result, expected)
  935. def test_use_bottleneck():
  936. if nanops._BOTTLENECK_INSTALLED:
  937. with pd.option_context("use_bottleneck", True):
  938. assert pd.get_option("use_bottleneck")
  939. with pd.option_context("use_bottleneck", False):
  940. assert not pd.get_option("use_bottleneck")
  941. @pytest.mark.parametrize(
  942. "numpy_op, expected",
  943. [
  944. (np.sum, 10),
  945. (np.nansum, 10),
  946. (np.mean, 2.5),
  947. (np.nanmean, 2.5),
  948. (np.median, 2.5),
  949. (np.nanmedian, 2.5),
  950. (np.min, 1),
  951. (np.max, 4),
  952. (np.nanmin, 1),
  953. (np.nanmax, 4),
  954. ],
  955. )
  956. def test_numpy_ops(numpy_op, expected):
  957. # GH8383
  958. result = numpy_op(Series([1, 2, 3, 4]))
  959. assert result == expected
  960. @pytest.mark.parametrize(
  961. "operation",
  962. [
  963. nanops.nanany,
  964. nanops.nanall,
  965. nanops.nansum,
  966. nanops.nanmean,
  967. nanops.nanmedian,
  968. nanops.nanstd,
  969. nanops.nanvar,
  970. nanops.nansem,
  971. nanops.nanargmax,
  972. nanops.nanargmin,
  973. nanops.nanmax,
  974. nanops.nanmin,
  975. nanops.nanskew,
  976. nanops.nankurt,
  977. nanops.nanprod,
  978. ],
  979. )
  980. def test_nanops_independent_of_mask_param(operation):
  981. # GH22764
  982. ser = Series([1, 2, np.nan, 3, np.nan, 4])
  983. mask = ser.isna()
  984. median_expected = operation(ser._values)
  985. median_result = operation(ser._values, mask=mask)
  986. assert median_expected == median_result
  987. @pytest.mark.parametrize("min_count", [-1, 0])
  988. def test_check_below_min_count_negative_or_zero_min_count(min_count):
  989. # GH35227
  990. result = nanops.check_below_min_count((21, 37), None, min_count)
  991. expected_result = False
  992. assert result == expected_result
  993. @pytest.mark.parametrize(
  994. "mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
  995. )
  996. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
  997. def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
  998. # GH35227
  999. shape = (10, 10)
  1000. result = nanops.check_below_min_count(shape, mask, min_count)
  1001. assert result == expected_result
  1002. @td.skip_if_windows
  1003. @td.skip_if_32bit
  1004. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
  1005. def test_check_below_min_count_large_shape(min_count, expected_result):
  1006. # GH35227 large shape used to show that the issue is fixed
  1007. shape = (2244367, 1253)
  1008. result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
  1009. assert result == expected_result
  1010. @pytest.mark.parametrize("func", ["nanmean", "nansum"])
  1011. def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
  1012. # GH 42878 bottleneck sometimes produces unreliable results for mean and sum
  1013. assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
  1014. @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
  1015. def test_nanmean_overflow(disable_bottleneck, val):
  1016. # GH 10155
  1017. # In the previous implementation mean can overflow for int dtypes, it
  1018. # is now consistent with numpy
  1019. ser = Series(val, index=range(500), dtype=np.int64)
  1020. result = ser.mean()
  1021. np_result = ser.values.mean()
  1022. assert result == val
  1023. assert result == np_result
  1024. assert result.dtype == np.float64
  1025. @pytest.mark.parametrize(
  1026. "dtype",
  1027. [
  1028. np.int16,
  1029. np.int32,
  1030. np.int64,
  1031. np.float32,
  1032. np.float64,
  1033. getattr(np, "float128", None),
  1034. ],
  1035. )
  1036. @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
  1037. def test_returned_dtype(disable_bottleneck, dtype, method):
  1038. if dtype is None:
  1039. pytest.skip("np.float128 not available")
  1040. ser = Series(range(10), dtype=dtype)
  1041. result = getattr(ser, method)()
  1042. if is_integer_dtype(dtype) and method not in ["min", "max"]:
  1043. assert result.dtype == np.float64
  1044. else:
  1045. assert result.dtype == dtype