test_nanops.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. from functools import partial
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. from pandas.core.dtypes.common import is_integer_dtype
  6. import pandas as pd
  7. from pandas import (
  8. Series,
  9. isna,
  10. )
  11. import pandas._testing as tm
  12. from pandas.core import nanops
  13. use_bn = nanops._USE_BOTTLENECK
  14. @pytest.fixture
  15. def disable_bottleneck(monkeypatch):
  16. with monkeypatch.context() as m:
  17. m.setattr(nanops, "_USE_BOTTLENECK", False)
  18. yield
  19. @pytest.fixture
  20. def arr_shape():
  21. return 11, 7
  22. @pytest.fixture
  23. def arr_float(arr_shape):
  24. return np.random.default_rng(2).standard_normal(arr_shape)
  25. @pytest.fixture
  26. def arr_complex(arr_float):
  27. return arr_float + arr_float * 1j
  28. @pytest.fixture
  29. def arr_int(arr_shape):
  30. return np.random.default_rng(2).integers(-10, 10, arr_shape)
  31. @pytest.fixture
  32. def arr_bool(arr_shape):
  33. return np.random.default_rng(2).integers(0, 2, arr_shape) == 0
  34. @pytest.fixture
  35. def arr_str(arr_float):
  36. return np.abs(arr_float).astype("S")
  37. @pytest.fixture
  38. def arr_utf(arr_float):
  39. return np.abs(arr_float).astype("U")
  40. @pytest.fixture
  41. def arr_date(arr_shape):
  42. return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
  43. @pytest.fixture
  44. def arr_tdelta(arr_shape):
  45. return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
  46. @pytest.fixture
  47. def arr_nan(arr_shape):
  48. return np.tile(np.nan, arr_shape)
  49. @pytest.fixture
  50. def arr_float_nan(arr_float, arr_nan):
  51. return np.vstack([arr_float, arr_nan])
  52. @pytest.fixture
  53. def arr_nan_float1(arr_nan, arr_float):
  54. return np.vstack([arr_nan, arr_float])
  55. @pytest.fixture
  56. def arr_nan_nan(arr_nan):
  57. return np.vstack([arr_nan, arr_nan])
  58. @pytest.fixture
  59. def arr_inf(arr_float):
  60. return arr_float * np.inf
  61. @pytest.fixture
  62. def arr_float_inf(arr_float, arr_inf):
  63. return np.vstack([arr_float, arr_inf])
  64. @pytest.fixture
  65. def arr_nan_inf(arr_nan, arr_inf):
  66. return np.vstack([arr_nan, arr_inf])
  67. @pytest.fixture
  68. def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
  69. return np.vstack([arr_float, arr_nan, arr_inf])
  70. @pytest.fixture
  71. def arr_nan_nan_inf(arr_nan, arr_inf):
  72. return np.vstack([arr_nan, arr_nan, arr_inf])
  73. @pytest.fixture
  74. def arr_obj(
  75. arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
  76. ):
  77. return np.vstack(
  78. [
  79. arr_float.astype("O"),
  80. arr_int.astype("O"),
  81. arr_bool.astype("O"),
  82. arr_complex.astype("O"),
  83. arr_str.astype("O"),
  84. arr_utf.astype("O"),
  85. arr_date.astype("O"),
  86. arr_tdelta.astype("O"),
  87. ]
  88. )
  89. @pytest.fixture
  90. def arr_nan_nanj(arr_nan):
  91. with np.errstate(invalid="ignore"):
  92. return arr_nan + arr_nan * 1j
  93. @pytest.fixture
  94. def arr_complex_nan(arr_complex, arr_nan_nanj):
  95. with np.errstate(invalid="ignore"):
  96. return np.vstack([arr_complex, arr_nan_nanj])
  97. @pytest.fixture
  98. def arr_nan_infj(arr_inf):
  99. with np.errstate(invalid="ignore"):
  100. return arr_inf * 1j
  101. @pytest.fixture
  102. def arr_complex_nan_infj(arr_complex, arr_nan_infj):
  103. with np.errstate(invalid="ignore"):
  104. return np.vstack([arr_complex, arr_nan_infj])
  105. @pytest.fixture
  106. def arr_float_1d(arr_float):
  107. return arr_float[:, 0]
  108. @pytest.fixture
  109. def arr_nan_1d(arr_nan):
  110. return arr_nan[:, 0]
  111. @pytest.fixture
  112. def arr_float_nan_1d(arr_float_nan):
  113. return arr_float_nan[:, 0]
  114. @pytest.fixture
  115. def arr_float1_nan_1d(arr_float1_nan):
  116. return arr_float1_nan[:, 0]
  117. @pytest.fixture
  118. def arr_nan_float1_1d(arr_nan_float1):
  119. return arr_nan_float1[:, 0]
  120. class TestnanopsDataFrame:
  121. def setup_method(self):
  122. nanops._USE_BOTTLENECK = False
  123. arr_shape = (11, 7)
  124. self.arr_float = np.random.default_rng(2).standard_normal(arr_shape)
  125. self.arr_float1 = np.random.default_rng(2).standard_normal(arr_shape)
  126. self.arr_complex = self.arr_float + self.arr_float1 * 1j
  127. self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape)
  128. self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0
  129. self.arr_str = np.abs(self.arr_float).astype("S")
  130. self.arr_utf = np.abs(self.arr_float).astype("U")
  131. self.arr_date = (
  132. np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
  133. )
  134. self.arr_tdelta = (
  135. np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
  136. )
  137. self.arr_nan = np.tile(np.nan, arr_shape)
  138. self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
  139. self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
  140. self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
  141. self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
  142. self.arr_inf = self.arr_float * np.inf
  143. self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
  144. self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
  145. self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
  146. self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
  147. self.arr_obj = np.vstack(
  148. [
  149. self.arr_float.astype("O"),
  150. self.arr_int.astype("O"),
  151. self.arr_bool.astype("O"),
  152. self.arr_complex.astype("O"),
  153. self.arr_str.astype("O"),
  154. self.arr_utf.astype("O"),
  155. self.arr_date.astype("O"),
  156. self.arr_tdelta.astype("O"),
  157. ]
  158. )
  159. with np.errstate(invalid="ignore"):
  160. self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
  161. self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
  162. self.arr_nan_infj = self.arr_inf * 1j
  163. self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
  164. self.arr_float_2d = self.arr_float
  165. self.arr_float1_2d = self.arr_float1
  166. self.arr_nan_2d = self.arr_nan
  167. self.arr_float_nan_2d = self.arr_float_nan
  168. self.arr_float1_nan_2d = self.arr_float1_nan
  169. self.arr_nan_float1_2d = self.arr_nan_float1
  170. self.arr_float_1d = self.arr_float[:, 0]
  171. self.arr_float1_1d = self.arr_float1[:, 0]
  172. self.arr_nan_1d = self.arr_nan[:, 0]
  173. self.arr_float_nan_1d = self.arr_float_nan[:, 0]
  174. self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
  175. self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
  176. def teardown_method(self):
  177. nanops._USE_BOTTLENECK = use_bn
  178. def check_results(self, targ, res, axis, check_dtype=True):
  179. res = getattr(res, "asm8", res)
  180. if (
  181. axis != 0
  182. and hasattr(targ, "shape")
  183. and targ.ndim
  184. and targ.shape != res.shape
  185. ):
  186. res = np.split(res, [targ.shape[0]], axis=0)[0]
  187. try:
  188. tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
  189. except AssertionError:
  190. # handle timedelta dtypes
  191. if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
  192. raise
  193. # There are sometimes rounding errors with
  194. # complex and object dtypes.
  195. # If it isn't one of those, re-raise the error.
  196. if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
  197. raise
  198. # convert object dtypes to something that can be split into
  199. # real and imaginary parts
  200. if res.dtype.kind == "O":
  201. if targ.dtype.kind != "O":
  202. res = res.astype(targ.dtype)
  203. else:
  204. cast_dtype = "c16" if hasattr(np, "complex128") else "f8"
  205. res = res.astype(cast_dtype)
  206. targ = targ.astype(cast_dtype)
  207. # there should never be a case where numpy returns an object
  208. # but nanops doesn't, so make that an exception
  209. elif targ.dtype.kind == "O":
  210. raise
  211. tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
  212. tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
  213. def check_fun_data(
  214. self,
  215. testfunc,
  216. targfunc,
  217. testar,
  218. testarval,
  219. targarval,
  220. skipna,
  221. check_dtype=True,
  222. empty_targfunc=None,
  223. **kwargs,
  224. ):
  225. for axis in [*list(range(targarval.ndim)), None]:
  226. targartempval = targarval if skipna else testarval
  227. if skipna and empty_targfunc and isna(targartempval).all():
  228. targ = empty_targfunc(targartempval, axis=axis, **kwargs)
  229. else:
  230. targ = targfunc(targartempval, axis=axis, **kwargs)
  231. if targartempval.dtype == object and (
  232. targfunc is np.any or targfunc is np.all
  233. ):
  234. # GH#12863 the numpy functions will retain e.g. floatiness
  235. if isinstance(targ, np.ndarray):
  236. targ = targ.astype(bool)
  237. else:
  238. targ = bool(targ)
  239. if testfunc.__name__ in ["nanargmax", "nanargmin"] and (
  240. testar.startswith("arr_nan")
  241. or (testar.endswith("nan") and (not skipna or axis == 1))
  242. ):
  243. with pytest.raises(ValueError, match="Encountered .* NA value"):
  244. testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
  245. return
  246. res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
  247. if (
  248. isinstance(targ, np.complex128)
  249. and isinstance(res, float)
  250. and np.isnan(targ)
  251. and np.isnan(res)
  252. ):
  253. # GH#18463
  254. targ = res
  255. self.check_results(targ, res, axis, check_dtype=check_dtype)
  256. if skipna:
  257. res = testfunc(testarval, axis=axis, **kwargs)
  258. self.check_results(targ, res, axis, check_dtype=check_dtype)
  259. if axis is None:
  260. res = testfunc(testarval, skipna=skipna, **kwargs)
  261. self.check_results(targ, res, axis, check_dtype=check_dtype)
  262. if skipna and axis is None:
  263. res = testfunc(testarval, **kwargs)
  264. self.check_results(targ, res, axis, check_dtype=check_dtype)
  265. if testarval.ndim <= 1:
  266. return
  267. # Recurse on lower-dimension
  268. testarval2 = np.take(testarval, 0, axis=-1)
  269. targarval2 = np.take(targarval, 0, axis=-1)
  270. self.check_fun_data(
  271. testfunc,
  272. targfunc,
  273. testar,
  274. testarval2,
  275. targarval2,
  276. skipna=skipna,
  277. check_dtype=check_dtype,
  278. empty_targfunc=empty_targfunc,
  279. **kwargs,
  280. )
  281. def check_fun(
  282. self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
  283. ):
  284. targar = testar
  285. if testar.endswith("_nan") and hasattr(self, testar[:-4]):
  286. targar = testar[:-4]
  287. testarval = getattr(self, testar)
  288. targarval = getattr(self, targar)
  289. self.check_fun_data(
  290. testfunc,
  291. targfunc,
  292. testar,
  293. testarval,
  294. targarval,
  295. skipna=skipna,
  296. empty_targfunc=empty_targfunc,
  297. **kwargs,
  298. )
  299. def check_funs(
  300. self,
  301. testfunc,
  302. targfunc,
  303. skipna,
  304. allow_complex=True,
  305. allow_all_nan=True,
  306. allow_date=True,
  307. allow_tdelta=True,
  308. allow_obj=True,
  309. **kwargs,
  310. ):
  311. self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
  312. self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
  313. self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
  314. self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
  315. objs = [
  316. self.arr_float.astype("O"),
  317. self.arr_int.astype("O"),
  318. self.arr_bool.astype("O"),
  319. ]
  320. if allow_all_nan:
  321. self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
  322. if allow_complex:
  323. self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
  324. self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
  325. if allow_all_nan:
  326. self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
  327. objs += [self.arr_complex.astype("O")]
  328. if allow_date:
  329. targfunc(self.arr_date)
  330. self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
  331. objs += [self.arr_date.astype("O")]
  332. if allow_tdelta:
  333. try:
  334. targfunc(self.arr_tdelta)
  335. except TypeError:
  336. pass
  337. else:
  338. self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
  339. objs += [self.arr_tdelta.astype("O")]
  340. if allow_obj:
  341. self.arr_obj = np.vstack(objs)
  342. # some nanops handle object dtypes better than their numpy
  343. # counterparts, so the numpy functions need to be given something
  344. # else
  345. if allow_obj == "convert":
  346. targfunc = partial(
  347. self._badobj_wrap, func=targfunc, allow_complex=allow_complex
  348. )
  349. self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
  350. def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
  351. if value.dtype.kind == "O":
  352. if allow_complex:
  353. value = value.astype("c16")
  354. else:
  355. value = value.astype("f8")
  356. return func(value, **kwargs)
  357. @pytest.mark.parametrize(
  358. "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
  359. )
  360. def test_nan_funcs(self, nan_op, np_op, skipna):
  361. self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
  362. def test_nansum(self, skipna):
  363. self.check_funs(
  364. nanops.nansum,
  365. np.sum,
  366. skipna,
  367. allow_date=False,
  368. check_dtype=False,
  369. empty_targfunc=np.nansum,
  370. )
  371. def test_nanmean(self, skipna):
  372. self.check_funs(
  373. nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
  374. )
  375. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  376. def test_nanmedian(self, skipna):
  377. self.check_funs(
  378. nanops.nanmedian,
  379. np.median,
  380. skipna,
  381. allow_complex=False,
  382. allow_date=False,
  383. allow_obj="convert",
  384. )
  385. @pytest.mark.parametrize("ddof", range(3))
  386. def test_nanvar(self, ddof, skipna):
  387. self.check_funs(
  388. nanops.nanvar,
  389. np.var,
  390. skipna,
  391. allow_complex=False,
  392. allow_date=False,
  393. allow_obj="convert",
  394. ddof=ddof,
  395. )
  396. @pytest.mark.parametrize("ddof", range(3))
  397. def test_nanstd(self, ddof, skipna):
  398. self.check_funs(
  399. nanops.nanstd,
  400. np.std,
  401. skipna,
  402. allow_complex=False,
  403. allow_date=False,
  404. allow_obj="convert",
  405. ddof=ddof,
  406. )
  407. @pytest.mark.parametrize("ddof", range(3))
  408. def test_nansem(self, ddof, skipna):
  409. sp_stats = pytest.importorskip("scipy.stats")
  410. with np.errstate(invalid="ignore"):
  411. self.check_funs(
  412. nanops.nansem,
  413. sp_stats.sem,
  414. skipna,
  415. allow_complex=False,
  416. allow_date=False,
  417. allow_tdelta=False,
  418. allow_obj="convert",
  419. ddof=ddof,
  420. )
  421. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  422. @pytest.mark.parametrize(
  423. "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
  424. )
  425. def test_nanops_with_warnings(self, nan_op, np_op, skipna):
  426. self.check_funs(nan_op, np_op, skipna, allow_obj=False)
  427. def _argminmax_wrap(self, value, axis=None, func=None):
  428. res = func(value, axis)
  429. nans = np.min(value, axis)
  430. nullnan = isna(nans)
  431. if res.ndim:
  432. res[nullnan] = -1
  433. elif (hasattr(nullnan, "all") and nullnan.all()) or (
  434. not hasattr(nullnan, "all") and nullnan
  435. ):
  436. res = -1
  437. return res
  438. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  439. def test_nanargmax(self, skipna):
  440. func = partial(self._argminmax_wrap, func=np.argmax)
  441. self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
  442. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  443. def test_nanargmin(self, skipna):
  444. func = partial(self._argminmax_wrap, func=np.argmin)
  445. self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
  446. def _skew_kurt_wrap(self, values, axis=None, func=None):
  447. if not isinstance(values.dtype.type, np.floating):
  448. values = values.astype("f8")
  449. result = func(values, axis=axis, bias=False)
  450. # fix for handling cases where all elements in an axis are the same
  451. if isinstance(result, np.ndarray):
  452. result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
  453. return result
  454. elif np.max(values) == np.min(values):
  455. return 0.0
  456. return result
  457. def test_nanskew(self, skipna):
  458. sp_stats = pytest.importorskip("scipy.stats")
  459. func = partial(self._skew_kurt_wrap, func=sp_stats.skew)
  460. with np.errstate(invalid="ignore"):
  461. self.check_funs(
  462. nanops.nanskew,
  463. func,
  464. skipna,
  465. allow_complex=False,
  466. allow_date=False,
  467. allow_tdelta=False,
  468. )
  469. def test_nankurt(self, skipna):
  470. sp_stats = pytest.importorskip("scipy.stats")
  471. func1 = partial(sp_stats.kurtosis, fisher=True)
  472. func = partial(self._skew_kurt_wrap, func=func1)
  473. with np.errstate(invalid="ignore"):
  474. self.check_funs(
  475. nanops.nankurt,
  476. func,
  477. skipna,
  478. allow_complex=False,
  479. allow_date=False,
  480. allow_tdelta=False,
  481. )
  482. def test_nanprod(self, skipna):
  483. self.check_funs(
  484. nanops.nanprod,
  485. np.prod,
  486. skipna,
  487. allow_date=False,
  488. allow_tdelta=False,
  489. empty_targfunc=np.nanprod,
  490. )
  491. def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
  492. res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
  493. res01 = checkfun(
  494. self.arr_float_2d,
  495. self.arr_float1_2d,
  496. min_periods=len(self.arr_float_2d) - 1,
  497. **kwargs,
  498. )
  499. tm.assert_almost_equal(targ0, res00)
  500. tm.assert_almost_equal(targ0, res01)
  501. res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
  502. res11 = checkfun(
  503. self.arr_float_nan_2d,
  504. self.arr_float1_nan_2d,
  505. min_periods=len(self.arr_float_2d) - 1,
  506. **kwargs,
  507. )
  508. tm.assert_almost_equal(targ1, res10)
  509. tm.assert_almost_equal(targ1, res11)
  510. targ2 = np.nan
  511. res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
  512. res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
  513. res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
  514. res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
  515. res24 = checkfun(
  516. self.arr_float_nan_2d,
  517. self.arr_nan_float1_2d,
  518. min_periods=len(self.arr_float_2d) - 1,
  519. **kwargs,
  520. )
  521. res25 = checkfun(
  522. self.arr_float_2d,
  523. self.arr_float1_2d,
  524. min_periods=len(self.arr_float_2d) + 1,
  525. **kwargs,
  526. )
  527. tm.assert_almost_equal(targ2, res20)
  528. tm.assert_almost_equal(targ2, res21)
  529. tm.assert_almost_equal(targ2, res22)
  530. tm.assert_almost_equal(targ2, res23)
  531. tm.assert_almost_equal(targ2, res24)
  532. tm.assert_almost_equal(targ2, res25)
  533. def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
  534. res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
  535. res01 = checkfun(
  536. self.arr_float_1d,
  537. self.arr_float1_1d,
  538. min_periods=len(self.arr_float_1d) - 1,
  539. **kwargs,
  540. )
  541. tm.assert_almost_equal(targ0, res00)
  542. tm.assert_almost_equal(targ0, res01)
  543. res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
  544. res11 = checkfun(
  545. self.arr_float_nan_1d,
  546. self.arr_float1_nan_1d,
  547. min_periods=len(self.arr_float_1d) - 1,
  548. **kwargs,
  549. )
  550. tm.assert_almost_equal(targ1, res10)
  551. tm.assert_almost_equal(targ1, res11)
  552. targ2 = np.nan
  553. res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
  554. res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
  555. res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
  556. res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
  557. res24 = checkfun(
  558. self.arr_float_nan_1d,
  559. self.arr_nan_float1_1d,
  560. min_periods=len(self.arr_float_1d) - 1,
  561. **kwargs,
  562. )
  563. res25 = checkfun(
  564. self.arr_float_1d,
  565. self.arr_float1_1d,
  566. min_periods=len(self.arr_float_1d) + 1,
  567. **kwargs,
  568. )
  569. tm.assert_almost_equal(targ2, res20)
  570. tm.assert_almost_equal(targ2, res21)
  571. tm.assert_almost_equal(targ2, res22)
  572. tm.assert_almost_equal(targ2, res23)
  573. tm.assert_almost_equal(targ2, res24)
  574. tm.assert_almost_equal(targ2, res25)
  575. def test_nancorr(self):
  576. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  577. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  578. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
  579. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  580. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  581. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  582. def test_nancorr_pearson(self):
  583. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  584. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  585. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
  586. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  587. targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  588. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
  589. def test_nancorr_kendall(self):
  590. sp_stats = pytest.importorskip("scipy.stats")
  591. targ0 = sp_stats.kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
  592. targ1 = sp_stats.kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  593. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
  594. targ0 = sp_stats.kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
  595. targ1 = sp_stats.kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  596. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
  597. def test_nancorr_spearman(self):
  598. sp_stats = pytest.importorskip("scipy.stats")
  599. targ0 = sp_stats.spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
  600. targ1 = sp_stats.spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  601. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
  602. targ0 = sp_stats.spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
  603. targ1 = sp_stats.spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  604. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
  605. def test_invalid_method(self):
  606. pytest.importorskip("scipy")
  607. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  608. targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  609. msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
  610. with pytest.raises(ValueError, match=msg):
  611. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
  612. def test_nancov(self):
  613. targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  614. targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  615. self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
  616. targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  617. targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  618. self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
  619. @pytest.mark.parametrize(
  620. "arr, correct",
  621. [
  622. ("arr_complex", False),
  623. ("arr_int", False),
  624. ("arr_bool", False),
  625. ("arr_str", False),
  626. ("arr_utf", False),
  627. ("arr_complex_nan", False),
  628. ("arr_nan_nanj", False),
  629. ("arr_nan_infj", True),
  630. ("arr_complex_nan_infj", True),
  631. ],
  632. )
  633. def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
  634. val = request.getfixturevalue(arr)
  635. while getattr(val, "ndim", True):
  636. res0 = nanops._has_infs(val)
  637. if correct:
  638. assert res0
  639. else:
  640. assert not res0
  641. if not hasattr(val, "ndim"):
  642. break
  643. # Reduce dimension for next step in the loop
  644. val = np.take(val, 0, axis=-1)
  645. @pytest.mark.parametrize(
  646. "arr, correct",
  647. [
  648. ("arr_float", False),
  649. ("arr_nan", False),
  650. ("arr_float_nan", False),
  651. ("arr_nan_nan", False),
  652. ("arr_float_inf", True),
  653. ("arr_inf", True),
  654. ("arr_nan_inf", True),
  655. ("arr_float_nan_inf", True),
  656. ("arr_nan_nan_inf", True),
  657. ],
  658. )
  659. @pytest.mark.parametrize("astype", [None, "f4", "f2"])
  660. def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
  661. val = request.getfixturevalue(arr)
  662. if astype is not None:
  663. val = val.astype(astype)
  664. while getattr(val, "ndim", True):
  665. res0 = nanops._has_infs(val)
  666. if correct:
  667. assert res0
  668. else:
  669. assert not res0
  670. if not hasattr(val, "ndim"):
  671. break
  672. # Reduce dimension for next step in the loop
  673. val = np.take(val, 0, axis=-1)
  674. @pytest.mark.parametrize(
  675. "fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
  676. )
  677. def test_bn_ok_dtype(fixture, request, disable_bottleneck):
  678. obj = request.getfixturevalue(fixture)
  679. assert nanops._bn_ok_dtype(obj.dtype, "test")
  680. @pytest.mark.parametrize(
  681. "fixture",
  682. [
  683. "arr_date",
  684. "arr_tdelta",
  685. "arr_obj",
  686. ],
  687. )
  688. def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
  689. obj = request.getfixturevalue(fixture)
  690. assert not nanops._bn_ok_dtype(obj.dtype, "test")
  691. class TestEnsureNumeric:
  692. def test_numeric_values(self):
  693. # Test integer
  694. assert nanops._ensure_numeric(1) == 1
  695. # Test float
  696. assert nanops._ensure_numeric(1.1) == 1.1
  697. # Test complex
  698. assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
  699. def test_ndarray(self):
  700. # Test numeric ndarray
  701. values = np.array([1, 2, 3])
  702. assert np.allclose(nanops._ensure_numeric(values), values)
  703. # Test object ndarray
  704. o_values = values.astype(object)
  705. assert np.allclose(nanops._ensure_numeric(o_values), values)
  706. # Test convertible string ndarray
  707. s_values = np.array(["1", "2", "3"], dtype=object)
  708. msg = r"Could not convert \['1' '2' '3'\] to numeric"
  709. with pytest.raises(TypeError, match=msg):
  710. nanops._ensure_numeric(s_values)
  711. # Test non-convertible string ndarray
  712. s_values = np.array(["foo", "bar", "baz"], dtype=object)
  713. msg = r"Could not convert .* to numeric"
  714. with pytest.raises(TypeError, match=msg):
  715. nanops._ensure_numeric(s_values)
  716. def test_convertable_values(self):
  717. with pytest.raises(TypeError, match="Could not convert string '1' to numeric"):
  718. nanops._ensure_numeric("1")
  719. with pytest.raises(
  720. TypeError, match="Could not convert string '1.1' to numeric"
  721. ):
  722. nanops._ensure_numeric("1.1")
  723. with pytest.raises(
  724. TypeError, match=r"Could not convert string '1\+1j' to numeric"
  725. ):
  726. nanops._ensure_numeric("1+1j")
  727. def test_non_convertable_values(self):
  728. msg = "Could not convert string 'foo' to numeric"
  729. with pytest.raises(TypeError, match=msg):
  730. nanops._ensure_numeric("foo")
  731. # with the wrong type, python raises TypeError for us
  732. msg = "argument must be a string or a number"
  733. with pytest.raises(TypeError, match=msg):
  734. nanops._ensure_numeric({})
  735. with pytest.raises(TypeError, match=msg):
  736. nanops._ensure_numeric([])
  737. class TestNanvarFixedValues:
  738. # xref GH10242
  739. # Samples from a normal distribution.
  740. @pytest.fixture
  741. def variance(self):
  742. return 3.0
  743. @pytest.fixture
  744. def samples(self, variance):
  745. return self.prng.normal(scale=variance**0.5, size=100000)
  746. def test_nanvar_all_finite(self, samples, variance):
  747. actual_variance = nanops.nanvar(samples)
  748. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  749. def test_nanvar_nans(self, samples, variance):
  750. samples_test = np.nan * np.ones(2 * samples.shape[0])
  751. samples_test[::2] = samples
  752. actual_variance = nanops.nanvar(samples_test, skipna=True)
  753. tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
  754. actual_variance = nanops.nanvar(samples_test, skipna=False)
  755. tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
  756. def test_nanstd_nans(self, samples, variance):
  757. samples_test = np.nan * np.ones(2 * samples.shape[0])
  758. samples_test[::2] = samples
  759. actual_std = nanops.nanstd(samples_test, skipna=True)
  760. tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)
  761. actual_std = nanops.nanvar(samples_test, skipna=False)
  762. tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
  763. def test_nanvar_axis(self, samples, variance):
  764. # Generate some sample data.
  765. samples_unif = self.prng.uniform(size=samples.shape[0])
  766. samples = np.vstack([samples, samples_unif])
  767. actual_variance = nanops.nanvar(samples, axis=1)
  768. tm.assert_almost_equal(
  769. actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
  770. )
  771. def test_nanvar_ddof(self):
  772. n = 5
  773. samples = self.prng.uniform(size=(10000, n + 1))
  774. samples[:, -1] = np.nan # Force use of our own algorithm.
  775. variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
  776. variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
  777. variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
  778. # The unbiased estimate.
  779. var = 1.0 / 12
  780. tm.assert_almost_equal(variance_1, var, rtol=1e-2)
  781. # The underestimated variance.
  782. tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
  783. # The overestimated variance.
  784. tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
  785. @pytest.mark.parametrize("axis", range(2))
  786. @pytest.mark.parametrize("ddof", range(3))
  787. def test_ground_truth(self, axis, ddof):
  788. # Test against values that were precomputed with Numpy.
  789. samples = np.empty((4, 4))
  790. samples[:3, :3] = np.array(
  791. [
  792. [0.97303362, 0.21869576, 0.55560287],
  793. [0.72980153, 0.03109364, 0.99155171],
  794. [0.09317602, 0.60078248, 0.15871292],
  795. ]
  796. )
  797. samples[3] = samples[:, 3] = np.nan
  798. # Actual variances along axis=0, 1 for ddof=0, 1, 2
  799. variance = np.array(
  800. [
  801. [
  802. [0.13762259, 0.05619224, 0.11568816],
  803. [0.20643388, 0.08428837, 0.17353224],
  804. [0.41286776, 0.16857673, 0.34706449],
  805. ],
  806. [
  807. [0.09519783, 0.16435395, 0.05082054],
  808. [0.14279674, 0.24653093, 0.07623082],
  809. [0.28559348, 0.49306186, 0.15246163],
  810. ],
  811. ]
  812. )
  813. # Test nanvar.
  814. var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
  815. tm.assert_almost_equal(var[:3], variance[axis, ddof])
  816. assert np.isnan(var[3])
  817. # Test nanstd.
  818. std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
  819. tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
  820. assert np.isnan(std[3])
  821. @pytest.mark.parametrize("ddof", range(3))
  822. def test_nanstd_roundoff(self, ddof):
  823. # Regression test for GH 10242 (test data taken from GH 10489). Ensure
  824. # that variance is stable.
  825. data = Series(766897346 * np.ones(10))
  826. result = data.std(ddof=ddof)
  827. assert result == 0.0
  828. @property
  829. def prng(self):
  830. return np.random.default_rng(2)
  831. class TestNanskewFixedValues:
  832. # xref GH 11974
  833. # Test data + skewness value (computed with scipy.stats.skew)
  834. @pytest.fixture
  835. def samples(self):
  836. return np.sin(np.linspace(0, 1, 200))
  837. @pytest.fixture
  838. def actual_skew(self):
  839. return -0.1875895205961754
  840. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  841. def test_constant_series(self, val):
  842. # xref GH 11974
  843. data = val * np.ones(300)
  844. skew = nanops.nanskew(data)
  845. assert skew == 0.0
  846. def test_all_finite(self):
  847. alpha, beta = 0.3, 0.1
  848. left_tailed = self.prng.beta(alpha, beta, size=100)
  849. assert nanops.nanskew(left_tailed) < 0
  850. alpha, beta = 0.1, 0.3
  851. right_tailed = self.prng.beta(alpha, beta, size=100)
  852. assert nanops.nanskew(right_tailed) > 0
  853. def test_ground_truth(self, samples, actual_skew):
  854. skew = nanops.nanskew(samples)
  855. tm.assert_almost_equal(skew, actual_skew)
  856. def test_axis(self, samples, actual_skew):
  857. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  858. skew = nanops.nanskew(samples, axis=1)
  859. tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))
  860. def test_nans(self, samples):
  861. samples = np.hstack([samples, np.nan])
  862. skew = nanops.nanskew(samples, skipna=False)
  863. assert np.isnan(skew)
  864. def test_nans_skipna(self, samples, actual_skew):
  865. samples = np.hstack([samples, np.nan])
  866. skew = nanops.nanskew(samples, skipna=True)
  867. tm.assert_almost_equal(skew, actual_skew)
  868. @pytest.mark.parametrize(
  869. "initial_data, nobs",
  870. [
  871. ([-2.05191341e-05, -4.10391103e-05], 27),
  872. ([-2.05191341e-10, -4.10391103e-10], 27),
  873. ([-2.05191341e-05, -4.10391103e-05], 10_000),
  874. ([-2.05191341e-10, -4.10391103e-10], 10_000),
  875. ],
  876. )
  877. def test_low_variance(self, initial_data, nobs):
  878. st = pytest.importorskip("scipy.stats")
  879. data = np.zeros((nobs,), dtype=np.float64)
  880. data[: len(initial_data)] = initial_data
  881. skew = nanops.nanskew(data)
  882. expected = st.skew(data, bias=False)
  883. tm.assert_almost_equal(skew, expected)
  884. @property
  885. def prng(self):
  886. return np.random.default_rng(2)
  887. class TestNankurtFixedValues:
  888. # xref GH 11974
  889. # Test data + kurtosis value (computed with scipy.stats.kurtosis)
  890. @pytest.fixture
  891. def samples(self):
  892. return np.sin(np.linspace(0, 1, 200))
  893. @pytest.fixture
  894. def actual_kurt(self):
  895. return -1.2058303433799713
  896. @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
  897. def test_constant_series(self, val):
  898. # xref GH 11974
  899. data = val * np.ones(300)
  900. kurt = nanops.nankurt(data)
  901. tm.assert_equal(kurt, 0.0)
  902. def test_all_finite(self):
  903. alpha, beta = 0.3, 0.1
  904. left_tailed = self.prng.beta(alpha, beta, size=100)
  905. assert nanops.nankurt(left_tailed) < 2
  906. alpha, beta = 0.1, 0.3
  907. right_tailed = self.prng.beta(alpha, beta, size=100)
  908. assert nanops.nankurt(right_tailed) < 0
  909. def test_ground_truth(self, samples, actual_kurt):
  910. kurt = nanops.nankurt(samples)
  911. tm.assert_almost_equal(kurt, actual_kurt)
  912. def test_axis(self, samples, actual_kurt):
  913. samples = np.vstack([samples, np.nan * np.ones(len(samples))])
  914. kurt = nanops.nankurt(samples, axis=1)
  915. tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))
  916. def test_nans(self, samples):
  917. samples = np.hstack([samples, np.nan])
  918. kurt = nanops.nankurt(samples, skipna=False)
  919. assert np.isnan(kurt)
  920. def test_nans_skipna(self, samples, actual_kurt):
  921. samples = np.hstack([samples, np.nan])
  922. kurt = nanops.nankurt(samples, skipna=True)
  923. tm.assert_almost_equal(kurt, actual_kurt)
  924. @pytest.mark.parametrize(
  925. "initial_data, nobs",
  926. [
  927. ([-2.05191341e-05, -4.10391103e-05], 27),
  928. ([-2.05191341e-10, -4.10391103e-10], 27),
  929. ([-2.05191341e-05, -4.10391103e-05], 10_000),
  930. ([-2.05191341e-10, -4.10391103e-10], 10_000),
  931. ],
  932. )
  933. def test_low_variance(self, initial_data, nobs):
  934. # GH#57972
  935. st = pytest.importorskip("scipy.stats")
  936. data = np.zeros((nobs,), dtype=np.float64)
  937. data[: len(initial_data)] = initial_data
  938. kurt = nanops.nankurt(data)
  939. expected = st.kurtosis(data, bias=False)
  940. tm.assert_almost_equal(kurt, expected)
  941. @property
  942. def prng(self):
  943. return np.random.default_rng(2)
  944. class TestDatetime64NaNOps:
  945. # Enabling mean changes the behavior of DataFrame.mean
  946. # See https://github.com/pandas-dev/pandas/issues/24752
  947. def test_nanmean(self, unit):
  948. dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
  949. expected = dti[1]
  950. for obj in [dti, dti._data]:
  951. result = nanops.nanmean(obj)
  952. assert result == expected
  953. dti2 = dti.insert(1, pd.NaT)
  954. for obj in [dti2, dti2._data]:
  955. result = nanops.nanmean(obj)
  956. assert result == expected
  957. @pytest.mark.parametrize("constructor", ["M8", "m8"])
  958. def test_nanmean_skipna_false(self, constructor, unit):
  959. dtype = f"{constructor}[{unit}]"
  960. arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
  961. arr[-1, -1] = "NaT"
  962. result = nanops.nanmean(arr, skipna=False)
  963. assert np.isnat(result)
  964. assert result.dtype == dtype
  965. result = nanops.nanmean(arr, axis=0, skipna=False)
  966. expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
  967. tm.assert_numpy_array_equal(result, expected)
  968. result = nanops.nanmean(arr, axis=1, skipna=False)
  969. expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
  970. tm.assert_numpy_array_equal(result, expected)
  971. def test_use_bottleneck():
  972. if nanops._BOTTLENECK_INSTALLED:
  973. with pd.option_context("use_bottleneck", True):
  974. assert pd.get_option("use_bottleneck")
  975. with pd.option_context("use_bottleneck", False):
  976. assert not pd.get_option("use_bottleneck")
  977. @pytest.mark.parametrize(
  978. "numpy_op, expected",
  979. [
  980. (np.sum, 10),
  981. (np.nansum, 10),
  982. (np.mean, 2.5),
  983. (np.nanmean, 2.5),
  984. (np.median, 2.5),
  985. (np.nanmedian, 2.5),
  986. (np.min, 1),
  987. (np.max, 4),
  988. (np.nanmin, 1),
  989. (np.nanmax, 4),
  990. ],
  991. )
  992. def test_numpy_ops(numpy_op, expected):
  993. # GH8383
  994. result = numpy_op(Series([1, 2, 3, 4]))
  995. assert result == expected
  996. @pytest.mark.parametrize(
  997. "operation",
  998. [
  999. nanops.nanany,
  1000. nanops.nanall,
  1001. nanops.nansum,
  1002. nanops.nanmean,
  1003. nanops.nanmedian,
  1004. nanops.nanstd,
  1005. nanops.nanvar,
  1006. nanops.nansem,
  1007. nanops.nanargmax,
  1008. nanops.nanargmin,
  1009. nanops.nanmax,
  1010. nanops.nanmin,
  1011. nanops.nanskew,
  1012. nanops.nankurt,
  1013. nanops.nanprod,
  1014. ],
  1015. )
  1016. def test_nanops_independent_of_mask_param(operation):
  1017. # GH22764
  1018. ser = Series([1, 2, np.nan, 3, np.nan, 4])
  1019. mask = ser.isna()
  1020. median_expected = operation(ser._values)
  1021. median_result = operation(ser._values, mask=mask)
  1022. assert median_expected == median_result
  1023. @pytest.mark.parametrize("min_count", [-1, 0])
  1024. def test_check_below_min_count_negative_or_zero_min_count(min_count):
  1025. # GH35227
  1026. result = nanops.check_below_min_count((21, 37), None, min_count)
  1027. expected_result = False
  1028. assert result == expected_result
  1029. @pytest.mark.parametrize(
  1030. "mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
  1031. )
  1032. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
  1033. def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
  1034. # GH35227
  1035. shape = (10, 10)
  1036. result = nanops.check_below_min_count(shape, mask, min_count)
  1037. assert result == expected_result
  1038. @td.skip_if_windows
  1039. @td.skip_if_32bit
  1040. @pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
  1041. def test_check_below_min_count_large_shape(min_count, expected_result):
  1042. # GH35227 large shape used to show that the issue is fixed
  1043. shape = (2244367, 1253)
  1044. result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
  1045. assert result == expected_result
  1046. @pytest.mark.parametrize("func", ["nanmean", "nansum"])
  1047. def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
  1048. # GH 42878 bottleneck sometimes produces unreliable results for mean and sum
  1049. assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
  1050. @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
  1051. def test_nanmean_overflow(disable_bottleneck, val, using_python_scalars):
  1052. # GH 10155
  1053. # In the previous implementation mean can overflow for int dtypes, it
  1054. # is now consistent with numpy
  1055. ser = Series(val, index=range(500), dtype=np.int64)
  1056. result = ser.mean()
  1057. assert result == val
  1058. if using_python_scalars:
  1059. assert type(result) == float
  1060. else:
  1061. np_result = ser.values.mean()
  1062. assert result == np_result
  1063. assert result.dtype == np.float64
  1064. @pytest.mark.parametrize(
  1065. "dtype",
  1066. [
  1067. np.int16,
  1068. np.int32,
  1069. np.int64,
  1070. np.float32,
  1071. np.float64,
  1072. getattr(np, "float128", None),
  1073. ],
  1074. )
  1075. @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
  1076. def test_returned_dtype(disable_bottleneck, dtype, method, using_python_scalars):
  1077. if dtype is None:
  1078. pytest.skip("np.float128 not available")
  1079. ser = Series(range(10), dtype=dtype)
  1080. result = getattr(ser, method)()
  1081. if using_python_scalars:
  1082. if is_integer_dtype(dtype) and method in ["min", "max"]:
  1083. assert isinstance(result, int)
  1084. else:
  1085. assert type(result) == float
  1086. elif is_integer_dtype(dtype) and method not in ["min", "max"]:
  1087. assert result.dtype == np.float64
  1088. else:
  1089. assert result.dtype == dtype