conftest.py 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159
  1. """
  2. This file is very long and growing, but it was decided to not split it yet, as
  3. it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989
  4. Instead of splitting it was decided to define sections here:
  5. - Configuration / Settings
  6. - Autouse fixtures
  7. - Common arguments
  8. - Missing values & co.
  9. - Classes
  10. - Indices
  11. - Series'
  12. - DataFrames
  13. - Operators & Operations
  14. - Data sets/files
  15. - Time zones
  16. - Dtypes
  17. - Misc
  18. """
  19. from __future__ import annotations
  20. from collections import abc
  21. from datetime import (
  22. date,
  23. datetime,
  24. time,
  25. timedelta,
  26. timezone,
  27. )
  28. from decimal import Decimal
  29. import gc
  30. import operator
  31. import os
  32. from typing import (
  33. TYPE_CHECKING,
  34. Any,
  35. )
  36. import uuid
  37. from dateutil.tz import (
  38. tzlocal,
  39. tzutc,
  40. )
  41. import hypothesis
  42. from hypothesis import strategies as st
  43. import numpy as np
  44. import pytest
  45. from pandas.compat._optional import import_optional_dependency
  46. import pandas.util._test_decorators as td
  47. from pandas.core.dtypes.dtypes import (
  48. DatetimeTZDtype,
  49. IntervalDtype,
  50. )
  51. import pandas as pd
  52. from pandas import (
  53. CategoricalIndex,
  54. DataFrame,
  55. Interval,
  56. IntervalIndex,
  57. Period,
  58. RangeIndex,
  59. Series,
  60. Timedelta,
  61. Timestamp,
  62. date_range,
  63. period_range,
  64. timedelta_range,
  65. )
  66. import pandas._testing as tm
  67. from pandas.core import ops
  68. from pandas.core.indexes.api import (
  69. Index,
  70. MultiIndex,
  71. )
  72. if TYPE_CHECKING:
  73. from collections.abc import (
  74. Callable,
  75. Hashable,
  76. Iterator,
  77. )
  78. try:
  79. import pyarrow as pa
  80. except ImportError:
  81. has_pyarrow = False
  82. else:
  83. del pa
  84. has_pyarrow = True
  85. pytz = import_optional_dependency("pytz", errors="ignore")
  86. # ----------------------------------------------------------------
  87. # Configuration / Settings
  88. # ----------------------------------------------------------------
  89. # pytest
  90. def pytest_addoption(parser) -> None:
  91. parser.addoption(
  92. "--no-strict-data-files",
  93. action="store_false",
  94. help="Don't fail if a test is skipped for missing data file.",
  95. )
  96. def pytest_sessionstart(session):
  97. import doctest
  98. import inspect
  99. # https://github.com/pandas-dev/pandas/pull/62988
  100. # When we modify the __module__ of a class, the __module__ on the methods
  101. # of that class do not change. When these two disagree, doctests would not
  102. # typically run. We hack `DocTestFinder` to avoid this.
  103. orig = doctest.DocTestFinder._from_module # type: ignore[attr-defined]
  104. def _from_module(self, module, object):
  105. # When . is in __qualname__, object is a method of a class.
  106. if inspect.isfunction(object) and "." in object.__qualname__:
  107. # We only get here when the class that the method is on is from the
  108. # appropriate module. So ignore checking the __module__ of the method
  109. # itself and run the doctest.
  110. return True
  111. return orig(self, module, object)
  112. doctest.DocTestFinder._from_module = _from_module # type: ignore[attr-defined]
  113. def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
  114. """Ignore doctest warning.
  115. Parameters
  116. ----------
  117. item : pytest.Item
  118. pytest test item.
  119. path : str
  120. Module path to Python object, e.g. "pandas.DataFrame.append". A
  121. warning will be filtered when item.name ends with in given path. So it is
  122. sufficient to specify e.g. "DataFrame.append".
  123. message : str
  124. Message to be filtered.
  125. """
  126. if item.name.endswith(path):
  127. item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}"))
  128. def pytest_collection_modifyitems(items, config) -> None:
  129. is_doctest = config.getoption("--doctest-modules") or config.getoption(
  130. "--doctest-cython", default=False
  131. )
  132. # Warnings from doctests that can be ignored; place reason in comment above.
  133. # Each entry specifies (path, message) - see the ignore_doctest_warning function
  134. ignored_doctest_warnings = [
  135. ("api.interchange.from_dataframe", "The DataFrame Interchange Protocol"),
  136. ("is_int64_dtype", "is_int64_dtype is deprecated"),
  137. ("is_interval_dtype", "is_interval_dtype is deprecated"),
  138. ("is_period_dtype", "is_period_dtype is deprecated"),
  139. ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
  140. ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
  141. ("is_sparse", "is_sparse is deprecated"),
  142. ("CategoricalDtype._from_values_or_dtype", "Constructing a Categorical"),
  143. ("DataFrame.__dataframe__", "The DataFrame Interchange Protocol"),
  144. ("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
  145. ("DataFrameGroupBy.corrwith", "DataFrameGroupBy.corrwith is deprecated"),
  146. ("NDFrame.replace", "Series.replace without 'value'"),
  147. ("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
  148. ("Series.idxmin", "The behavior of Series.idxmin"),
  149. ("Series.idxmax", "The behavior of Series.idxmax"),
  150. ("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
  151. ("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
  152. ("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
  153. ("to_pytimedelta", "The behavior of TimedeltaProperties.to_pytimedelta"),
  154. ("NDFrame.reindex_like", "keyword argument 'method' is deprecated"),
  155. # Docstring divides by zero to show behavior difference
  156. ("missing.mask_zero_div_zero", "divide by zero encountered"),
  157. (
  158. "pandas.core.generic.NDFrame.first",
  159. "first is deprecated and will be removed in a future version. "
  160. "Please create a mask and filter using `.loc` instead",
  161. ),
  162. (
  163. "Resampler.fillna",
  164. "DatetimeIndexResampler.fillna is deprecated",
  165. ),
  166. (
  167. "DataFrameGroupBy.fillna",
  168. "DataFrameGroupBy.fillna with 'method' is deprecated",
  169. ),
  170. ("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
  171. ]
  172. if is_doctest:
  173. for item in items:
  174. for path, message in ignored_doctest_warnings:
  175. ignore_doctest_warning(item, path, message)
  176. # Similar to "ci" config in
  177. # https://hypothesis.readthedocs.io/en/latest/reference/api.html#built-in-profiles
  178. hypothesis.settings.register_profile(
  179. "pandas_ci",
  180. database=None,
  181. deadline=None,
  182. max_examples=15,
  183. suppress_health_check=(
  184. hypothesis.HealthCheck.too_slow,
  185. hypothesis.HealthCheck.differing_executors,
  186. ),
  187. )
  188. hypothesis.settings.load_profile("pandas_ci")
  189. # Registering these strategies makes them globally available via st.from_type,
  190. # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
  191. for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split():
  192. cls = getattr(pd.tseries.offsets, name)
  193. st.register_type_strategy(
  194. cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans())
  195. )
  196. for name in "YearBegin YearEnd BYearBegin BYearEnd".split():
  197. cls = getattr(pd.tseries.offsets, name)
  198. st.register_type_strategy(
  199. cls,
  200. st.builds(
  201. cls,
  202. n=st.integers(-5, 5),
  203. normalize=st.booleans(),
  204. month=st.integers(min_value=1, max_value=12),
  205. ),
  206. )
  207. for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split():
  208. cls = getattr(pd.tseries.offsets, name)
  209. st.register_type_strategy(
  210. cls,
  211. st.builds(
  212. cls,
  213. n=st.integers(-24, 24),
  214. normalize=st.booleans(),
  215. startingMonth=st.integers(min_value=1, max_value=12),
  216. ),
  217. )
  218. # ----------------------------------------------------------------
  219. # Autouse fixtures
  220. # ----------------------------------------------------------------
  221. # https://github.com/pytest-dev/pytest/issues/11873
  222. # Would like to avoid autouse=True, but cannot as of pytest 8.0.0
  223. @pytest.fixture(autouse=True)
  224. def add_doctest_imports(doctest_namespace) -> None:
  225. """
  226. Make `np` and `pd` names available for doctests.
  227. """
  228. doctest_namespace["np"] = np
  229. doctest_namespace["pd"] = pd
  230. @pytest.fixture(autouse=True)
  231. def configure_tests() -> None:
  232. """
  233. Configure settings for all tests and test modules.
  234. """
  235. pd.set_option("chained_assignment", "raise")
  236. # ----------------------------------------------------------------
  237. # Common arguments
  238. # ----------------------------------------------------------------
  239. @pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={x!r}")
  240. def axis(request):
  241. """
  242. Fixture for returning the axis numbers of a DataFrame.
  243. """
  244. return request.param
  245. @pytest.fixture(params=[True, False])
  246. def observed(request):
  247. """
  248. Pass in the observed keyword to groupby for [True, False]
  249. This indicates whether categoricals should return values for
  250. values which are not in the grouper [False / None], or only values which
  251. appear in the grouper [True]. [None] is supported for future compatibility
  252. if we decide to change the default (and would need to warn if this
  253. parameter is not passed).
  254. """
  255. return request.param
  256. @pytest.fixture(params=[True, False, None])
  257. def ordered(request):
  258. """
  259. Boolean 'ordered' parameter for Categorical.
  260. """
  261. return request.param
  262. @pytest.fixture(params=[True, False])
  263. def dropna(request):
  264. """
  265. Boolean 'dropna' parameter.
  266. """
  267. return request.param
  268. @pytest.fixture(params=[True, False])
  269. def sort(request):
  270. """
  271. Boolean 'sort' parameter.
  272. """
  273. return request.param
  274. @pytest.fixture(params=[True, False])
  275. def skipna(request):
  276. """
  277. Boolean 'skipna' parameter.
  278. """
  279. return request.param
  280. @pytest.fixture(params=["first", "last", False])
  281. def keep(request):
  282. """
  283. Valid values for the 'keep' parameter used in
  284. .duplicated or .drop_duplicates
  285. """
  286. return request.param
  287. @pytest.fixture(params=["both", "neither", "left", "right"])
  288. def inclusive_endpoints_fixture(request):
  289. """
  290. Fixture for trying all interval 'inclusive' parameters.
  291. """
  292. return request.param
  293. @pytest.fixture(params=["left", "right", "both", "neither"])
  294. def closed(request):
  295. """
  296. Fixture for trying all interval closed parameters.
  297. """
  298. return request.param
  299. @pytest.fixture(params=["left", "right", "both", "neither"])
  300. def other_closed(request):
  301. """
  302. Secondary closed fixture to allow parametrizing over all pairs of closed.
  303. """
  304. return request.param
  305. @pytest.fixture(
  306. params=[
  307. None,
  308. "gzip",
  309. "bz2",
  310. "zip",
  311. "xz",
  312. "tar",
  313. pytest.param("zstd", marks=td.skip_if_no("zstandard")),
  314. ]
  315. )
  316. def compression(request):
  317. """
  318. Fixture for trying common compression types in compression tests.
  319. """
  320. return request.param
  321. @pytest.fixture(
  322. params=[
  323. "gzip",
  324. "bz2",
  325. "zip",
  326. "xz",
  327. "tar",
  328. pytest.param("zstd", marks=td.skip_if_no("zstandard")),
  329. ]
  330. )
  331. def compression_only(request):
  332. """
  333. Fixture for trying common compression types in compression tests excluding
  334. uncompressed case.
  335. """
  336. return request.param
  337. @pytest.fixture(params=[True, False])
  338. def writable(request):
  339. """
  340. Fixture that an array is writable.
  341. """
  342. return request.param
  343. @pytest.fixture(params=["inner", "outer", "left", "right"])
  344. def join_type(request):
  345. """
  346. Fixture for trying all types of join operations.
  347. """
  348. return request.param
  349. @pytest.fixture(params=["nlargest", "nsmallest"])
  350. def nselect_method(request):
  351. """
  352. Fixture for trying all nselect methods.
  353. """
  354. return request.param
  355. @pytest.fixture(params=[None, "ignore"])
  356. def na_action(request):
  357. """
  358. Fixture for 'na_action' argument in map.
  359. """
  360. return request.param
  361. @pytest.fixture(params=[True, False])
  362. def ascending(request):
  363. """
  364. Fixture for 'na_action' argument in sort_values/sort_index/rank.
  365. """
  366. return request.param
  367. @pytest.fixture(params=["average", "min", "max", "first", "dense"])
  368. def rank_method(request):
  369. """
  370. Fixture for 'rank' argument in rank.
  371. """
  372. return request.param
  373. @pytest.fixture(params=[True, False])
  374. def as_index(request):
  375. """
  376. Fixture for 'as_index' argument in groupby.
  377. """
  378. return request.param
  379. @pytest.fixture(params=[True, False])
  380. def cache(request):
  381. """
  382. Fixture for 'cache' argument in to_datetime.
  383. """
  384. return request.param
  385. @pytest.fixture(params=[True, False])
  386. def parallel(request):
  387. """
  388. Fixture for parallel keyword argument for numba.jit.
  389. """
  390. return request.param
  391. # Can parameterize nogil & nopython over True | False, but limiting per
  392. # https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472
  393. @pytest.fixture(params=[False])
  394. def nogil(request):
  395. """
  396. Fixture for nogil keyword argument for numba.jit.
  397. """
  398. return request.param
  399. @pytest.fixture(params=[True])
  400. def nopython(request):
  401. """
  402. Fixture for nopython keyword argument for numba.jit.
  403. """
  404. return request.param
  405. # ----------------------------------------------------------------
  406. # Missing values & co.
  407. # ----------------------------------------------------------------
  408. @pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__)
  409. def nulls_fixture(request):
  410. """
  411. Fixture for each null type in pandas.
  412. """
  413. return request.param
  414. nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture
  415. @pytest.fixture(params=[None, np.nan, pd.NaT])
  416. def unique_nulls_fixture(request):
  417. """
  418. Fixture for each null type in pandas, each null type exactly once.
  419. """
  420. return request.param
  421. # Generate cartesian product of unique_nulls_fixture:
  422. unique_nulls_fixture2 = unique_nulls_fixture
  423. @pytest.fixture(params=tm.NP_NAT_OBJECTS, ids=lambda x: type(x).__name__)
  424. def np_nat_fixture(request):
  425. """
  426. Fixture for each NaT type in numpy.
  427. """
  428. return request.param
  429. # Generate cartesian product of np_nat_fixture:
  430. np_nat_fixture2 = np_nat_fixture
  431. # ----------------------------------------------------------------
  432. # Classes
  433. # ----------------------------------------------------------------
  434. @pytest.fixture(params=[DataFrame, Series])
  435. def frame_or_series(request):
  436. """
  437. Fixture to parametrize over DataFrame and Series.
  438. """
  439. return request.param
  440. @pytest.fixture(params=[Index, Series], ids=["index", "series"])
  441. def index_or_series(request):
  442. """
  443. Fixture to parametrize over Index and Series, made necessary by a mypy
  444. bug, giving an error:
  445. List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]"
  446. See GH#29725
  447. """
  448. return request.param
  449. @pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
  450. def index_or_series_or_array(request):
  451. """
  452. Fixture to parametrize over Index, Series, and ExtensionArray
  453. """
  454. return request.param
  455. @pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__)
  456. def box_with_array(request):
  457. """
  458. Fixture to test behavior for Index, Series, DataFrame, and pandas Array
  459. classes
  460. """
  461. return request.param
  462. box_with_array2 = box_with_array
  463. @pytest.fixture
  464. def dict_subclass() -> type[dict]:
  465. """
  466. Fixture for a dictionary subclass.
  467. """
  468. class TestSubDict(dict):
  469. def __init__(self, *args, **kwargs) -> None:
  470. dict.__init__(self, *args, **kwargs)
  471. return TestSubDict
  472. @pytest.fixture
  473. def non_dict_mapping_subclass() -> type[abc.Mapping]:
  474. """
  475. Fixture for a non-mapping dictionary subclass.
  476. """
  477. class TestNonDictMapping(abc.Mapping):
  478. def __init__(self, underlying_dict) -> None:
  479. self._data = underlying_dict
  480. def __getitem__(self, key):
  481. return self._data.__getitem__(key)
  482. def __iter__(self) -> Iterator:
  483. return self._data.__iter__()
  484. def __len__(self) -> int:
  485. return self._data.__len__()
  486. return TestNonDictMapping
  487. # ----------------------------------------------------------------
  488. # Indices
  489. # ----------------------------------------------------------------
  490. @pytest.fixture
  491. def multiindex_year_month_day_dataframe_random_data():
  492. """
  493. DataFrame with 3 level MultiIndex (year, month, day) covering
  494. first 100 business days from 2000-01-01 with random data
  495. """
  496. tdf = DataFrame(
  497. np.random.default_rng(2).standard_normal((100, 4)),
  498. columns=Index(list("ABCD")),
  499. index=date_range("2000-01-01", periods=100, freq="B"),
  500. )
  501. ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
  502. # use int64 Index, to make sure things work
  503. ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels])
  504. ymd.index.set_names(["year", "month", "day"], inplace=True)
  505. return ymd
  506. @pytest.fixture
  507. def lexsorted_two_level_string_multiindex() -> MultiIndex:
  508. """
  509. 2-level MultiIndex, lexsorted, with string names.
  510. """
  511. return MultiIndex(
  512. levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
  513. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  514. names=["first", "second"],
  515. )
  516. @pytest.fixture
  517. def multiindex_dataframe_random_data(
  518. lexsorted_two_level_string_multiindex,
  519. ) -> DataFrame:
  520. """DataFrame with 2 level MultiIndex with random data"""
  521. index = lexsorted_two_level_string_multiindex
  522. return DataFrame(
  523. np.random.default_rng(2).standard_normal((10, 3)),
  524. index=index,
  525. columns=Index(["A", "B", "C"], name="exp"),
  526. )
  527. def _create_multiindex():
  528. """
  529. MultiIndex used to test the general functionality of this object
  530. """
  531. # See Also: tests.multi.conftest.idx
  532. major_axis = Index(["foo", "bar", "baz", "qux"])
  533. minor_axis = Index(["one", "two"])
  534. major_codes = np.array([0, 0, 1, 2, 3, 3])
  535. minor_codes = np.array([0, 1, 0, 1, 0, 1])
  536. index_names = ["first", "second"]
  537. return MultiIndex(
  538. levels=[major_axis, minor_axis],
  539. codes=[major_codes, minor_codes],
  540. names=index_names,
  541. verify_integrity=False,
  542. )
  543. def _create_mi_with_dt64tz_level():
  544. """
  545. MultiIndex with a level that is a tzaware DatetimeIndex.
  546. """
  547. # GH#8367 round trip with pickle
  548. return MultiIndex.from_product(
  549. [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
  550. names=["one", "two", "three"],
  551. )
  552. indices_dict = {
  553. "object": Index([f"pandas_{i}" for i in range(10)], dtype=object),
  554. "string": Index([f"pandas_{i}" for i in range(10)], dtype="str"),
  555. "datetime": date_range("2020-01-01", periods=10),
  556. "datetime-tz": date_range("2020-01-01", periods=10, tz="US/Pacific"),
  557. "period": period_range("2020-01-01", periods=10, freq="D"),
  558. "timedelta": timedelta_range(start="1 day", periods=10, freq="D"),
  559. "range": RangeIndex(10),
  560. "int8": Index(np.arange(10), dtype="int8"),
  561. "int16": Index(np.arange(10), dtype="int16"),
  562. "int32": Index(np.arange(10), dtype="int32"),
  563. "int64": Index(np.arange(10), dtype="int64"),
  564. "uint8": Index(np.arange(10), dtype="uint8"),
  565. "uint16": Index(np.arange(10), dtype="uint16"),
  566. "uint32": Index(np.arange(10), dtype="uint32"),
  567. "uint64": Index(np.arange(10), dtype="uint64"),
  568. "float32": Index(np.arange(10), dtype="float32"),
  569. "float64": Index(np.arange(10), dtype="float64"),
  570. "bool-object": Index([True, False] * 5, dtype=object),
  571. "bool-dtype": Index([True, False] * 5, dtype=bool),
  572. "complex64": Index(
  573. np.arange(10, dtype="complex64") + 1.0j * np.arange(10, dtype="complex64")
  574. ),
  575. "complex128": Index(
  576. np.arange(10, dtype="complex128") + 1.0j * np.arange(10, dtype="complex128")
  577. ),
  578. "categorical": CategoricalIndex(list("abcd") * 2),
  579. "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=11)),
  580. "empty": Index([]),
  581. "tuples": MultiIndex.from_tuples(
  582. zip(["foo", "bar", "baz"], [1, 2, 3], strict=True)
  583. ),
  584. "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
  585. "multi": _create_multiindex(),
  586. "repeats": Index([0, 0, 1, 1, 2, 2]),
  587. "nullable_int": Index(np.arange(10), dtype="Int64"),
  588. "nullable_uint": Index(np.arange(10), dtype="UInt16"),
  589. "nullable_float": Index(np.arange(10), dtype="Float32"),
  590. "nullable_bool": Index(np.arange(10).astype(bool), dtype="boolean"),
  591. "string-python": Index(
  592. pd.array([f"pandas_{i}" for i in range(10)], dtype="string[python]")
  593. ),
  594. }
  595. if has_pyarrow:
  596. idx = Index(pd.array([f"pandas_{i}" for i in range(10)], dtype="string[pyarrow]"))
  597. indices_dict["string-pyarrow"] = idx
  598. @pytest.fixture(params=indices_dict.keys())
  599. def index(request):
  600. """
  601. Fixture for many "simple" kinds of indices.
  602. These indices are unlikely to cover corner cases, e.g.
  603. - no names
  604. - no NaTs/NaNs
  605. - no values near implementation bounds
  606. - ...
  607. """
  608. # copy to avoid mutation, e.g. setting .name
  609. return indices_dict[request.param].copy(deep=False)
  610. @pytest.fixture(
  611. params=[
  612. key for key, value in indices_dict.items() if not isinstance(value, MultiIndex)
  613. ]
  614. )
  615. def index_flat(request):
  616. """
  617. index fixture, but excluding MultiIndex cases.
  618. """
  619. key = request.param
  620. return indices_dict[key].copy(deep=False)
  621. @pytest.fixture(
  622. params=[
  623. key
  624. for key, value in indices_dict.items()
  625. if not (
  626. key.startswith(("int", "uint", "float"))
  627. or key in ["range", "empty", "repeats", "bool-dtype"]
  628. )
  629. and not isinstance(value, MultiIndex)
  630. ]
  631. )
  632. def index_with_missing(request):
  633. """
  634. Fixture for indices with missing values.
  635. Integer-dtype and empty cases are excluded because they cannot hold missing
  636. values.
  637. MultiIndex is excluded because isna() is not defined for MultiIndex.
  638. """
  639. ind = indices_dict[request.param]
  640. if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
  641. # For setting missing values in the top level of MultiIndex
  642. vals = ind.tolist()
  643. vals[0] = (None, *vals[0][1:])
  644. vals[-1] = (None, *vals[-1][1:])
  645. return MultiIndex.from_tuples(vals)
  646. else:
  647. vals = ind.values.copy()
  648. vals[0] = None
  649. vals[-1] = None
  650. return type(ind)(vals, copy=False)
  651. # ----------------------------------------------------------------
  652. # Series'
  653. # ----------------------------------------------------------------
  654. @pytest.fixture
  655. def string_series() -> Series:
  656. """
  657. Fixture for Series of floats with Index of unique strings
  658. """
  659. return Series(
  660. np.arange(30, dtype=np.float64) * 1.1,
  661. index=Index([f"i_{i}" for i in range(30)]),
  662. name="series",
  663. )
  664. @pytest.fixture
  665. def object_series() -> Series:
  666. """
  667. Fixture for Series of dtype object with Index of unique strings
  668. """
  669. data = [f"foo_{i}" for i in range(30)]
  670. index = Index([f"bar_{i}" for i in range(30)])
  671. return Series(data, index=index, name="objects", dtype=object)
  672. @pytest.fixture
  673. def datetime_series() -> Series:
  674. """
  675. Fixture for Series of floats with DatetimeIndex
  676. """
  677. return Series(
  678. np.random.default_rng(2).standard_normal(30),
  679. index=date_range("2000-01-01", periods=30, freq="B"),
  680. name="ts",
  681. )
  682. def _create_series(index):
  683. """Helper for the _series dict"""
  684. size = len(index)
  685. data = np.random.default_rng(2).standard_normal(size)
  686. return Series(data, index=index, name="a", copy=False)
  687. _series = {
  688. f"series-with-{index_id}-index": _create_series(index)
  689. for index_id, index in indices_dict.items()
  690. }
  691. @pytest.fixture
  692. def series_with_simple_index(index) -> Series:
  693. """
  694. Fixture for tests on series with changing types of indices.
  695. """
  696. return _create_series(index)
  697. _narrow_series = {
  698. f"{dtype.__name__}-series": Series(
  699. range(30), index=[f"i-{i}" for i in range(30)], name="a", dtype=dtype
  700. )
  701. for dtype in tm.NARROW_NP_DTYPES
  702. }
  703. _index_or_series_objs = {**indices_dict, **_series, **_narrow_series}
  704. @pytest.fixture(params=_index_or_series_objs.keys())
  705. def index_or_series_obj(request):
  706. """
  707. Fixture for tests on indexes, series and series with a narrow dtype
  708. copy to avoid mutation, e.g. setting .name
  709. """
  710. return _index_or_series_objs[request.param].copy(deep=False)
  711. _typ_objects_series = {
  712. f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES
  713. }
  714. _index_or_series_memory_objs = {
  715. **indices_dict,
  716. **_series,
  717. **_narrow_series,
  718. **_typ_objects_series,
  719. }
  720. @pytest.fixture(params=_index_or_series_memory_objs.keys())
  721. def index_or_series_memory_obj(request):
  722. """
  723. Fixture for tests on indexes, series, series with a narrow dtype and
  724. series with empty objects type
  725. copy to avoid mutation, e.g. setting .name
  726. """
  727. return _index_or_series_memory_objs[request.param].copy(deep=False)
  728. # ----------------------------------------------------------------
  729. # DataFrames
  730. # ----------------------------------------------------------------
  731. @pytest.fixture
  732. def int_frame() -> DataFrame:
  733. """
  734. Fixture for DataFrame of ints with index of unique strings
  735. Columns are ['A', 'B', 'C', 'D']
  736. """
  737. return DataFrame(
  738. np.ones((30, 4), dtype=np.int64),
  739. index=Index([f"foo_{i}" for i in range(30)]),
  740. columns=Index(list("ABCD")),
  741. )
  742. @pytest.fixture
  743. def float_frame() -> DataFrame:
  744. """
  745. Fixture for DataFrame of floats with index of unique strings
  746. Columns are ['A', 'B', 'C', 'D'].
  747. """
  748. return DataFrame(
  749. np.random.default_rng(2).standard_normal((30, 4)),
  750. index=Index([f"foo_{i}" for i in range(30)]),
  751. columns=Index(list("ABCD")),
  752. )
  753. @pytest.fixture
  754. def rand_series_with_duplicate_datetimeindex() -> Series:
  755. """
  756. Fixture for Series with a DatetimeIndex that has duplicates.
  757. """
  758. dates = [
  759. datetime(2000, 1, 2),
  760. datetime(2000, 1, 2),
  761. datetime(2000, 1, 2),
  762. datetime(2000, 1, 3),
  763. datetime(2000, 1, 3),
  764. datetime(2000, 1, 3),
  765. datetime(2000, 1, 4),
  766. datetime(2000, 1, 4),
  767. datetime(2000, 1, 4),
  768. datetime(2000, 1, 5),
  769. ]
  770. return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates)
  771. # ----------------------------------------------------------------
  772. # Scalars
  773. # ----------------------------------------------------------------
  774. @pytest.fixture(
  775. params=[
  776. (Interval(left=0, right=5), IntervalDtype("int64", "right")),
  777. (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")),
  778. (Period("2012-01", freq="M"), "period[M]"),
  779. (Period("2012-02-01", freq="D"), "period[D]"),
  780. (
  781. Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"),
  782. DatetimeTZDtype(unit="s", tz="US/Eastern"),
  783. ),
  784. (Timedelta(seconds=500), "timedelta64[us]"),
  785. ]
  786. )
  787. def ea_scalar_and_dtype(request):
  788. """
  789. Fixture that tests each scalar and datetime type.
  790. """
  791. return request.param
  792. # ----------------------------------------------------------------
  793. # Operators & Operations
  794. # ----------------------------------------------------------------
  795. @pytest.fixture(params=tm.arithmetic_dunder_methods)
  796. def all_arithmetic_operators(request):
  797. """
  798. Fixture for dunder names for common arithmetic operations.
  799. """
  800. return request.param
  801. @pytest.fixture(
  802. params=[
  803. operator.add,
  804. ops.radd,
  805. operator.sub,
  806. ops.rsub,
  807. operator.mul,
  808. ops.rmul,
  809. operator.truediv,
  810. ops.rtruediv,
  811. operator.floordiv,
  812. ops.rfloordiv,
  813. operator.mod,
  814. ops.rmod,
  815. operator.pow,
  816. ops.rpow,
  817. operator.eq,
  818. operator.ne,
  819. operator.lt,
  820. operator.le,
  821. operator.gt,
  822. operator.ge,
  823. operator.and_,
  824. ops.rand_,
  825. operator.xor,
  826. ops.rxor,
  827. operator.or_,
  828. ops.ror_,
  829. ]
  830. )
  831. def all_binary_operators(request):
  832. """
  833. Fixture for operator and roperator arithmetic, comparison, and logical ops.
  834. """
  835. return request.param
  836. @pytest.fixture(
  837. params=[
  838. operator.add,
  839. ops.radd,
  840. operator.sub,
  841. ops.rsub,
  842. operator.mul,
  843. ops.rmul,
  844. operator.truediv,
  845. ops.rtruediv,
  846. operator.floordiv,
  847. ops.rfloordiv,
  848. operator.mod,
  849. ops.rmod,
  850. operator.pow,
  851. ops.rpow,
  852. ]
  853. )
  854. def all_arithmetic_functions(request):
  855. """
  856. Fixture for operator and roperator arithmetic functions.
  857. Notes
  858. -----
  859. This includes divmod and rdivmod, whereas all_arithmetic_operators
  860. does not.
  861. """
  862. return request.param
  863. _all_numeric_reductions = [
  864. "count",
  865. "sum",
  866. "max",
  867. "min",
  868. "mean",
  869. "prod",
  870. "std",
  871. "var",
  872. "median",
  873. "kurt",
  874. "skew",
  875. "sem",
  876. ]
  877. @pytest.fixture(params=_all_numeric_reductions)
  878. def all_numeric_reductions(request):
  879. """
  880. Fixture for numeric reduction names.
  881. """
  882. return request.param
  883. _all_boolean_reductions = ["all", "any"]
  884. @pytest.fixture(params=_all_boolean_reductions)
  885. def all_boolean_reductions(request):
  886. """
  887. Fixture for boolean reduction names.
  888. """
  889. return request.param
  890. _all_reductions = _all_numeric_reductions + _all_boolean_reductions
  891. @pytest.fixture(params=_all_reductions)
  892. def all_reductions(request):
  893. """
  894. Fixture for all (boolean + numeric) reduction names.
  895. """
  896. return request.param
  897. @pytest.fixture(
  898. params=[
  899. operator.eq,
  900. operator.ne,
  901. operator.gt,
  902. operator.ge,
  903. operator.lt,
  904. operator.le,
  905. ]
  906. )
  907. def comparison_op(request):
  908. """
  909. Fixture for operator module comparison functions.
  910. """
  911. return request.param
  912. @pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"])
  913. def compare_operators_no_eq_ne(request):
  914. """
  915. Fixture for dunder names for compare operations except == and !=
  916. * >=
  917. * >
  918. * <
  919. * <=
  920. """
  921. return request.param
  922. @pytest.fixture(
  923. params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"]
  924. )
  925. def all_logical_operators(request):
  926. """
  927. Fixture for dunder names for common logical operations
  928. * |
  929. * &
  930. * ^
  931. """
  932. return request.param
  933. _all_numeric_accumulations = ["cumsum", "cumprod", "cummin", "cummax"]
  934. @pytest.fixture(params=_all_numeric_accumulations)
  935. def all_numeric_accumulations(request):
  936. """
  937. Fixture for numeric accumulation names
  938. """
  939. return request.param
  940. # ----------------------------------------------------------------
  941. # Data sets/files
  942. # ----------------------------------------------------------------
  943. @pytest.fixture
  944. def strict_data_files(pytestconfig):
  945. """
  946. Returns the configuration for the test setting `--no-strict-data-files`.
  947. """
  948. return pytestconfig.getoption("--no-strict-data-files")
  949. @pytest.fixture
  950. def datapath(strict_data_files: str) -> Callable[..., str]:
  951. """
  952. Get the path to a data file.
  953. Parameters
  954. ----------
  955. path : str
  956. Path to the file, relative to ``pandas/tests/``
  957. Returns
  958. -------
  959. path including ``pandas/tests``.
  960. Raises
  961. ------
  962. ValueError
  963. If the path doesn't exist and the --no-strict-data-files option is not set.
  964. """
  965. BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
  966. def deco(*args):
  967. path = os.path.join(BASE_PATH, *args)
  968. if not os.path.exists(path):
  969. if strict_data_files:
  970. raise ValueError(
  971. f"Could not find file {path} and --no-strict-data-files is not set."
  972. )
  973. pytest.skip(f"Could not find {path}.")
  974. return path
  975. return deco
  976. # ----------------------------------------------------------------
  977. # Time zones
  978. # ----------------------------------------------------------------
  979. TIMEZONES = [
  980. None,
  981. "UTC",
  982. "US/Eastern",
  983. "Asia/Tokyo",
  984. "dateutil/US/Pacific",
  985. "dateutil/Asia/Singapore",
  986. "+01:15",
  987. "-02:15",
  988. "UTC+01:15",
  989. "UTC-02:15",
  990. tzutc(),
  991. tzlocal(),
  992. timezone.utc,
  993. timezone(timedelta(hours=1)),
  994. timezone(timedelta(hours=-1), name="foo"),
  995. ]
  996. if pytz is not None:
  997. TIMEZONES.extend(
  998. (
  999. pytz.FixedOffset(300),
  1000. pytz.FixedOffset(0),
  1001. pytz.FixedOffset(-300),
  1002. pytz.timezone("US/Pacific"),
  1003. pytz.timezone("UTC"),
  1004. )
  1005. )
  1006. TIMEZONE_IDS = [repr(i) for i in TIMEZONES]
  1007. @td.parametrize_fixture_doc(str(TIMEZONE_IDS))
  1008. @pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS)
  1009. def tz_naive_fixture(request):
  1010. """
  1011. Fixture for trying timezones including default (None): {0}
  1012. """
  1013. return request.param
  1014. @td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:]))
  1015. @pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:])
  1016. def tz_aware_fixture(request):
  1017. """
  1018. Fixture for trying explicit timezones: {0}
  1019. """
  1020. return request.param
  1021. _UTCS = ["utc", "dateutil/UTC", tzutc(), timezone.utc]
  1022. if pytz is not None:
  1023. _UTCS.append(pytz.utc)
  1024. @pytest.fixture(params=_UTCS)
  1025. def utc_fixture(request):
  1026. """
  1027. Fixture to provide variants of UTC timezone strings and tzinfo objects.
  1028. """
  1029. return request.param
  1030. utc_fixture2 = utc_fixture
  1031. @pytest.fixture(params=["s", "ms", "us", "ns"])
  1032. def unit(request):
  1033. """
  1034. datetime64 units we support.
  1035. """
  1036. return request.param
  1037. unit2 = unit
  1038. # ----------------------------------------------------------------
  1039. # Dtypes
  1040. # ----------------------------------------------------------------
  1041. @pytest.fixture(params=tm.STRING_DTYPES)
  1042. def string_dtype(request):
  1043. """
  1044. Parametrized fixture for string dtypes.
  1045. * str
  1046. * 'str'
  1047. * 'U'
  1048. """
  1049. return request.param
  1050. @pytest.fixture(
  1051. params=[
  1052. ("python", pd.NA),
  1053. pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
  1054. pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
  1055. ("python", np.nan),
  1056. ],
  1057. ids=[
  1058. "string=string[python]",
  1059. "string=string[pyarrow]",
  1060. "string=str[pyarrow]",
  1061. "string=str[python]",
  1062. ],
  1063. )
  1064. def string_dtype_no_object(request):
  1065. """
  1066. Parametrized fixture for string dtypes.
  1067. * 'string[python]' (NA variant)
  1068. * 'string[pyarrow]' (NA variant)
  1069. * 'str' (NaN variant, with pyarrow)
  1070. * 'str' (NaN variant, without pyarrow)
  1071. """
  1072. # need to instantiate the StringDtype here instead of in the params
  1073. # to avoid importing pyarrow during test collection
  1074. storage, na_value = request.param
  1075. return pd.StringDtype(storage, na_value)
  1076. @pytest.fixture(
  1077. params=[
  1078. "string[python]",
  1079. pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
  1080. ]
  1081. )
  1082. def nullable_string_dtype(request):
  1083. """
  1084. Parametrized fixture for string dtypes.
  1085. * 'string[python]'
  1086. * 'string[pyarrow]'
  1087. """
  1088. return request.param
  1089. @pytest.fixture(
  1090. params=[
  1091. pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
  1092. pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
  1093. ]
  1094. )
  1095. def pyarrow_string_dtype(request):
  1096. """
  1097. Parametrized fixture for string dtypes backed by Pyarrow.
  1098. * 'str[pyarrow]'
  1099. * 'string[pyarrow]'
  1100. """
  1101. return pd.StringDtype(*request.param)
  1102. @pytest.fixture(
  1103. params=[
  1104. "python",
  1105. pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
  1106. ]
  1107. )
  1108. def string_storage(request):
  1109. """
  1110. Parametrized fixture for pd.options.mode.string_storage.
  1111. * 'python'
  1112. * 'pyarrow'
  1113. """
  1114. return request.param
  1115. @pytest.fixture(
  1116. params=[
  1117. ("python", pd.NA),
  1118. pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
  1119. pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
  1120. ("python", np.nan),
  1121. ],
  1122. ids=[
  1123. "string=string[python]",
  1124. "string=string[pyarrow]",
  1125. "string=str[pyarrow]",
  1126. "string=str[python]",
  1127. ],
  1128. )
  1129. def string_dtype_arguments(request):
  1130. """
  1131. Parametrized fixture for StringDtype storage and na_value.
  1132. * 'python' + pd.NA
  1133. * 'pyarrow' + pd.NA
  1134. * 'pyarrow' + np.nan
  1135. """
  1136. return request.param
  1137. @pytest.fixture(
  1138. params=[
  1139. "numpy_nullable",
  1140. pytest.param("pyarrow", marks=td.skip_if_no("pyarrow")),
  1141. ]
  1142. )
  1143. def dtype_backend(request):
  1144. """
  1145. Parametrized fixture for pd.options.mode.string_storage.
  1146. * 'python'
  1147. * 'pyarrow'
  1148. """
  1149. return request.param
  1150. # Alias so we can test with cartesian product of string_storage
  1151. string_storage2 = string_storage
  1152. string_dtype_arguments2 = string_dtype_arguments
  1153. @pytest.fixture(params=tm.BYTES_DTYPES)
  1154. def bytes_dtype(request):
  1155. """
  1156. Parametrized fixture for bytes dtypes.
  1157. * bytes
  1158. * 'bytes'
  1159. """
  1160. return request.param
  1161. @pytest.fixture(params=tm.OBJECT_DTYPES)
  1162. def object_dtype(request):
  1163. """
  1164. Parametrized fixture for object dtypes.
  1165. * object
  1166. * 'object'
  1167. """
  1168. return request.param
  1169. @pytest.fixture(
  1170. params=[
  1171. np.dtype("object"),
  1172. ("python", pd.NA),
  1173. pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
  1174. pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
  1175. ("python", np.nan),
  1176. ],
  1177. ids=[
  1178. "string=object",
  1179. "string=string[python]",
  1180. "string=string[pyarrow]",
  1181. "string=str[pyarrow]",
  1182. "string=str[python]",
  1183. ],
  1184. )
  1185. def any_string_dtype(request):
  1186. """
  1187. Parametrized fixture for string dtypes.
  1188. * 'object'
  1189. * 'string[python]' (NA variant)
  1190. * 'string[pyarrow]' (NA variant)
  1191. * 'str' (NaN variant, with pyarrow)
  1192. * 'str' (NaN variant, without pyarrow)
  1193. """
  1194. if isinstance(request.param, np.dtype):
  1195. return request.param
  1196. else:
  1197. # need to instantiate the StringDtype here instead of in the params
  1198. # to avoid importing pyarrow during test collection
  1199. storage, na_value = request.param
  1200. return pd.StringDtype(storage, na_value)
  1201. any_string_dtype2 = any_string_dtype
  1202. @pytest.fixture(params=tm.DATETIME64_DTYPES)
  1203. def datetime64_dtype(request):
  1204. """
  1205. Parametrized fixture for datetime64 dtypes.
  1206. * 'datetime64[ns]'
  1207. * 'M8[ns]'
  1208. """
  1209. return request.param
  1210. @pytest.fixture(params=tm.TIMEDELTA64_DTYPES)
  1211. def timedelta64_dtype(request):
  1212. """
  1213. Parametrized fixture for timedelta64 dtypes.
  1214. * 'timedelta64[ns]'
  1215. * 'm8[ns]'
  1216. """
  1217. return request.param
  1218. @pytest.fixture
  1219. def fixed_now_ts() -> Timestamp:
  1220. """
  1221. Fixture emits fixed Timestamp.now()
  1222. """
  1223. return Timestamp( # pyright: ignore[reportReturnType]
  1224. year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22
  1225. )
  1226. @pytest.fixture(params=tm.FLOAT_NUMPY_DTYPES)
  1227. def float_numpy_dtype(request):
  1228. """
  1229. Parameterized fixture for float dtypes.
  1230. * float
  1231. * 'float32'
  1232. * 'float64'
  1233. """
  1234. return request.param
  1235. @pytest.fixture(params=tm.FLOAT_EA_DTYPES)
  1236. def float_ea_dtype(request):
  1237. """
  1238. Parameterized fixture for float dtypes.
  1239. * 'Float32'
  1240. * 'Float64'
  1241. """
  1242. return request.param
  1243. @pytest.fixture(params=tm.ALL_FLOAT_DTYPES)
  1244. def any_float_dtype(request):
  1245. """
  1246. Parameterized fixture for float dtypes.
  1247. * float
  1248. * 'float32'
  1249. * 'float64'
  1250. * 'Float32'
  1251. * 'Float64'
  1252. """
  1253. return request.param
  1254. @pytest.fixture(params=tm.COMPLEX_DTYPES)
  1255. def complex_dtype(request):
  1256. """
  1257. Parameterized fixture for complex dtypes.
  1258. * complex
  1259. * 'complex64'
  1260. * 'complex128'
  1261. """
  1262. return request.param
  1263. @pytest.fixture(params=tm.COMPLEX_FLOAT_DTYPES)
  1264. def complex_or_float_dtype(request):
  1265. """
  1266. Parameterized fixture for complex and numpy float dtypes.
  1267. * complex
  1268. * 'complex64'
  1269. * 'complex128'
  1270. * float
  1271. * 'float32'
  1272. * 'float64'
  1273. """
  1274. return request.param
  1275. @pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES)
  1276. def any_signed_int_numpy_dtype(request):
  1277. """
  1278. Parameterized fixture for signed integer dtypes.
  1279. * int
  1280. * 'int8'
  1281. * 'int16'
  1282. * 'int32'
  1283. * 'int64'
  1284. """
  1285. return request.param
  1286. @pytest.fixture(params=tm.UNSIGNED_INT_NUMPY_DTYPES)
  1287. def any_unsigned_int_numpy_dtype(request):
  1288. """
  1289. Parameterized fixture for unsigned integer dtypes.
  1290. * 'uint8'
  1291. * 'uint16'
  1292. * 'uint32'
  1293. * 'uint64'
  1294. """
  1295. return request.param
  1296. @pytest.fixture(params=tm.ALL_INT_NUMPY_DTYPES)
  1297. def any_int_numpy_dtype(request):
  1298. """
  1299. Parameterized fixture for any integer dtype.
  1300. * int
  1301. * 'int8'
  1302. * 'uint8'
  1303. * 'int16'
  1304. * 'uint16'
  1305. * 'int32'
  1306. * 'uint32'
  1307. * 'int64'
  1308. * 'uint64'
  1309. """
  1310. return request.param
  1311. @pytest.fixture(params=tm.ALL_INT_EA_DTYPES)
  1312. def any_int_ea_dtype(request):
  1313. """
  1314. Parameterized fixture for any nullable integer dtype.
  1315. * 'UInt8'
  1316. * 'Int8'
  1317. * 'UInt16'
  1318. * 'Int16'
  1319. * 'UInt32'
  1320. * 'Int32'
  1321. * 'UInt64'
  1322. * 'Int64'
  1323. """
  1324. return request.param
  1325. @pytest.fixture(params=tm.ALL_INT_DTYPES)
  1326. def any_int_dtype(request):
  1327. """
  1328. Parameterized fixture for any nullable integer dtype.
  1329. * int
  1330. * 'int8'
  1331. * 'uint8'
  1332. * 'int16'
  1333. * 'uint16'
  1334. * 'int32'
  1335. * 'uint32'
  1336. * 'int64'
  1337. * 'uint64'
  1338. * 'UInt8'
  1339. * 'Int8'
  1340. * 'UInt16'
  1341. * 'Int16'
  1342. * 'UInt32'
  1343. * 'Int32'
  1344. * 'UInt64'
  1345. * 'Int64'
  1346. """
  1347. return request.param
  1348. @pytest.fixture(params=tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES)
  1349. def any_numeric_ea_dtype(request):
  1350. """
  1351. Parameterized fixture for any nullable integer dtype and
  1352. any float ea dtypes.
  1353. * 'UInt8'
  1354. * 'Int8'
  1355. * 'UInt16'
  1356. * 'Int16'
  1357. * 'UInt32'
  1358. * 'Int32'
  1359. * 'UInt64'
  1360. * 'Int64'
  1361. * 'Float32'
  1362. * 'Float64'
  1363. """
  1364. return request.param
  1365. # Unsupported operand types for + ("List[Union[str, ExtensionDtype, dtype[Any],
  1366. # Type[object]]]" and "List[str]")
  1367. @pytest.fixture(
  1368. params=tm.ALL_INT_EA_DTYPES
  1369. + tm.FLOAT_EA_DTYPES
  1370. + tm.ALL_INT_PYARROW_DTYPES_STR_REPR
  1371. + tm.FLOAT_PYARROW_DTYPES_STR_REPR # type: ignore[operator]
  1372. )
  1373. def any_numeric_ea_and_arrow_dtype(request):
  1374. """
  1375. Parameterized fixture for any nullable integer dtype and
  1376. any float ea dtypes.
  1377. * 'UInt8'
  1378. * 'Int8'
  1379. * 'UInt16'
  1380. * 'Int16'
  1381. * 'UInt32'
  1382. * 'Int32'
  1383. * 'UInt64'
  1384. * 'Int64'
  1385. * 'Float32'
  1386. * 'Float64'
  1387. * 'uint8[pyarrow]'
  1388. * 'int8[pyarrow]'
  1389. * 'uint16[pyarrow]'
  1390. * 'int16[pyarrow]'
  1391. * 'uint32[pyarrow]'
  1392. * 'int32[pyarrow]'
  1393. * 'uint64[pyarrow]'
  1394. * 'int64[pyarrow]'
  1395. * 'float32[pyarrow]'
  1396. * 'float64[pyarrow]'
  1397. """
  1398. return request.param
  1399. @pytest.fixture(params=tm.SIGNED_INT_EA_DTYPES)
  1400. def any_signed_int_ea_dtype(request):
  1401. """
  1402. Parameterized fixture for any signed nullable integer dtype.
  1403. * 'Int8'
  1404. * 'Int16'
  1405. * 'Int32'
  1406. * 'Int64'
  1407. """
  1408. return request.param
  1409. @pytest.fixture(params=tm.ALL_REAL_NUMPY_DTYPES)
  1410. def any_real_numpy_dtype(request):
  1411. """
  1412. Parameterized fixture for any (purely) real numeric dtype.
  1413. * int
  1414. * 'int8'
  1415. * 'uint8'
  1416. * 'int16'
  1417. * 'uint16'
  1418. * 'int32'
  1419. * 'uint32'
  1420. * 'int64'
  1421. * 'uint64'
  1422. * float
  1423. * 'float32'
  1424. * 'float64'
  1425. """
  1426. return request.param
  1427. @pytest.fixture(params=tm.ALL_REAL_DTYPES)
  1428. def any_real_numeric_dtype(request):
  1429. """
  1430. Parameterized fixture for any (purely) real numeric dtype.
  1431. * int
  1432. * 'int8'
  1433. * 'uint8'
  1434. * 'int16'
  1435. * 'uint16'
  1436. * 'int32'
  1437. * 'uint32'
  1438. * 'int64'
  1439. * 'uint64'
  1440. * float
  1441. * 'float32'
  1442. * 'float64'
  1443. and associated ea dtypes.
  1444. """
  1445. return request.param
  1446. @pytest.fixture(params=tm.ALL_NUMPY_DTYPES)
  1447. def any_numpy_dtype(request):
  1448. """
  1449. Parameterized fixture for all numpy dtypes.
  1450. * bool
  1451. * 'bool'
  1452. * int
  1453. * 'int8'
  1454. * 'uint8'
  1455. * 'int16'
  1456. * 'uint16'
  1457. * 'int32'
  1458. * 'uint32'
  1459. * 'int64'
  1460. * 'uint64'
  1461. * float
  1462. * 'float32'
  1463. * 'float64'
  1464. * complex
  1465. * 'complex64'
  1466. * 'complex128'
  1467. * str
  1468. * 'str'
  1469. * 'U'
  1470. * bytes
  1471. * 'bytes'
  1472. * 'datetime64[ns]'
  1473. * 'M8[ns]'
  1474. * 'timedelta64[ns]'
  1475. * 'm8[ns]'
  1476. * object
  1477. * 'object'
  1478. """
  1479. return request.param
  1480. @pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES)
  1481. def any_real_nullable_dtype(request):
  1482. """
  1483. Parameterized fixture for all real dtypes that can hold NA.
  1484. * float
  1485. * 'float32'
  1486. * 'float64'
  1487. * 'Float32'
  1488. * 'Float64'
  1489. * 'UInt8'
  1490. * 'UInt16'
  1491. * 'UInt32'
  1492. * 'UInt64'
  1493. * 'Int8'
  1494. * 'Int16'
  1495. * 'Int32'
  1496. * 'Int64'
  1497. * 'uint8[pyarrow]'
  1498. * 'uint16[pyarrow]'
  1499. * 'uint32[pyarrow]'
  1500. * 'uint64[pyarrow]'
  1501. * 'int8[pyarrow]'
  1502. * 'int16[pyarrow]'
  1503. * 'int32[pyarrow]'
  1504. * 'int64[pyarrow]'
  1505. * 'float[pyarrow]'
  1506. * 'double[pyarrow]'
  1507. """
  1508. return request.param
  1509. @pytest.fixture(params=tm.ALL_NUMERIC_DTYPES)
  1510. def any_numeric_dtype(request):
  1511. """
  1512. Parameterized fixture for all numeric dtypes.
  1513. * int
  1514. * 'int8'
  1515. * 'uint8'
  1516. * 'int16'
  1517. * 'uint16'
  1518. * 'int32'
  1519. * 'uint32'
  1520. * 'int64'
  1521. * 'uint64'
  1522. * float
  1523. * 'float32'
  1524. * 'float64'
  1525. * complex
  1526. * 'complex64'
  1527. * 'complex128'
  1528. * 'UInt8'
  1529. * 'Int8'
  1530. * 'UInt16'
  1531. * 'Int16'
  1532. * 'UInt32'
  1533. * 'Int32'
  1534. * 'UInt64'
  1535. * 'Int64'
  1536. * 'Float32'
  1537. * 'Float64'
  1538. """
  1539. return request.param
  1540. # categoricals are handled separately
  1541. _any_skipna_inferred_dtype = [
  1542. ("string", ["a", np.nan, "c"]),
  1543. ("string", ["a", pd.NA, "c"]),
  1544. ("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array
  1545. ("bytes", [b"a", np.nan, b"c"]),
  1546. ("empty", [np.nan, np.nan, np.nan]),
  1547. ("empty", []),
  1548. ("mixed-integer", ["a", np.nan, 2]),
  1549. ("mixed", ["a", np.nan, 2.0]),
  1550. ("floating", [1.0, np.nan, 2.0]),
  1551. ("integer", [1, np.nan, 2]),
  1552. ("mixed-integer-float", [1, np.nan, 2.0]),
  1553. ("decimal", [Decimal(1), np.nan, Decimal(2)]),
  1554. ("boolean", [True, np.nan, False]),
  1555. ("boolean", [True, pd.NA, False]),
  1556. ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
  1557. ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
  1558. ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
  1559. ("complex", [1 + 1j, np.nan, 2 + 2j]),
  1560. # The following dtype is commented out due to GH 23554
  1561. # ('timedelta64', [np.timedelta64(1, 'D'),
  1562. # np.nan, np.timedelta64(2, 'D')]),
  1563. ("timedelta", [timedelta(1), np.nan, timedelta(2)]),
  1564. ("time", [time(1), np.nan, time(2)]),
  1565. ("period", [Period(2013), pd.NaT, Period(2018)]),
  1566. ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
  1567. ]
  1568. ids = [
  1569. pair[0] for pair in _any_skipna_inferred_dtype
  1570. ] # use inferred type as fixture-id
  1571. @pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
  1572. def any_skipna_inferred_dtype(request):
  1573. """
  1574. Fixture for all inferred dtypes from _libs.lib.infer_dtype
  1575. The covered (inferred) types are:
  1576. * 'string'
  1577. * 'empty'
  1578. * 'bytes'
  1579. * 'mixed'
  1580. * 'mixed-integer'
  1581. * 'mixed-integer-float'
  1582. * 'floating'
  1583. * 'integer'
  1584. * 'decimal'
  1585. * 'boolean'
  1586. * 'datetime64'
  1587. * 'datetime'
  1588. * 'date'
  1589. * 'timedelta'
  1590. * 'time'
  1591. * 'period'
  1592. * 'interval'
  1593. Returns
  1594. -------
  1595. inferred_dtype : str
  1596. The string for the inferred dtype from _libs.lib.infer_dtype
  1597. values : np.ndarray
  1598. An array of object dtype that will be inferred to have
  1599. `inferred_dtype`
  1600. Examples
  1601. --------
  1602. >>> from pandas._libs import lib
  1603. >>>
  1604. >>> def test_something(any_skipna_inferred_dtype):
  1605. ... inferred_dtype, values = any_skipna_inferred_dtype
  1606. ... # will pass
  1607. ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
  1608. """
  1609. inferred_dtype, values = request.param
  1610. values = np.array(values, dtype=object) # object dtype to avoid casting
  1611. # correctness of inference tested in tests/dtypes/test_inference.py
  1612. return inferred_dtype, values
  1613. # ----------------------------------------------------------------
  1614. # Misc
  1615. # ----------------------------------------------------------------
  1616. @pytest.fixture
  1617. def ip():
  1618. """
  1619. Get an instance of IPython.InteractiveShell.
  1620. Will raise a skip if IPython is not installed.
  1621. """
  1622. pytest.importorskip("IPython", minversion="6.0.0")
  1623. from IPython.core.interactiveshell import InteractiveShell
  1624. # GH#35711 make sure sqlite history file handle is not leaked
  1625. from traitlets.config import Config # isort:skip
  1626. c = Config()
  1627. c.HistoryManager.hist_file = ":memory:"
  1628. return InteractiveShell(config=c)
  1629. @pytest.fixture
  1630. def mpl_cleanup():
  1631. """
  1632. Ensure Matplotlib is cleaned up around a test.
  1633. Before a test is run:
  1634. 1) Set the backend to "template" to avoid requiring a GUI.
  1635. After a test is run:
  1636. 1) Reset units registry
  1637. 2) Reset rc_context
  1638. 3) Close all figures
  1639. See matplotlib/testing/decorators.py#L24.
  1640. """
  1641. mpl = pytest.importorskip("matplotlib")
  1642. mpl_units = pytest.importorskip("matplotlib.units")
  1643. plt = pytest.importorskip("matplotlib.pyplot")
  1644. orig_units_registry = mpl_units.registry.copy()
  1645. try:
  1646. with mpl.rc_context():
  1647. mpl.use("template")
  1648. yield
  1649. finally:
  1650. mpl_units.registry.clear()
  1651. mpl_units.registry.update(orig_units_registry)
  1652. plt.close("all")
  1653. # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501
  1654. gc.collect(1)
  1655. @pytest.fixture(
  1656. params=[
  1657. getattr(pd.offsets, o)
  1658. for o in pd.offsets.__all__
  1659. if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) and o != "Tick"
  1660. ]
  1661. )
  1662. def tick_classes(request):
  1663. """
  1664. Fixture for Tick based datetime offsets available for a time series.
  1665. """
  1666. return request.param
  1667. @pytest.fixture(params=[None, lambda x: x])
  1668. def sort_by_key(request):
  1669. """
  1670. Simple fixture for testing keys in sorting methods.
  1671. Tests None (no key) and the identity key.
  1672. """
  1673. return request.param
  1674. @pytest.fixture(
  1675. params=[
  1676. ("foo", None, None),
  1677. ("Egon", "Venkman", None),
  1678. ("NCC1701D", "NCC1701D", "NCC1701D"),
  1679. # possibly-matching NAs
  1680. (np.nan, np.nan, np.nan),
  1681. (np.nan, pd.NaT, None),
  1682. (np.nan, pd.NA, None),
  1683. (pd.NA, pd.NA, pd.NA),
  1684. ]
  1685. )
  1686. def names(request) -> tuple[Hashable, Hashable, Hashable]:
  1687. """
  1688. A 3-tuple of names, the first two for operands, the last for a result.
  1689. """
  1690. return request.param
  1691. @pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc])
  1692. def indexer_sli(request):
  1693. """
  1694. Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__
  1695. """
  1696. return request.param
  1697. @pytest.fixture(params=[tm.loc, tm.iloc])
  1698. def indexer_li(request):
  1699. """
  1700. Parametrize over loc.__getitem__, iloc.__getitem__
  1701. """
  1702. return request.param
  1703. @pytest.fixture(params=[tm.setitem, tm.iloc])
  1704. def indexer_si(request):
  1705. """
  1706. Parametrize over __setitem__, iloc.__setitem__
  1707. """
  1708. return request.param
  1709. @pytest.fixture(params=[tm.setitem, tm.loc])
  1710. def indexer_sl(request):
  1711. """
  1712. Parametrize over __setitem__, loc.__setitem__
  1713. """
  1714. return request.param
  1715. @pytest.fixture(params=[tm.at, tm.loc])
  1716. def indexer_al(request):
  1717. """
  1718. Parametrize over at.__setitem__, loc.__setitem__
  1719. """
  1720. return request.param
  1721. @pytest.fixture(params=[tm.iat, tm.iloc])
  1722. def indexer_ial(request):
  1723. """
  1724. Parametrize over iat.__setitem__, iloc.__setitem__
  1725. """
  1726. return request.param
  1727. @pytest.fixture(params=[True, False])
  1728. def performance_warning(request) -> Iterator[bool | type[Warning]]:
  1729. """
  1730. Fixture to check if performance warnings are enabled. Either produces
  1731. ``PerformanceWarning`` if they are enabled, otherwise ``False``.
  1732. """
  1733. with pd.option_context("mode.performance_warnings", request.param):
  1734. yield pd.errors.PerformanceWarning if request.param else False
  1735. @pytest.fixture
  1736. def using_infer_string() -> bool:
  1737. """
  1738. Fixture to check if infer string option is enabled.
  1739. """
  1740. return pd.options.future.infer_string is True
  1741. @pytest.fixture
  1742. def using_python_scalars() -> bool:
  1743. return pd.options.future.python_scalars is True
  1744. _warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
  1745. if pytz is not None:
  1746. _warsaws.append(pytz.timezone("Europe/Warsaw"))
  1747. @pytest.fixture(params=_warsaws)
  1748. def warsaw(request) -> str:
  1749. """
  1750. tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
  1751. """
  1752. return request.param
  1753. @pytest.fixture
  1754. def temp_file(tmp_path):
  1755. """
  1756. Generate a unique file for testing use. See link for removal policy.
  1757. https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html#the-default-base-temporary-directory
  1758. """
  1759. file_path = tmp_path / str(uuid.uuid4())
  1760. file_path.touch()
  1761. return file_path
  1762. @pytest.fixture(scope="session")
  1763. def monkeysession():
  1764. with pytest.MonkeyPatch.context() as mp:
  1765. yield mp
  1766. @pytest.fixture(params=[True, False])
  1767. def using_nan_is_na(request):
  1768. opt = request.param
  1769. with pd.option_context("future.distinguish_nan_and_na", not opt):
  1770. yield opt