| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370 |
- """
- Testing that we work in the downstream packages
- """
- import array
- import subprocess
- import sys
- import numpy as np
- import pytest
- from pandas.errors import IntCastingNaNError
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- DataFrame,
- DatetimeIndex,
- Series,
- TimedeltaIndex,
- )
- import pandas._testing as tm
- from pandas.core.arrays import (
- DatetimeArray,
- TimedeltaArray,
- )
- from pandas.util.version import Version
- @pytest.fixture
- def df():
- return DataFrame({"A": [1, 2, 3]})
- def test_dask(df):
- # dask sets "compute.use_numexpr" to False, so catch the current value
- # and ensure to reset it afterwards to avoid impacting other tests
- olduse = pd.get_option("compute.use_numexpr")
- try:
- pytest.importorskip("toolz")
- dd = pytest.importorskip("dask.dataframe")
- ddf = dd.from_pandas(df, npartitions=3)
- assert ddf.A is not None
- assert ddf.compute() is not None
- finally:
- pd.set_option("compute.use_numexpr", olduse)
- def test_dask_ufunc():
- # dask sets "compute.use_numexpr" to False, so catch the current value
- # and ensure to reset it afterwards to avoid impacting other tests
- olduse = pd.get_option("compute.use_numexpr")
- try:
- da = pytest.importorskip("dask.array")
- dd = pytest.importorskip("dask.dataframe")
- s = Series([1.5, 2.3, 3.7, 4.0])
- ds = dd.from_pandas(s, npartitions=2)
- result = da.fix(ds).compute()
- expected = np.fix(s)
- tm.assert_series_equal(result, expected)
- finally:
- pd.set_option("compute.use_numexpr", olduse)
- def test_construct_dask_float_array_int_dtype_match_ndarray():
- # GH#40110 make sure we treat a float-dtype dask array with the same
- # rules we would for an ndarray
- dd = pytest.importorskip("dask.dataframe")
- arr = np.array([1, 2.5, 3])
- darr = dd.from_array(arr)
- res = Series(darr)
- expected = Series(arr)
- tm.assert_series_equal(res, expected)
- # GH#49599 in 2.0 we raise instead of silently ignoring the dtype
- msg = "Trying to coerce float values to integers"
- with pytest.raises(ValueError, match=msg):
- Series(darr, dtype="i8")
- msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
- arr[2] = np.nan
- with pytest.raises(IntCastingNaNError, match=msg):
- Series(darr, dtype="i8")
- # which is the same as we get with a numpy input
- with pytest.raises(IntCastingNaNError, match=msg):
- Series(arr, dtype="i8")
- def test_xarray(df):
- pytest.importorskip("xarray")
- assert df.to_xarray() is not None
- def test_xarray_cftimeindex_nearest():
- # https://github.com/pydata/xarray/issues/3751
- cftime = pytest.importorskip("cftime")
- xarray = pytest.importorskip("xarray")
- times = xarray.cftime_range("0001", periods=2)
- key = cftime.DatetimeGregorian(2000, 1, 1)
- result = times.get_indexer([key], method="nearest")
- expected = 1
- assert result == expected
- @pytest.mark.single_cpu
- def test_oo_optimizable():
- # GH 21071
- subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
- @pytest.mark.single_cpu
- def test_oo_optimized_datetime_index_unpickle():
- # GH 42866
- subprocess.check_call(
- [
- sys.executable,
- "-OO",
- "-c",
- (
- "import pandas as pd, pickle; "
- "pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"
- ),
- ]
- )
- def test_statsmodels():
- smf = pytest.importorskip("statsmodels.formula.api")
- df = DataFrame(
- {"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}
- )
- smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()
- def test_scikit_learn():
- pytest.importorskip("sklearn")
- from sklearn import (
- datasets,
- svm,
- )
- digits = datasets.load_digits()
- clf = svm.SVC(gamma=0.001, C=100.0)
- clf.fit(digits.data[:-1], digits.target[:-1])
- clf.predict(digits.data[-1:])
- def test_seaborn():
- seaborn = pytest.importorskip("seaborn")
- tips = DataFrame(
- {"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}
- )
- seaborn.stripplot(x="day", y="total_bill", data=tips)
- def test_pandas_datareader():
- pytest.importorskip("pandas_datareader")
- @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
- def test_pyarrow(df):
- pyarrow = pytest.importorskip("pyarrow")
- table = pyarrow.Table.from_pandas(df)
- result = table.to_pandas()
- tm.assert_frame_equal(result, df)
- def test_yaml_dump(df):
- # GH#42748
- yaml = pytest.importorskip("yaml")
- dumped = yaml.dump(df)
- loaded = yaml.load(dumped, Loader=yaml.Loader)
- tm.assert_frame_equal(df, loaded)
- loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)
- tm.assert_frame_equal(df, loaded2)
- @pytest.mark.single_cpu
- def test_missing_required_dependency():
- # GH 23868
- # To ensure proper isolation, we pass these flags
- # -S : disable site-packages
- # -s : disable user site-packages
- # -E : disable PYTHON* env vars, especially PYTHONPATH
- # https://github.com/MacPython/pandas-wheels/pull/50
- pyexe = sys.executable.replace("\\", "/")
- # We skip this test if pandas is installed as a site package. We first
- # import the package normally and check the path to the module before
- # executing the test which imports pandas with site packages disabled.
- call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]
- output = subprocess.check_output(call).decode()
- if "site-packages" in output:
- pytest.skip("pandas installed as site package")
- # This test will fail if pandas is installed as a site package. The flags
- # prevent pandas being imported and the test will report Failed: DID NOT
- # RAISE <class 'subprocess.CalledProcessError'>
- call = [pyexe, "-sSE", "-c", "import pandas"]
- msg = (
- rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "
- "returned non-zero exit status 1."
- )
- with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:
- subprocess.check_output(call, stderr=subprocess.STDOUT)
- output = exc.value.stdout.decode()
- for name in ["numpy", "pytz", "dateutil"]:
- assert name in output
- def test_frame_setitem_dask_array_into_new_col(request):
- # GH#47128
- # dask sets "compute.use_numexpr" to False, so catch the current value
- # and ensure to reset it afterwards to avoid impacting other tests
- olduse = pd.get_option("compute.use_numexpr")
- try:
- dask = pytest.importorskip("dask")
- da = pytest.importorskip("dask.array")
- if Version(dask.__version__) <= Version("2025.1.0") and Version(
- np.__version__
- ) >= Version("2.1"):
- request.applymarker(
- pytest.mark.xfail(reason="loc.__setitem__ incorrectly mutated column c")
- )
- dda = da.array([1, 2])
- df = DataFrame({"a": ["a", "b"]})
- df["b"] = dda
- df["c"] = dda
- df.loc[[False, True], "b"] = 100
- result = df.loc[[1], :]
- expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
- tm.assert_frame_equal(result, expected)
- finally:
- pd.set_option("compute.use_numexpr", olduse)
- def test_pandas_priority():
- # GH#48347
- class MyClass:
- __pandas_priority__ = 5000
- def __radd__(self, other):
- return self
- left = MyClass()
- right = Series(range(3))
- assert right.__add__(left) is NotImplemented
- assert right + left is left
- @pytest.fixture(
- params=[
- "memoryview",
- "array",
- pytest.param("dask", marks=td.skip_if_no("dask.array")),
- pytest.param("xarray", marks=td.skip_if_no("xarray")),
- ]
- )
- def array_likes(request):
- """
- Fixture giving a numpy array and a parametrized 'data' object, which can
- be a memoryview, array, dask or xarray object created from the numpy array.
- """
- # GH#24539 recognize e.g xarray, dask, ...
- arr = np.array([1, 2, 3], dtype=np.int64)
- name = request.param
- if name == "memoryview":
- data = memoryview(arr)
- elif name == "array":
- data = array.array("i", arr)
- elif name == "dask":
- import dask.array
- data = dask.array.array(arr)
- elif name == "xarray":
- import xarray as xr
- data = xr.DataArray(arr)
- return arr, data
- @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
- def test_from_obscure_array(dtype, array_likes):
- # GH#24539 recognize e.g xarray, dask, ...
- # Note: we dont do this for PeriodArray bc _from_sequence won't accept
- # an array of integers
- # TODO: could check with arraylike of Period objects
- arr, data = array_likes
- cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
- depr_msg = f"{cls.__name__}.__init__ is deprecated"
- with tm.assert_produces_warning(FutureWarning, match=depr_msg):
- expected = cls(arr)
- result = cls._from_sequence(data, dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- if not isinstance(data, memoryview):
- # FIXME(GH#44431) these raise on memoryview and attempted fix
- # fails on py3.10
- func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
- result = func(arr).array
- expected = func(data).array
- tm.assert_equal(result, expected)
- # Let's check the Indexes while we're here
- idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
- result = idx_cls(arr)
- expected = idx_cls(data)
- tm.assert_index_equal(result, expected)
- def test_dataframe_consortium() -> None:
- """
- Test some basic methods of the dataframe consortium standard.
- Full testing is done at https://github.com/data-apis/dataframe-api-compat,
- this is just to check that the entry point works as expected.
- """
- pytest.importorskip("dataframe_api_compat")
- df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- df = df_pd.__dataframe_consortium_standard__()
- result_1 = df.get_column_names()
- expected_1 = ["a", "b"]
- assert result_1 == expected_1
- ser = Series([1, 2, 3], name="a")
- col = ser.__column_consortium_standard__()
- assert col.name == "a"
- def test_xarray_coerce_unit():
- # GH44053
- xr = pytest.importorskip("xarray")
- arr = xr.DataArray([1, 2, 3])
- result = pd.to_datetime(arr, unit="ns")
- expected = DatetimeIndex(
- [
- "1970-01-01 00:00:00.000000001",
- "1970-01-01 00:00:00.000000002",
- "1970-01-01 00:00:00.000000003",
- ],
- dtype="datetime64[ns]",
- freq=None,
- )
- tm.assert_index_equal(result, expected)
|