| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503 |
- from __future__ import annotations
- import operator
- from typing import (
- TYPE_CHECKING,
- Literal,
- NoReturn,
- cast,
- )
- import warnings
- import numpy as np
- from pandas._libs import lib
- from pandas._libs.missing import is_matching_na
- from pandas._libs.sparse import SparseIndex
- import pandas._libs.testing as _testing
- from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
- from pandas.errors import Pandas4Warning
- from pandas.util._decorators import (
- deprecate_kwarg,
- set_module,
- )
- from pandas.core.dtypes.common import (
- is_bool,
- is_float_dtype,
- is_integer_dtype,
- is_number,
- is_numeric_dtype,
- needs_i8_conversion,
- )
- from pandas.core.dtypes.dtypes import (
- CategoricalDtype,
- DatetimeTZDtype,
- ExtensionDtype,
- NumpyEADtype,
- )
- from pandas.core.dtypes.missing import array_equivalent
- import pandas as pd
- from pandas import (
- Categorical,
- DataFrame,
- DatetimeIndex,
- Index,
- IntervalDtype,
- IntervalIndex,
- MultiIndex,
- PeriodIndex,
- RangeIndex,
- Series,
- TimedeltaIndex,
- )
- from pandas.core.arrays import (
- DatetimeArray,
- ExtensionArray,
- IntervalArray,
- PeriodArray,
- TimedeltaArray,
- )
- from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
- from pandas.core.arrays.string_ import StringDtype
- from pandas.core.indexes.api import safe_sort_index
- from pandas.io.formats.printing import pprint_thing
- if TYPE_CHECKING:
- from pandas._typing import DtypeObj
- def assert_almost_equal(
- left,
- right,
- check_dtype: bool | Literal["equiv"] = "equiv",
- rtol: float = 1.0e-5,
- atol: float = 1.0e-8,
- **kwargs,
- ) -> None:
- """
- Check that the left and right objects are approximately equal.
- By approximately equal, we refer to objects that are numbers or that
- contain numbers which may be equivalent to specific levels of precision.
- Parameters
- ----------
- left : object
- right : object
- check_dtype : bool or {'equiv'}, default 'equiv'
- Check dtype if both a and b are the same type. If 'equiv' is passed in,
- then `RangeIndex` and `Index` with int64 dtype are also considered
- equivalent when doing type checking.
- rtol : float, default 1e-5
- Relative tolerance.
- atol : float, default 1e-8
- Absolute tolerance.
- """
- if isinstance(left, Index):
- assert_index_equal(
- left,
- right,
- check_exact=False,
- exact=check_dtype,
- rtol=rtol,
- atol=atol,
- **kwargs,
- )
- elif isinstance(left, Series):
- assert_series_equal(
- left,
- right,
- check_exact=False,
- check_dtype=check_dtype,
- rtol=rtol,
- atol=atol,
- **kwargs,
- )
- elif isinstance(left, DataFrame):
- assert_frame_equal(
- left,
- right,
- check_exact=False,
- check_dtype=check_dtype,
- rtol=rtol,
- atol=atol,
- **kwargs,
- )
- else:
- # Other sequences.
- if check_dtype:
- if is_number(left) and is_number(right):
- # Do not compare numeric classes, like np.float64 and float.
- pass
- elif is_bool(left) and is_bool(right):
- # Do not compare bool classes, like np.bool_ and bool.
- pass
- else:
- if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
- obj = "numpy array"
- else:
- obj = "Input"
- assert_class_equal(left, right, obj=obj)
- # if we have "equiv", this becomes True
- _testing.assert_almost_equal(
- left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
- )
- def _check_isinstance(left, right, cls) -> None:
- """
- Helper method for our assert_* methods that ensures that
- the two objects being compared have the right type before
- proceeding with the comparison.
- Parameters
- ----------
- left : The first object being compared.
- right : The second object being compared.
- cls : The class type to check against.
- Raises
- ------
- AssertionError : Either `left` or `right` is not an instance of `cls`.
- """
- cls_name = cls.__name__
- if not isinstance(left, cls):
- raise AssertionError(
- f"{cls_name} Expected type {cls}, found {type(left)} instead"
- )
- if not isinstance(right, cls):
- raise AssertionError(
- f"{cls_name} Expected type {cls}, found {type(right)} instead"
- )
- def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
- _check_isinstance(left, right, dict)
- _testing.assert_dict_equal(left, right, compare_keys=compare_keys)
- @set_module("pandas.testing")
- def assert_index_equal(
- left: Index,
- right: Index,
- exact: bool | str = "equiv",
- check_names: bool = True,
- check_exact: bool = True,
- check_categorical: bool = True,
- check_order: bool = True,
- rtol: float = 1.0e-5,
- atol: float = 1.0e-8,
- obj: str | None = None,
- ) -> None:
- """
- Check that left and right Index are equal.
- Parameters
- ----------
- left : Index
- The first index to compare.
- right : Index
- The second index to compare.
- exact : bool or {'equiv'}, default 'equiv'
- Whether to check the Index class, dtype and inferred_type
- are identical. If 'equiv', then RangeIndex can be substituted for
- Index with an int64 dtype as well.
- check_names : bool, default True
- Whether to check the names attribute.
- check_exact : bool, default True
- Whether to compare number exactly.
- check_categorical : bool, default True
- Whether to compare internal Categorical exactly.
- check_order : bool, default True
- Whether to compare the order of index entries as well as their values.
- If True, both indexes must contain the same elements, in the same order.
- If False, both indexes must contain the same elements, but in any order.
- rtol : float, default 1e-5
- Relative tolerance. Only used when check_exact is False.
- atol : float, default 1e-8
- Absolute tolerance. Only used when check_exact is False.
- obj : str, default 'Index' or 'MultiIndex'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- See Also
- --------
- testing.assert_series_equal : Check that two Series are equal.
- testing.assert_frame_equal : Check that two DataFrames are equal.
- Examples
- --------
- >>> from pandas import testing as tm
- >>> a = pd.Index([1, 2, 3])
- >>> b = pd.Index([1, 2, 3])
- >>> tm.assert_index_equal(a, b)
- """
- __tracebackhide__ = True
- if obj is None:
- obj = "MultiIndex" if isinstance(left, MultiIndex) else "Index"
- def _check_types(left, right, obj: str = "Index") -> None:
- if not exact:
- return
- assert_class_equal(left, right, exact=exact, obj=obj)
- assert_attr_equal("inferred_type", left, right, obj=obj)
- # Skip exact dtype checking when `check_categorical` is False
- if isinstance(left.dtype, CategoricalDtype) and isinstance(
- right.dtype, CategoricalDtype
- ):
- if check_categorical:
- assert_attr_equal("dtype", left, right, obj=obj)
- assert_index_equal(left.categories, right.categories, exact=exact)
- return
- assert_attr_equal("dtype", left, right, obj=obj)
- # instance validation
- _check_isinstance(left, right, Index)
- # class / dtype comparison
- _check_types(left, right, obj=obj)
- # level comparison
- if left.nlevels != right.nlevels:
- msg1 = f"{obj} levels are different"
- msg2 = f"{left.nlevels}, {left}"
- msg3 = f"{right.nlevels}, {right}"
- raise_assert_detail(obj, msg1, msg2, msg3)
- # length comparison
- if len(left) != len(right):
- msg1 = f"{obj} length are different"
- msg2 = f"{len(left)}, {left}"
- msg3 = f"{len(right)}, {right}"
- raise_assert_detail(obj, msg1, msg2, msg3)
- # If order doesn't matter then sort the index entries
- if not check_order:
- left = safe_sort_index(left)
- right = safe_sort_index(right)
- # MultiIndex special comparison for little-friendly error messages
- if isinstance(left, MultiIndex):
- right = cast(MultiIndex, right)
- for level in range(left.nlevels):
- lobj = f"{obj} level [{level}]"
- try:
- # try comparison on levels/codes to avoid densifying MultiIndex
- assert_index_equal(
- left.levels[level],
- right.levels[level],
- exact=exact,
- check_names=check_names,
- check_exact=check_exact,
- check_categorical=check_categorical,
- rtol=rtol,
- atol=atol,
- obj=lobj,
- )
- assert_numpy_array_equal(left.codes[level], right.codes[level])
- except AssertionError:
- llevel = left.get_level_values(level)
- rlevel = right.get_level_values(level)
- assert_index_equal(
- llevel,
- rlevel,
- exact=exact,
- check_names=check_names,
- check_exact=check_exact,
- check_categorical=check_categorical,
- rtol=rtol,
- atol=atol,
- obj=lobj,
- )
- # get_level_values may change dtype
- _check_types(left.levels[level], right.levels[level], obj=lobj)
- # skip exact index checking when `check_categorical` is False
- elif check_exact and check_categorical:
- if not left.equals(right):
- # _values compare can raise TypeError (non-comparable
- # categoricals (GH#61935)
- try:
- mismatch = left._values != right._values
- except TypeError:
- raise_assert_detail(
- obj,
- "types are not comparable (non-matching categorical categories)",
- left,
- right,
- )
- if not isinstance(mismatch, np.ndarray):
- mismatch = cast("ExtensionArray", mismatch).fillna(True)
- diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
- msg = f"{obj} values are different ({np.round(diff, 5)} %)"
- raise_assert_detail(obj, msg, left, right)
- else:
- # if we have "equiv", this becomes True
- exact_bool = bool(exact)
- _testing.assert_almost_equal(
- left.values,
- right.values,
- rtol=rtol,
- atol=atol,
- check_dtype=exact_bool,
- obj=obj,
- lobj=left,
- robj=right,
- )
- # metadata comparison
- if check_names:
- assert_attr_equal("names", left, right, obj=obj)
- if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
- assert_attr_equal("dtype", left, right, obj=obj)
- if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
- assert_interval_array_equal(left._values, right._values)
- if check_categorical:
- if isinstance(left.dtype, CategoricalDtype) or isinstance(
- right.dtype, CategoricalDtype
- ):
- assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
- def assert_class_equal(
- left, right, exact: bool | str = True, obj: str = "Input"
- ) -> None:
- """
- Checks classes are equal.
- """
- __tracebackhide__ = True
- def repr_class(x):
- if isinstance(x, Index):
- # return Index as it is to include values in the error message
- return x
- return type(x).__name__
- def is_class_equiv(idx: Index) -> bool:
- """Classes that are a RangeIndex (sub-)instance or exactly an `Index` .
- This only checks class equivalence. There is a separate check that the
- dtype is int64.
- """
- return type(idx) is Index or isinstance(idx, RangeIndex)
- if type(left) == type(right):
- return
- if exact == "equiv":
- if is_class_equiv(left) and is_class_equiv(right):
- return
- msg = f"{obj} classes are different"
- raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
- def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
- """
- Check attributes are equal. Both objects must have attribute.
- Parameters
- ----------
- attr : str
- Attribute name being compared.
- left : object
- right : object
- obj : str, default 'Attributes'
- Specify object name being compared, internally used to show appropriate
- assertion message
- """
- __tracebackhide__ = True
- left_attr = getattr(left, attr)
- right_attr = getattr(right, attr)
- if left_attr is right_attr or is_matching_na(left_attr, right_attr):
- # e.g. both np.nan, both NaT, both pd.NA, ...
- return None
- try:
- result = left_attr == right_attr
- except TypeError:
- # datetimetz on rhs may raise TypeError
- result = False
- if (left_attr is pd.NA) ^ (right_attr is pd.NA):
- result = False
- elif not isinstance(result, bool):
- result = result.all()
- if not result:
- msg = f'Attribute "{attr}" are different'
- raise_assert_detail(obj, msg, left_attr, right_attr)
- return None
- def assert_is_sorted(seq) -> None:
- """Assert that the sequence is sorted."""
- if isinstance(seq, (Index, Series)):
- seq = seq.values
- # sorting does not change precisions
- if isinstance(seq, np.ndarray):
- assert_numpy_array_equal(seq, np.sort(np.array(seq)))
- else:
- assert_extension_array_equal(seq, seq[seq.argsort()])
- def assert_categorical_equal(
- left,
- right,
- check_dtype: bool = True,
- check_category_order: bool = True,
- obj: str = "Categorical",
- ) -> None:
- """
- Test that Categoricals are equivalent.
- Parameters
- ----------
- left : Categorical
- right : Categorical
- check_dtype : bool, default True
- Check that integer dtype of the codes are the same.
- check_category_order : bool, default True
- Whether the order of the categories should be compared, which
- implies identical integer codes. If False, only the resulting
- values are compared. The ordered attribute is
- checked regardless.
- obj : str, default 'Categorical'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- """
- _check_isinstance(left, right, Categorical)
- exact: bool | str
- if isinstance(left.categories, RangeIndex) or isinstance(
- right.categories, RangeIndex
- ):
- exact = "equiv"
- else:
- # We still want to require exact matches for Index
- exact = True
- if check_category_order:
- assert_index_equal(
- left.categories, right.categories, obj=f"{obj}.categories", exact=exact
- )
- assert_numpy_array_equal(
- left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
- )
- else:
- try:
- lc = left.categories.sort_values()
- rc = right.categories.sort_values()
- except TypeError:
- # e.g. '<' not supported between instances of 'int' and 'str'
- lc, rc = left.categories, right.categories
- assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
- assert_index_equal(
- left.categories.take(left.codes),
- right.categories.take(right.codes),
- obj=f"{obj}.values",
- exact=exact,
- )
- assert_attr_equal("ordered", left, right, obj=obj)
- def assert_interval_array_equal(
- left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
- ) -> None:
- """
- Test that two IntervalArrays are equivalent.
- Parameters
- ----------
- left, right : IntervalArray
- The IntervalArrays to compare.
- exact : bool or {'equiv'}, default 'equiv'
- Whether to check the Index class, dtype and inferred_type
- are identical. If 'equiv', then RangeIndex can be substituted for
- Index with an int64 dtype as well.
- obj : str, default 'IntervalArray'
- Specify object name being compared, internally used to show appropriate
- assertion message
- """
- _check_isinstance(left, right, IntervalArray)
- kwargs = {}
- if left._left.dtype.kind in "mM":
- # We have a DatetimeArray or TimedeltaArray
- kwargs["check_freq"] = False
- assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
- assert_equal(left._right, right._right, obj=f"{obj}.right", **kwargs)
- assert_attr_equal("closed", left, right, obj=obj)
- def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
- _check_isinstance(left, right, PeriodArray)
- assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
- assert_attr_equal("dtype", left, right, obj=obj)
- def assert_datetime_array_equal(
- left, right, obj: str = "DatetimeArray", check_freq: bool = True
- ) -> None:
- __tracebackhide__ = True
- _check_isinstance(left, right, DatetimeArray)
- assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
- if check_freq:
- assert_attr_equal("freq", left, right, obj=obj)
- assert_attr_equal("tz", left, right, obj=obj)
- def assert_timedelta_array_equal(
- left, right, obj: str = "TimedeltaArray", check_freq: bool = True
- ) -> None:
- __tracebackhide__ = True
- _check_isinstance(left, right, TimedeltaArray)
- assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
- if check_freq:
- assert_attr_equal("freq", left, right, obj=obj)
- def raise_assert_detail(
- obj, message, left, right, diff=None, first_diff=None, index_values=None
- ) -> NoReturn:
- __tracebackhide__ = True
- msg = f"""{obj} are different
- {message}"""
- if isinstance(index_values, Index):
- index_values = np.asarray(index_values)
- if isinstance(index_values, np.ndarray):
- msg += f"\n[index]: {pprint_thing(index_values)}"
- if isinstance(left, np.ndarray):
- left = pprint_thing(left)
- elif isinstance(left, (CategoricalDtype, StringDtype, NumpyEADtype)):
- left = repr(left)
- if isinstance(right, np.ndarray):
- right = pprint_thing(right)
- elif isinstance(right, (CategoricalDtype, StringDtype, NumpyEADtype)):
- right = repr(right)
- msg += f"""
- [left]: {left}
- [right]: {right}"""
- if diff is not None:
- msg += f"\n[diff]: {diff}"
- if first_diff is not None:
- msg += f"\n{first_diff}"
- raise AssertionError(msg)
- def assert_numpy_array_equal(
- left,
- right,
- strict_nan: bool = False,
- check_dtype: bool | Literal["equiv"] = True,
- err_msg=None,
- check_same=None,
- obj: str = "numpy array",
- index_values=None,
- ) -> None:
- """
- Check that 'np.ndarray' is equivalent.
- Parameters
- ----------
- left, right : numpy.ndarray or iterable
- The two arrays to be compared.
- strict_nan : bool, default False
- If True, consider NaN and None to be different.
- check_dtype : bool, default True
- Check dtype if both a and b are np.ndarray.
- err_msg : str, default None
- If provided, used as assertion message.
- check_same : None|'copy'|'same', default None
- Ensure left and right refer/do not refer to the same memory area.
- obj : str, default 'numpy array'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- index_values : Index | numpy.ndarray, default None
- optional index (shared by both left and right), used in output.
- """
- __tracebackhide__ = True
- # instance validation
- # Show a detailed error message when classes are different
- assert_class_equal(left, right, obj=obj)
- # both classes must be an np.ndarray
- _check_isinstance(left, right, np.ndarray)
- def _get_base(obj):
- return obj.base if getattr(obj, "base", None) is not None else obj
- left_base = _get_base(left)
- right_base = _get_base(right)
- if check_same == "same":
- if left_base is not right_base:
- raise AssertionError(f"{left_base!r} is not {right_base!r}")
- elif check_same == "copy":
- if left_base is right_base:
- raise AssertionError(f"{left_base!r} is {right_base!r}")
- def _raise(left, right, err_msg) -> NoReturn:
- if err_msg is None:
- if left.shape != right.shape:
- raise_assert_detail(
- obj, f"{obj} shapes are different", left.shape, right.shape
- )
- diff = 0
- for left_arr, right_arr in zip(left, right, strict=True):
- # count up differences
- if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
- diff += 1
- diff = diff * 100.0 / left.size
- msg = f"{obj} values are different ({np.round(diff, 5)} %)"
- raise_assert_detail(obj, msg, left, right, index_values=index_values)
- raise AssertionError(err_msg)
- # compare shape and values
- if not array_equivalent(left, right, strict_nan=strict_nan):
- _raise(left, right, err_msg)
- if check_dtype:
- if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
- assert_attr_equal("dtype", left, right, obj=obj)
- @set_module("pandas.testing")
- def assert_extension_array_equal(
- left,
- right,
- check_dtype: bool | Literal["equiv"] = True,
- index_values=None,
- check_exact: bool | lib.NoDefault = lib.no_default,
- rtol: float | lib.NoDefault = lib.no_default,
- atol: float | lib.NoDefault = lib.no_default,
- obj: str = "ExtensionArray",
- ) -> None:
- """
- Check that left and right ExtensionArrays are equal.
- This method compares two ``ExtensionArray`` instances for equality,
- including checks for missing values, the dtype of the arrays, and
- the exactness of the comparison (or tolerance when comparing floats).
- Parameters
- ----------
- left, right : ExtensionArray
- The two arrays to compare.
- check_dtype : bool, default True
- Whether to check if the ExtensionArray dtypes are identical.
- index_values : Index | numpy.ndarray, default None
- Optional index (shared by both left and right), used in output.
- check_exact : bool, default False
- Whether to compare number exactly.
- .. versionchanged:: 2.2.0
- Defaults to True for integer dtypes if none of
- ``check_exact``, ``rtol`` and ``atol`` are specified.
- rtol : float, default 1e-5
- Relative tolerance. Only used when check_exact is False.
- atol : float, default 1e-8
- Absolute tolerance. Only used when check_exact is False.
- obj : str, default 'ExtensionArray'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- .. versionadded:: 2.0.0
- See Also
- --------
- testing.assert_series_equal : Check that left and right ``Series`` are equal.
- testing.assert_frame_equal : Check that left and right ``DataFrame`` are equal.
- testing.assert_index_equal : Check that left and right ``Index`` are equal.
- Notes
- -----
- Missing values are checked separately from valid values.
- A mask of missing values is computed for each and checked to match.
- The remaining all-valid values are cast to object dtype and checked.
- Examples
- --------
- >>> from pandas import testing as tm
- >>> a = pd.Series([1, 2, 3, 4])
- >>> b, c = a.array, a.array
- >>> tm.assert_extension_array_equal(b, c)
- """
- if (
- check_exact is lib.no_default
- and rtol is lib.no_default
- and atol is lib.no_default
- ):
- check_exact = (
- is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)
- ) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
- elif check_exact is lib.no_default:
- check_exact = False
- rtol = rtol if rtol is not lib.no_default else 1.0e-5
- atol = atol if atol is not lib.no_default else 1.0e-8
- assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
- assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
- if check_dtype:
- assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
- if (
- isinstance(left, DatetimeLikeArrayMixin)
- and isinstance(right, DatetimeLikeArrayMixin)
- and type(right) == type(left)
- ):
- # GH 52449
- if not check_dtype and left.dtype.kind in "mM":
- if not isinstance(left.dtype, np.dtype):
- l_unit = cast(DatetimeTZDtype, left.dtype).unit
- else:
- l_unit = np.datetime_data(left.dtype)[0]
- if not isinstance(right.dtype, np.dtype):
- r_unit = cast(DatetimeTZDtype, right.dtype).unit
- else:
- r_unit = np.datetime_data(right.dtype)[0]
- if (
- l_unit != r_unit
- and compare_mismatched_resolutions(
- left._ndarray, right._ndarray, operator.eq
- ).all()
- ):
- return
- # Avoid slow object-dtype comparisons
- # np.asarray for case where we have an np.MaskedArray
- assert_numpy_array_equal(
- np.asarray(left.asi8),
- np.asarray(right.asi8),
- index_values=index_values,
- obj=obj,
- )
- return
- left_na = np.asarray(left.isna())
- right_na = np.asarray(right.isna())
- assert_numpy_array_equal(
- left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
- )
- # Specifically for StringArrayNumpySemantics, validate here we have a valid array
- if (
- isinstance(left.dtype, StringDtype)
- and left.dtype.storage == "python"
- and left.dtype.na_value is np.nan
- ):
- assert np.all(
- [np.isnan(val) for val in left._ndarray[left_na]] # type: ignore[attr-defined]
- ), "wrong missing value sentinels"
- if (
- isinstance(right.dtype, StringDtype)
- and right.dtype.storage == "python"
- and right.dtype.na_value is np.nan
- ):
- assert np.all(
- [np.isnan(val) for val in right._ndarray[right_na]] # type: ignore[attr-defined]
- ), "wrong missing value sentinels"
- left_valid = left[~left_na].to_numpy(dtype=object)
- right_valid = right[~right_na].to_numpy(dtype=object)
- if check_exact:
- assert_numpy_array_equal(
- left_valid, right_valid, obj=obj, index_values=index_values
- )
- else:
- _testing.assert_almost_equal(
- left_valid,
- right_valid,
- check_dtype=bool(check_dtype),
- rtol=rtol,
- atol=atol,
- obj=obj,
- index_values=index_values,
- )
- # This could be refactored to use the NDFrame.equals method
- @set_module("pandas.testing")
- @deprecate_kwarg(Pandas4Warning, "check_datetimelike_compat", new_arg_name=None)
- def assert_series_equal(
- left,
- right,
- check_dtype: bool | Literal["equiv"] = True,
- check_index_type: bool | Literal["equiv"] = "equiv",
- check_series_type: bool = True,
- check_names: bool = True,
- check_exact: bool | lib.NoDefault = lib.no_default,
- check_datetimelike_compat: bool = False,
- check_categorical: bool = True,
- check_category_order: bool = True,
- check_freq: bool = True,
- check_flags: bool = True,
- rtol: float | lib.NoDefault = lib.no_default,
- atol: float | lib.NoDefault = lib.no_default,
- obj: str = "Series",
- *,
- check_index: bool = True,
- check_like: bool = False,
- ) -> None:
- """
- Check that left and right Series are equal.
- Parameters
- ----------
- left : Series
- First Series to compare.
- right : Series
- Second Series to compare.
- check_dtype : bool, default True
- Whether to check the Series dtype is identical.
- check_index_type : bool or {'equiv'}, default 'equiv'
- Whether to check the Index class, dtype and inferred_type
- are identical.
- check_series_type : bool, default True
- Whether to check the Series class is identical.
- check_names : bool, default True
- Whether to check the Series and Index names attribute.
- check_exact : bool, default False
- Whether to compare number exactly. This also applies when checking
- Index equivalence.
- .. versionchanged:: 2.2.0
- Defaults to True for integer dtypes if none of
- ``check_exact``, ``rtol`` and ``atol`` are specified.
- .. versionchanged:: 3.0.0
- check_exact for comparing the Indexes defaults to True by
- checking if an Index is of integer dtypes.
- check_datetimelike_compat : bool, default False
- Compare datetime-like which is comparable ignoring dtype.
- .. deprecated:: 3.0
- check_categorical : bool, default True
- Whether to compare internal Categorical exactly.
- check_category_order : bool, default True
- Whether to compare category order of internal Categoricals.
- check_freq : bool, default True
- Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
- check_flags : bool, default True
- Whether to check the `flags` attribute.
- rtol : float, default 1e-5
- Relative tolerance. Only used when check_exact is False.
- atol : float, default 1e-8
- Absolute tolerance. Only used when check_exact is False.
- obj : str, default 'Series'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- check_index : bool, default True
- Whether to check index equivalence. If False, then compare only values.
- check_like : bool, default False
- If True, ignore the order of the index. Must be False if check_index is False.
- Note: same labels must be with the same data.
- See Also
- --------
- testing.assert_index_equal : Check that two Indexes are equal.
- testing.assert_frame_equal : Check that two DataFrames are equal.
- Examples
- --------
- >>> from pandas import testing as tm
- >>> a = pd.Series([1, 2, 3, 4])
- >>> b = pd.Series([1, 2, 3, 4])
- >>> tm.assert_series_equal(a, b)
- """
- __tracebackhide__ = True
- if (
- check_exact is lib.no_default
- and rtol is lib.no_default
- and atol is lib.no_default
- ):
- check_exact = (
- is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)
- ) or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
- left_index_dtypes = (
- [left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes
- )
- right_index_dtypes = (
- [right.index.dtype] if right.index.nlevels == 1 else right.index.dtypes
- )
- check_exact_index = all(
- dtype.kind in "iu" for dtype in left_index_dtypes
- ) or all(dtype.kind in "iu" for dtype in right_index_dtypes)
- elif check_exact is lib.no_default:
- check_exact = False
- check_exact_index = False
- else:
- check_exact_index = check_exact
- rtol = rtol if rtol is not lib.no_default else 1.0e-5
- atol = atol if atol is not lib.no_default else 1.0e-8
- if not check_index and check_like:
- raise ValueError("check_like must be False if check_index is False")
- # instance validation
- _check_isinstance(left, right, Series)
- if check_series_type:
- assert_class_equal(left, right, obj=obj)
- # length comparison
- if len(left) != len(right):
- msg1 = f"{len(left)}, {left.index}"
- msg2 = f"{len(right)}, {right.index}"
- raise_assert_detail(obj, "Series length are different", msg1, msg2)
- if check_flags:
- assert left.flags == right.flags, f"{left.flags!r} != {right.flags!r}"
- if check_index:
- # GH #38183
- assert_index_equal(
- left.index,
- right.index,
- exact=check_index_type,
- check_names=check_names,
- check_exact=check_exact_index,
- check_categorical=check_categorical,
- check_order=not check_like,
- rtol=rtol,
- atol=atol,
- obj=f"{obj}.index",
- )
- if check_like:
- left = left.reindex_like(right)
- if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
- lidx = left.index
- ridx = right.index
- assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
- if check_dtype:
- # We want to skip exact dtype checking when `check_categorical`
- # is False. We'll still raise if only one is a `Categorical`,
- # regardless of `check_categorical`
- if (
- isinstance(left.dtype, CategoricalDtype)
- and isinstance(right.dtype, CategoricalDtype)
- and not check_categorical
- ):
- pass
- else:
- assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
- if check_exact:
- left_values = left._values
- right_values = right._values
- # Only check exact if dtype is numeric
- if isinstance(left_values, ExtensionArray) and isinstance(
- right_values, ExtensionArray
- ):
- assert_extension_array_equal(
- left_values,
- right_values,
- check_dtype=check_dtype,
- index_values=left.index,
- obj=str(obj),
- )
- else:
- # convert both to NumPy if not, check_dtype would raise earlier
- lv, rv = left_values, right_values
- if isinstance(left_values, ExtensionArray):
- lv = left_values.to_numpy()
- if isinstance(right_values, ExtensionArray):
- rv = right_values.to_numpy()
- assert_numpy_array_equal(
- lv,
- rv,
- check_dtype=check_dtype,
- obj=str(obj),
- index_values=left.index,
- )
- elif check_datetimelike_compat and (
- needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
- ):
- # we want to check only if we have compat dtypes
- # e.g. integer and M|m are NOT compat, but we can simply check
- # the values in that case
- # datetimelike may have different objects (e.g. datetime.datetime
- # vs Timestamp) but will compare equal
- if not Index(left._values).equals(Index(right._values)):
- msg = (
- f"[datetimelike_compat=True] {left._values} "
- f"is not equal to {right._values}."
- )
- raise AssertionError(msg)
- elif isinstance(left.dtype, IntervalDtype) and isinstance(
- right.dtype, IntervalDtype
- ):
- assert_interval_array_equal(left.array, right.array)
- elif isinstance(left.dtype, CategoricalDtype) or isinstance(
- right.dtype, CategoricalDtype
- ):
- _testing.assert_almost_equal(
- left._values,
- right._values,
- rtol=rtol,
- atol=atol,
- check_dtype=bool(check_dtype),
- obj=str(obj),
- index_values=left.index,
- )
- elif isinstance(left.dtype, ExtensionDtype) and isinstance(
- right.dtype, ExtensionDtype
- ):
- assert_extension_array_equal(
- left._values,
- right._values,
- rtol=rtol,
- atol=atol,
- check_dtype=check_dtype,
- index_values=left.index,
- obj=str(obj),
- )
- elif is_extension_array_dtype_and_needs_i8_conversion(
- left.dtype, right.dtype
- ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
- assert_extension_array_equal(
- left._values,
- right._values,
- check_dtype=check_dtype,
- index_values=left.index,
- obj=str(obj),
- )
- elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
- # DatetimeArray or TimedeltaArray
- assert_extension_array_equal(
- left._values,
- right._values,
- check_dtype=check_dtype,
- index_values=left.index,
- obj=str(obj),
- )
- else:
- _testing.assert_almost_equal(
- left._values,
- right._values,
- rtol=rtol,
- atol=atol,
- check_dtype=bool(check_dtype),
- obj=str(obj),
- index_values=left.index,
- )
- # metadata comparison
- if check_names:
- assert_attr_equal("name", left, right, obj=obj)
- if check_categorical:
- if isinstance(left.dtype, CategoricalDtype) or isinstance(
- right.dtype, CategoricalDtype
- ):
- assert_categorical_equal(
- left._values,
- right._values,
- obj=f"{obj} category",
- check_category_order=check_category_order,
- )
- # This could be refactored to use the NDFrame.equals method
- @set_module("pandas.testing")
- @deprecate_kwarg(Pandas4Warning, "check_datetimelike_compat", new_arg_name=None)
- def assert_frame_equal(
- left,
- right,
- check_dtype: bool | Literal["equiv"] = True,
- check_index_type: bool | Literal["equiv"] = "equiv",
- check_column_type: bool | Literal["equiv"] = "equiv",
- check_frame_type: bool = True,
- check_names: bool = True,
- by_blocks: bool = False,
- check_exact: bool | lib.NoDefault = lib.no_default,
- check_datetimelike_compat: bool = False,
- check_categorical: bool = True,
- check_like: bool = False,
- check_freq: bool = True,
- check_flags: bool = True,
- rtol: float | lib.NoDefault = lib.no_default,
- atol: float | lib.NoDefault = lib.no_default,
- obj: str = "DataFrame",
- ) -> None:
- """
- Check that left and right DataFrame are equal.
- This function is intended to compare two DataFrames and output any
- differences. It is mostly intended for use in unit tests.
- Additional parameters allow varying the strictness of the
- equality checks performed.
- Parameters
- ----------
- left : DataFrame
- First DataFrame to compare.
- right : DataFrame
- Second DataFrame to compare.
- check_dtype : bool, default True
- Whether to check the DataFrame dtype is identical.
- check_index_type : bool or {'equiv'}, default 'equiv'
- Whether to check the Index class, dtype and inferred_type
- are identical.
- check_column_type : bool or {'equiv'}, default 'equiv'
- Whether to check the columns class, dtype and inferred_type
- are identical. Is passed as the ``exact`` argument of
- :func:`assert_index_equal`.
- check_frame_type : bool, default True
- Whether to check the DataFrame class is identical.
- check_names : bool, default True
- Whether to check that the `names` attribute for both the `index`
- and `column` attributes of the DataFrame is identical.
- by_blocks : bool, default False
- Specify how to compare internal data. If False, compare by columns.
- If True, compare by blocks.
- check_exact : bool, default False
- Whether to compare number exactly. If False, the comparison uses the
- relative tolerance (``rtol``) and absolute tolerance (``atol``)
- parameters to determine if two values are considered close,
- according to the formula: ``|a - b| <= (atol + rtol * |b|)``.
- .. versionchanged:: 2.2.0
- Defaults to True for integer dtypes if none of
- ``check_exact``, ``rtol`` and ``atol`` are specified.
- check_datetimelike_compat : bool, default False
- Compare datetime-like which is comparable ignoring dtype.
- .. deprecated:: 3.0
- check_categorical : bool, default True
- Whether to compare internal Categorical exactly.
- check_like : bool, default False
- If True, ignore the order of index & columns.
- Note: index labels must match their respective rows
- (same as in columns) - same labels must be with the same data.
- check_freq : bool, default True
- Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
- check_flags : bool, default True
- Whether to check the `flags` attribute.
- rtol : float, default 1e-5
- Relative tolerance. Only used when check_exact is False.
- atol : float, default 1e-8
- Absolute tolerance. Only used when check_exact is False.
- obj : str, default 'DataFrame'
- Specify object name being compared, internally used to show appropriate
- assertion message.
- See Also
- --------
- assert_series_equal : Equivalent method for asserting Series equality.
- DataFrame.equals : Check DataFrame equality.
- Examples
- --------
- This example shows comparing two DataFrames that are equal
- but with columns of differing dtypes.
- >>> from pandas.testing import assert_frame_equal
- >>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
- >>> df2 = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0]})
- df1 equals itself.
- >>> assert_frame_equal(df1, df1)
- df1 differs from df2 as column 'b' is of a different type.
- >>> assert_frame_equal(df1, df2)
- Traceback (most recent call last):
- ...
- AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
- Attribute "dtype" are different
- [left]: int64
- [right]: float64
- Ignore differing dtypes in columns with check_dtype.
- >>> assert_frame_equal(df1, df2, check_dtype=False)
- """
- __tracebackhide__ = True
- _rtol = rtol if rtol is not lib.no_default else 1.0e-5
- _atol = atol if atol is not lib.no_default else 1.0e-8
- _check_exact = check_exact if check_exact is not lib.no_default else False
- # instance validation
- _check_isinstance(left, right, DataFrame)
- if check_frame_type:
- assert isinstance(left, type(right))
- # assert_class_equal(left, right, obj=obj)
- # shape comparison
- if left.shape != right.shape:
- raise_assert_detail(
- obj, f"{obj} shape mismatch", f"{left.shape!r}", f"{right.shape!r}"
- )
- if check_flags:
- assert left.flags == right.flags, f"{left.flags!r} != {right.flags!r}"
- # index comparison
- assert_index_equal(
- left.index,
- right.index,
- exact=check_index_type,
- check_names=check_names,
- check_exact=_check_exact,
- check_categorical=check_categorical,
- check_order=not check_like,
- rtol=_rtol,
- atol=_atol,
- obj=f"{obj}.index",
- )
- # column comparison
- assert_index_equal(
- left.columns,
- right.columns,
- exact=check_column_type,
- check_names=check_names,
- check_exact=_check_exact,
- check_categorical=check_categorical,
- check_order=not check_like,
- rtol=_rtol,
- atol=_atol,
- obj=f"{obj}.columns",
- )
- if check_like:
- left = left.reindex_like(right)
- # compare by blocks
- if by_blocks:
- rblocks = right._to_dict_of_blocks()
- lblocks = left._to_dict_of_blocks()
- for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
- assert dtype in lblocks
- assert dtype in rblocks
- assert_frame_equal(
- lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj
- )
- # compare by columns
- else:
- for i, col in enumerate(left.columns):
- # We have already checked that columns match, so we can do
- # fast location-based lookups
- lcol = left._ixs(i, axis=1)
- rcol = right._ixs(i, axis=1)
- # GH #38183
- # use check_index=False, because we do not want to run
- # assert_index_equal for each column,
- # as we already checked it for the whole dataframe before.
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore",
- message="the 'check_datetimelike_compat' keyword",
- category=Pandas4Warning,
- )
- assert_series_equal(
- lcol,
- rcol,
- check_dtype=check_dtype,
- check_index_type=check_index_type,
- check_exact=check_exact,
- check_names=check_names,
- check_datetimelike_compat=check_datetimelike_compat,
- check_categorical=check_categorical,
- check_freq=check_freq,
- obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
- rtol=rtol,
- atol=atol,
- check_index=False,
- check_flags=False,
- )
- def assert_equal(left, right, **kwargs) -> None:
- """
- Wrapper for tm.assert_*_equal to dispatch to the appropriate test function.
- Parameters
- ----------
- left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray
- The two items to be compared.
- **kwargs
- All keyword arguments are passed through to the underlying assert method.
- """
- __tracebackhide__ = True
- if isinstance(left, Index):
- assert_index_equal(left, right, **kwargs)
- if isinstance(left, (DatetimeIndex, TimedeltaIndex)):
- assert left.freq == right.freq, (left.freq, right.freq)
- elif isinstance(left, Series):
- assert_series_equal(left, right, **kwargs)
- elif isinstance(left, DataFrame):
- assert_frame_equal(left, right, **kwargs)
- elif isinstance(left, IntervalArray):
- assert_interval_array_equal(left, right, **kwargs)
- elif isinstance(left, PeriodArray):
- assert_period_array_equal(left, right, **kwargs)
- elif isinstance(left, DatetimeArray):
- assert_datetime_array_equal(left, right, **kwargs)
- elif isinstance(left, TimedeltaArray):
- assert_timedelta_array_equal(left, right, **kwargs)
- elif isinstance(left, ExtensionArray):
- assert_extension_array_equal(left, right, **kwargs)
- elif isinstance(left, np.ndarray):
- assert_numpy_array_equal(left, right, **kwargs)
- elif isinstance(left, str):
- assert kwargs == {}
- assert left == right
- else:
- assert kwargs == {}
- assert_almost_equal(left, right)
- def assert_sp_array_equal(left, right) -> None:
- """
- Check that the left and right SparseArray are equal.
- Parameters
- ----------
- left : SparseArray
- right : SparseArray
- """
- _check_isinstance(left, right, pd.arrays.SparseArray)
- assert_numpy_array_equal(left.sp_values, right.sp_values)
- # SparseIndex comparison
- assert isinstance(left.sp_index, SparseIndex)
- assert isinstance(right.sp_index, SparseIndex)
- left_index = left.sp_index
- right_index = right.sp_index
- if not left_index.equals(right_index):
- raise_assert_detail(
- "SparseArray.index", "index are not equal", left_index, right_index
- )
- else:
- # Just ensure a
- pass
- assert_attr_equal("fill_value", left, right)
- assert_attr_equal("dtype", left, right)
- assert_numpy_array_equal(left.to_dense(), right.to_dense())
- def assert_contains_all(iterable, dic) -> None:
- for k in iterable:
- assert k in dic, f"Did not contain item: {k!r}"
- def assert_copy(iter1, iter2, **eql_kwargs) -> None:
- """
- iter1, iter2: iterables that produce elements
- comparable with assert_almost_equal
- Checks that the elements are equal, but not
- the same object. (Does not check that items
- in sequences are also not the same object)
- """
- for elem1, elem2 in zip(iter1, iter2, strict=True):
- assert_almost_equal(elem1, elem2, **eql_kwargs)
- msg = (
- f"Expected object {type(elem1)!r} and object {type(elem2)!r} to be "
- "different objects, but they were the same object."
- )
- assert elem1 is not elem2, msg
- def is_extension_array_dtype_and_needs_i8_conversion(
- left_dtype: DtypeObj, right_dtype: DtypeObj
- ) -> bool:
- """
- Checks that we have the combination of an ExtensionArraydtype and
- a dtype that should be converted to int64
- Returns
- -------
- bool
- Related to issue #37609
- """
- return isinstance(left_dtype, ExtensionDtype) and needs_i8_conversion(right_dtype)
- def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:
- """
- Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable,
- ser[l_slc].
- """
- expected = ser.iloc[i_slc]
- assert_series_equal(ser.loc[l_slc], expected)
- if not is_integer_dtype(ser.index):
- # For integer indices, .loc and plain getitem are position-based.
- assert_series_equal(ser[l_slc], expected)
- def assert_metadata_equivalent(
- left: DataFrame | Series, right: DataFrame | Series | None = None
- ) -> None:
- """
- Check that ._metadata attributes are equivalent.
- """
- for attr in left._metadata:
- val = getattr(left, attr, None)
- if right is None:
- assert val is None
- else:
- assert val == getattr(right, attr, None)
|