test_misc.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import sys
  2. import numpy as np
  3. import pytest
  4. from pandas._config import using_string_dtype
  5. from pandas.compat import PYPY
  6. from pandas.core.dtypes.common import (
  7. is_dtype_equal,
  8. is_object_dtype,
  9. )
  10. import pandas as pd
  11. from pandas import (
  12. Index,
  13. Series,
  14. )
  15. import pandas._testing as tm
  16. def test_isnull_notnull_docstrings():
  17. # GH#41855 make sure its clear these are aliases
  18. doc = pd.DataFrame.notnull.__doc__
  19. assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
  20. doc = pd.DataFrame.isnull.__doc__
  21. assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")
  22. doc = Series.notnull.__doc__
  23. assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
  24. doc = Series.isnull.__doc__
  25. assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")
  26. @pytest.mark.parametrize(
  27. "op_name, op",
  28. [
  29. ("add", "+"),
  30. ("sub", "-"),
  31. ("mul", "*"),
  32. ("mod", "%"),
  33. ("pow", "**"),
  34. ("truediv", "/"),
  35. ("floordiv", "//"),
  36. ],
  37. )
  38. def test_binary_ops_docstring(frame_or_series, op_name, op):
  39. # not using the all_arithmetic_functions fixture with _get_opstr
  40. # as _get_opstr is used internally in the dynamic implementation of the docstring
  41. klass = frame_or_series
  42. operand1 = klass.__name__.lower()
  43. operand2 = "other"
  44. expected_str = " ".join([operand1, op, operand2])
  45. assert expected_str in getattr(klass, op_name).__doc__
  46. # reverse version of the binary ops
  47. expected_str = " ".join([operand2, op, operand1])
  48. assert expected_str in getattr(klass, "r" + op_name).__doc__
  49. def test_ndarray_compat_properties(index_or_series_obj):
  50. obj = index_or_series_obj
  51. # Check that we work.
  52. for p in ["shape", "dtype", "T", "nbytes"]:
  53. assert getattr(obj, p, None) is not None
  54. # deprecated properties
  55. for p in ["strides", "itemsize", "base", "data"]:
  56. assert not hasattr(obj, p)
  57. msg = "can only convert an array of size 1 to a Python scalar"
  58. with pytest.raises(ValueError, match=msg):
  59. obj.item() # len > 1
  60. assert obj.ndim == 1
  61. assert obj.size == len(obj)
  62. assert Index([1]).item() == 1
  63. assert Series([1]).item() == 1
  64. @pytest.mark.skipif(
  65. PYPY or using_string_dtype(),
  66. reason="not relevant for PyPy doesn't work properly for arrow strings",
  67. )
  68. def test_memory_usage(index_or_series_memory_obj):
  69. obj = index_or_series_memory_obj
  70. # Clear index caches so that len(obj) == 0 report 0 memory usage
  71. if isinstance(obj, Series):
  72. is_ser = True
  73. obj.index._engine.clear_mapping()
  74. else:
  75. is_ser = False
  76. obj._engine.clear_mapping()
  77. res = obj.memory_usage()
  78. res_deep = obj.memory_usage(deep=True)
  79. is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
  80. is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
  81. is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
  82. )
  83. is_object_string = is_dtype_equal(obj, "string[python]") or (
  84. is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
  85. )
  86. if len(obj) == 0:
  87. expected = 0
  88. assert res_deep == res == expected
  89. elif is_object or is_categorical or is_object_string:
  90. # only deep will pick them up
  91. assert res_deep > res
  92. else:
  93. assert res == res_deep
  94. # sys.getsizeof will call the .memory_usage with
  95. # deep=True, and add on some GC overhead
  96. diff = res_deep - sys.getsizeof(obj)
  97. assert abs(diff) < 100
  98. def test_memory_usage_components_series(series_with_simple_index):
  99. series = series_with_simple_index
  100. total_usage = series.memory_usage(index=True)
  101. non_index_usage = series.memory_usage(index=False)
  102. index_usage = series.index.memory_usage()
  103. assert total_usage == non_index_usage + index_usage
  104. @pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
  105. def test_memory_usage_components_narrow_series(dtype):
  106. series = Series(range(5), dtype=dtype, index=[f"i-{i}" for i in range(5)], name="a")
  107. total_usage = series.memory_usage(index=True)
  108. non_index_usage = series.memory_usage(index=False)
  109. index_usage = series.index.memory_usage()
  110. assert total_usage == non_index_usage + index_usage
  111. def test_searchsorted(request, index_or_series_obj):
  112. # numpy.searchsorted calls obj.searchsorted under the hood.
  113. # See gh-12238
  114. obj = index_or_series_obj
  115. if isinstance(obj, pd.MultiIndex):
  116. # See gh-14833
  117. request.applymarker(
  118. pytest.mark.xfail(
  119. reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
  120. )
  121. )
  122. elif obj.dtype.kind == "c" and isinstance(obj, Index):
  123. # TODO: Should Series cases also raise? Looks like they use numpy
  124. # comparison semantics https://github.com/numpy/numpy/issues/15981
  125. mark = pytest.mark.xfail(reason="complex objects are not comparable")
  126. request.applymarker(mark)
  127. max_obj = max(obj, default=0)
  128. index = np.searchsorted(obj, max_obj)
  129. assert 0 <= index <= len(obj)
  130. index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
  131. assert 0 <= index <= len(obj)
  132. @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning")
  133. def test_access_by_position(index_flat):
  134. index = index_flat
  135. if len(index) == 0:
  136. pytest.skip("Test doesn't make sense on empty data")
  137. series = Series(index)
  138. assert index[0] == series.iloc[0]
  139. assert index[5] == series.iloc[5]
  140. assert index[-1] == series.iloc[-1]
  141. size = len(index)
  142. assert index[-1] == index[size - 1]
  143. msg = f"index {size} is out of bounds for axis 0 with size {size}"
  144. if isinstance(index.dtype, pd.StringDtype) and index.dtype.storage == "pyarrow":
  145. msg = "index out of bounds"
  146. with pytest.raises(IndexError, match=msg):
  147. index[size]
  148. msg = "single positional indexer is out-of-bounds"
  149. with pytest.raises(IndexError, match=msg):
  150. series.iloc[size]