test_info.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. from io import StringIO
  2. from string import ascii_uppercase
  3. import textwrap
  4. import numpy as np
  5. import pytest
  6. from pandas._config import using_string_dtype
  7. from pandas.compat import (
  8. HAS_PYARROW,
  9. PYPY,
  10. )
  11. from pandas import (
  12. CategoricalIndex,
  13. Index,
  14. MultiIndex,
  15. Series,
  16. date_range,
  17. )
  18. def test_info_categorical_column_just_works():
  19. n = 2500
  20. data = np.array(list("abcdefghij")).take(
  21. np.random.default_rng(2).integers(0, 10, size=n, dtype=int)
  22. )
  23. s = Series(data).astype("category")
  24. s.isna()
  25. buf = StringIO()
  26. s.info(buf=buf)
  27. s2 = s[s == "d"]
  28. buf = StringIO()
  29. s2.info(buf=buf)
  30. def test_info_categorical():
  31. # GH14298
  32. idx = CategoricalIndex(["a", "b"])
  33. s = Series(np.zeros(2), index=idx)
  34. buf = StringIO()
  35. s.info(buf=buf)
  36. @pytest.mark.parametrize("verbose", [True, False])
  37. def test_info_series(
  38. lexsorted_two_level_string_multiindex, verbose, using_infer_string
  39. ):
  40. index = lexsorted_two_level_string_multiindex
  41. ser = Series(range(len(index)), index=index, name="sth")
  42. buf = StringIO()
  43. ser.info(verbose=verbose, buf=buf)
  44. result = buf.getvalue()
  45. expected = textwrap.dedent(
  46. """\
  47. <class 'pandas.core.series.Series'>
  48. MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
  49. """
  50. )
  51. if verbose:
  52. expected += textwrap.dedent(
  53. """\
  54. Series name: sth
  55. Non-Null Count Dtype
  56. -------------- -----
  57. 10 non-null int64
  58. """
  59. )
  60. qualifier = "" if using_infer_string and HAS_PYARROW else "+"
  61. expected += textwrap.dedent(
  62. f"""\
  63. dtypes: int64(1)
  64. memory usage: {ser.memory_usage()}.0{qualifier} bytes
  65. """
  66. )
  67. assert result == expected
  68. def test_info_memory():
  69. s = Series([1, 2], dtype="i8")
  70. buf = StringIO()
  71. s.info(buf=buf)
  72. result = buf.getvalue()
  73. memory_bytes = float(s.memory_usage())
  74. expected = textwrap.dedent(
  75. f"""\
  76. <class 'pandas.core.series.Series'>
  77. RangeIndex: 2 entries, 0 to 1
  78. Series name: None
  79. Non-Null Count Dtype
  80. -------------- -----
  81. 2 non-null int64
  82. dtypes: int64(1)
  83. memory usage: {memory_bytes} bytes
  84. """
  85. )
  86. assert result == expected
  87. def test_info_wide():
  88. s = Series(np.random.default_rng(2).standard_normal(101))
  89. msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
  90. with pytest.raises(ValueError, match=msg):
  91. s.info(max_cols=1)
  92. def test_info_shows_dtypes():
  93. dtypes = [
  94. "int64",
  95. "float64",
  96. "datetime64[ns]",
  97. "timedelta64[ns]",
  98. "complex128",
  99. "object",
  100. "bool",
  101. ]
  102. n = 10
  103. for dtype in dtypes:
  104. s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype))
  105. buf = StringIO()
  106. s.info(buf=buf)
  107. res = buf.getvalue()
  108. name = f"{n:d} non-null {dtype}"
  109. assert name in res
  110. @pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
  111. def test_info_memory_usage_deep_not_pypy():
  112. s_with_object_index = Series({"a": [1]}, index=["foo"])
  113. assert s_with_object_index.memory_usage(
  114. index=True, deep=True
  115. ) > s_with_object_index.memory_usage(index=True)
  116. s_object = Series({"a": ["a"]})
  117. assert s_object.memory_usage(deep=True) > s_object.memory_usage()
  118. @pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
  119. def test_info_memory_usage_deep_pypy():
  120. s_with_object_index = Series({"a": [1]}, index=["foo"])
  121. assert s_with_object_index.memory_usage(
  122. index=True, deep=True
  123. ) == s_with_object_index.memory_usage(index=True)
  124. s_object = Series({"a": ["a"]})
  125. assert s_object.memory_usage(deep=True) == s_object.memory_usage()
  126. @pytest.mark.parametrize(
  127. "index, plus",
  128. [
  129. ([1, 2, 3], False),
  130. (Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)),
  131. (Index(list("ABC"), dtype=object), True),
  132. (MultiIndex.from_product([range(3), range(3)]), False),
  133. (
  134. MultiIndex.from_product([range(3), ["foo", "bar"]]),
  135. not (using_string_dtype() and HAS_PYARROW),
  136. ),
  137. ],
  138. )
  139. def test_info_memory_usage_qualified(index, plus):
  140. series = Series(1, index=index)
  141. buf = StringIO()
  142. series.info(buf=buf)
  143. if plus:
  144. assert "+" in buf.getvalue()
  145. else:
  146. assert "+" not in buf.getvalue()
  147. def test_info_memory_usage_bug_on_multiindex():
  148. # GH 14308
  149. # memory usage introspection should not materialize .values
  150. N = 100
  151. M = len(ascii_uppercase)
  152. index = MultiIndex.from_product(
  153. [list(ascii_uppercase), date_range("20160101", periods=N)],
  154. names=["id", "date"],
  155. )
  156. s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
  157. unstacked = s.unstack("id")
  158. assert s.values.nbytes == unstacked.values.nbytes
  159. assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
  160. # high upper bound
  161. diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
  162. assert diff < 2000