test_internals.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. Series,
  8. )
  9. import pandas._testing as tm
  10. from pandas.tests.copy_view.util import get_array
  11. @td.skip_array_manager_invalid_test
  12. def test_consolidate(using_copy_on_write):
  13. # create unconsolidated DataFrame
  14. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  15. df["c"] = [4, 5, 6]
  16. # take a viewing subset
  17. subset = df[:]
  18. # each block of subset references a block of df
  19. assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
  20. # consolidate the two int64 blocks
  21. subset._consolidate_inplace()
  22. # the float64 block still references the parent one because it still a view
  23. assert subset._mgr.blocks[0].refs.has_reference()
  24. # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
  25. # but avoids caching df["b"]
  26. assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
  27. # the new consolidated int64 block does not reference another
  28. assert not subset._mgr.blocks[1].refs.has_reference()
  29. # the parent dataframe now also only is linked for the float column
  30. assert not df._mgr.blocks[0].refs.has_reference()
  31. assert df._mgr.blocks[1].refs.has_reference()
  32. assert not df._mgr.blocks[2].refs.has_reference()
  33. # and modifying subset still doesn't modify parent
  34. if using_copy_on_write:
  35. subset.iloc[0, 1] = 0.0
  36. assert not df._mgr.blocks[1].refs.has_reference()
  37. assert df.loc[0, "b"] == 0.1
  38. @pytest.mark.single_cpu
  39. @td.skip_array_manager_invalid_test
  40. def test_switch_options():
  41. # ensure we can switch the value of the option within one session
  42. # (assuming data is constructed after switching)
  43. # using the option_context to ensure we set back to global option value
  44. # after running the test
  45. with pd.option_context("mode.copy_on_write", False):
  46. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  47. subset = df[:]
  48. subset.iloc[0, 0] = 0
  49. # df updated with CoW disabled
  50. assert df.iloc[0, 0] == 0
  51. pd.options.mode.copy_on_write = True
  52. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  53. subset = df[:]
  54. subset.iloc[0, 0] = 0
  55. # df not updated with CoW enabled
  56. assert df.iloc[0, 0] == 1
  57. pd.options.mode.copy_on_write = False
  58. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  59. subset = df[:]
  60. subset.iloc[0, 0] = 0
  61. # df updated with CoW disabled
  62. assert df.iloc[0, 0] == 0
  63. @td.skip_array_manager_invalid_test
  64. @pytest.mark.parametrize("dtype", [np.intp, np.int8])
  65. @pytest.mark.parametrize(
  66. "locs, arr",
  67. [
  68. ([0], np.array([-1, -2, -3])),
  69. ([1], np.array([-1, -2, -3])),
  70. ([5], np.array([-1, -2, -3])),
  71. ([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
  72. ([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
  73. ([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
  74. ([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
  75. ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
  76. ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
  77. ],
  78. )
  79. def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
  80. # Nothing currently calls iset with
  81. # more than 1 loc with inplace=True (only happens with inplace=False)
  82. # but ensure that it works
  83. df = DataFrame(
  84. {
  85. "a": [1, 2, 3],
  86. "b": [4, 5, 6],
  87. "c": [7, 8, 9],
  88. "d": [10, 11, 12],
  89. "e": [13, 14, 15],
  90. "f": Series(["a", "b", "c"], dtype=object),
  91. },
  92. )
  93. arr = arr.astype(dtype)
  94. df_orig = df.copy()
  95. df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy)
  96. df2._mgr.iset(locs, arr, inplace=True)
  97. tm.assert_frame_equal(df, df_orig)
  98. if using_copy_on_write:
  99. for i, col in enumerate(df.columns):
  100. if i not in locs:
  101. assert np.shares_memory(get_array(df, col), get_array(df2, col))
  102. else:
  103. for col in df.columns:
  104. assert not np.shares_memory(get_array(df, col), get_array(df2, col))
  105. def test_exponential_backoff():
  106. # GH#55518
  107. df = DataFrame({"a": [1, 2, 3]})
  108. for i in range(490):
  109. df.copy(deep=False)
  110. assert len(df._mgr.blocks[0].refs.referenced_blocks) == 491
  111. df = DataFrame({"a": [1, 2, 3]})
  112. dfs = [df.copy(deep=False) for i in range(510)]
  113. for i in range(20):
  114. df.copy(deep=False)
  115. assert len(df._mgr.blocks[0].refs.referenced_blocks) == 531
  116. assert df._mgr.blocks[0].refs.clear_counter == 1000
  117. for i in range(500):
  118. df.copy(deep=False)
  119. # Don't reduce since we still have over 500 objects alive
  120. assert df._mgr.blocks[0].refs.clear_counter == 1000
  121. dfs = dfs[:300]
  122. for i in range(500):
  123. df.copy(deep=False)
  124. # Reduce since there are less than 500 objects alive
  125. assert df._mgr.blocks[0].refs.clear_counter == 500