test_setitem.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. import numpy as np
  2. from pandas import (
  3. DataFrame,
  4. Index,
  5. MultiIndex,
  6. RangeIndex,
  7. Series,
  8. )
  9. import pandas._testing as tm
  10. from pandas.tests.copy_view.util import get_array
  11. # -----------------------------------------------------------------------------
  12. # Copy/view behaviour for the values that are set in a DataFrame
  13. def test_set_column_with_array():
  14. # Case: setting an array as a new column (df[col] = arr) copies that data
  15. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  16. arr = np.array([1, 2, 3], dtype="int64")
  17. df["c"] = arr
  18. # the array data is copied
  19. assert not np.shares_memory(get_array(df, "c"), arr)
  20. # and thus modifying the array does not modify the DataFrame
  21. arr[0] = 0
  22. tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
  23. def test_set_column_with_series(using_copy_on_write):
  24. # Case: setting a series as a new column (df[col] = s) copies that data
  25. # (with delayed copy with CoW)
  26. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  27. ser = Series([1, 2, 3])
  28. df["c"] = ser
  29. if using_copy_on_write:
  30. assert np.shares_memory(get_array(df, "c"), get_array(ser))
  31. else:
  32. # the series data is copied
  33. assert not np.shares_memory(get_array(df, "c"), get_array(ser))
  34. # and modifying the series does not modify the DataFrame
  35. ser.iloc[0] = 0
  36. assert ser.iloc[0] == 0
  37. tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
  38. def test_set_column_with_index(using_copy_on_write):
  39. # Case: setting an index as a new column (df[col] = idx) copies that data
  40. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  41. idx = Index([1, 2, 3])
  42. df["c"] = idx
  43. # the index data is copied
  44. assert not np.shares_memory(get_array(df, "c"), idx.values)
  45. idx = RangeIndex(1, 4)
  46. arr = idx.values
  47. df["d"] = idx
  48. assert not np.shares_memory(get_array(df, "d"), arr)
  49. def test_set_columns_with_dataframe(using_copy_on_write):
  50. # Case: setting a DataFrame as new columns copies that data
  51. # (with delayed copy with CoW)
  52. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  53. df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  54. df[["c", "d"]] = df2
  55. if using_copy_on_write:
  56. assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  57. else:
  58. # the data is copied
  59. assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  60. # and modifying the set DataFrame does not modify the original DataFrame
  61. df2.iloc[0, 0] = 0
  62. tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
  63. def test_setitem_series_no_copy(using_copy_on_write):
  64. # Case: setting a Series as column into a DataFrame can delay copying that data
  65. df = DataFrame({"a": [1, 2, 3]})
  66. rhs = Series([4, 5, 6])
  67. rhs_orig = rhs.copy()
  68. # adding a new column
  69. df["b"] = rhs
  70. if using_copy_on_write:
  71. assert np.shares_memory(get_array(rhs), get_array(df, "b"))
  72. df.iloc[0, 1] = 100
  73. tm.assert_series_equal(rhs, rhs_orig)
  74. def test_setitem_series_no_copy_single_block(using_copy_on_write):
  75. # Overwriting an existing column that is a single block
  76. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  77. rhs = Series([4, 5, 6])
  78. rhs_orig = rhs.copy()
  79. df["a"] = rhs
  80. if using_copy_on_write:
  81. assert np.shares_memory(get_array(rhs), get_array(df, "a"))
  82. df.iloc[0, 0] = 100
  83. tm.assert_series_equal(rhs, rhs_orig)
  84. def test_setitem_series_no_copy_split_block(using_copy_on_write):
  85. # Overwriting an existing column that is part of a larger block
  86. df = DataFrame({"a": [1, 2, 3], "b": 1})
  87. rhs = Series([4, 5, 6])
  88. rhs_orig = rhs.copy()
  89. df["b"] = rhs
  90. if using_copy_on_write:
  91. assert np.shares_memory(get_array(rhs), get_array(df, "b"))
  92. df.iloc[0, 1] = 100
  93. tm.assert_series_equal(rhs, rhs_orig)
  94. def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
  95. # Setting a Series to multiple columns will repeat the data
  96. # (currently copying the data eagerly)
  97. df = DataFrame(
  98. [[1, 2, 3], [3, 4, 5]],
  99. columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
  100. )
  101. rhs = Series([10, 11])
  102. df["a"] = rhs
  103. assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
  104. if using_copy_on_write:
  105. assert df._mgr._has_no_reference(0)
  106. def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write):
  107. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  108. # this should not raise any warning
  109. with tm.assert_produces_warning(None):
  110. df["a"] += 1
  111. # when it is not in a chain, then it should produce a warning
  112. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  113. ser = df["a"]
  114. with tm.assert_cow_warning(warn_copy_on_write):
  115. ser += 1