| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- import numpy as np
- from pandas import (
- DataFrame,
- Index,
- MultiIndex,
- RangeIndex,
- Series,
- )
- import pandas._testing as tm
- from pandas.tests.copy_view.util import get_array
- # -----------------------------------------------------------------------------
- # Copy/view behaviour for the values that are set in a DataFrame
- def test_set_column_with_array():
- # Case: setting an array as a new column (df[col] = arr) copies that data
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- arr = np.array([1, 2, 3], dtype="int64")
- df["c"] = arr
- # the array data is copied
- assert not np.shares_memory(get_array(df, "c"), arr)
- # and thus modifying the array does not modify the DataFrame
- arr[0] = 0
- tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
- def test_set_column_with_series(using_copy_on_write):
- # Case: setting a series as a new column (df[col] = s) copies that data
- # (with delayed copy with CoW)
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- ser = Series([1, 2, 3])
- df["c"] = ser
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "c"), get_array(ser))
- else:
- # the series data is copied
- assert not np.shares_memory(get_array(df, "c"), get_array(ser))
- # and modifying the series does not modify the DataFrame
- ser.iloc[0] = 0
- assert ser.iloc[0] == 0
- tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
- def test_set_column_with_index(using_copy_on_write):
- # Case: setting an index as a new column (df[col] = idx) copies that data
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- idx = Index([1, 2, 3])
- df["c"] = idx
- # the index data is copied
- assert not np.shares_memory(get_array(df, "c"), idx.values)
- idx = RangeIndex(1, 4)
- arr = idx.values
- df["d"] = idx
- assert not np.shares_memory(get_array(df, "d"), arr)
- def test_set_columns_with_dataframe(using_copy_on_write):
- # Case: setting a DataFrame as new columns copies that data
- # (with delayed copy with CoW)
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
- df[["c", "d"]] = df2
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
- else:
- # the data is copied
- assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
- # and modifying the set DataFrame does not modify the original DataFrame
- df2.iloc[0, 0] = 0
- tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
- def test_setitem_series_no_copy(using_copy_on_write):
- # Case: setting a Series as column into a DataFrame can delay copying that data
- df = DataFrame({"a": [1, 2, 3]})
- rhs = Series([4, 5, 6])
- rhs_orig = rhs.copy()
- # adding a new column
- df["b"] = rhs
- if using_copy_on_write:
- assert np.shares_memory(get_array(rhs), get_array(df, "b"))
- df.iloc[0, 1] = 100
- tm.assert_series_equal(rhs, rhs_orig)
- def test_setitem_series_no_copy_single_block(using_copy_on_write):
- # Overwriting an existing column that is a single block
- df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- rhs = Series([4, 5, 6])
- rhs_orig = rhs.copy()
- df["a"] = rhs
- if using_copy_on_write:
- assert np.shares_memory(get_array(rhs), get_array(df, "a"))
- df.iloc[0, 0] = 100
- tm.assert_series_equal(rhs, rhs_orig)
- def test_setitem_series_no_copy_split_block(using_copy_on_write):
- # Overwriting an existing column that is part of a larger block
- df = DataFrame({"a": [1, 2, 3], "b": 1})
- rhs = Series([4, 5, 6])
- rhs_orig = rhs.copy()
- df["b"] = rhs
- if using_copy_on_write:
- assert np.shares_memory(get_array(rhs), get_array(df, "b"))
- df.iloc[0, 1] = 100
- tm.assert_series_equal(rhs, rhs_orig)
- def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
- # Setting a Series to multiple columns will repeat the data
- # (currently copying the data eagerly)
- df = DataFrame(
- [[1, 2, 3], [3, 4, 5]],
- columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
- )
- rhs = Series([10, 11])
- df["a"] = rhs
- assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write):
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- # this should not raise any warning
- with tm.assert_produces_warning(None):
- df["a"] += 1
- # when it is not in a chain, then it should produce a warning
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
- ser = df["a"]
- with tm.assert_cow_warning(warn_copy_on_write):
- ser += 1
|