test_array.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. import numpy as np
  2. import pytest
  3. from pandas.compat.numpy import np_version_gt2
  4. from pandas import (
  5. DataFrame,
  6. Series,
  7. date_range,
  8. )
  9. import pandas._testing as tm
  10. from pandas.tests.copy_view.util import get_array
  11. # -----------------------------------------------------------------------------
  12. # Copy/view behaviour for accessing underlying array of Series/DataFrame
  13. @pytest.mark.parametrize(
  14. "method",
  15. [
  16. lambda ser: ser.values,
  17. lambda ser: np.asarray(ser),
  18. lambda ser: np.array(ser, copy=False),
  19. ],
  20. ids=["values", "asarray", "array"],
  21. )
  22. def test_series_values(using_copy_on_write, method):
  23. ser = Series([1, 2, 3], name="name")
  24. ser_orig = ser.copy()
  25. arr = method(ser)
  26. if using_copy_on_write:
  27. # .values still gives a view but is read-only
  28. assert np.shares_memory(arr, get_array(ser, "name"))
  29. assert arr.flags.writeable is False
  30. # mutating series through arr therefore doesn't work
  31. with pytest.raises(ValueError, match="read-only"):
  32. arr[0] = 0
  33. tm.assert_series_equal(ser, ser_orig)
  34. # mutating the series itself still works
  35. ser.iloc[0] = 0
  36. assert ser.values[0] == 0
  37. else:
  38. assert arr.flags.writeable is True
  39. arr[0] = 0
  40. assert ser.iloc[0] == 0
  41. @pytest.mark.parametrize(
  42. "method",
  43. [
  44. lambda df: df.values,
  45. lambda df: np.asarray(df),
  46. lambda ser: np.array(ser, copy=False),
  47. ],
  48. ids=["values", "asarray", "array"],
  49. )
  50. def test_dataframe_values(using_copy_on_write, using_array_manager, method):
  51. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  52. df_orig = df.copy()
  53. arr = method(df)
  54. if using_copy_on_write:
  55. # .values still gives a view but is read-only
  56. assert np.shares_memory(arr, get_array(df, "a"))
  57. assert arr.flags.writeable is False
  58. # mutating series through arr therefore doesn't work
  59. with pytest.raises(ValueError, match="read-only"):
  60. arr[0, 0] = 0
  61. tm.assert_frame_equal(df, df_orig)
  62. # mutating the series itself still works
  63. df.iloc[0, 0] = 0
  64. assert df.values[0, 0] == 0
  65. else:
  66. assert arr.flags.writeable is True
  67. arr[0, 0] = 0
  68. if not using_array_manager:
  69. assert df.iloc[0, 0] == 0
  70. else:
  71. tm.assert_frame_equal(df, df_orig)
  72. def test_series_to_numpy(using_copy_on_write):
  73. ser = Series([1, 2, 3], name="name")
  74. ser_orig = ser.copy()
  75. # default: copy=False, no dtype or NAs
  76. arr = ser.to_numpy()
  77. if using_copy_on_write:
  78. # to_numpy still gives a view but is read-only
  79. assert np.shares_memory(arr, get_array(ser, "name"))
  80. assert arr.flags.writeable is False
  81. # mutating series through arr therefore doesn't work
  82. with pytest.raises(ValueError, match="read-only"):
  83. arr[0] = 0
  84. tm.assert_series_equal(ser, ser_orig)
  85. # mutating the series itself still works
  86. ser.iloc[0] = 0
  87. assert ser.values[0] == 0
  88. else:
  89. assert arr.flags.writeable is True
  90. arr[0] = 0
  91. assert ser.iloc[0] == 0
  92. # specify copy=True gives a writeable array
  93. ser = Series([1, 2, 3], name="name")
  94. arr = ser.to_numpy(copy=True)
  95. assert not np.shares_memory(arr, get_array(ser, "name"))
  96. assert arr.flags.writeable is True
  97. # specifying a dtype that already causes a copy also gives a writeable array
  98. ser = Series([1, 2, 3], name="name")
  99. arr = ser.to_numpy(dtype="float64")
  100. assert not np.shares_memory(arr, get_array(ser, "name"))
  101. assert arr.flags.writeable is True
  102. @pytest.mark.parametrize("order", ["F", "C"])
  103. def test_ravel_read_only(using_copy_on_write, order):
  104. ser = Series([1, 2, 3])
  105. with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
  106. arr = ser.ravel(order=order)
  107. if using_copy_on_write:
  108. assert arr.flags.writeable is False
  109. assert np.shares_memory(get_array(ser), arr)
  110. def test_series_array_ea_dtypes(using_copy_on_write):
  111. ser = Series([1, 2, 3], dtype="Int64")
  112. arr = np.asarray(ser, dtype="int64")
  113. assert np.shares_memory(arr, get_array(ser))
  114. if using_copy_on_write:
  115. assert arr.flags.writeable is False
  116. else:
  117. assert arr.flags.writeable is True
  118. arr = np.asarray(ser)
  119. assert np.shares_memory(arr, get_array(ser))
  120. if using_copy_on_write:
  121. assert arr.flags.writeable is False
  122. else:
  123. assert arr.flags.writeable is True
  124. def test_dataframe_array_ea_dtypes(using_copy_on_write):
  125. df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  126. arr = np.asarray(df, dtype="int64")
  127. assert np.shares_memory(arr, get_array(df, "a"))
  128. if using_copy_on_write:
  129. assert arr.flags.writeable is False
  130. else:
  131. assert arr.flags.writeable is True
  132. arr = np.asarray(df)
  133. assert np.shares_memory(arr, get_array(df, "a"))
  134. if using_copy_on_write:
  135. assert arr.flags.writeable is False
  136. else:
  137. assert arr.flags.writeable is True
  138. def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
  139. df = DataFrame({"a": ["a", "b"]}, dtype="string")
  140. arr = np.asarray(df)
  141. if not using_array_manager:
  142. assert np.shares_memory(arr, get_array(df, "a"))
  143. if using_copy_on_write:
  144. assert arr.flags.writeable is False
  145. else:
  146. assert arr.flags.writeable is True
  147. def test_dataframe_multiple_numpy_dtypes():
  148. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  149. arr = np.asarray(df)
  150. assert not np.shares_memory(arr, get_array(df, "a"))
  151. assert arr.flags.writeable is True
  152. if np_version_gt2:
  153. # copy=False semantics are only supported in NumPy>=2.
  154. msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
  155. with pytest.raises(FutureWarning, match=msg):
  156. arr = np.array(df, copy=False)
  157. arr = np.array(df, copy=True)
  158. assert arr.flags.writeable is True
  159. def test_dataframe_single_block_copy_true():
  160. # the copy=False/None cases are tested above in test_dataframe_values
  161. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  162. arr = np.array(df, copy=True)
  163. assert not np.shares_memory(arr, get_array(df, "a"))
  164. assert arr.flags.writeable is True
  165. def test_values_is_ea(using_copy_on_write):
  166. df = DataFrame({"a": date_range("2012-01-01", periods=3)})
  167. arr = np.asarray(df)
  168. if using_copy_on_write:
  169. assert arr.flags.writeable is False
  170. else:
  171. assert arr.flags.writeable is True
  172. def test_empty_dataframe():
  173. df = DataFrame()
  174. arr = np.asarray(df)
  175. assert arr.flags.writeable is True