test_values.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. from pandas import (
  5. DataFrame,
  6. NaT,
  7. Series,
  8. Timestamp,
  9. date_range,
  10. period_range,
  11. )
  12. import pandas._testing as tm
  13. class TestDataFrameValues:
  14. @td.skip_array_manager_invalid_test
  15. def test_values(self, float_frame, using_copy_on_write):
  16. if using_copy_on_write:
  17. with pytest.raises(ValueError, match="read-only"):
  18. float_frame.values[:, 0] = 5.0
  19. assert (float_frame.values[:, 0] != 5).all()
  20. else:
  21. float_frame.values[:, 0] = 5.0
  22. assert (float_frame.values[:, 0] == 5).all()
  23. def test_more_values(self, float_string_frame):
  24. values = float_string_frame.values
  25. assert values.shape[1] == len(float_string_frame.columns)
  26. def test_values_mixed_dtypes(self, float_frame, float_string_frame):
  27. frame = float_frame
  28. arr = frame.values
  29. frame_cols = frame.columns
  30. for i, row in enumerate(arr):
  31. for j, value in enumerate(row):
  32. col = frame_cols[j]
  33. if np.isnan(value):
  34. assert np.isnan(frame[col].iloc[i])
  35. else:
  36. assert value == frame[col].iloc[i]
  37. # mixed type
  38. arr = float_string_frame[["foo", "A"]].values
  39. assert arr[0, 0] == "bar"
  40. df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]})
  41. arr = df.values
  42. assert arr[0, 0] == 1j
  43. def test_values_duplicates(self):
  44. df = DataFrame(
  45. [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"]
  46. )
  47. result = df.values
  48. expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object)
  49. tm.assert_numpy_array_equal(result, expected)
  50. def test_values_with_duplicate_columns(self):
  51. df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"])
  52. result = df.values
  53. expected = np.array([[1, 2.5], [3, 4.5]])
  54. assert (result == expected).all().all()
  55. @pytest.mark.parametrize("constructor", [date_range, period_range])
  56. def test_values_casts_datetimelike_to_object(self, constructor):
  57. series = Series(constructor("2000-01-01", periods=10, freq="D"))
  58. expected = series.astype("object")
  59. df = DataFrame(
  60. {"a": series, "b": np.random.default_rng(2).standard_normal(len(series))}
  61. )
  62. result = df.values.squeeze()
  63. assert (result[:, 0] == expected.values).all()
  64. df = DataFrame({"a": series, "b": ["foo"] * len(series)})
  65. result = df.values.squeeze()
  66. assert (result[:, 0] == expected.values).all()
  67. def test_frame_values_with_tz(self):
  68. tz = "US/Central"
  69. df = DataFrame({"A": date_range("2000", periods=4, tz=tz)})
  70. result = df.values
  71. expected = np.array(
  72. [
  73. [Timestamp("2000-01-01", tz=tz)],
  74. [Timestamp("2000-01-02", tz=tz)],
  75. [Timestamp("2000-01-03", tz=tz)],
  76. [Timestamp("2000-01-04", tz=tz)],
  77. ]
  78. )
  79. tm.assert_numpy_array_equal(result, expected)
  80. # two columns, homogeneous
  81. df["B"] = df["A"]
  82. result = df.values
  83. expected = np.concatenate([expected, expected], axis=1)
  84. tm.assert_numpy_array_equal(result, expected)
  85. # three columns, heterogeneous
  86. est = "US/Eastern"
  87. df["C"] = df["A"].dt.tz_convert(est)
  88. new = np.array(
  89. [
  90. [Timestamp("2000-01-01T01:00:00", tz=est)],
  91. [Timestamp("2000-01-02T01:00:00", tz=est)],
  92. [Timestamp("2000-01-03T01:00:00", tz=est)],
  93. [Timestamp("2000-01-04T01:00:00", tz=est)],
  94. ]
  95. )
  96. expected = np.concatenate([expected, new], axis=1)
  97. result = df.values
  98. tm.assert_numpy_array_equal(result, expected)
  99. def test_interleave_with_tzaware(self, timezone_frame):
  100. # interleave with object
  101. result = timezone_frame.assign(D="foo").values
  102. expected = np.array(
  103. [
  104. [
  105. Timestamp("2013-01-01 00:00:00"),
  106. Timestamp("2013-01-02 00:00:00"),
  107. Timestamp("2013-01-03 00:00:00"),
  108. ],
  109. [
  110. Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
  111. NaT,
  112. Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
  113. ],
  114. [
  115. Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
  116. NaT,
  117. Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
  118. ],
  119. ["foo", "foo", "foo"],
  120. ],
  121. dtype=object,
  122. ).T
  123. tm.assert_numpy_array_equal(result, expected)
  124. # interleave with only datetime64[ns]
  125. result = timezone_frame.values
  126. expected = np.array(
  127. [
  128. [
  129. Timestamp("2013-01-01 00:00:00"),
  130. Timestamp("2013-01-02 00:00:00"),
  131. Timestamp("2013-01-03 00:00:00"),
  132. ],
  133. [
  134. Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
  135. NaT,
  136. Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
  137. ],
  138. [
  139. Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
  140. NaT,
  141. Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
  142. ],
  143. ],
  144. dtype=object,
  145. ).T
  146. tm.assert_numpy_array_equal(result, expected)
  147. def test_values_interleave_non_unique_cols(self):
  148. df = DataFrame(
  149. [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
  150. columns=["x", "x"],
  151. index=[1, 2],
  152. )
  153. df_unique = df.copy()
  154. df_unique.columns = ["x", "y"]
  155. assert df_unique.values.shape == df.values.shape
  156. tm.assert_numpy_array_equal(df_unique.values[0], df.values[0])
  157. tm.assert_numpy_array_equal(df_unique.values[1], df.values[1])
  158. def test_values_numeric_cols(self, float_frame):
  159. float_frame["foo"] = "bar"
  160. values = float_frame[["A", "B", "C", "D"]].values
  161. assert values.dtype == np.float64
  162. def test_values_lcd(self, mixed_float_frame, mixed_int_frame):
  163. # mixed lcd
  164. values = mixed_float_frame[["A", "B", "C", "D"]].values
  165. assert values.dtype == np.float64
  166. values = mixed_float_frame[["A", "B", "C"]].values
  167. assert values.dtype == np.float32
  168. values = mixed_float_frame[["C"]].values
  169. assert values.dtype == np.float16
  170. # GH#10364
  171. # B uint64 forces float because there are other signed int types
  172. values = mixed_int_frame[["A", "B", "C", "D"]].values
  173. assert values.dtype == np.float64
  174. values = mixed_int_frame[["A", "D"]].values
  175. assert values.dtype == np.int64
  176. # B uint64 forces float because there are other signed int types
  177. values = mixed_int_frame[["A", "B", "C"]].values
  178. assert values.dtype == np.float64
  179. # as B and C are both unsigned, no forcing to float is needed
  180. values = mixed_int_frame[["B", "C"]].values
  181. assert values.dtype == np.uint64
  182. values = mixed_int_frame[["A", "C"]].values
  183. assert values.dtype == np.int32
  184. values = mixed_int_frame[["C", "D"]].values
  185. assert values.dtype == np.int64
  186. values = mixed_int_frame[["A"]].values
  187. assert values.dtype == np.int32
  188. values = mixed_int_frame[["C"]].values
  189. assert values.dtype == np.uint8
  190. class TestPrivateValues:
  191. @td.skip_array_manager_invalid_test
  192. def test_private_values_dt64tz(self, using_copy_on_write):
  193. dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1)
  194. df = DataFrame(dta, columns=["A"])
  195. tm.assert_equal(df._values, dta)
  196. if using_copy_on_write:
  197. assert not np.shares_memory(df._values._ndarray, dta._ndarray)
  198. else:
  199. # we have a view
  200. assert np.shares_memory(df._values._ndarray, dta._ndarray)
  201. # TimedeltaArray
  202. tda = dta - dta
  203. df2 = df - df
  204. tm.assert_equal(df2._values, tda)
  205. @td.skip_array_manager_invalid_test
  206. def test_private_values_dt64tz_multicol(self, using_copy_on_write):
  207. dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2)
  208. df = DataFrame(dta, columns=["A", "B"])
  209. tm.assert_equal(df._values, dta)
  210. if using_copy_on_write:
  211. assert not np.shares_memory(df._values._ndarray, dta._ndarray)
  212. else:
  213. # we have a view
  214. assert np.shares_memory(df._values._ndarray, dta._ndarray)
  215. # TimedeltaArray
  216. tda = dta - dta
  217. df2 = df - df
  218. tm.assert_equal(df2._values, tda)
  219. def test_private_values_dt64_multiblock(self):
  220. dta = date_range("2000", periods=8)._data
  221. df = DataFrame({"A": dta[:4]}, copy=False)
  222. df["B"] = dta[4:]
  223. assert len(df._mgr.arrays) == 2
  224. result = df._values
  225. expected = dta.reshape(2, 4).T
  226. tm.assert_equal(result, expected)