test_update.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. Series,
  8. date_range,
  9. )
  10. import pandas._testing as tm
  11. class TestDataFrameUpdate:
  12. def test_update_nan(self):
  13. # #15593 #15617
  14. # test 1
  15. df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)})
  16. df2 = DataFrame({"A": [None, 2, 3]})
  17. expected = df1.copy()
  18. df1.update(df2, overwrite=False)
  19. tm.assert_frame_equal(df1, expected)
  20. # test 2
  21. df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)})
  22. df2 = DataFrame({"A": [None, 2, 3]})
  23. expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)})
  24. df1.update(df2, overwrite=False)
  25. tm.assert_frame_equal(df1, expected)
  26. def test_update(self):
  27. df = DataFrame(
  28. [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
  29. )
  30. other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
  31. df.update(other)
  32. expected = DataFrame(
  33. [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]]
  34. )
  35. tm.assert_frame_equal(df, expected)
  36. def test_update_dtypes(self):
  37. # gh 3016
  38. df = DataFrame(
  39. [[1.0, 2.0, 1, False, True], [4.0, 5.0, 2, True, False]],
  40. columns=["A", "B", "int", "bool1", "bool2"],
  41. )
  42. other = DataFrame(
  43. [[45, 45, 3, True]], index=[0], columns=["A", "B", "int", "bool1"]
  44. )
  45. df.update(other)
  46. expected = DataFrame(
  47. [[45.0, 45.0, 3, True, True], [4.0, 5.0, 2, True, False]],
  48. columns=["A", "B", "int", "bool1", "bool2"],
  49. )
  50. tm.assert_frame_equal(df, expected)
  51. def test_update_nooverwrite(self):
  52. df = DataFrame(
  53. [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
  54. )
  55. other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
  56. df.update(other, overwrite=False)
  57. expected = DataFrame(
  58. [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]]
  59. )
  60. tm.assert_frame_equal(df, expected)
  61. def test_update_filtered(self):
  62. df = DataFrame(
  63. [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
  64. )
  65. other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3])
  66. df.update(other, filter_func=lambda x: x > 2)
  67. expected = DataFrame(
  68. [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]]
  69. )
  70. tm.assert_frame_equal(df, expected)
  71. @pytest.mark.parametrize(
  72. "bad_kwarg, exception, msg",
  73. [
  74. # errors must be 'ignore' or 'raise'
  75. ({"errors": "something"}, ValueError, "The parameter errors must.*"),
  76. ({"join": "inner"}, NotImplementedError, "Only left join is supported"),
  77. ],
  78. )
  79. def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg):
  80. df = DataFrame([[1.5, 1, 3.0]])
  81. with pytest.raises(exception, match=msg):
  82. df.update(df, **bad_kwarg)
  83. def test_update_raise_on_overlap(self):
  84. df = DataFrame(
  85. [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]]
  86. )
  87. other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2])
  88. with pytest.raises(ValueError, match="Data overlaps"):
  89. df.update(other, errors="raise")
  90. def test_update_from_non_df(self):
  91. d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])}
  92. df = DataFrame(d)
  93. d["a"] = Series([5, 6, 7, 8])
  94. df.update(d)
  95. expected = DataFrame(d)
  96. tm.assert_frame_equal(df, expected)
  97. d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
  98. df = DataFrame(d)
  99. d["a"] = [5, 6, 7, 8]
  100. df.update(d)
  101. expected = DataFrame(d)
  102. tm.assert_frame_equal(df, expected)
  103. def test_update_datetime_tz(self):
  104. # GH 25807
  105. result = DataFrame([pd.Timestamp("2019", tz="UTC")])
  106. with tm.assert_produces_warning(None):
  107. result.update(result)
  108. expected = DataFrame([pd.Timestamp("2019", tz="UTC")])
  109. tm.assert_frame_equal(result, expected)
  110. def test_update_datetime_tz_in_place(self, using_copy_on_write, warn_copy_on_write):
  111. # https://github.com/pandas-dev/pandas/issues/56227
  112. result = DataFrame([pd.Timestamp("2019", tz="UTC")])
  113. orig = result.copy()
  114. view = result[:]
  115. with tm.assert_produces_warning(
  116. FutureWarning if warn_copy_on_write else None, match="Setting a value"
  117. ):
  118. result.update(result + pd.Timedelta(days=1))
  119. expected = DataFrame([pd.Timestamp("2019-01-02", tz="UTC")])
  120. tm.assert_frame_equal(result, expected)
  121. if not using_copy_on_write:
  122. tm.assert_frame_equal(view, expected)
  123. else:
  124. tm.assert_frame_equal(view, orig)
  125. def test_update_with_different_dtype(self, using_copy_on_write):
  126. # GH#3217
  127. df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
  128. df["c"] = np.nan
  129. with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
  130. df.update({"c": Series(["foo"], index=[0])})
  131. expected = DataFrame(
  132. {
  133. "a": [1, 3],
  134. "b": [np.nan, 2],
  135. "c": Series(["foo", np.nan]),
  136. }
  137. )
  138. tm.assert_frame_equal(df, expected)
  139. @td.skip_array_manager_invalid_test
  140. def test_update_modify_view(
  141. self, using_copy_on_write, warn_copy_on_write, using_infer_string
  142. ):
  143. # GH#47188
  144. df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]})
  145. df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]})
  146. df2_orig = df2.copy()
  147. result_view = df2[:]
  148. # TODO(CoW-warn) better warning message
  149. with tm.assert_cow_warning(warn_copy_on_write):
  150. df2.update(df)
  151. expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]})
  152. tm.assert_frame_equal(df2, expected)
  153. if using_copy_on_write or using_infer_string:
  154. tm.assert_frame_equal(result_view, df2_orig)
  155. else:
  156. tm.assert_frame_equal(result_view, expected)
  157. def test_update_dt_column_with_NaT_create_column(self):
  158. # GH#16713
  159. df = DataFrame({"A": [1, None], "B": [pd.NaT, pd.to_datetime("2016-01-01")]})
  160. df2 = DataFrame({"A": [2, 3]})
  161. df.update(df2, overwrite=False)
  162. expected = DataFrame(
  163. {"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]}
  164. )
  165. tm.assert_frame_equal(df, expected)