test_map.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. Series,
  8. Timestamp,
  9. date_range,
  10. )
  11. import pandas._testing as tm
  12. from pandas.tseries.offsets import BDay
  13. def test_map(float_frame):
  14. result = float_frame.map(lambda x: x * 2)
  15. tm.assert_frame_equal(result, float_frame * 2)
  16. float_frame.map(type)
  17. # GH 465: function returning tuples
  18. result = float_frame.map(lambda x: (x, x))["A"].iloc[0]
  19. assert isinstance(result, tuple)
  20. @pytest.mark.parametrize("val", [1, 1.0])
  21. def test_map_float_object_conversion(val):
  22. # GH 2909: object conversion to float in constructor?
  23. df = DataFrame(data=[val, "a"])
  24. result = df.map(lambda x: x).dtypes[0]
  25. assert result == object
  26. @pytest.mark.parametrize("na_action", [None, "ignore"])
  27. def test_map_keeps_dtype(na_action):
  28. # GH52219
  29. arr = Series(["a", np.nan, "b"])
  30. sparse_arr = arr.astype(pd.SparseDtype(object))
  31. df = DataFrame(data={"a": arr, "b": sparse_arr})
  32. def func(x):
  33. return str.upper(x) if not pd.isna(x) else x
  34. result = df.map(func, na_action=na_action)
  35. expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object))
  36. expected_arr = expected_sparse.astype(object)
  37. expected = DataFrame({"a": expected_arr, "b": expected_sparse})
  38. tm.assert_frame_equal(result, expected)
  39. result_empty = df.iloc[:0, :].map(func, na_action=na_action)
  40. expected_empty = expected.iloc[:0, :]
  41. tm.assert_frame_equal(result_empty, expected_empty)
  42. def test_map_str():
  43. # GH 2786
  44. df = DataFrame(np.random.default_rng(2).random((3, 4)))
  45. df2 = df.copy()
  46. cols = ["a", "a", "a", "a"]
  47. df.columns = cols
  48. expected = df2.map(str)
  49. expected.columns = cols
  50. result = df.map(str)
  51. tm.assert_frame_equal(result, expected)
  52. @pytest.mark.parametrize(
  53. "col, val",
  54. [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]],
  55. )
  56. def test_map_datetimelike(col, val):
  57. # datetime/timedelta
  58. df = DataFrame(np.random.default_rng(2).random((3, 4)))
  59. df[col] = val
  60. result = df.map(str)
  61. assert result.loc[0, col] == str(df.loc[0, col])
  62. @pytest.mark.parametrize(
  63. "expected",
  64. [
  65. DataFrame(),
  66. DataFrame(columns=list("ABC")),
  67. DataFrame(index=list("ABC")),
  68. DataFrame({"A": [], "B": [], "C": []}),
  69. ],
  70. )
  71. @pytest.mark.parametrize("func", [round, lambda x: x])
  72. def test_map_empty(expected, func):
  73. # GH 8222
  74. result = expected.map(func)
  75. tm.assert_frame_equal(result, expected)
  76. def test_map_kwargs():
  77. # GH 40652
  78. result = DataFrame([[1, 2], [3, 4]]).map(lambda x, y: x + y, y=2)
  79. expected = DataFrame([[3, 4], [5, 6]])
  80. tm.assert_frame_equal(result, expected)
  81. def test_map_na_ignore(float_frame):
  82. # GH 23803
  83. strlen_frame = float_frame.map(lambda x: len(str(x)))
  84. float_frame_with_na = float_frame.copy()
  85. mask = np.random.default_rng(2).integers(0, 2, size=float_frame.shape, dtype=bool)
  86. float_frame_with_na[mask] = pd.NA
  87. strlen_frame_na_ignore = float_frame_with_na.map(
  88. lambda x: len(str(x)), na_action="ignore"
  89. )
  90. # Set float64 type to avoid upcast when setting NA below
  91. strlen_frame_with_na = strlen_frame.copy().astype("float64")
  92. strlen_frame_with_na[mask] = pd.NA
  93. tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
  94. def test_map_box_timestamps():
  95. # GH 2689, GH 2627
  96. ser = Series(date_range("1/1/2000", periods=10))
  97. def func(x):
  98. return (x.hour, x.day, x.month)
  99. # it works!
  100. DataFrame(ser).map(func)
  101. def test_map_box():
  102. # ufunc will not be boxed. Same test cases as the test_map_box
  103. df = DataFrame(
  104. {
  105. "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")],
  106. "b": [
  107. Timestamp("2011-01-01", tz="US/Eastern"),
  108. Timestamp("2011-01-02", tz="US/Eastern"),
  109. ],
  110. "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")],
  111. "d": [
  112. pd.Period("2011-01-01", freq="M"),
  113. pd.Period("2011-01-02", freq="M"),
  114. ],
  115. }
  116. )
  117. result = df.map(lambda x: type(x).__name__)
  118. expected = DataFrame(
  119. {
  120. "a": ["Timestamp", "Timestamp"],
  121. "b": ["Timestamp", "Timestamp"],
  122. "c": ["Timedelta", "Timedelta"],
  123. "d": ["Period", "Period"],
  124. }
  125. )
  126. tm.assert_frame_equal(result, expected)
  127. def test_frame_map_dont_convert_datetime64():
  128. df = DataFrame({"x1": [datetime(1996, 1, 1)]})
  129. df = df.map(lambda x: x + BDay())
  130. df = df.map(lambda x: x + BDay())
  131. result = df.x1.dtype
  132. assert result == "M8[ns]"
  133. def test_map_function_runs_once():
  134. df = DataFrame({"a": [1, 2, 3]})
  135. values = [] # Save values function is applied to
  136. def reducing_function(val):
  137. values.append(val)
  138. def non_reducing_function(val):
  139. values.append(val)
  140. return val
  141. for func in [reducing_function, non_reducing_function]:
  142. del values[:]
  143. df.map(func)
  144. assert values == df.a.to_list()
  145. def test_map_type():
  146. # GH 46719
  147. df = DataFrame(
  148. {"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
  149. index=["a", "b", "c"],
  150. )
  151. result = df.map(type)
  152. expected = DataFrame(
  153. {"col1": [int, str, type], "col2": [float, datetime, float]},
  154. index=["a", "b", "c"],
  155. )
  156. tm.assert_frame_equal(result, expected)
  157. def test_map_invalid_na_action(float_frame):
  158. # GH 23803
  159. with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
  160. float_frame.map(lambda x: len(str(x)), na_action="abc")
  161. def test_applymap_deprecated():
  162. # GH52353
  163. df = DataFrame({"a": [1, 2, 3]})
  164. msg = "DataFrame.applymap has been deprecated. Use DataFrame.map instead."
  165. with tm.assert_produces_warning(FutureWarning, match=msg):
  166. df.applymap(lambda x: x)