| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- from datetime import datetime
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- DataFrame,
- Series,
- Timestamp,
- date_range,
- )
- import pandas._testing as tm
- from pandas.tseries.offsets import BDay
- def test_map(float_frame):
- result = float_frame.map(lambda x: x * 2)
- tm.assert_frame_equal(result, float_frame * 2)
- float_frame.map(type)
- # GH 465: function returning tuples
- result = float_frame.map(lambda x: (x, x))["A"].iloc[0]
- assert isinstance(result, tuple)
- @pytest.mark.parametrize("val", [1, 1.0])
- def test_map_float_object_conversion(val):
- # GH 2909: object conversion to float in constructor?
- df = DataFrame(data=[val, "a"])
- result = df.map(lambda x: x).dtypes[0]
- assert result == object
- @pytest.mark.parametrize("na_action", [None, "ignore"])
- def test_map_keeps_dtype(na_action):
- # GH52219
- arr = Series(["a", np.nan, "b"])
- sparse_arr = arr.astype(pd.SparseDtype(object))
- df = DataFrame(data={"a": arr, "b": sparse_arr})
- def func(x):
- return str.upper(x) if not pd.isna(x) else x
- result = df.map(func, na_action=na_action)
- expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object))
- expected_arr = expected_sparse.astype(object)
- expected = DataFrame({"a": expected_arr, "b": expected_sparse})
- tm.assert_frame_equal(result, expected)
- result_empty = df.iloc[:0, :].map(func, na_action=na_action)
- expected_empty = expected.iloc[:0, :]
- tm.assert_frame_equal(result_empty, expected_empty)
- def test_map_str():
- # GH 2786
- df = DataFrame(np.random.default_rng(2).random((3, 4)))
- df2 = df.copy()
- cols = ["a", "a", "a", "a"]
- df.columns = cols
- expected = df2.map(str)
- expected.columns = cols
- result = df.map(str)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "col, val",
- [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]],
- )
- def test_map_datetimelike(col, val):
- # datetime/timedelta
- df = DataFrame(np.random.default_rng(2).random((3, 4)))
- df[col] = val
- result = df.map(str)
- assert result.loc[0, col] == str(df.loc[0, col])
- @pytest.mark.parametrize(
- "expected",
- [
- DataFrame(),
- DataFrame(columns=list("ABC")),
- DataFrame(index=list("ABC")),
- DataFrame({"A": [], "B": [], "C": []}),
- ],
- )
- @pytest.mark.parametrize("func", [round, lambda x: x])
- def test_map_empty(expected, func):
- # GH 8222
- result = expected.map(func)
- tm.assert_frame_equal(result, expected)
- def test_map_kwargs():
- # GH 40652
- result = DataFrame([[1, 2], [3, 4]]).map(lambda x, y: x + y, y=2)
- expected = DataFrame([[3, 4], [5, 6]])
- tm.assert_frame_equal(result, expected)
- def test_map_na_ignore(float_frame):
- # GH 23803
- strlen_frame = float_frame.map(lambda x: len(str(x)))
- float_frame_with_na = float_frame.copy()
- mask = np.random.default_rng(2).integers(0, 2, size=float_frame.shape, dtype=bool)
- float_frame_with_na[mask] = pd.NA
- strlen_frame_na_ignore = float_frame_with_na.map(
- lambda x: len(str(x)), na_action="ignore"
- )
- # Set float64 type to avoid upcast when setting NA below
- strlen_frame_with_na = strlen_frame.copy().astype("float64")
- strlen_frame_with_na[mask] = pd.NA
- tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
- def test_map_box_timestamps():
- # GH 2689, GH 2627
- ser = Series(date_range("1/1/2000", periods=10))
- def func(x):
- return (x.hour, x.day, x.month)
- # it works!
- DataFrame(ser).map(func)
- def test_map_box():
- # ufunc will not be boxed. Same test cases as the test_map_box
- df = DataFrame(
- {
- "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")],
- "b": [
- Timestamp("2011-01-01", tz="US/Eastern"),
- Timestamp("2011-01-02", tz="US/Eastern"),
- ],
- "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")],
- "d": [
- pd.Period("2011-01-01", freq="M"),
- pd.Period("2011-01-02", freq="M"),
- ],
- }
- )
- result = df.map(lambda x: type(x).__name__)
- expected = DataFrame(
- {
- "a": ["Timestamp", "Timestamp"],
- "b": ["Timestamp", "Timestamp"],
- "c": ["Timedelta", "Timedelta"],
- "d": ["Period", "Period"],
- }
- )
- tm.assert_frame_equal(result, expected)
- def test_frame_map_dont_convert_datetime64():
- df = DataFrame({"x1": [datetime(1996, 1, 1)]})
- df = df.map(lambda x: x + BDay())
- df = df.map(lambda x: x + BDay())
- result = df.x1.dtype
- assert result == "M8[ns]"
- def test_map_function_runs_once():
- df = DataFrame({"a": [1, 2, 3]})
- values = [] # Save values function is applied to
- def reducing_function(val):
- values.append(val)
- def non_reducing_function(val):
- values.append(val)
- return val
- for func in [reducing_function, non_reducing_function]:
- del values[:]
- df.map(func)
- assert values == df.a.to_list()
- def test_map_type():
- # GH 46719
- df = DataFrame(
- {"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
- index=["a", "b", "c"],
- )
- result = df.map(type)
- expected = DataFrame(
- {"col1": [int, str, type], "col2": [float, datetime, float]},
- index=["a", "b", "c"],
- )
- tm.assert_frame_equal(result, expected)
- def test_map_invalid_na_action(float_frame):
- # GH 23803
- with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
- float_frame.map(lambda x: len(str(x)), na_action="abc")
- def test_applymap_deprecated():
- # GH52353
- df = DataFrame({"a": [1, 2, 3]})
- msg = "DataFrame.applymap has been deprecated. Use DataFrame.map instead."
- with tm.assert_produces_warning(FutureWarning, match=msg):
- df.applymap(lambda x: x)
|