missing.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. class BaseMissingTests:
  6. def test_isna(self, data_missing):
  7. expected = np.array([True, False])
  8. result = pd.isna(data_missing)
  9. tm.assert_numpy_array_equal(result, expected)
  10. result = pd.Series(data_missing).isna()
  11. expected = pd.Series(expected)
  12. tm.assert_series_equal(result, expected)
  13. # GH 21189
  14. result = pd.Series(data_missing).drop([0, 1]).isna()
  15. expected = pd.Series([], dtype=bool)
  16. tm.assert_series_equal(result, expected)
  17. @pytest.mark.parametrize("na_func", ["isna", "notna"])
  18. def test_isna_returns_copy(self, data_missing, na_func):
  19. result = pd.Series(data_missing)
  20. expected = result.copy()
  21. mask = getattr(result, na_func)()
  22. if isinstance(mask.dtype, pd.SparseDtype):
  23. # TODO: GH 57739
  24. mask = np.array(mask)
  25. mask.flags.writeable = True
  26. mask[:] = True
  27. tm.assert_series_equal(result, expected)
  28. def test_dropna_array(self, data_missing):
  29. result = data_missing.dropna()
  30. expected = data_missing[[1]]
  31. tm.assert_extension_array_equal(result, expected)
  32. def test_dropna_series(self, data_missing):
  33. ser = pd.Series(data_missing)
  34. result = ser.dropna()
  35. expected = ser.iloc[[1]]
  36. tm.assert_series_equal(result, expected)
  37. def test_dropna_frame(self, data_missing):
  38. df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object))
  39. # defaults
  40. result = df.dropna()
  41. expected = df.iloc[[1]]
  42. tm.assert_frame_equal(result, expected)
  43. # axis = 1
  44. result = df.dropna(axis="columns")
  45. expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
  46. tm.assert_frame_equal(result, expected)
  47. # multiple
  48. df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
  49. result = df.dropna()
  50. expected = df.iloc[:0]
  51. tm.assert_frame_equal(result, expected)
  52. def test_fillna_scalar(self, data_missing):
  53. valid = data_missing[1]
  54. result = data_missing.fillna(valid)
  55. expected = data_missing.fillna(valid)
  56. tm.assert_extension_array_equal(result, expected)
  57. @pytest.mark.filterwarnings(
  58. "ignore:Series.fillna with 'method' is deprecated:FutureWarning"
  59. )
  60. def test_fillna_limit_pad(self, data_missing):
  61. arr = data_missing.take([1, 0, 0, 0, 1])
  62. result = pd.Series(arr).ffill(limit=2)
  63. expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
  64. tm.assert_series_equal(result, expected)
  65. @pytest.mark.parametrize(
  66. "limit_area, input_ilocs, expected_ilocs",
  67. [
  68. ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
  69. ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
  70. ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
  71. ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
  72. ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
  73. ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
  74. ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
  75. ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
  76. ],
  77. )
  78. def test_ffill_limit_area(
  79. self, data_missing, limit_area, input_ilocs, expected_ilocs
  80. ):
  81. # GH#56616
  82. arr = data_missing.take(input_ilocs)
  83. result = pd.Series(arr).ffill(limit_area=limit_area)
  84. expected = pd.Series(data_missing.take(expected_ilocs))
  85. tm.assert_series_equal(result, expected)
  86. @pytest.mark.filterwarnings(
  87. "ignore:Series.fillna with 'method' is deprecated:FutureWarning"
  88. )
  89. def test_fillna_limit_backfill(self, data_missing):
  90. arr = data_missing.take([1, 0, 0, 0, 1])
  91. result = pd.Series(arr).fillna(method="backfill", limit=2)
  92. expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
  93. tm.assert_series_equal(result, expected)
  94. def test_fillna_no_op_returns_copy(self, data):
  95. data = data[~data.isna()]
  96. valid = data[0]
  97. result = data.fillna(valid)
  98. assert result is not data
  99. tm.assert_extension_array_equal(result, data)
  100. result = data._pad_or_backfill(method="backfill")
  101. assert result is not data
  102. tm.assert_extension_array_equal(result, data)
  103. def test_fillna_series(self, data_missing):
  104. fill_value = data_missing[1]
  105. ser = pd.Series(data_missing)
  106. result = ser.fillna(fill_value)
  107. expected = pd.Series(
  108. data_missing._from_sequence(
  109. [fill_value, fill_value], dtype=data_missing.dtype
  110. )
  111. )
  112. tm.assert_series_equal(result, expected)
  113. # Fill with a series
  114. result = ser.fillna(expected)
  115. tm.assert_series_equal(result, expected)
  116. # Fill with a series not affecting the missing values
  117. result = ser.fillna(ser)
  118. tm.assert_series_equal(result, ser)
  119. def test_fillna_series_method(self, data_missing, fillna_method):
  120. fill_value = data_missing[1]
  121. if fillna_method == "ffill":
  122. data_missing = data_missing[::-1]
  123. result = getattr(pd.Series(data_missing), fillna_method)()
  124. expected = pd.Series(
  125. data_missing._from_sequence(
  126. [fill_value, fill_value], dtype=data_missing.dtype
  127. )
  128. )
  129. tm.assert_series_equal(result, expected)
  130. def test_fillna_frame(self, data_missing):
  131. fill_value = data_missing[1]
  132. result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
  133. expected = pd.DataFrame(
  134. {
  135. "A": data_missing._from_sequence(
  136. [fill_value, fill_value], dtype=data_missing.dtype
  137. ),
  138. "B": [1, 2],
  139. }
  140. )
  141. tm.assert_frame_equal(result, expected)
  142. def test_fillna_fill_other(self, data):
  143. result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
  144. expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
  145. tm.assert_frame_equal(result, expected)
  146. def test_use_inf_as_na_no_effect(self, data_missing):
  147. ser = pd.Series(data_missing)
  148. expected = ser.isna()
  149. msg = "use_inf_as_na option is deprecated"
  150. with tm.assert_produces_warning(FutureWarning, match=msg):
  151. with pd.option_context("mode.use_inf_as_na", True):
  152. result = ser.isna()
  153. tm.assert_series_equal(result, expected)