test_string_array.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import numpy as np
  2. import pytest
  3. from pandas._libs import lib
  4. from pandas import (
  5. NA,
  6. DataFrame,
  7. Series,
  8. _testing as tm,
  9. option_context,
  10. )
  11. def test_string_array(nullable_string_dtype, any_string_method):
  12. method_name, args, kwargs = any_string_method
  13. data = ["a", "bb", np.nan, "ccc"]
  14. a = Series(data, dtype=object)
  15. b = Series(data, dtype=nullable_string_dtype)
  16. if method_name == "decode":
  17. with pytest.raises(TypeError, match="a bytes-like object is required"):
  18. getattr(b.str, method_name)(*args, **kwargs)
  19. return
  20. expected = getattr(a.str, method_name)(*args, **kwargs)
  21. result = getattr(b.str, method_name)(*args, **kwargs)
  22. if isinstance(expected, Series):
  23. if expected.dtype == "object" and lib.is_string_array(
  24. expected.dropna().values,
  25. ):
  26. assert result.dtype == nullable_string_dtype
  27. result = result.astype(object)
  28. elif expected.dtype == "object" and lib.is_bool_array(
  29. expected.values, skipna=True
  30. ):
  31. assert result.dtype == "boolean"
  32. expected = expected.astype("boolean")
  33. elif expected.dtype == "bool":
  34. assert result.dtype == "boolean"
  35. result = result.astype("bool")
  36. elif expected.dtype == "float" and expected.isna().any():
  37. assert result.dtype == "Int64"
  38. result = result.astype("float")
  39. if expected.dtype == object:
  40. # GH#18463
  41. expected[expected.isna()] = NA
  42. elif isinstance(expected, DataFrame):
  43. columns = expected.select_dtypes(include="object").columns
  44. assert all(result[columns].dtypes == nullable_string_dtype)
  45. result[columns] = result[columns].astype(object)
  46. with option_context("future.no_silent_downcasting", True):
  47. expected[columns] = expected[columns].fillna(NA) # GH#18463
  48. tm.assert_equal(result, expected)
  49. @pytest.mark.parametrize(
  50. "method,expected",
  51. [
  52. ("count", [2, None]),
  53. ("find", [0, None]),
  54. ("index", [0, None]),
  55. ("rindex", [2, None]),
  56. ],
  57. )
  58. def test_string_array_numeric_integer_array(nullable_string_dtype, method, expected):
  59. s = Series(["aba", None], dtype=nullable_string_dtype)
  60. result = getattr(s.str, method)("a")
  61. expected = Series(expected, dtype="Int64")
  62. tm.assert_series_equal(result, expected)
  63. @pytest.mark.parametrize(
  64. "method,expected",
  65. [
  66. ("isdigit", [False, None, True]),
  67. ("isalpha", [True, None, False]),
  68. ("isalnum", [True, None, True]),
  69. ("isnumeric", [False, None, True]),
  70. ],
  71. )
  72. def test_string_array_boolean_array(nullable_string_dtype, method, expected):
  73. s = Series(["a", None, "1"], dtype=nullable_string_dtype)
  74. result = getattr(s.str, method)()
  75. expected = Series(expected, dtype="boolean")
  76. tm.assert_series_equal(result, expected)
  77. def test_string_array_extract(nullable_string_dtype):
  78. # https://github.com/pandas-dev/pandas/issues/30969
  79. # Only expand=False & multiple groups was failing
  80. a = Series(["a1", "b2", "cc"], dtype=nullable_string_dtype)
  81. b = Series(["a1", "b2", "cc"], dtype="object")
  82. pat = r"(\w)(\d)"
  83. result = a.str.extract(pat, expand=False)
  84. expected = b.str.extract(pat, expand=False)
  85. expected = expected.fillna(NA) # GH#18463
  86. assert all(result.dtypes == nullable_string_dtype)
  87. result = result.astype(object)
  88. tm.assert_equal(result, expected)