test_upcast.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.parsers import (
  4. _maybe_upcast,
  5. na_values,
  6. )
  7. import pandas as pd
  8. from pandas import NA
  9. import pandas._testing as tm
  10. from pandas.core.arrays import (
  11. ArrowStringArray,
  12. BooleanArray,
  13. FloatingArray,
  14. IntegerArray,
  15. StringArray,
  16. )
  17. def test_maybe_upcast(any_real_numpy_dtype):
  18. # GH#36712
  19. dtype = np.dtype(any_real_numpy_dtype)
  20. na_value = na_values[dtype]
  21. arr = np.array([1, 2, na_value], dtype=dtype)
  22. result = _maybe_upcast(arr, use_dtype_backend=True)
  23. expected_mask = np.array([False, False, True])
  24. if issubclass(dtype.type, np.integer):
  25. expected = IntegerArray(arr, mask=expected_mask)
  26. else:
  27. expected = FloatingArray(arr, mask=expected_mask)
  28. tm.assert_extension_array_equal(result, expected)
  29. def test_maybe_upcast_no_na(any_real_numpy_dtype):
  30. # GH#36712
  31. arr = np.array([1, 2, 3], dtype=any_real_numpy_dtype)
  32. result = _maybe_upcast(arr, use_dtype_backend=True)
  33. expected_mask = np.array([False, False, False])
  34. if issubclass(np.dtype(any_real_numpy_dtype).type, np.integer):
  35. expected = IntegerArray(arr, mask=expected_mask)
  36. else:
  37. expected = FloatingArray(arr, mask=expected_mask)
  38. tm.assert_extension_array_equal(result, expected)
  39. def test_maybe_upcaste_bool():
  40. # GH#36712
  41. dtype = np.bool_
  42. na_value = na_values[dtype]
  43. arr = np.array([True, False, na_value], dtype="uint8").view(dtype)
  44. result = _maybe_upcast(arr, use_dtype_backend=True)
  45. expected_mask = np.array([False, False, True])
  46. expected = BooleanArray(arr, mask=expected_mask)
  47. tm.assert_extension_array_equal(result, expected)
  48. def test_maybe_upcaste_bool_no_nan():
  49. # GH#36712
  50. dtype = np.bool_
  51. arr = np.array([True, False, False], dtype="uint8").view(dtype)
  52. result = _maybe_upcast(arr, use_dtype_backend=True)
  53. expected_mask = np.array([False, False, False])
  54. expected = BooleanArray(arr, mask=expected_mask)
  55. tm.assert_extension_array_equal(result, expected)
  56. def test_maybe_upcaste_all_nan():
  57. # GH#36712
  58. dtype = np.int64
  59. na_value = na_values[dtype]
  60. arr = np.array([na_value, na_value], dtype=dtype)
  61. result = _maybe_upcast(arr, use_dtype_backend=True)
  62. expected_mask = np.array([True, True])
  63. expected = IntegerArray(arr, mask=expected_mask)
  64. tm.assert_extension_array_equal(result, expected)
  65. @pytest.mark.parametrize("val", [na_values[np.object_], "c"])
  66. def test_maybe_upcast_object(val, string_storage):
  67. # GH#36712
  68. pa = pytest.importorskip("pyarrow")
  69. with pd.option_context("mode.string_storage", string_storage):
  70. arr = np.array(["a", "b", val], dtype=np.object_)
  71. result = _maybe_upcast(arr, use_dtype_backend=True)
  72. if string_storage == "python":
  73. exp_val = "c" if val == "c" else NA
  74. expected = StringArray(np.array(["a", "b", exp_val], dtype=np.object_))
  75. else:
  76. exp_val = "c" if val == "c" else None
  77. expected = ArrowStringArray(pa.array(["a", "b", exp_val]))
  78. tm.assert_extension_array_equal(result, expected)