test_utils.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas.core.interchange.utils import dtype_to_arrow_c_fmt
  5. # TODO: use ArrowSchema to get reference C-string.
  6. # At the time, there is no way to access ArrowSchema holding a type format string
  7. # from python. The only way to access it is to export the structure to a C-pointer,
  8. # see DataType._export_to_c() method defined in
  9. # https://github.com/apache/arrow/blob/master/python/pyarrow/types.pxi
  10. @pytest.mark.parametrize(
  11. "pandas_dtype, c_string",
  12. [
  13. (np.dtype("bool"), "b"),
  14. (np.dtype("int8"), "c"),
  15. (np.dtype("uint8"), "C"),
  16. (np.dtype("int16"), "s"),
  17. (np.dtype("uint16"), "S"),
  18. (np.dtype("int32"), "i"),
  19. (np.dtype("uint32"), "I"),
  20. (np.dtype("int64"), "l"),
  21. (np.dtype("uint64"), "L"),
  22. (np.dtype("float16"), "e"),
  23. (np.dtype("float32"), "f"),
  24. (np.dtype("float64"), "g"),
  25. (pd.Series(["a"]).dtype, "u"),
  26. (
  27. pd.Series([0]).astype("datetime64[ns]").dtype,
  28. "tsn:",
  29. ),
  30. (pd.CategoricalDtype(["a"]), "l"),
  31. (np.dtype("O"), "u"),
  32. ],
  33. )
  34. def test_dtype_to_arrow_c_fmt(pandas_dtype, c_string): # PR01
  35. """Test ``dtype_to_arrow_c_fmt`` utility function."""
  36. assert dtype_to_arrow_c_fmt(pandas_dtype) == c_string
  37. @pytest.mark.parametrize(
  38. "pa_dtype, args_kwargs, c_string",
  39. [
  40. ["null", {}, "n"],
  41. ["bool_", {}, "b"],
  42. ["uint8", {}, "C"],
  43. ["uint16", {}, "S"],
  44. ["uint32", {}, "I"],
  45. ["uint64", {}, "L"],
  46. ["int8", {}, "c"],
  47. ["int16", {}, "S"],
  48. ["int32", {}, "i"],
  49. ["int64", {}, "l"],
  50. ["float16", {}, "e"],
  51. ["float32", {}, "f"],
  52. ["float64", {}, "g"],
  53. ["string", {}, "u"],
  54. ["binary", {}, "z"],
  55. ["time32", ("s",), "tts"],
  56. ["time32", ("ms",), "ttm"],
  57. ["time64", ("us",), "ttu"],
  58. ["time64", ("ns",), "ttn"],
  59. ["date32", {}, "tdD"],
  60. ["date64", {}, "tdm"],
  61. ["timestamp", {"unit": "s"}, "tss:"],
  62. ["timestamp", {"unit": "ms"}, "tsm:"],
  63. ["timestamp", {"unit": "us"}, "tsu:"],
  64. ["timestamp", {"unit": "ns"}, "tsn:"],
  65. ["timestamp", {"unit": "ns", "tz": "UTC"}, "tsn:UTC"],
  66. ["duration", ("s",), "tDs"],
  67. ["duration", ("ms",), "tDm"],
  68. ["duration", ("us",), "tDu"],
  69. ["duration", ("ns",), "tDn"],
  70. ["decimal128", {"precision": 4, "scale": 2}, "d:4,2"],
  71. ],
  72. )
  73. def test_dtype_to_arrow_c_fmt_arrowdtype(pa_dtype, args_kwargs, c_string):
  74. # GH 52323
  75. pa = pytest.importorskip("pyarrow")
  76. if not args_kwargs:
  77. pa_type = getattr(pa, pa_dtype)()
  78. elif isinstance(args_kwargs, tuple):
  79. pa_type = getattr(pa, pa_dtype)(*args_kwargs)
  80. else:
  81. pa_type = getattr(pa, pa_dtype)(**args_kwargs)
  82. arrow_type = pd.ArrowDtype(pa_type)
  83. assert dtype_to_arrow_c_fmt(arrow_type) == c_string