test_arrow_interface.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import ctypes
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. import pandas as pd
  5. pa = pytest.importorskip("pyarrow")
  6. @td.skip_if_no("pyarrow", min_version="14.0")
  7. def test_dataframe_arrow_interface(using_infer_string):
  8. df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
  9. capsule = df.__arrow_c_stream__()
  10. assert (
  11. ctypes.pythonapi.PyCapsule_IsValid(
  12. ctypes.py_object(capsule), b"arrow_array_stream"
  13. )
  14. == 1
  15. )
  16. table = pa.table(df)
  17. string_type = pa.large_string() if using_infer_string else pa.string()
  18. expected = pa.table({"a": [1, 2, 3], "b": pa.array(["a", "b", "c"], string_type)})
  19. assert table.equals(expected)
  20. schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
  21. table = pa.table(df, schema=schema)
  22. expected = expected.cast(schema)
  23. assert table.equals(expected)
  24. @td.skip_if_no("pyarrow", min_version="15.0")
  25. def test_dataframe_to_arrow(using_infer_string):
  26. df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
  27. table = pa.RecordBatchReader.from_stream(df).read_all()
  28. string_type = pa.large_string() if using_infer_string else pa.string()
  29. expected = pa.table({"a": [1, 2, 3], "b": pa.array(["a", "b", "c"], string_type)})
  30. assert table.equals(expected)
  31. schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
  32. table = pa.RecordBatchReader.from_stream(df, schema=schema).read_all()
  33. expected = expected.cast(schema)
  34. assert table.equals(expected)