feather_format.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. """ feather-format compat """
  2. from __future__ import annotations
  3. from typing import (
  4. TYPE_CHECKING,
  5. Any,
  6. )
  7. from pandas._config import using_string_dtype
  8. from pandas._libs import lib
  9. from pandas.compat._optional import import_optional_dependency
  10. from pandas.util._decorators import doc
  11. from pandas.util._validators import check_dtype_backend
  12. from pandas.core.api import DataFrame
  13. from pandas.core.shared_docs import _shared_docs
  14. from pandas.io._util import arrow_table_to_pandas
  15. from pandas.io.common import get_handle
  16. if TYPE_CHECKING:
  17. from collections.abc import (
  18. Hashable,
  19. Sequence,
  20. )
  21. from pandas._typing import (
  22. DtypeBackend,
  23. FilePath,
  24. ReadBuffer,
  25. StorageOptions,
  26. WriteBuffer,
  27. )
  28. @doc(storage_options=_shared_docs["storage_options"])
  29. def to_feather(
  30. df: DataFrame,
  31. path: FilePath | WriteBuffer[bytes],
  32. storage_options: StorageOptions | None = None,
  33. **kwargs: Any,
  34. ) -> None:
  35. """
  36. Write a DataFrame to the binary Feather format.
  37. Parameters
  38. ----------
  39. df : DataFrame
  40. path : str, path object, or file-like object
  41. {storage_options}
  42. **kwargs :
  43. Additional keywords passed to `pyarrow.feather.write_feather`.
  44. """
  45. import_optional_dependency("pyarrow")
  46. from pyarrow import feather
  47. if not isinstance(df, DataFrame):
  48. raise ValueError("feather only support IO with DataFrames")
  49. with get_handle(
  50. path, "wb", storage_options=storage_options, is_text=False
  51. ) as handles:
  52. feather.write_feather(df, handles.handle, **kwargs)
  53. @doc(storage_options=_shared_docs["storage_options"])
  54. def read_feather(
  55. path: FilePath | ReadBuffer[bytes],
  56. columns: Sequence[Hashable] | None = None,
  57. use_threads: bool = True,
  58. storage_options: StorageOptions | None = None,
  59. dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
  60. ) -> DataFrame:
  61. """
  62. Load a feather-format object from the file path.
  63. Parameters
  64. ----------
  65. path : str, path object, or file-like object
  66. String, path object (implementing ``os.PathLike[str]``), or file-like
  67. object implementing a binary ``read()`` function. The string could be a URL.
  68. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
  69. expected. A local file could be: ``file://localhost/path/to/table.feather``.
  70. columns : sequence, default None
  71. If not provided, all columns are read.
  72. use_threads : bool, default True
  73. Whether to parallelize reading using multiple threads.
  74. {storage_options}
  75. dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
  76. Back-end data type applied to the resultant :class:`DataFrame`
  77. (still experimental). Behaviour is as follows:
  78. * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
  79. (default).
  80. * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
  81. DataFrame.
  82. .. versionadded:: 2.0
  83. Returns
  84. -------
  85. type of object stored in file
  86. Examples
  87. --------
  88. >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP
  89. """
  90. import_optional_dependency("pyarrow")
  91. from pyarrow import feather
  92. # import utils to register the pyarrow extension types
  93. import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401
  94. check_dtype_backend(dtype_backend)
  95. with get_handle(
  96. path, "rb", storage_options=storage_options, is_text=False
  97. ) as handles:
  98. if dtype_backend is lib.no_default and not using_string_dtype():
  99. return feather.read_feather(
  100. handles.handle, columns=columns, use_threads=bool(use_threads)
  101. )
  102. pa_table = feather.read_table(
  103. handles.handle, columns=columns, use_threads=bool(use_threads)
  104. )
  105. return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)