pickle.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. """ pickle compat """
  2. from __future__ import annotations
  3. import pickle
  4. from typing import (
  5. TYPE_CHECKING,
  6. Any,
  7. )
  8. import warnings
  9. from pandas.compat import pickle_compat as pc
  10. from pandas.util._decorators import doc
  11. from pandas.core.shared_docs import _shared_docs
  12. from pandas.io.common import get_handle
  13. if TYPE_CHECKING:
  14. from pandas._typing import (
  15. CompressionOptions,
  16. FilePath,
  17. ReadPickleBuffer,
  18. StorageOptions,
  19. WriteBuffer,
  20. )
  21. from pandas import (
  22. DataFrame,
  23. Series,
  24. )
  25. @doc(
  26. storage_options=_shared_docs["storage_options"],
  27. compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
  28. )
  29. def to_pickle(
  30. obj: Any,
  31. filepath_or_buffer: FilePath | WriteBuffer[bytes],
  32. compression: CompressionOptions = "infer",
  33. protocol: int = pickle.HIGHEST_PROTOCOL,
  34. storage_options: StorageOptions | None = None,
  35. ) -> None:
  36. """
  37. Pickle (serialize) object to file.
  38. Parameters
  39. ----------
  40. obj : any object
  41. Any python object.
  42. filepath_or_buffer : str, path object, or file-like object
  43. String, path object (implementing ``os.PathLike[str]``), or file-like
  44. object implementing a binary ``write()`` function.
  45. Also accepts URL. URL has to be of S3 or GCS.
  46. {compression_options}
  47. .. versionchanged:: 1.4.0 Zstandard support.
  48. protocol : int
  49. Int which indicates which protocol should be used by the pickler,
  50. default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
  51. values for this parameter depend on the version of Python. For Python
  52. 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
  53. For Python >= 3.4, 4 is a valid value. A negative value for the
  54. protocol parameter is equivalent to setting its value to
  55. HIGHEST_PROTOCOL.
  56. {storage_options}
  57. .. [1] https://docs.python.org/3/library/pickle.html
  58. See Also
  59. --------
  60. read_pickle : Load pickled pandas object (or any object) from file.
  61. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  62. DataFrame.to_sql : Write DataFrame to a SQL database.
  63. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  64. Examples
  65. --------
  66. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
  67. >>> original_df # doctest: +SKIP
  68. foo bar
  69. 0 0 5
  70. 1 1 6
  71. 2 2 7
  72. 3 3 8
  73. 4 4 9
  74. >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
  75. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  76. >>> unpickled_df # doctest: +SKIP
  77. foo bar
  78. 0 0 5
  79. 1 1 6
  80. 2 2 7
  81. 3 3 8
  82. 4 4 9
  83. """ # noqa: E501
  84. if protocol < 0:
  85. protocol = pickle.HIGHEST_PROTOCOL
  86. with get_handle(
  87. filepath_or_buffer,
  88. "wb",
  89. compression=compression,
  90. is_text=False,
  91. storage_options=storage_options,
  92. ) as handles:
  93. # letting pickle write directly to the buffer is more memory-efficient
  94. pickle.dump(obj, handles.handle, protocol=protocol)
  95. @doc(
  96. storage_options=_shared_docs["storage_options"],
  97. decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer",
  98. )
  99. def read_pickle(
  100. filepath_or_buffer: FilePath | ReadPickleBuffer,
  101. compression: CompressionOptions = "infer",
  102. storage_options: StorageOptions | None = None,
  103. ) -> DataFrame | Series:
  104. """
  105. Load pickled pandas object (or any object) from file.
  106. .. warning::
  107. Loading pickled data received from untrusted sources can be
  108. unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
  109. Parameters
  110. ----------
  111. filepath_or_buffer : str, path object, or file-like object
  112. String, path object (implementing ``os.PathLike[str]``), or file-like
  113. object implementing a binary ``readlines()`` function.
  114. Also accepts URL. URL is not limited to S3 and GCS.
  115. {decompression_options}
  116. .. versionchanged:: 1.4.0 Zstandard support.
  117. {storage_options}
  118. Returns
  119. -------
  120. same type as object stored in file
  121. See Also
  122. --------
  123. DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
  124. Series.to_pickle : Pickle (serialize) Series object to file.
  125. read_hdf : Read HDF5 file into a DataFrame.
  126. read_sql : Read SQL query or database table into a DataFrame.
  127. read_parquet : Load a parquet object, returning a DataFrame.
  128. Notes
  129. -----
  130. read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
  131. provided the object was serialized with to_pickle.
  132. Examples
  133. --------
  134. >>> original_df = pd.DataFrame(
  135. ... {{"foo": range(5), "bar": range(5, 10)}}
  136. ... ) # doctest: +SKIP
  137. >>> original_df # doctest: +SKIP
  138. foo bar
  139. 0 0 5
  140. 1 1 6
  141. 2 2 7
  142. 3 3 8
  143. 4 4 9
  144. >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
  145. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  146. >>> unpickled_df # doctest: +SKIP
  147. foo bar
  148. 0 0 5
  149. 1 1 6
  150. 2 2 7
  151. 3 3 8
  152. 4 4 9
  153. """
  154. excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
  155. with get_handle(
  156. filepath_or_buffer,
  157. "rb",
  158. compression=compression,
  159. is_text=False,
  160. storage_options=storage_options,
  161. ) as handles:
  162. # 1) try standard library Pickle
  163. # 2) try pickle_compat (older pandas version) to handle subclass changes
  164. # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
  165. try:
  166. # TypeError for Cython complaints about object.__new__ vs Tick.__new__
  167. try:
  168. with warnings.catch_warnings(record=True):
  169. # We want to silence any warnings about, e.g. moved modules.
  170. warnings.simplefilter("ignore", Warning)
  171. return pickle.load(handles.handle)
  172. except excs_to_catch:
  173. # e.g.
  174. # "No module named 'pandas.core.sparse.series'"
  175. # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
  176. return pc.load(handles.handle, encoding=None)
  177. except UnicodeDecodeError:
  178. # e.g. can occur for files written in py27; see GH#28645 and GH#31988
  179. return pc.load(handles.handle, encoding="latin-1")