pickle_compat.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. """
  2. Support pre-0.12 series pickle compatibility.
  3. """
  4. from __future__ import annotations
  5. import contextlib
  6. import copy
  7. import io
  8. import pickle as pkl
  9. from typing import TYPE_CHECKING
  10. import numpy as np
  11. from pandas._libs.arrays import NDArrayBacked
  12. from pandas._libs.tslibs import BaseOffset
  13. from pandas import Index
  14. from pandas.core.arrays import (
  15. DatetimeArray,
  16. PeriodArray,
  17. TimedeltaArray,
  18. )
  19. from pandas.core.internals import BlockManager
  20. if TYPE_CHECKING:
  21. from collections.abc import Generator
  22. def load_reduce(self) -> None:
  23. stack = self.stack
  24. args = stack.pop()
  25. func = stack[-1]
  26. try:
  27. stack[-1] = func(*args)
  28. return
  29. except TypeError as err:
  30. # If we have a deprecated function,
  31. # try to replace and try again.
  32. msg = "_reconstruct: First argument must be a sub-type of ndarray"
  33. if msg in str(err):
  34. try:
  35. cls = args[0]
  36. stack[-1] = object.__new__(cls)
  37. return
  38. except TypeError:
  39. pass
  40. elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
  41. # TypeError: object.__new__(Day) is not safe, use Day.__new__()
  42. cls = args[0]
  43. stack[-1] = cls.__new__(*args)
  44. return
  45. elif args and issubclass(args[0], PeriodArray):
  46. cls = args[0]
  47. stack[-1] = NDArrayBacked.__new__(*args)
  48. return
  49. raise
  50. # If classes are moved, provide compat here.
  51. _class_locations_map = {
  52. ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
  53. # 15477
  54. ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
  55. # Re-routing unpickle block logic to go through _unpickle_block instead
  56. # for pandas <= 1.3.5
  57. ("pandas.core.internals.blocks", "new_block"): (
  58. "pandas._libs.internals",
  59. "_unpickle_block",
  60. ),
  61. ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
  62. ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
  63. # 10890
  64. ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
  65. ("pandas.sparse.series", "SparseTimeSeries"): (
  66. "pandas.core.sparse.series",
  67. "SparseSeries",
  68. ),
  69. # 12588, extensions moving
  70. ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
  71. ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
  72. # 18543 moving period
  73. ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
  74. ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
  75. # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
  76. ("pandas.tslib", "__nat_unpickle"): (
  77. "pandas._libs.tslibs.nattype",
  78. "__nat_unpickle",
  79. ),
  80. ("pandas._libs.tslib", "__nat_unpickle"): (
  81. "pandas._libs.tslibs.nattype",
  82. "__nat_unpickle",
  83. ),
  84. # 15998 top-level dirs moving
  85. ("pandas.sparse.array", "SparseArray"): (
  86. "pandas.core.arrays.sparse",
  87. "SparseArray",
  88. ),
  89. ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
  90. ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
  91. ("pandas.indexes.numeric", "Int64Index"): (
  92. "pandas.core.indexes.base",
  93. "Index", # updated in 50775
  94. ),
  95. ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
  96. ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
  97. ("pandas.tseries.index", "_new_DatetimeIndex"): (
  98. "pandas.core.indexes.datetimes",
  99. "_new_DatetimeIndex",
  100. ),
  101. ("pandas.tseries.index", "DatetimeIndex"): (
  102. "pandas.core.indexes.datetimes",
  103. "DatetimeIndex",
  104. ),
  105. ("pandas.tseries.period", "PeriodIndex"): (
  106. "pandas.core.indexes.period",
  107. "PeriodIndex",
  108. ),
  109. # 19269, arrays moving
  110. ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
  111. # 19939, add timedeltaindex, float64index compat from 15998 move
  112. ("pandas.tseries.tdi", "TimedeltaIndex"): (
  113. "pandas.core.indexes.timedeltas",
  114. "TimedeltaIndex",
  115. ),
  116. ("pandas.indexes.numeric", "Float64Index"): (
  117. "pandas.core.indexes.base",
  118. "Index", # updated in 50775
  119. ),
  120. # 50775, remove Int64Index, UInt64Index & Float64Index from codabase
  121. ("pandas.core.indexes.numeric", "Int64Index"): (
  122. "pandas.core.indexes.base",
  123. "Index",
  124. ),
  125. ("pandas.core.indexes.numeric", "UInt64Index"): (
  126. "pandas.core.indexes.base",
  127. "Index",
  128. ),
  129. ("pandas.core.indexes.numeric", "Float64Index"): (
  130. "pandas.core.indexes.base",
  131. "Index",
  132. ),
  133. ("pandas.core.arrays.sparse.dtype", "SparseDtype"): (
  134. "pandas.core.dtypes.dtypes",
  135. "SparseDtype",
  136. ),
  137. }
  138. # our Unpickler sub-class to override methods and some dispatcher
  139. # functions for compat and uses a non-public class of the pickle module.
  140. class Unpickler(pkl._Unpickler):
  141. def find_class(self, module, name):
  142. # override superclass
  143. key = (module, name)
  144. module, name = _class_locations_map.get(key, key)
  145. return super().find_class(module, name)
  146. Unpickler.dispatch = copy.copy(Unpickler.dispatch)
  147. Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
  148. def load_newobj(self) -> None:
  149. args = self.stack.pop()
  150. cls = self.stack[-1]
  151. # compat
  152. if issubclass(cls, Index):
  153. obj = object.__new__(cls)
  154. elif issubclass(cls, DatetimeArray) and not args:
  155. arr = np.array([], dtype="M8[ns]")
  156. obj = cls.__new__(cls, arr, arr.dtype)
  157. elif issubclass(cls, TimedeltaArray) and not args:
  158. arr = np.array([], dtype="m8[ns]")
  159. obj = cls.__new__(cls, arr, arr.dtype)
  160. elif cls is BlockManager and not args:
  161. obj = cls.__new__(cls, (), [], False)
  162. else:
  163. obj = cls.__new__(cls, *args)
  164. self.stack[-1] = obj
  165. Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
  166. def load_newobj_ex(self) -> None:
  167. kwargs = self.stack.pop()
  168. args = self.stack.pop()
  169. cls = self.stack.pop()
  170. # compat
  171. if issubclass(cls, Index):
  172. obj = object.__new__(cls)
  173. else:
  174. obj = cls.__new__(cls, *args, **kwargs)
  175. self.append(obj)
  176. try:
  177. Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
  178. except (AttributeError, KeyError):
  179. pass
  180. def load(fh, encoding: str | None = None, is_verbose: bool = False):
  181. """
  182. Load a pickle, with a provided encoding,
  183. Parameters
  184. ----------
  185. fh : a filelike object
  186. encoding : an optional encoding
  187. is_verbose : show exception output
  188. """
  189. try:
  190. fh.seek(0)
  191. if encoding is not None:
  192. up = Unpickler(fh, encoding=encoding)
  193. else:
  194. up = Unpickler(fh)
  195. # "Unpickler" has no attribute "is_verbose" [attr-defined]
  196. up.is_verbose = is_verbose # type: ignore[attr-defined]
  197. return up.load()
  198. except (ValueError, TypeError):
  199. raise
  200. def loads(
  201. bytes_object: bytes,
  202. *,
  203. fix_imports: bool = True,
  204. encoding: str = "ASCII",
  205. errors: str = "strict",
  206. ):
  207. """
  208. Analogous to pickle._loads.
  209. """
  210. fd = io.BytesIO(bytes_object)
  211. return Unpickler(
  212. fd, fix_imports=fix_imports, encoding=encoding, errors=errors
  213. ).load()
  214. @contextlib.contextmanager
  215. def patch_pickle() -> Generator[None, None, None]:
  216. """
  217. Temporarily patch pickle to use our unpickler.
  218. """
  219. orig_loads = pkl.loads
  220. try:
  221. setattr(pkl, "loads", loads)
  222. yield
  223. finally:
  224. setattr(pkl, "loads", orig_loads)