compressors.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. """
  2. Patched ``BZ2File`` and ``LZMAFile`` to handle pickle protocol 5.
  3. """
  4. from __future__ import annotations
  5. from pickle import PickleBuffer
  6. from pandas.compat._constants import PY310
  7. try:
  8. import bz2
  9. has_bz2 = True
  10. except ImportError:
  11. has_bz2 = False
  12. try:
  13. import lzma
  14. has_lzma = True
  15. except ImportError:
  16. has_lzma = False
  17. def flatten_buffer(
  18. b: bytes | bytearray | memoryview | PickleBuffer,
  19. ) -> bytes | bytearray | memoryview:
  20. """
  21. Return some 1-D `uint8` typed buffer.
  22. Coerces anything that does not match that description to one that does
  23. without copying if possible (otherwise will copy).
  24. """
  25. if isinstance(b, (bytes, bytearray)):
  26. return b
  27. if not isinstance(b, PickleBuffer):
  28. b = PickleBuffer(b)
  29. try:
  30. # coerce to 1-D `uint8` C-contiguous `memoryview` zero-copy
  31. return b.raw()
  32. except BufferError:
  33. # perform in-memory copy if buffer is not contiguous
  34. return memoryview(b).tobytes("A")
  35. if has_bz2:
  36. class BZ2File(bz2.BZ2File):
  37. if not PY310:
  38. def write(self, b) -> int:
  39. # Workaround issue where `bz2.BZ2File` expects `len`
  40. # to return the number of bytes in `b` by converting
  41. # `b` into something that meets that constraint with
  42. # minimal copying.
  43. #
  44. # Note: This is fixed in Python 3.10.
  45. return super().write(flatten_buffer(b))
  46. if has_lzma:
  47. class LZMAFile(lzma.LZMAFile):
  48. if not PY310:
  49. def write(self, b) -> int:
  50. # Workaround issue where `lzma.LZMAFile` expects `len`
  51. # to return the number of bytes in `b` by converting
  52. # `b` into something that meets that constraint with
  53. # minimal copying.
  54. #
  55. # Note: This is fixed in Python 3.10.
  56. return super().write(flatten_buffer(b))