internals.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import numpy as np
  2. from pandas._typing import ArrayLike
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. )
  7. from pandas.core.internals.api import _make_block
  8. from pandas.core.internals.managers import BlockManager as _BlockManager
  9. def create_dataframe_from_blocks(
  10. blocks: list[tuple[ArrayLike, np.ndarray]], index: Index, columns: Index
  11. ) -> DataFrame:
  12. """
  13. Low-level function to create a DataFrame from arrays as they are
  14. representing the block structure of the resulting DataFrame.
  15. Attention: this is an advanced, low-level function that should only be
  16. used if you know that the below-mentioned assumptions are guaranteed.
  17. If passing data that do not follow those assumptions, subsequent
  18. subsequent operations on the resulting DataFrame might lead to strange
  19. errors.
  20. For almost all use cases, you should use the standard pd.DataFrame(..)
  21. constructor instead. If you are planning to use this function, let us
  22. know by opening an issue at https://github.com/pandas-dev/pandas/issues.
  23. Assumptions:
  24. - The block arrays are either a 2D numpy array or a pandas ExtensionArray
  25. - In case of a numpy array, it is assumed to already be in the expected
  26. shape for Blocks (2D, (cols, rows), i.e. transposed compared to the
  27. DataFrame columns).
  28. - All arrays are taken as is (no type inference) and expected to have the
  29. correct size.
  30. - The placement arrays have the correct length (equalling the number of
  31. columns that its equivalent block array represents), and all placement
  32. arrays together form a complete set of 0 to n_columns - 1.
  33. Parameters
  34. ----------
  35. blocks : list of tuples of (block_array, block_placement)
  36. This should be a list of tuples existing of (block_array, block_placement),
  37. where:
  38. - block_array is a 2D numpy array or a 1D ExtensionArray, following the
  39. requirements listed above.
  40. - block_placement is a 1D integer numpy array
  41. index : Index
  42. The Index object for the `index` of the resulting DataFrame.
  43. columns : Index
  44. The Index object for the `columns` of the resulting DataFrame.
  45. Returns
  46. -------
  47. DataFrame
  48. """
  49. block_objs = [_make_block(*block) for block in blocks]
  50. axes = [columns, index]
  51. mgr = _BlockManager(block_objs, axes)
  52. return DataFrame._from_mgr(mgr, mgr.axes)