string.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. """
  2. Module for formatting output data in console (to string).
  3. """
  4. from __future__ import annotations
  5. from shutil import get_terminal_size
  6. from typing import TYPE_CHECKING
  7. import numpy as np
  8. from pandas.io.formats.printing import pprint_thing
  9. if TYPE_CHECKING:
  10. from collections.abc import Iterable
  11. from pandas.io.formats.format import DataFrameFormatter
  12. class StringFormatter:
  13. """Formatter for string representation of a dataframe."""
  14. def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
  15. self.fmt = fmt
  16. self.adj = fmt.adj
  17. self.frame = fmt.frame
  18. self.line_width = line_width
  19. def to_string(self) -> str:
  20. text = self._get_string_representation()
  21. if self.fmt.should_show_dimensions:
  22. text = f"{text}{self.fmt.dimensions_info}"
  23. return text
  24. def _get_strcols(self) -> list[list[str]]:
  25. strcols = self.fmt.get_strcols()
  26. if self.fmt.is_truncated:
  27. strcols = self._insert_dot_separators(strcols)
  28. return strcols
  29. def _get_string_representation(self) -> str:
  30. if self.fmt.frame.empty:
  31. return self._empty_info_line
  32. strcols = self._get_strcols()
  33. if self.line_width is None:
  34. # no need to wrap around just print the whole frame
  35. return self.adj.adjoin(1, *strcols)
  36. if self._need_to_wrap_around:
  37. return self._join_multiline(strcols)
  38. return self._fit_strcols_to_terminal_width(strcols)
  39. @property
  40. def _empty_info_line(self) -> str:
  41. return (
  42. f"Empty {type(self.frame).__name__}\n"
  43. f"Columns: {pprint_thing(self.frame.columns)}\n"
  44. f"Index: {pprint_thing(self.frame.index)}"
  45. )
  46. @property
  47. def _need_to_wrap_around(self) -> bool:
  48. return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
  49. def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
  50. str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
  51. index_length = len(str_index)
  52. if self.fmt.is_truncated_horizontally:
  53. strcols = self._insert_dot_separator_horizontal(strcols, index_length)
  54. if self.fmt.is_truncated_vertically:
  55. strcols = self._insert_dot_separator_vertical(strcols, index_length)
  56. return strcols
  57. @property
  58. def _adjusted_tr_col_num(self) -> int:
  59. return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
  60. def _insert_dot_separator_horizontal(
  61. self, strcols: list[list[str]], index_length: int
  62. ) -> list[list[str]]:
  63. strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
  64. return strcols
  65. def _insert_dot_separator_vertical(
  66. self, strcols: list[list[str]], index_length: int
  67. ) -> list[list[str]]:
  68. n_header_rows = index_length - len(self.fmt.tr_frame)
  69. row_num = self.fmt.tr_row_num
  70. for ix, col in enumerate(strcols):
  71. cwidth = self.adj.len(col[row_num])
  72. if self.fmt.is_truncated_horizontally:
  73. is_dot_col = ix == self._adjusted_tr_col_num
  74. else:
  75. is_dot_col = False
  76. if cwidth > 3 or is_dot_col:
  77. dots = "..."
  78. else:
  79. dots = ".."
  80. if ix == 0 and self.fmt.index:
  81. dot_mode = "left"
  82. elif is_dot_col:
  83. cwidth = 4
  84. dot_mode = "right"
  85. else:
  86. dot_mode = "right"
  87. dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
  88. col.insert(row_num + n_header_rows, dot_str)
  89. return strcols
  90. def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
  91. lwidth = self.line_width
  92. adjoin_width = 1
  93. strcols = list(strcols_input)
  94. if self.fmt.index:
  95. idx = strcols.pop(0)
  96. lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
  97. col_widths = [
  98. np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
  99. for col in strcols
  100. ]
  101. assert lwidth is not None
  102. col_bins = _binify(col_widths, lwidth)
  103. nbins = len(col_bins)
  104. str_lst = []
  105. start = 0
  106. for i, end in enumerate(col_bins):
  107. row = strcols[start:end]
  108. if self.fmt.index:
  109. row.insert(0, idx)
  110. if nbins > 1:
  111. nrows = len(row[-1])
  112. if end <= len(strcols) and i < nbins - 1:
  113. row.append([" \\"] + [" "] * (nrows - 1))
  114. else:
  115. row.append([" "] * nrows)
  116. str_lst.append(self.adj.adjoin(adjoin_width, *row))
  117. start = end
  118. return "\n\n".join(str_lst)
  119. def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
  120. from pandas import Series
  121. lines = self.adj.adjoin(1, *strcols).split("\n")
  122. max_len = Series(lines).str.len().max()
  123. # plus truncate dot col
  124. width, _ = get_terminal_size()
  125. dif = max_len - width
  126. # '+ 1' to avoid too wide repr (GH PR #17023)
  127. adj_dif = dif + 1
  128. col_lens = Series([Series(ele).str.len().max() for ele in strcols])
  129. n_cols = len(col_lens)
  130. counter = 0
  131. while adj_dif > 0 and n_cols > 1:
  132. counter += 1
  133. mid = round(n_cols / 2)
  134. mid_ix = col_lens.index[mid]
  135. col_len = col_lens[mid_ix]
  136. # adjoin adds one
  137. adj_dif -= col_len + 1
  138. col_lens = col_lens.drop(mid_ix)
  139. n_cols = len(col_lens)
  140. # subtract index column
  141. max_cols_fitted = n_cols - self.fmt.index
  142. # GH-21180. Ensure that we print at least two.
  143. max_cols_fitted = max(max_cols_fitted, 2)
  144. self.fmt.max_cols_fitted = max_cols_fitted
  145. # Call again _truncate to cut frame appropriately
  146. # and then generate string representation
  147. self.fmt.truncate()
  148. strcols = self._get_strcols()
  149. return self.adj.adjoin(1, *strcols)
  150. def _binify(cols: list[int], line_width: int) -> list[int]:
  151. adjoin_width = 1
  152. bins = []
  153. curr_width = 0
  154. i_last_column = len(cols) - 1
  155. for i, w in enumerate(cols):
  156. w_adjoined = w + adjoin_width
  157. curr_width += w_adjoined
  158. if i_last_column == i:
  159. wrap = curr_width + 1 > line_width and i > 0
  160. else:
  161. wrap = curr_width + 2 > line_width and i > 0
  162. if wrap:
  163. bins.append(i)
  164. curr_width = w_adjoined
  165. bins.append(len(cols))
  166. return bins