style.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. # This is where we handle translating css styles into openpyxl styles
  2. # and cascading those from parent to child in the dom.
  3. try:
  4. from openpyxl.cell import cell
  5. from openpyxl.styles import (
  6. Font,
  7. Alignment,
  8. PatternFill,
  9. NamedStyle,
  10. Border,
  11. Side,
  12. Color,
  13. )
  14. from openpyxl.styles.fills import FILL_SOLID
  15. from openpyxl.styles.numbers import FORMAT_CURRENCY_USD_SIMPLE, FORMAT_PERCENTAGE
  16. from openpyxl.styles.colors import BLACK
  17. except:
  18. import warnings
  19. warnings.warn(
  20. "Can not import openpyxl, some functions in the ppstructure may not work. Please manually install openpyxl before using ppstructure."
  21. )
  22. FORMAT_DATE_MMDDYYYY = "mm/dd/yyyy"
  23. def colormap(color):
  24. """
  25. Convenience for looking up known colors
  26. """
  27. cmap = {"black": BLACK}
  28. return cmap.get(color, color)
  29. def style_string_to_dict(style):
  30. """
  31. Convert css style string to a python dictionary
  32. """
  33. def clean_split(string, delim):
  34. return (s.strip() for s in string.split(delim))
  35. styles = [clean_split(s, ":") for s in style.split(";") if ":" in s]
  36. return dict(styles)
  37. def get_side(style, name):
  38. return {
  39. "border_style": style.get("border-{}-style".format(name)),
  40. "color": colormap(style.get("border-{}-color".format(name))),
  41. }
  42. known_styles = {}
  43. def style_dict_to_named_style(style_dict, number_format=None):
  44. """
  45. Change css style (stored in a python dictionary) to openpyxl NamedStyle
  46. """
  47. style_and_format_string = str(
  48. {
  49. "style_dict": style_dict,
  50. "parent": style_dict.parent,
  51. "number_format": number_format,
  52. }
  53. )
  54. if style_and_format_string not in known_styles:
  55. # Font
  56. font = Font(
  57. bold=style_dict.get("font-weight") == "bold",
  58. color=style_dict.get_color("color", None),
  59. size=style_dict.get("font-size"),
  60. )
  61. # Alignment
  62. alignment = Alignment(
  63. horizontal=style_dict.get("text-align", "general"),
  64. vertical=style_dict.get("vertical-align"),
  65. wrap_text=style_dict.get("white-space", "nowrap") == "normal",
  66. )
  67. # Fill
  68. bg_color = style_dict.get_color("background-color")
  69. fg_color = style_dict.get_color("foreground-color", Color())
  70. fill_type = style_dict.get("fill-type")
  71. if bg_color and bg_color != "transparent":
  72. fill = PatternFill(
  73. fill_type=fill_type or FILL_SOLID,
  74. start_color=bg_color,
  75. end_color=fg_color,
  76. )
  77. else:
  78. fill = PatternFill()
  79. # Border
  80. border = Border(
  81. left=Side(**get_side(style_dict, "left")),
  82. right=Side(**get_side(style_dict, "right")),
  83. top=Side(**get_side(style_dict, "top")),
  84. bottom=Side(**get_side(style_dict, "bottom")),
  85. diagonal=Side(**get_side(style_dict, "diagonal")),
  86. diagonal_direction=None,
  87. outline=Side(**get_side(style_dict, "outline")),
  88. vertical=None,
  89. horizontal=None,
  90. )
  91. name = "Style {}".format(len(known_styles) + 1)
  92. pyxl_style = NamedStyle(
  93. name=name,
  94. font=font,
  95. fill=fill,
  96. alignment=alignment,
  97. border=border,
  98. number_format=number_format,
  99. )
  100. known_styles[style_and_format_string] = pyxl_style
  101. return known_styles[style_and_format_string]
  102. class StyleDict(dict):
  103. """
  104. It's like a dictionary, but it looks for items in the parent dictionary
  105. """
  106. def __init__(self, *args, **kwargs):
  107. self.parent = kwargs.pop("parent", None)
  108. super(StyleDict, self).__init__(*args, **kwargs)
  109. def __getitem__(self, item):
  110. if item in self:
  111. return super(StyleDict, self).__getitem__(item)
  112. elif self.parent:
  113. return self.parent[item]
  114. else:
  115. raise KeyError("{} not found".format(item))
  116. def __hash__(self):
  117. return hash(tuple([(k, self.get(k)) for k in self._keys()]))
  118. # Yielding the keys avoids creating unnecessary data structures
  119. # and happily works with both python2 and python3 where the
  120. # .keys() method is a dictionary_view in python3 and a list in python2.
  121. def _keys(self):
  122. yielded = set()
  123. for k in self.keys():
  124. yielded.add(k)
  125. yield k
  126. if self.parent:
  127. for k in self.parent._keys():
  128. if k not in yielded:
  129. yielded.add(k)
  130. yield k
  131. def get(self, k, d=None):
  132. try:
  133. return self[k]
  134. except KeyError:
  135. return d
  136. def get_color(self, k, d=None):
  137. """
  138. Strip leading # off colors if necessary
  139. """
  140. color = self.get(k, d)
  141. if hasattr(color, "startswith") and color.startswith("#"):
  142. color = color[1:]
  143. if (
  144. len(color) == 3
  145. ): # Premailers reduces colors like #00ff00 to #0f0, openpyxl doesn't like that
  146. color = "".join(2 * c for c in color)
  147. return color
  148. class Element(object):
  149. """
  150. Our base class for representing an html element along with a cascading style.
  151. The element is created along with a parent so that the StyleDict that we store
  152. can point to the parent's StyleDict.
  153. """
  154. def __init__(self, element, parent=None):
  155. self.element = element
  156. self.number_format = None
  157. parent_style = parent.style_dict if parent else None
  158. self.style_dict = StyleDict(
  159. style_string_to_dict(element.get("style", "")), parent=parent_style
  160. )
  161. self._style_cache = None
  162. def style(self):
  163. """
  164. Turn the css styles for this element into an openpyxl NamedStyle.
  165. """
  166. if not self._style_cache:
  167. self._style_cache = style_dict_to_named_style(
  168. self.style_dict, number_format=self.number_format
  169. )
  170. return self._style_cache
  171. def get_dimension(self, dimension_key):
  172. """
  173. Extracts the dimension from the style dict of the Element and returns it as a float.
  174. """
  175. dimension = self.style_dict.get(dimension_key)
  176. if dimension:
  177. if dimension[-2:] in ["px", "em", "pt", "in", "cm"]:
  178. dimension = dimension[:-2]
  179. dimension = float(dimension)
  180. return dimension
  181. class Table(Element):
  182. """
  183. The concrete implementations of Elements are semantically named for the types of elements we are interested in.
  184. This defines a very concrete tree structure for html tables that we expect to deal with. I prefer this compared to
  185. allowing Element to have an arbitrary number of children and dealing with an abstract element tree.
  186. """
  187. def __init__(self, table):
  188. """
  189. takes an html table object (from lxml)
  190. """
  191. super(Table, self).__init__(table)
  192. table_head = table.find("thead")
  193. self.head = (
  194. TableHead(table_head, parent=self) if table_head is not None else None
  195. )
  196. table_body = table.find("tbody")
  197. self.body = TableBody(
  198. table_body if table_body is not None else table, parent=self
  199. )
  200. class TableHead(Element):
  201. """
  202. This class maps to the `<th>` element of the html table.
  203. """
  204. def __init__(self, head, parent=None):
  205. super(TableHead, self).__init__(head, parent=parent)
  206. self.rows = [TableRow(tr, parent=self) for tr in head.findall("tr")]
  207. class TableBody(Element):
  208. """
  209. This class maps to the `<tbody>` element of the html table.
  210. """
  211. def __init__(self, body, parent=None):
  212. super(TableBody, self).__init__(body, parent=parent)
  213. self.rows = [TableRow(tr, parent=self) for tr in body.findall("tr")]
  214. class TableRow(Element):
  215. """
  216. This class maps to the `<tr>` element of the html table.
  217. """
  218. def __init__(self, tr, parent=None):
  219. super(TableRow, self).__init__(tr, parent=parent)
  220. self.cells = [
  221. TableCell(cell, parent=self) for cell in tr.findall("th") + tr.findall("td")
  222. ]
  223. def element_to_string(el):
  224. return _element_to_string(el).strip()
  225. def _element_to_string(el):
  226. string = ""
  227. for x in el.iterchildren():
  228. string += "\n" + _element_to_string(x)
  229. text = el.text.strip() if el.text else ""
  230. tail = el.tail.strip() if el.tail else ""
  231. return text + string + "\n" + tail
  232. class TableCell(Element):
  233. """
  234. This class maps to the `<td>` element of the html table.
  235. """
  236. CELL_TYPES = {
  237. "TYPE_STRING",
  238. "TYPE_FORMULA",
  239. "TYPE_NUMERIC",
  240. "TYPE_BOOL",
  241. "TYPE_CURRENCY",
  242. "TYPE_PERCENTAGE",
  243. "TYPE_NULL",
  244. "TYPE_INLINE",
  245. "TYPE_ERROR",
  246. "TYPE_FORMULA_CACHE_STRING",
  247. "TYPE_INTEGER",
  248. }
  249. def __init__(self, cell, parent=None):
  250. super(TableCell, self).__init__(cell, parent=parent)
  251. self.value = element_to_string(cell)
  252. self.number_format = self.get_number_format()
  253. def data_type(self):
  254. cell_types = self.CELL_TYPES & set(self.element.get("class", "").split())
  255. if cell_types:
  256. if "TYPE_FORMULA" in cell_types:
  257. # Make sure TYPE_FORMULA takes precedence over the other classes in the set.
  258. cell_type = "TYPE_FORMULA"
  259. elif cell_types & {"TYPE_CURRENCY", "TYPE_INTEGER", "TYPE_PERCENTAGE"}:
  260. cell_type = "TYPE_NUMERIC"
  261. else:
  262. cell_type = cell_types.pop()
  263. else:
  264. cell_type = "TYPE_STRING"
  265. return getattr(cell, cell_type)
  266. def get_number_format(self):
  267. if "TYPE_CURRENCY" in self.element.get("class", "").split():
  268. return FORMAT_CURRENCY_USD_SIMPLE
  269. if "TYPE_INTEGER" in self.element.get("class", "").split():
  270. return "#,##0"
  271. if "TYPE_PERCENTAGE" in self.element.get("class", "").split():
  272. return FORMAT_PERCENTAGE
  273. if "TYPE_DATE" in self.element.get("class", "").split():
  274. return FORMAT_DATE_MMDDYYYY
  275. if self.data_type() == cell.TYPE_NUMERIC:
  276. try:
  277. int(self.value)
  278. except ValueError:
  279. return "#,##0.##"
  280. else:
  281. return "#,##0"
  282. def format(self, cell):
  283. cell.style = self.style()
  284. data_type = self.data_type()
  285. if data_type:
  286. cell.data_type = data_type