render.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. # SPDX-FileCopyrightText: 2026 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. import os
  4. import math
  5. import types
  6. import logging
  7. import colorsys
  8. import functools
  9. from pathlib import Path
  10. import multiprocessing as mp
  11. import concurrent.futures as ft
  12. from importlib.util import find_spec
  13. import pypdfium2._helpers as pdfium
  14. import pypdfium2.internal as pdfium_i
  15. import pypdfium2.raw as pdfium_c
  16. from pypdfium2._cli._parsers import (
  17. add_input, get_input,
  18. setup_logging,
  19. iterator_hasvalue,
  20. BooleanOptionalAction,
  21. )
  22. have_pil = find_spec("PIL") is not None
  23. have_cv2 = find_spec("cv2") is not None
  24. logger = logging.getLogger(__name__)
  25. def _bitmap_wrapper_foreign_simple(width, height, format, *args, **kwargs):
  26. if format == pdfium_c.FPDFBitmap_BGRx:
  27. use_alpha = False
  28. elif format == pdfium_c.FPDFBitmap_BGRA:
  29. use_alpha = True
  30. else:
  31. raise RuntimeError(f"Cannot create foreign_simple bitmap with bitmap type {pdfium_i.BitmapTypeToStr[format]}.")
  32. return pdfium.PdfBitmap.new_foreign_simple(width, height, use_alpha, *args, **kwargs)
  33. BitmapMakers = dict(
  34. native = pdfium.PdfBitmap.new_native,
  35. foreign = pdfium.PdfBitmap.new_foreign,
  36. foreign_packed = functools.partial(pdfium.PdfBitmap.new_foreign, force_packed=True),
  37. foreign_simple = _bitmap_wrapper_foreign_simple,
  38. )
  39. ColorSchemeFields = ("path_fill", "path_stroke", "text_fill", "text_stroke")
  40. ColorOpts = dict(metavar="C", nargs=4, type=int)
  41. SampleTheme = dict(
  42. # TODO improve colors - currently it's just some random colors to distinguish the different drawings
  43. path_fill = (170, 100, 0, 255), # dark orange
  44. path_stroke = (0, 150, 255, 255), # sky blue
  45. text_fill = (255, 255, 255, 255), # white
  46. text_stroke = (150, 255, 0, 255), # green
  47. )
  48. def attach(parser):
  49. add_input(parser, pages=True)
  50. parser.add_argument(
  51. "--output", "-o",
  52. type = lambda p: Path(p).expanduser().resolve(),
  53. required = True,
  54. help = "Output directory where the serially numbered images shall be placed.",
  55. )
  56. parser.add_argument(
  57. "--prefix",
  58. help = "Custom prefix for the images. Defaults to the input filename's stem.",
  59. )
  60. parser.add_argument(
  61. "--format", "-f",
  62. type = str.lower,
  63. help = "The image format to use (default: conditional).",
  64. )
  65. engines_map = {"pil": PILEngine, "numpy+pil": NumpyPILEngine, "numpy+cv2": NumpyCV2Engine}
  66. parser.add_argument(
  67. "--engine",
  68. dest = "engine_cls",
  69. type = lambda k: engines_map[k.lower()],
  70. help = f"The saver engine to use {tuple(engines_map.keys())}",
  71. )
  72. parser.add_argument(
  73. "--scale",
  74. default = 1,
  75. type = float,
  76. help = "Define the resolution of the output images. By default, one PDF point (1/72in) is rendered to 1x1 pixel. This factor scales the number of pixels that represent one point.",
  77. )
  78. parser.add_argument(
  79. "--rotation",
  80. default = 0,
  81. type = int,
  82. choices = (0, 90, 180, 270),
  83. help = "Rotate pages by 90, 180 or 270 degrees.",
  84. )
  85. parser.add_argument(
  86. "--fill-color",
  87. help = "Color the bitmap will be filled with before rendering. Shall be given in RGBA format as a sequence of integers ranging from 0 to 255. Defaults to white.",
  88. **ColorOpts,
  89. )
  90. parser.add_argument(
  91. "--optimize-mode",
  92. choices = ("lcd", "print"),
  93. help = "The rendering optimisation mode. None if not given.",
  94. )
  95. parser.add_argument(
  96. "--crop",
  97. metavar="C", nargs=4, type=float,
  98. default = (0, 0, 0, 0),
  99. help = "Amount to crop from (left, bottom, right, top).",
  100. )
  101. parser.add_argument(
  102. "--draw-annots",
  103. action = BooleanOptionalAction,
  104. default = True,
  105. help = "Whether annotations may be shown (default: true).",
  106. )
  107. parser.add_argument(
  108. "--draw-forms",
  109. action = BooleanOptionalAction,
  110. default = True,
  111. help = "Whether forms may be shown (default: true).",
  112. )
  113. parser.add_argument(
  114. "--no-antialias",
  115. nargs = "+",
  116. default = [],
  117. choices = ("text", "image", "path"),
  118. type = str.lower,
  119. help = "Item types that shall not be smoothed.",
  120. )
  121. parser.add_argument(
  122. "--force-halftone",
  123. action = "store_true",
  124. help = "Always use halftone for image stretching.",
  125. )
  126. bitmap = parser.add_argument_group(
  127. title = "Bitmap options",
  128. description = "Bitmap config, including pixel format.",
  129. )
  130. bitmap.add_argument(
  131. "--bitmap-maker",
  132. choices = BitmapMakers.keys(),
  133. default = "native",
  134. help = "The bitmap maker to use.",
  135. type = str.lower,
  136. )
  137. bitmap.add_argument(
  138. "--grayscale",
  139. action = "store_true",
  140. help = "Whether to render in grayscale mode (no colors).",
  141. )
  142. bitmap.add_argument(
  143. "--byteorder",
  144. dest = "rev_byteorder",
  145. type = lambda v: {"bgr": False, "rgb": True}[v.lower()],
  146. help = "Whether to use BGR or RGB byteorder (default: conditional).",
  147. )
  148. bitmap.add_argument(
  149. "--x-channel",
  150. dest = "prefer_bgrx",
  151. action = BooleanOptionalAction,
  152. help = "Whether to prefer BGRx/RGBx over BGR/RGB (default: conditional).",
  153. )
  154. bitmap.add_argument(
  155. "--maybe-alpha",
  156. action = BooleanOptionalAction,
  157. help = "Whether to use BGRA if page content has transparency. Note, this makes format selection page-dependent. As this behavior can be confusing, it is not currently the default, but recommended for performance in these cases.",
  158. )
  159. # TODO expose force_bitmap_format
  160. parallel = parser.add_argument_group(
  161. title = "Parallelization",
  162. description = "Options for rendering with multiple processes.",
  163. )
  164. parallel.add_argument(
  165. "--linear",
  166. nargs = "?",
  167. type = int,
  168. const = math.inf,
  169. help = "Render non-parallel if page count is less or equal to the specified value (default: 4). If this flag is given without a value, then render linear regardless of document length.",
  170. )
  171. parallel.add_argument(
  172. "--processes",
  173. default = os.cpu_count(),
  174. type = int,
  175. help = "The maximum number of parallel rendering processes. Defaults to the number of CPU cores.",
  176. )
  177. parallel.add_argument(
  178. "--parallel-strategy",
  179. choices = ("spawn", "forkserver", "fork"),
  180. default = "spawn",
  181. type = str.lower,
  182. help = "The process start method to use. ('fork' is discouraged due to stability issues.)",
  183. )
  184. parallel.add_argument(
  185. "--parallel-lib",
  186. choices = ("mp", "ft"),
  187. default = "mp",
  188. type = str.lower,
  189. help = "The parallelization module to use (mp = multiprocessing, ft = concurrent.futures).",
  190. )
  191. parallel.add_argument(
  192. "--parallel-map",
  193. type = str.lower,
  194. help = "The map function to use (backend specific, the default is an iterative map)."
  195. )
  196. color_scheme = parser.add_argument_group(
  197. title = "Flat color scheme",
  198. description = "Options for using pdfium's color scheme renderer. Note that this may flatten different colors into one, so the usability of this is limited. Alternatively, consider post-processing with lightness inversion (see below).",
  199. )
  200. color_scheme.add_argument(
  201. "--sample-theme",
  202. action = "store_true",
  203. help = "Use a dark background sample theme as base. Explicit color params override selectively."
  204. )
  205. color_scheme.add_argument("--path-fill", **ColorOpts)
  206. color_scheme.add_argument("--path-stroke", **ColorOpts)
  207. color_scheme.add_argument("--text-fill", **ColorOpts)
  208. color_scheme.add_argument("--text-stroke", **ColorOpts)
  209. color_scheme.add_argument(
  210. "--fill-to-stroke",
  211. action = "store_true",
  212. help = "When rendering with custom color scheme, only draw borders around fill areas using the `path_stroke` color, instead of filling with the `path_fill` color. This is actually recommended, since with a single fill color for paths the boundaries of adjacent fill paths are less visible.",
  213. )
  214. postproc = parser.add_argument_group(
  215. title = "Post processing",
  216. description = "Options to post-process rendered images. Note, this may have a strongly negative impact on performance.",
  217. )
  218. postproc.add_argument(
  219. "--invert-lightness",
  220. action = "store_true",
  221. help = "Invert lightness using the HLS color space (e.g. white<->black, dark_blue<->light_blue). The intent is to achieve a dark theme for documents with light background, while providing better visual results than classical color inversion or a flat pdfium color scheme. However, note that --optimize-mode lcd is not recommendable when inverting lightness.",
  222. )
  223. postproc.add_argument(
  224. "--exclude-images",
  225. action = "store_true",
  226. help = "Whether to exclude PDF images from lightness inversion.",
  227. )
  228. class SavingEngine:
  229. def __init__(self, saver_args, postproc_kwargs):
  230. self.args = saver_args
  231. self.postproc_kwargs = postproc_kwargs
  232. def _get_path(self, i, ext):
  233. args = self.args
  234. return args.output_dir / f"{args.prefix}{i+1:0{args.n_digits}d}.{ext}"
  235. def __call__(self, i, bitmap, page):
  236. if self.args.maybe_alpha and self.args.format in ("jpg", "jpeg") and pdfium_c.FPDFPage_HasTransparency(page):
  237. # alternatively, we could perhaps convert to RGB
  238. logger.info("Page has transparency - overriding output format to PNG.")
  239. ext = "png"
  240. else:
  241. ext = self.args.format
  242. out_path = self._get_path(i, ext)
  243. self._saving_hook(out_path, bitmap, page, self.postproc_kwargs)
  244. logger.info(f"Wrote page {i+1} as {out_path.name}")
  245. class PILEngine (SavingEngine):
  246. def do_imports(self):
  247. if not self.postproc_kwargs["invert_lightness"]:
  248. return
  249. logger.debug("PIL engine imports for post-processing")
  250. global PIL
  251. import PIL.Image
  252. import PIL.ImageOps
  253. import PIL.ImageFilter
  254. import PIL.ImageDraw
  255. _to_pil_hook = staticmethod(pdfium.PdfBitmap.to_pil)
  256. def _saving_hook(self, out_path, bitmap, page, postproc_kwargs):
  257. posconv = bitmap.get_posconv(page)
  258. pil_image = self._to_pil_hook(bitmap)
  259. pil_image = self.postprocess(pil_image, page, posconv, **postproc_kwargs)
  260. pil_image.save(out_path)
  261. @staticmethod
  262. def _invert_px_lightness(r, g, b):
  263. h, l, s = colorsys.rgb_to_hls(r, g, b)
  264. l = 1 - l
  265. return colorsys.hls_to_rgb(h, l, s)
  266. LINV_LUT_SIZE = 17
  267. @classmethod
  268. @functools.lru_cache(maxsize=1)
  269. def _get_linv_lut(cls):
  270. return PIL.ImageFilter.Color3DLUT.generate(cls.LINV_LUT_SIZE, cls._invert_px_lightness)
  271. @classmethod
  272. def postprocess(cls, src_image, page, posconv, invert_lightness, exclude_images):
  273. dst_image = src_image
  274. if invert_lightness:
  275. if src_image.mode == "L":
  276. dst_image = PIL.ImageOps.invert(src_image)
  277. else:
  278. dst_image = dst_image.filter(cls._get_linv_lut())
  279. if exclude_images:
  280. # FIXME pdfium does not seem to provide APIs to translate XObject to page coordinates, so not sure how to handle images nested in XObjects.
  281. # FIXME we'd also like to take alpha masks into account, but this may be difficult as long as pdfium does not expose them directly.
  282. have_images, obj_walker = iterator_hasvalue( page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1) )
  283. if have_images:
  284. mask = PIL.Image.new("1", src_image.size)
  285. draw = PIL.ImageDraw.Draw(mask)
  286. for obj in obj_walker:
  287. qpoints = [posconv.to_bitmap(x, y) for x, y in obj.get_quad_points()]
  288. draw.polygon(qpoints, fill=1)
  289. dst_image.paste(src_image, mask=mask)
  290. return dst_image
  291. class NumpyPILEngine (PILEngine):
  292. def do_imports(self):
  293. logger.debug("NumPy+PIL engine imports")
  294. global PIL
  295. import PIL.Image
  296. super().do_imports()
  297. @staticmethod
  298. def _to_pil_hook(bitmap):
  299. return PIL.Image.fromarray(bitmap.to_numpy(), bitmap.mode)
  300. class NumpyCV2Engine (SavingEngine):
  301. def do_imports(self):
  302. logger.debug("NumPy+cv2 engine imports")
  303. global cv2, np
  304. import cv2
  305. if self.postproc_kwargs["exclude_images"]:
  306. import numpy as np
  307. def _saving_hook(self, out_path, bitmap, page, postproc_kwargs):
  308. np_array = bitmap.to_numpy()
  309. np_array = self.postprocess(np_array, bitmap, page, **postproc_kwargs)
  310. cv2.imwrite(str(out_path), np_array)
  311. @classmethod
  312. def postprocess(cls, src_image, bitmap, page, invert_lightness, exclude_images):
  313. dst_image = src_image
  314. if invert_lightness:
  315. if bitmap.format == pdfium_c.FPDFBitmap_Gray:
  316. dst_image = ~src_image
  317. else:
  318. convert_to, convert_from = (cv2.COLOR_RGB2HLS, cv2.COLOR_HLS2RGB) if bitmap.rev_byteorder else (cv2.COLOR_BGR2HLS, cv2.COLOR_HLS2BGR)
  319. dst_image = cv2.cvtColor(dst_image, convert_to)
  320. h, l, s = cv2.split(dst_image)
  321. l = ~l
  322. dst_image = cv2.merge([h, l, s])
  323. dst_image = cv2.cvtColor(dst_image, convert_from)
  324. if exclude_images:
  325. assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" # FIXME?
  326. posconv = bitmap.get_posconv(page)
  327. have_images, obj_walker = iterator_hasvalue( page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1) )
  328. if have_images:
  329. mask = np.zeros((bitmap.height, bitmap.width, 1), np.uint8)
  330. for obj in obj_walker:
  331. qpoints = np.array([posconv.to_bitmap(x, y) for x, y in obj.get_quad_points()], np.int32)
  332. cv2.fillPoly(mask, [qpoints], 1)
  333. dst_image = cv2.copyTo(src_image, mask=mask, dst=dst_image)
  334. return dst_image
  335. def _render_parallel_init(logging_init, engine_init, input, password, may_init_forms, kwargs, engine):
  336. logging_init()
  337. logger.info(f"Initializing data for process {os.getpid()}")
  338. engine_init()
  339. pdf = pdfium.PdfDocument(input, password=password, autoclose=True)
  340. if may_init_forms:
  341. pdf.init_forms()
  342. global ProcObjs
  343. ProcObjs = (pdf, kwargs, engine)
  344. def _render_job(i, pdf, kwargs, engine):
  345. # logger.info(f"Started page {i+1} ...")
  346. page = pdf[i]
  347. bitmap = page.render(**kwargs)
  348. engine(i, bitmap, page)
  349. def _render_parallel_job(i):
  350. global ProcObjs
  351. _render_job(i, *ProcObjs)
  352. def _do_nothing(): pass
  353. # TODO turn into a python-usable API yielding output paths as they are written
  354. def main(args):
  355. if not args.output.is_dir():
  356. # make sure the output directory exists (PIL throws an error if it doesn't, but cv2 may silently skip)
  357. raise ValueError(f"Output path is not an existing directory: {args.output!r}")
  358. pdf = get_input(args, init_forms=args.draw_forms)
  359. pdf_len = len(pdf)
  360. if not all(0 <= i < pdf_len for i in args.pages):
  361. raise ValueError("Out-of-bounds page indices are prohibited.")
  362. if len(args.pages) != len(set(args.pages)):
  363. raise ValueError("Duplicate page indices are prohibited.")
  364. if args.prefix is None:
  365. args.prefix = f"{args.input.stem}_"
  366. if args.fill_color is None:
  367. args.fill_color = (0, 0, 0, 255) if args.sample_theme else (255, 255, 255, 255)
  368. if args.format is None:
  369. # can't use jpeg with transparency rsp. when there is an alpha channel
  370. args.format = "jpg" if args.fill_color[3] == 255 else "png"
  371. if args.linear is None:
  372. args.linear = 4
  373. # numpy+cv2 is much faster for PNG, and PIL faster for JPG, but this might simply be due to different encoding defaults
  374. if args.engine_cls is None:
  375. assert have_pil or have_cv2, "Either pillow or numpy+cv2 must be installed for rendering CLI."
  376. if (not have_pil) or (have_cv2 and args.format == "png"):
  377. args.engine_cls = NumpyCV2Engine
  378. else:
  379. args.engine_cls = PILEngine
  380. # PIL is faster with rev_byteorder and prefer_bgrx = True, as this achieves a natively supported pixel format. For numpy+cv2 there doesn't seem to be a difference.
  381. if args.rev_byteorder is None:
  382. args.rev_byteorder = issubclass(args.engine_cls, PILEngine)
  383. if args.prefer_bgrx is None:
  384. # PIL can't save BGRX as PNG
  385. args.prefer_bgrx = issubclass(args.engine_cls, PILEngine) and args.format != "png"
  386. cs_kwargs = dict()
  387. if args.sample_theme:
  388. cs_kwargs.update(**SampleTheme)
  389. cs_kwargs.update(**{f: getattr(args, f) for f in ColorSchemeFields if getattr(args, f)})
  390. color_scheme = pdfium.PdfColorScheme(**cs_kwargs) if cs_kwargs else None
  391. kwargs = dict(
  392. scale = args.scale,
  393. rotation = args.rotation,
  394. crop = args.crop,
  395. grayscale = args.grayscale,
  396. fill_color = args.fill_color,
  397. optimize_mode = args.optimize_mode,
  398. draw_annots = args.draw_annots,
  399. may_draw_forms = args.draw_forms,
  400. force_halftone = args.force_halftone,
  401. rev_byteorder = args.rev_byteorder,
  402. prefer_bgrx = args.prefer_bgrx,
  403. maybe_alpha = args.maybe_alpha,
  404. bitmap_maker = BitmapMakers[args.bitmap_maker],
  405. color_scheme = color_scheme,
  406. fill_to_stroke = args.fill_to_stroke,
  407. )
  408. for type in args.no_antialias:
  409. kwargs[f"no_smooth{type}"] = True
  410. saver_args = types.SimpleNamespace(
  411. output_dir = args.output,
  412. prefix = args.prefix,
  413. n_digits = len(str(pdf_len)),
  414. format = args.format,
  415. maybe_alpha = args.maybe_alpha,
  416. )
  417. postproc_kwargs = dict(
  418. invert_lightness = args.invert_lightness,
  419. exclude_images = args.exclude_images,
  420. )
  421. if args.invert_lightness and args.optimize_mode == "lcd":
  422. logger.warning("LCD optimization clashes with lightness inversion, as post-processing colors defeats the idea of subpixel rendering.")
  423. print_args = vars(args).copy()
  424. del print_args["subcommand"], print_args["pages"]
  425. if print_args["password"]:
  426. print_args["password"] = "<obfuscated>"
  427. logger.debug(f"{print_args}") # TODO prettier?
  428. if color_scheme:
  429. logger.debug(f"{color_scheme}")
  430. engine = args.engine_cls(saver_args, postproc_kwargs)
  431. if len(args.pages) <= args.linear:
  432. logger.info("Linear rendering ...")
  433. engine.do_imports()
  434. for i in args.pages:
  435. _render_job(i, pdf, kwargs, engine)
  436. else:
  437. logger.info("Parallel rendering ...")
  438. ctx = mp.get_context(args.parallel_strategy)
  439. pool_backends = dict(
  440. mp = (ctx.Pool, "imap"),
  441. ft = (functools.partial(ft.ProcessPoolExecutor, mp_context=ctx), "map"),
  442. )
  443. pool_ctor, map_attr = pool_backends[args.parallel_lib]
  444. if args.parallel_map:
  445. map_attr = args.parallel_map
  446. if args.parallel_strategy == "fork":
  447. logging_init, engine_init = _do_nothing, _do_nothing
  448. engine.do_imports()
  449. else:
  450. logging_init, engine_init = setup_logging, engine.do_imports
  451. pool_kwargs = dict(
  452. initializer = _render_parallel_init,
  453. initargs = (logging_init, engine_init, pdf._input, args.password, args.draw_forms, kwargs, engine),
  454. )
  455. n_procs = min(args.processes, len(args.pages))
  456. with pool_ctor(n_procs, **pool_kwargs) as pool:
  457. map_func = getattr(pool, map_attr)
  458. for _ in map_func(_render_parallel_job, args.pages):
  459. pass