imgtopdf.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # SPDX-FileCopyrightText: 2026 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. # TODO test-cover converting non-jpeg format
  4. from pathlib import Path
  5. import pypdfium2._helpers as pdfium
  6. from pypdfium2._lazy import Lazy
  7. def attach(parser):
  8. parser.add_argument(
  9. "images",
  10. nargs = "+",
  11. help = "Input images",
  12. type = Path,
  13. )
  14. parser.add_argument(
  15. "--output", "-o",
  16. required = True,
  17. type = Path,
  18. help = "Target path for the new PDF"
  19. )
  20. parser.add_argument(
  21. "--inline",
  22. action = "store_true",
  23. help = "If JPEG, whether to use PDFium's inline loading function."
  24. )
  25. def main(args):
  26. # Rudimentary image to PDF conversion (testing / proof of concept)
  27. # Due to limitations in PDFium's public API, this function may be inefficient/lossy for non-JPEG input.
  28. # The technically best available open-source tool for image to PDF conversion is probably img2pdf (although its code style can be regarded as displeasing).
  29. pdf = pdfium.PdfDocument.new()
  30. for fp in args.images:
  31. image_obj = pdfium.PdfImage.new(pdf)
  32. # Simple check whether the file is a JPEG image - a better implementation could use mimetypes, python-magic, or PIL
  33. if fp.suffix.lower() in (".jpg", ".jpeg"):
  34. image_obj.load_jpeg(fp, inline=args.inline)
  35. else:
  36. pil_image = Lazy.PIL_Image.open(fp)
  37. bitmap = pdfium.PdfBitmap.from_pil(pil_image)
  38. pil_image.close()
  39. image_obj.set_bitmap(bitmap)
  40. bitmap.close()
  41. w, h = image_obj.get_px_size()
  42. image_obj.set_matrix( pdfium.PdfMatrix().scale(w, h) )
  43. page = pdf.new_page(w, h)
  44. page.insert_obj(image_obj)
  45. page.gen_content()
  46. image_obj.close() # no-op
  47. page.close()
  48. pdf.save(args.output)
  49. pdf.close()