pdfinfo.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # SPDX-FileCopyrightText: 2026 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. import pypdfium2.raw as pdfium_c
  4. import pypdfium2.internal as pdfium_i
  5. from pypdfium2._cli._parsers import (
  6. add_input,
  7. add_n_digits,
  8. get_input,
  9. round_list,
  10. )
  11. def attach(parser):
  12. add_input(parser)
  13. add_n_digits(parser)
  14. def main(args):
  15. pdf = get_input(args)
  16. print(f"Page Count: {len(pdf)}")
  17. print(f"PDF Version: {pdf.get_version() / 10}")
  18. id_permanent = pdf.get_identifier(pdfium_c.FILEIDTYPE_PERMANENT)
  19. id_changing = pdf.get_identifier(pdfium_c.FILEIDTYPE_CHANGING)
  20. print(f"ID (permanent): {id_permanent}")
  21. print(f"ID (changing): {id_changing}")
  22. print(f"ID match? - {id_permanent == id_changing}")
  23. print(f"Tagged? - {pdf.is_tagged()}")
  24. pagemode = pdf.get_pagemode()
  25. if pagemode != pdfium_c.PAGEMODE_USENONE:
  26. print(f"Page Mode: {pdfium_i.PageModeToStr.get(pagemode)}")
  27. formtype = pdf.get_formtype()
  28. if formtype != pdfium_c.FORMTYPE_NONE:
  29. print(f"Form Type: {pdfium_i.FormTypeToStr.get(formtype)}")
  30. metadata = pdf.get_metadata_dict(skip_empty=True)
  31. if len(metadata) > 0:
  32. print("Metadata:")
  33. for key, value in metadata.items():
  34. print(f" {key}: {value}")
  35. for i in args.pages:
  36. print(f"\n# Page {i+1}")
  37. page = pdf[i]
  38. print(f"Size: {round_list(page.get_size(), args.n_digits)}")
  39. print(f"Rotation: {page.get_rotation()}")
  40. print(f"Bounding Box: {round_list(page.get_bbox(), args.n_digits)}")
  41. for box_name in ("media", "crop", "bleed", "trim", "art"):
  42. box = getattr(page, f"get_{box_name.lower()}box")(fallback_ok=False)
  43. if box:
  44. print(f"{box_name.capitalize()}Box: {round_list(box, args.n_digits)}")