pdfinfo.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. import pypdfium2.raw as pdfium_c
  4. import pypdfium2.internal as pdfium_i
  5. # TODO? consider dotted access
  6. from pypdfium2._cli._parsers import (
  7. add_input,
  8. add_n_digits,
  9. get_input,
  10. round_list,
  11. )
  12. def attach(parser):
  13. add_input(parser)
  14. add_n_digits(parser)
  15. def main(args):
  16. pdf = get_input(args)
  17. print(f"Page Count: {len(pdf)}")
  18. print(f"PDF Version: {pdf.get_version() / 10}")
  19. id_permanent = pdf.get_identifier(pdfium_c.FILEIDTYPE_PERMANENT)
  20. id_changing = pdf.get_identifier(pdfium_c.FILEIDTYPE_CHANGING)
  21. print(f"ID (permanent): {id_permanent}")
  22. print(f"ID (changing): {id_changing}")
  23. print(f"ID match? - {id_permanent == id_changing}")
  24. print(f"Tagged? - {pdf.is_tagged()}")
  25. pagemode = pdf.get_pagemode()
  26. if pagemode != pdfium_c.PAGEMODE_USENONE:
  27. print(f"Page Mode: {pdfium_i.PageModeToStr.get(pagemode)}")
  28. formtype = pdf.get_formtype()
  29. if formtype != pdfium_c.FORMTYPE_NONE:
  30. print(f"Form Type: {pdfium_i.FormTypeToStr.get(formtype)}")
  31. metadata = pdf.get_metadata_dict(skip_empty=True)
  32. if len(metadata) > 0:
  33. print("Metadata:")
  34. for key, value in metadata.items():
  35. print(f" {key}: {value}")
  36. for i in args.pages:
  37. print(f"\n# Page {i+1}")
  38. page = pdf[i]
  39. print(f"Size: {round_list(page.get_size(), args.n_digits)}")
  40. print(f"Rotation: {page.get_rotation()}")
  41. print(f"Bounding Box: {round_list(page.get_bbox(), args.n_digits)}")
  42. for box_name in ("media", "crop", "bleed", "trim", "art"):
  43. box = getattr(page, f"get_{box_name.lower()}box")(fallback_ok=False)
  44. if box:
  45. print(f"{box_name.capitalize()}Box: {round_list(box, args.n_digits)}")