imgtopdf.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. # TODO test-cover converting non-jpeg format
  4. from pathlib import Path
  5. import pypdfium2._helpers as pdfium
  6. try:
  7. import PIL.Image
  8. except ImportError:
  9. PIL = None
  10. def attach(parser):
  11. parser.add_argument(
  12. "images",
  13. nargs = "+",
  14. help = "Input images",
  15. type = Path,
  16. )
  17. parser.add_argument(
  18. "--output", "-o",
  19. required = True,
  20. type = Path,
  21. help = "Target path for the new PDF"
  22. )
  23. parser.add_argument(
  24. "--inline",
  25. action = "store_true",
  26. help = "If JPEG, whether to use PDFium's inline loading function."
  27. )
  28. def main(args):
  29. # Rudimentary image to PDF conversion (testing / proof of concept)
  30. # Due to limitations in PDFium's public API, this function may be inefficient/lossy for non-JPEG input.
  31. # The technically best available open-source tool for image to PDF conversion is probably img2pdf (although its code style can be regarded as displeasing).
  32. # Development note: We are closing objects explicitly because loading JPEGs non-inline binds file handles to the PDF, which need to be released as soon as possible. Without this, we have already run into "OSError: Too many open files" while testing.
  33. pdf = pdfium.PdfDocument.new()
  34. for fp in args.images:
  35. image_obj = pdfium.PdfImage.new(pdf)
  36. # Simple check whether the file is a JPEG image - a better implementation could use mimetypes, python-magic, or PIL
  37. if fp.suffix.lower() in (".jpg", ".jpeg"):
  38. image_obj.load_jpeg(fp, inline=args.inline)
  39. else:
  40. pil_image = PIL.Image.open(fp)
  41. bitmap = pdfium.PdfBitmap.from_pil(pil_image)
  42. pil_image.close()
  43. image_obj.set_bitmap(bitmap)
  44. bitmap.close()
  45. w, h = image_obj.get_size()
  46. image_obj.set_matrix( pdfium.PdfMatrix().scale(w, h) )
  47. page = pdf.new_page(w, h)
  48. page.insert_obj(image_obj)
  49. page.gen_content()
  50. image_obj.close() # no-op
  51. page.close()
  52. pdf.save(args.output)
  53. pdf.close()