arrange.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. import pypdfium2._helpers as pdfium
  4. # TODO? consider dotted access
  5. from pypdfium2._cli._parsers import parse_numtext
  6. def attach(parser):
  7. parser.add_argument(
  8. "inputs",
  9. nargs = "+",
  10. help = "Sequence of PDF files.",
  11. )
  12. parser.add_argument(
  13. "--pages",
  14. nargs = "+",
  15. default = [],
  16. help = "Sequence of page texts, definig the pages to include from each PDF. Use '_' as placeholder for all pages."
  17. )
  18. parser.add_argument(
  19. "--passwords",
  20. nargs = "+",
  21. default = [],
  22. help = "Passwords to unlock encrypted PDFs. Any placeholder may be used for non-encrypted documents.",
  23. )
  24. parser.add_argument(
  25. "--output", "-o",
  26. required = True,
  27. help = "Target path for the output document",
  28. )
  29. def main(args):
  30. args.pages = [None if p == "_" else parse_numtext(p) for p in args.pages]
  31. for _ in range(len(args.inputs) - len(args.pages)):
  32. args.pages.append(None)
  33. for _ in range(len(args.inputs) - len(args.passwords)):
  34. args.passwords.append(None)
  35. dest_pdf = pdfium.PdfDocument.new()
  36. index = 0
  37. for in_path, pages, password in zip(args.inputs, args.pages, args.passwords):
  38. src_pdf = pdfium.PdfDocument(in_path, password=password)
  39. dest_pdf.import_pages(src_pdf, pages=pages)
  40. index += len(src_pdf)
  41. dest_pdf.save(args.output)