bitmap.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. # SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
  2. # SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
  3. __all__ = ("PdfBitmap", "PdfBitmapInfo")
  4. import ctypes
  5. import logging
  6. import weakref
  7. from collections import namedtuple
  8. import pypdfium2.raw as pdfium_c
  9. import pypdfium2.internal as pdfium_i
  10. from pypdfium2._helpers.misc import PdfiumError
  11. logger = logging.getLogger(__name__)
  12. try:
  13. import PIL.Image
  14. except ImportError:
  15. PIL = None
  16. try:
  17. import numpy
  18. except ImportError:
  19. numpy = None
  20. class PdfBitmap (pdfium_i.AutoCloseable):
  21. """
  22. Bitmap helper class.
  23. Hint:
  24. This class provides built-in converters (e. g. :meth:`.to_pil`, :meth:`.to_numpy`) that may be used to create a different representation of the bitmap.
  25. Converters can be applied on :class:`.PdfBitmap` objects either as bound method (``bitmap.to_*()``), or as function (``PdfBitmap.to_*(bitmap)``)
  26. The second pattern is useful for API methods that need to apply a caller-provided converter (e. g. :meth:`.PdfDocument.render`)
  27. .. _PIL Modes: https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
  28. Note:
  29. All attributes of :class:`.PdfBitmapInfo` are available in this class as well.
  30. Warning:
  31. ``bitmap.close()``, which frees the buffer of foreign bitmaps, is not validated for safety.
  32. A bitmap must not be closed when other objects still depend on its buffer!
  33. Attributes:
  34. raw (FPDF_BITMAP):
  35. The underlying PDFium bitmap handle.
  36. buffer (~ctypes.c_ubyte):
  37. A ctypes array representation of the pixel data (each item is an unsigned byte, i. e. a number ranging from 0 to 255).
  38. """
  39. def __init__(self, raw, buffer, width, height, stride, format, rev_byteorder, needs_free):
  40. self.raw, self.buffer, self.width, self.height = raw, buffer, width, height
  41. self.stride, self.format, self.rev_byteorder = stride, format, rev_byteorder
  42. self.n_channels = pdfium_i.BitmapTypeToNChannels[self.format]
  43. self.mode = (pdfium_i.BitmapTypeToStrReverse if self.rev_byteorder else pdfium_i.BitmapTypeToStr)[self.format]
  44. super().__init__(pdfium_c.FPDFBitmap_Destroy, needs_free=needs_free, obj=self.buffer)
  45. @property
  46. def parent(self): # AutoCloseable hook
  47. return None
  48. def get_info(self):
  49. """
  50. Returns:
  51. PdfBitmapInfo: A namedtuple describing the bitmap.
  52. """
  53. return PdfBitmapInfo(
  54. width=self.width, height=self.height, stride=self.stride, format=self.format,
  55. rev_byteorder=self.rev_byteorder, n_channels=self.n_channels, mode=self.mode,
  56. )
  57. @classmethod
  58. def from_raw(cls, raw, rev_byteorder=False, ex_buffer=None):
  59. """
  60. Construct a :class:`.PdfBitmap` wrapper around a raw PDFium bitmap handle.
  61. Parameters:
  62. raw (FPDF_BITMAP):
  63. PDFium bitmap handle.
  64. rev_byteorder (bool):
  65. Whether the bitmap uses reverse byte order.
  66. ex_buffer (~ctypes.c_ubyte | None):
  67. If the bitmap was created from a buffer allocated by Python/ctypes, pass in the ctypes array to keep it referenced.
  68. """
  69. width = pdfium_c.FPDFBitmap_GetWidth(raw)
  70. height = pdfium_c.FPDFBitmap_GetHeight(raw)
  71. format = pdfium_c.FPDFBitmap_GetFormat(raw)
  72. stride = pdfium_c.FPDFBitmap_GetStride(raw)
  73. if ex_buffer is None:
  74. needs_free = True
  75. buffer_ptr = pdfium_c.FPDFBitmap_GetBuffer(raw)
  76. if buffer_ptr is None:
  77. raise PdfiumError("Failed to get bitmap buffer (null pointer returned)")
  78. buffer = ctypes.cast(buffer_ptr, ctypes.POINTER(ctypes.c_ubyte * (stride * height))).contents
  79. else:
  80. needs_free = False
  81. buffer = ex_buffer
  82. return cls(
  83. raw=raw, buffer=buffer, width=width, height=height, stride=stride,
  84. format=format, rev_byteorder=rev_byteorder, needs_free=needs_free,
  85. )
  86. # TODO support setting stride if external buffer is provided
  87. @classmethod
  88. def new_native(cls, width, height, format, rev_byteorder=False, buffer=None):
  89. """
  90. Create a new bitmap using :func:`FPDFBitmap_CreateEx`, with a buffer allocated by Python/ctypes.
  91. Bitmaps created by this function are always packed (no unused bytes at line end).
  92. """
  93. stride = width * pdfium_i.BitmapTypeToNChannels[format]
  94. if buffer is None:
  95. buffer = (ctypes.c_ubyte * (stride * height))()
  96. raw = pdfium_c.FPDFBitmap_CreateEx(width, height, format, buffer, stride)
  97. # alternatively, we could call the constructor directly with the information from above
  98. return cls.from_raw(raw, rev_byteorder, buffer)
  99. @classmethod
  100. def new_foreign(cls, width, height, format, rev_byteorder=False, force_packed=False):
  101. """
  102. Create a new bitmap using :func:`FPDFBitmap_CreateEx`, with a buffer allocated by PDFium.
  103. Using this method is discouraged. Prefer :meth:`.new_native` instead.
  104. """
  105. stride = width * pdfium_i.BitmapTypeToNChannels[format] if force_packed else 0
  106. raw = pdfium_c.FPDFBitmap_CreateEx(width, height, format, None, stride)
  107. return cls.from_raw(raw, rev_byteorder)
  108. @classmethod
  109. def new_foreign_simple(cls, width, height, use_alpha, rev_byteorder=False):
  110. """
  111. Create a new bitmap using :func:`FPDFBitmap_Create`. The buffer is allocated by PDFium.
  112. The resulting bitmap is supposed to be packed (i. e. no gap of unused bytes between lines).
  113. Using this method is discouraged. Prefer :meth:`.new_native` instead.
  114. """
  115. raw = pdfium_c.FPDFBitmap_Create(width, height, use_alpha)
  116. return cls.from_raw(raw, rev_byteorder)
  117. def fill_rect(self, left, top, width, height, color):
  118. """
  119. Fill a rectangle on the bitmap with the given color.
  120. The coordinate system starts at the top left corner of the image.
  121. Note:
  122. This function replaces the color values in the given rectangle. It does not perform alpha compositing.
  123. Parameters:
  124. color (tuple[int, int, int, int]):
  125. RGBA fill color (a tuple of 4 integers ranging from 0 to 255).
  126. """
  127. c_color = pdfium_i.color_tohex(color, self.rev_byteorder)
  128. pdfium_c.FPDFBitmap_FillRect(self, left, top, width, height, c_color)
  129. # Requirement: If the result is a view of the buffer (not a copy), it keeps the referenced memory valid.
  130. #
  131. # Note that memory management differs between native and foreign bitmap buffers:
  132. # - With native bitmaps, the memory is allocated by python on creation of the buffer object (transparent).
  133. # - With foreign bitmaps, the buffer object is merely a view of memory allocated by pdfium and will be freed by finalizer (opaque).
  134. #
  135. # It is necessary that receivers correctly handle both cases, e.g. by keeping the buffer object itself alive.
  136. # As of May 2023, this seems to hold true for NumPy and PIL. New converters should be carefully tested.
  137. #
  138. # We could consider attaching a buffer keep-alive finalizer to any converted objects referencing the buffer,
  139. # but then we'd have to rely on third parties to actually create a reference at all times, otherwise we would unnecessarily delay releasing memory.
  140. def to_numpy(self):
  141. """
  142. Convert the bitmap to a :mod:`numpy` array.
  143. The array contains as many rows as the bitmap is high.
  144. Each row contains as many pixels as the bitmap is wide.
  145. The length of each pixel corresponds to the number of channels.
  146. The resulting array is supposed to share memory with the original bitmap buffer,
  147. so changes to the buffer should be reflected in the array, and vice versa.
  148. Returns:
  149. numpy.ndarray: NumPy array (representation of the bitmap buffer).
  150. """
  151. # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray
  152. array = numpy.ndarray(
  153. # layout: row major
  154. shape = (self.height, self.width, self.n_channels),
  155. dtype = ctypes.c_ubyte,
  156. buffer = self.buffer,
  157. # number of bytes per item for each nesting level (outer->inner, i. e. row, pixel, value)
  158. strides = (self.stride, self.n_channels, 1),
  159. )
  160. return array
  161. def to_pil(self):
  162. """
  163. Convert the bitmap to a :mod:`PIL` image, using :func:`PIL.Image.frombuffer`.
  164. For ``RGBA``, ``RGBX`` and ``L`` buffers, PIL is supposed to share memory with
  165. the original bitmap buffer, so changes to the buffer should be reflected in the image, and vice versa.
  166. Otherwise, PIL will make a copy of the data.
  167. Returns:
  168. PIL.Image.Image: PIL image (representation or copy of the bitmap buffer).
  169. .. versionchanged:: 4.16 Set ``image.readonly = False`` so that changes to the image are also reflected in the buffer.
  170. """
  171. # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.frombuffer
  172. # https://pillow.readthedocs.io/en/stable/handbook/writing-your-own-image-plugin.html#the-raw-decoder
  173. dest_mode = pdfium_i.BitmapTypeToStrReverse[self.format]
  174. image = PIL.Image.frombuffer(
  175. dest_mode, # target color format
  176. (self.width, self.height), # size
  177. self.buffer, # buffer
  178. "raw", # decoder
  179. self.mode, # input color format
  180. self.stride, # bytes per line
  181. 1, # orientation (top->bottom)
  182. )
  183. image.readonly = False
  184. return image
  185. @classmethod
  186. def from_pil(cls, pil_image, recopy=False):
  187. """
  188. Convert a :mod:`PIL` image to a PDFium bitmap.
  189. Due to the restricted number of color formats and bit depths supported by PDFium's
  190. bitmap implementation, this may be a lossy operation.
  191. Bitmaps returned by this function should be treated as immutable (i.e. don't call :meth:`.fill_rect`).
  192. Parameters:
  193. pil_image (PIL.Image.Image):
  194. The image.
  195. Returns:
  196. PdfBitmap: PDFium bitmap (with a copy of the PIL image's data).
  197. .. deprecated:: 4.25
  198. The *recopy* parameter has been deprecated.
  199. """
  200. if pil_image.mode in pdfium_i.BitmapStrToConst:
  201. # PIL always seems to represent BGR(A/X) input as RGB(A/X), so this code passage is probably only hit for L
  202. format = pdfium_i.BitmapStrToConst[pil_image.mode]
  203. else:
  204. pil_image = _pil_convert_for_pdfium(pil_image)
  205. format = pdfium_i.BitmapStrReverseToConst[pil_image.mode]
  206. py_buffer = pil_image.tobytes()
  207. if recopy:
  208. buffer = (ctypes.c_ubyte * len(py_buffer)).from_buffer_copy(py_buffer)
  209. else:
  210. buffer = py_buffer
  211. w, h = pil_image.size
  212. return cls.new_native(w, h, format, rev_byteorder=False, buffer=buffer)
  213. # TODO implement from_numpy()
  214. def _pil_convert_for_pdfium(pil_image):
  215. # FIXME? convoluted / hard to understand; improve control flow
  216. if pil_image.mode == "1":
  217. pil_image = pil_image.convert("L")
  218. elif pil_image.mode.startswith("RGB"):
  219. pass
  220. elif "A" in pil_image.mode:
  221. pil_image = pil_image.convert("RGBA")
  222. else:
  223. pil_image = pil_image.convert("RGB")
  224. # convert RGB(A/X) to BGR(A) for PDFium
  225. if pil_image.mode == "RGB":
  226. r, g, b = pil_image.split()
  227. pil_image = PIL.Image.merge("RGB", (b, g, r))
  228. elif pil_image.mode == "RGBA":
  229. r, g, b, a = pil_image.split()
  230. pil_image = PIL.Image.merge("RGBA", (b, g, r, a))
  231. elif pil_image.mode == "RGBX":
  232. # technically the x channel may be unnecessary, but preserve what the caller passes in
  233. r, g, b, x = pil_image.split()
  234. pil_image = PIL.Image.merge("RGBX", (b, g, r, x))
  235. return pil_image
  236. PdfBitmapInfo = namedtuple("PdfBitmapInfo", "width height stride format rev_byteorder n_channels mode")
  237. """
  238. Attributes:
  239. width (int):
  240. Width of the bitmap (horizontal size).
  241. height (int):
  242. Height of the bitmap (vertical size).
  243. stride (int):
  244. Number of bytes per line in the bitmap buffer.
  245. Depending on how the bitmap was created, there may be a padding of unused bytes at the end of each line, so this value can be greater than ``width * n_channels``.
  246. format (int):
  247. PDFium bitmap format constant (:attr:`FPDFBitmap_*`)
  248. rev_byteorder (bool):
  249. Whether the bitmap is using reverse byte order.
  250. n_channels (int):
  251. Number of channels per pixel.
  252. mode (str):
  253. The bitmap format as string (see `PIL Modes`_).
  254. """