| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- """Python implementation of ASCII85/ASCIIHex decoder (Adobe version)."""
- import re
- from base64 import a85decode
- from binascii import unhexlify
- start_re = re.compile(rb"^\s*<?\s*~\s*")
- end_re = re.compile(rb"\s*~\s*>?\s*$")
- def ascii85decode(data: bytes) -> bytes:
- """In ASCII85 encoding, every four bytes are encoded with five ASCII
- letters, using 85 different types of characters (as 256**4 < 85**5).
- When the length of the original bytes is not a multiple of 4, a special
- rule is used for round up.
- Adobe's ASCII85 implementation expects the input to be terminated
- by `b"~>"`, and (though this is absent from the PDF spec) it can
- also begin with `b"<~"`. We can't reliably expect this to be the
- case, and there can be off-by-one errors in stream lengths which
- mean we only see `~` at the end. Worse yet, `<` and `>` are
- ASCII85 digits, so we can't strip them. We settle on a compromise
- where we strip leading `<~` or `~` and trailing `~` or `~>`.
- """
- data = start_re.sub(b"", data)
- data = end_re.sub(b"", data)
- return a85decode(data)
- bws_re = re.compile(rb"\s")
- def asciihexdecode(data: bytes) -> bytes:
- """ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
- For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
- ASCIIHexDecode filter produces one byte of binary data. All white-space
- characters are ignored. A right angle bracket character (>) indicates
- EOD. Any other characters will cause an error. If the filter encounters
- the EOD marker after reading an odd number of hexadecimal digits, it
- will behave as if a 0 followed the last digit.
- """
- data = bws_re.sub(b"", data)
- idx = data.find(b">")
- if idx != -1:
- data = data[:idx]
- if idx % 2 == 1:
- data += b"0"
- return unhexlify(data)
|