ccitt.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. # CCITT Fax decoder
  2. #
  3. # Bugs: uncompressed mode untested.
  4. #
  5. # cf.
  6. # ITU-T Recommendation T.4
  7. # "Standardization of Group 3 facsimile terminals
  8. # for document transmission"
  9. # ITU-T Recommendation T.6
  10. # "FACSIMILE CODING SCHEMES AND CODING CONTROL FUNCTIONS
  11. # FOR GROUP 4 FACSIMILE APPARATUS"
  12. import array
  13. from typing import (
  14. Any,
  15. Callable,
  16. Dict,
  17. Iterator,
  18. List,
  19. MutableSequence,
  20. Optional,
  21. Sequence,
  22. Union,
  23. cast,
  24. )
  25. from pdfminer.pdfexceptions import PDFException, PDFValueError
  26. def get_bytes(data: bytes) -> Iterator[int]:
  27. yield from data
  28. # Workaround https://github.com/python/mypy/issues/731
  29. BitParserState = MutableSequence[Any]
  30. # A better definition (not supported by mypy) would be:
  31. # BitParserState = MutableSequence[Union["BitParserState", int, str, None]]
  32. class BitParser:
  33. _state: BitParserState
  34. # _accept is declared Optional solely as a workaround for
  35. # https://github.com/python/mypy/issues/708
  36. _accept: Optional[Callable[[Any], BitParserState]]
  37. def __init__(self) -> None:
  38. self._pos = 0
  39. @classmethod
  40. def add(cls, root: BitParserState, v: Union[int, str], bits: str) -> None:
  41. p: BitParserState = root
  42. b = None
  43. for i in range(len(bits)):
  44. if i > 0:
  45. assert b is not None
  46. if p[b] is None:
  47. p[b] = [None, None]
  48. p = p[b]
  49. if bits[i] == "1":
  50. b = 1
  51. else:
  52. b = 0
  53. assert b is not None
  54. p[b] = v
  55. def feedbytes(self, data: bytes) -> None:
  56. for byte in get_bytes(data):
  57. for m in (128, 64, 32, 16, 8, 4, 2, 1):
  58. self._parse_bit(byte & m)
  59. def _parse_bit(self, x: object) -> None:
  60. if x:
  61. v = self._state[1]
  62. else:
  63. v = self._state[0]
  64. self._pos += 1
  65. if isinstance(v, list):
  66. self._state = v
  67. else:
  68. assert self._accept is not None
  69. self._state = self._accept(v)
  70. class CCITTG4Parser(BitParser):
  71. MODE = [None, None]
  72. BitParser.add(MODE, 0, "1")
  73. BitParser.add(MODE, +1, "011")
  74. BitParser.add(MODE, -1, "010")
  75. BitParser.add(MODE, "h", "001")
  76. BitParser.add(MODE, "p", "0001")
  77. BitParser.add(MODE, +2, "000011")
  78. BitParser.add(MODE, -2, "000010")
  79. BitParser.add(MODE, +3, "0000011")
  80. BitParser.add(MODE, -3, "0000010")
  81. BitParser.add(MODE, "u", "0000001111")
  82. BitParser.add(MODE, "x1", "0000001000")
  83. BitParser.add(MODE, "x2", "0000001001")
  84. BitParser.add(MODE, "x3", "0000001010")
  85. BitParser.add(MODE, "x4", "0000001011")
  86. BitParser.add(MODE, "x5", "0000001100")
  87. BitParser.add(MODE, "x6", "0000001101")
  88. BitParser.add(MODE, "x7", "0000001110")
  89. BitParser.add(MODE, "e", "000000000001000000000001")
  90. WHITE = [None, None]
  91. BitParser.add(WHITE, 0, "00110101")
  92. BitParser.add(WHITE, 1, "000111")
  93. BitParser.add(WHITE, 2, "0111")
  94. BitParser.add(WHITE, 3, "1000")
  95. BitParser.add(WHITE, 4, "1011")
  96. BitParser.add(WHITE, 5, "1100")
  97. BitParser.add(WHITE, 6, "1110")
  98. BitParser.add(WHITE, 7, "1111")
  99. BitParser.add(WHITE, 8, "10011")
  100. BitParser.add(WHITE, 9, "10100")
  101. BitParser.add(WHITE, 10, "00111")
  102. BitParser.add(WHITE, 11, "01000")
  103. BitParser.add(WHITE, 12, "001000")
  104. BitParser.add(WHITE, 13, "000011")
  105. BitParser.add(WHITE, 14, "110100")
  106. BitParser.add(WHITE, 15, "110101")
  107. BitParser.add(WHITE, 16, "101010")
  108. BitParser.add(WHITE, 17, "101011")
  109. BitParser.add(WHITE, 18, "0100111")
  110. BitParser.add(WHITE, 19, "0001100")
  111. BitParser.add(WHITE, 20, "0001000")
  112. BitParser.add(WHITE, 21, "0010111")
  113. BitParser.add(WHITE, 22, "0000011")
  114. BitParser.add(WHITE, 23, "0000100")
  115. BitParser.add(WHITE, 24, "0101000")
  116. BitParser.add(WHITE, 25, "0101011")
  117. BitParser.add(WHITE, 26, "0010011")
  118. BitParser.add(WHITE, 27, "0100100")
  119. BitParser.add(WHITE, 28, "0011000")
  120. BitParser.add(WHITE, 29, "00000010")
  121. BitParser.add(WHITE, 30, "00000011")
  122. BitParser.add(WHITE, 31, "00011010")
  123. BitParser.add(WHITE, 32, "00011011")
  124. BitParser.add(WHITE, 33, "00010010")
  125. BitParser.add(WHITE, 34, "00010011")
  126. BitParser.add(WHITE, 35, "00010100")
  127. BitParser.add(WHITE, 36, "00010101")
  128. BitParser.add(WHITE, 37, "00010110")
  129. BitParser.add(WHITE, 38, "00010111")
  130. BitParser.add(WHITE, 39, "00101000")
  131. BitParser.add(WHITE, 40, "00101001")
  132. BitParser.add(WHITE, 41, "00101010")
  133. BitParser.add(WHITE, 42, "00101011")
  134. BitParser.add(WHITE, 43, "00101100")
  135. BitParser.add(WHITE, 44, "00101101")
  136. BitParser.add(WHITE, 45, "00000100")
  137. BitParser.add(WHITE, 46, "00000101")
  138. BitParser.add(WHITE, 47, "00001010")
  139. BitParser.add(WHITE, 48, "00001011")
  140. BitParser.add(WHITE, 49, "01010010")
  141. BitParser.add(WHITE, 50, "01010011")
  142. BitParser.add(WHITE, 51, "01010100")
  143. BitParser.add(WHITE, 52, "01010101")
  144. BitParser.add(WHITE, 53, "00100100")
  145. BitParser.add(WHITE, 54, "00100101")
  146. BitParser.add(WHITE, 55, "01011000")
  147. BitParser.add(WHITE, 56, "01011001")
  148. BitParser.add(WHITE, 57, "01011010")
  149. BitParser.add(WHITE, 58, "01011011")
  150. BitParser.add(WHITE, 59, "01001010")
  151. BitParser.add(WHITE, 60, "01001011")
  152. BitParser.add(WHITE, 61, "00110010")
  153. BitParser.add(WHITE, 62, "00110011")
  154. BitParser.add(WHITE, 63, "00110100")
  155. BitParser.add(WHITE, 64, "11011")
  156. BitParser.add(WHITE, 128, "10010")
  157. BitParser.add(WHITE, 192, "010111")
  158. BitParser.add(WHITE, 256, "0110111")
  159. BitParser.add(WHITE, 320, "00110110")
  160. BitParser.add(WHITE, 384, "00110111")
  161. BitParser.add(WHITE, 448, "01100100")
  162. BitParser.add(WHITE, 512, "01100101")
  163. BitParser.add(WHITE, 576, "01101000")
  164. BitParser.add(WHITE, 640, "01100111")
  165. BitParser.add(WHITE, 704, "011001100")
  166. BitParser.add(WHITE, 768, "011001101")
  167. BitParser.add(WHITE, 832, "011010010")
  168. BitParser.add(WHITE, 896, "011010011")
  169. BitParser.add(WHITE, 960, "011010100")
  170. BitParser.add(WHITE, 1024, "011010101")
  171. BitParser.add(WHITE, 1088, "011010110")
  172. BitParser.add(WHITE, 1152, "011010111")
  173. BitParser.add(WHITE, 1216, "011011000")
  174. BitParser.add(WHITE, 1280, "011011001")
  175. BitParser.add(WHITE, 1344, "011011010")
  176. BitParser.add(WHITE, 1408, "011011011")
  177. BitParser.add(WHITE, 1472, "010011000")
  178. BitParser.add(WHITE, 1536, "010011001")
  179. BitParser.add(WHITE, 1600, "010011010")
  180. BitParser.add(WHITE, 1664, "011000")
  181. BitParser.add(WHITE, 1728, "010011011")
  182. BitParser.add(WHITE, 1792, "00000001000")
  183. BitParser.add(WHITE, 1856, "00000001100")
  184. BitParser.add(WHITE, 1920, "00000001101")
  185. BitParser.add(WHITE, 1984, "000000010010")
  186. BitParser.add(WHITE, 2048, "000000010011")
  187. BitParser.add(WHITE, 2112, "000000010100")
  188. BitParser.add(WHITE, 2176, "000000010101")
  189. BitParser.add(WHITE, 2240, "000000010110")
  190. BitParser.add(WHITE, 2304, "000000010111")
  191. BitParser.add(WHITE, 2368, "000000011100")
  192. BitParser.add(WHITE, 2432, "000000011101")
  193. BitParser.add(WHITE, 2496, "000000011110")
  194. BitParser.add(WHITE, 2560, "000000011111")
  195. BLACK = [None, None]
  196. BitParser.add(BLACK, 0, "0000110111")
  197. BitParser.add(BLACK, 1, "010")
  198. BitParser.add(BLACK, 2, "11")
  199. BitParser.add(BLACK, 3, "10")
  200. BitParser.add(BLACK, 4, "011")
  201. BitParser.add(BLACK, 5, "0011")
  202. BitParser.add(BLACK, 6, "0010")
  203. BitParser.add(BLACK, 7, "00011")
  204. BitParser.add(BLACK, 8, "000101")
  205. BitParser.add(BLACK, 9, "000100")
  206. BitParser.add(BLACK, 10, "0000100")
  207. BitParser.add(BLACK, 11, "0000101")
  208. BitParser.add(BLACK, 12, "0000111")
  209. BitParser.add(BLACK, 13, "00000100")
  210. BitParser.add(BLACK, 14, "00000111")
  211. BitParser.add(BLACK, 15, "000011000")
  212. BitParser.add(BLACK, 16, "0000010111")
  213. BitParser.add(BLACK, 17, "0000011000")
  214. BitParser.add(BLACK, 18, "0000001000")
  215. BitParser.add(BLACK, 19, "00001100111")
  216. BitParser.add(BLACK, 20, "00001101000")
  217. BitParser.add(BLACK, 21, "00001101100")
  218. BitParser.add(BLACK, 22, "00000110111")
  219. BitParser.add(BLACK, 23, "00000101000")
  220. BitParser.add(BLACK, 24, "00000010111")
  221. BitParser.add(BLACK, 25, "00000011000")
  222. BitParser.add(BLACK, 26, "000011001010")
  223. BitParser.add(BLACK, 27, "000011001011")
  224. BitParser.add(BLACK, 28, "000011001100")
  225. BitParser.add(BLACK, 29, "000011001101")
  226. BitParser.add(BLACK, 30, "000001101000")
  227. BitParser.add(BLACK, 31, "000001101001")
  228. BitParser.add(BLACK, 32, "000001101010")
  229. BitParser.add(BLACK, 33, "000001101011")
  230. BitParser.add(BLACK, 34, "000011010010")
  231. BitParser.add(BLACK, 35, "000011010011")
  232. BitParser.add(BLACK, 36, "000011010100")
  233. BitParser.add(BLACK, 37, "000011010101")
  234. BitParser.add(BLACK, 38, "000011010110")
  235. BitParser.add(BLACK, 39, "000011010111")
  236. BitParser.add(BLACK, 40, "000001101100")
  237. BitParser.add(BLACK, 41, "000001101101")
  238. BitParser.add(BLACK, 42, "000011011010")
  239. BitParser.add(BLACK, 43, "000011011011")
  240. BitParser.add(BLACK, 44, "000001010100")
  241. BitParser.add(BLACK, 45, "000001010101")
  242. BitParser.add(BLACK, 46, "000001010110")
  243. BitParser.add(BLACK, 47, "000001010111")
  244. BitParser.add(BLACK, 48, "000001100100")
  245. BitParser.add(BLACK, 49, "000001100101")
  246. BitParser.add(BLACK, 50, "000001010010")
  247. BitParser.add(BLACK, 51, "000001010011")
  248. BitParser.add(BLACK, 52, "000000100100")
  249. BitParser.add(BLACK, 53, "000000110111")
  250. BitParser.add(BLACK, 54, "000000111000")
  251. BitParser.add(BLACK, 55, "000000100111")
  252. BitParser.add(BLACK, 56, "000000101000")
  253. BitParser.add(BLACK, 57, "000001011000")
  254. BitParser.add(BLACK, 58, "000001011001")
  255. BitParser.add(BLACK, 59, "000000101011")
  256. BitParser.add(BLACK, 60, "000000101100")
  257. BitParser.add(BLACK, 61, "000001011010")
  258. BitParser.add(BLACK, 62, "000001100110")
  259. BitParser.add(BLACK, 63, "000001100111")
  260. BitParser.add(BLACK, 64, "0000001111")
  261. BitParser.add(BLACK, 128, "000011001000")
  262. BitParser.add(BLACK, 192, "000011001001")
  263. BitParser.add(BLACK, 256, "000001011011")
  264. BitParser.add(BLACK, 320, "000000110011")
  265. BitParser.add(BLACK, 384, "000000110100")
  266. BitParser.add(BLACK, 448, "000000110101")
  267. BitParser.add(BLACK, 512, "0000001101100")
  268. BitParser.add(BLACK, 576, "0000001101101")
  269. BitParser.add(BLACK, 640, "0000001001010")
  270. BitParser.add(BLACK, 704, "0000001001011")
  271. BitParser.add(BLACK, 768, "0000001001100")
  272. BitParser.add(BLACK, 832, "0000001001101")
  273. BitParser.add(BLACK, 896, "0000001110010")
  274. BitParser.add(BLACK, 960, "0000001110011")
  275. BitParser.add(BLACK, 1024, "0000001110100")
  276. BitParser.add(BLACK, 1088, "0000001110101")
  277. BitParser.add(BLACK, 1152, "0000001110110")
  278. BitParser.add(BLACK, 1216, "0000001110111")
  279. BitParser.add(BLACK, 1280, "0000001010010")
  280. BitParser.add(BLACK, 1344, "0000001010011")
  281. BitParser.add(BLACK, 1408, "0000001010100")
  282. BitParser.add(BLACK, 1472, "0000001010101")
  283. BitParser.add(BLACK, 1536, "0000001011010")
  284. BitParser.add(BLACK, 1600, "0000001011011")
  285. BitParser.add(BLACK, 1664, "0000001100100")
  286. BitParser.add(BLACK, 1728, "0000001100101")
  287. BitParser.add(BLACK, 1792, "00000001000")
  288. BitParser.add(BLACK, 1856, "00000001100")
  289. BitParser.add(BLACK, 1920, "00000001101")
  290. BitParser.add(BLACK, 1984, "000000010010")
  291. BitParser.add(BLACK, 2048, "000000010011")
  292. BitParser.add(BLACK, 2112, "000000010100")
  293. BitParser.add(BLACK, 2176, "000000010101")
  294. BitParser.add(BLACK, 2240, "000000010110")
  295. BitParser.add(BLACK, 2304, "000000010111")
  296. BitParser.add(BLACK, 2368, "000000011100")
  297. BitParser.add(BLACK, 2432, "000000011101")
  298. BitParser.add(BLACK, 2496, "000000011110")
  299. BitParser.add(BLACK, 2560, "000000011111")
  300. UNCOMPRESSED = [None, None]
  301. BitParser.add(UNCOMPRESSED, "1", "1")
  302. BitParser.add(UNCOMPRESSED, "01", "01")
  303. BitParser.add(UNCOMPRESSED, "001", "001")
  304. BitParser.add(UNCOMPRESSED, "0001", "0001")
  305. BitParser.add(UNCOMPRESSED, "00001", "00001")
  306. BitParser.add(UNCOMPRESSED, "00000", "000001")
  307. BitParser.add(UNCOMPRESSED, "T00", "00000011")
  308. BitParser.add(UNCOMPRESSED, "T10", "00000010")
  309. BitParser.add(UNCOMPRESSED, "T000", "000000011")
  310. BitParser.add(UNCOMPRESSED, "T100", "000000010")
  311. BitParser.add(UNCOMPRESSED, "T0000", "0000000011")
  312. BitParser.add(UNCOMPRESSED, "T1000", "0000000010")
  313. BitParser.add(UNCOMPRESSED, "T00000", "00000000011")
  314. BitParser.add(UNCOMPRESSED, "T10000", "00000000010")
  315. class CCITTException(PDFException):
  316. pass
  317. class EOFB(CCITTException):
  318. pass
  319. class InvalidData(CCITTException):
  320. pass
  321. class ByteSkip(CCITTException):
  322. pass
  323. _color: int
  324. def __init__(self, width: int, bytealign: bool = False) -> None:
  325. BitParser.__init__(self)
  326. self.width = width
  327. self.bytealign = bytealign
  328. self.reset()
  329. def feedbytes(self, data: bytes) -> None:
  330. for byte in get_bytes(data):
  331. try:
  332. for m in (128, 64, 32, 16, 8, 4, 2, 1):
  333. self._parse_bit(byte & m)
  334. except self.ByteSkip:
  335. self._accept = self._parse_mode
  336. self._state = self.MODE
  337. except self.EOFB:
  338. break
  339. def _parse_mode(self, mode: object) -> BitParserState:
  340. if mode == "p":
  341. self._do_pass()
  342. self._flush_line()
  343. return self.MODE
  344. elif mode == "h":
  345. self._n1 = 0
  346. self._accept = self._parse_horiz1
  347. if self._color:
  348. return self.WHITE
  349. else:
  350. return self.BLACK
  351. elif mode == "u":
  352. self._accept = self._parse_uncompressed
  353. return self.UNCOMPRESSED
  354. elif mode == "e":
  355. raise self.EOFB
  356. elif isinstance(mode, int):
  357. self._do_vertical(mode)
  358. self._flush_line()
  359. return self.MODE
  360. else:
  361. raise self.InvalidData(mode)
  362. def _parse_horiz1(self, n: Any) -> BitParserState:
  363. if n is None:
  364. raise self.InvalidData
  365. self._n1 += n
  366. if n < 64:
  367. self._n2 = 0
  368. self._color = 1 - self._color
  369. self._accept = self._parse_horiz2
  370. if self._color:
  371. return self.WHITE
  372. else:
  373. return self.BLACK
  374. def _parse_horiz2(self, n: Any) -> BitParserState:
  375. if n is None:
  376. raise self.InvalidData
  377. self._n2 += n
  378. if n < 64:
  379. self._color = 1 - self._color
  380. self._accept = self._parse_mode
  381. self._do_horizontal(self._n1, self._n2)
  382. self._flush_line()
  383. return self.MODE
  384. elif self._color:
  385. return self.WHITE
  386. else:
  387. return self.BLACK
  388. def _parse_uncompressed(self, bits: Optional[str]) -> BitParserState:
  389. if not bits:
  390. raise self.InvalidData
  391. if bits.startswith("T"):
  392. self._accept = self._parse_mode
  393. self._color = int(bits[1])
  394. self._do_uncompressed(bits[2:])
  395. return self.MODE
  396. else:
  397. self._do_uncompressed(bits)
  398. return self.UNCOMPRESSED
  399. def _get_bits(self) -> str:
  400. return "".join(str(b) for b in self._curline[: self._curpos])
  401. def _get_refline(self, i: int) -> str:
  402. if i < 0:
  403. return "[]" + "".join(str(b) for b in self._refline)
  404. elif len(self._refline) <= i:
  405. return "".join(str(b) for b in self._refline) + "[]"
  406. else:
  407. return (
  408. "".join(str(b) for b in self._refline[:i])
  409. + "["
  410. + str(self._refline[i])
  411. + "]"
  412. + "".join(str(b) for b in self._refline[i + 1 :])
  413. )
  414. def reset(self) -> None:
  415. self._y = 0
  416. self._curline = array.array("b", [1] * self.width)
  417. self._reset_line()
  418. self._accept = self._parse_mode
  419. self._state = self.MODE
  420. def output_line(self, y: int, bits: Sequence[int]) -> None:
  421. print(y, "".join(str(b) for b in bits))
  422. def _reset_line(self) -> None:
  423. self._refline = self._curline
  424. self._curline = array.array("b", [1] * self.width)
  425. self._curpos = -1
  426. self._color = 1
  427. def _flush_line(self) -> None:
  428. if self.width <= self._curpos:
  429. self.output_line(self._y, self._curline)
  430. self._y += 1
  431. self._reset_line()
  432. if self.bytealign:
  433. raise self.ByteSkip
  434. def _do_vertical(self, dx: int) -> None:
  435. x1 = self._curpos + 1
  436. while 1:
  437. if x1 == 0:
  438. if self._color == 1 and self._refline[x1] != self._color:
  439. break
  440. elif x1 == len(self._refline) or (
  441. self._refline[x1 - 1] == self._color
  442. and self._refline[x1] != self._color
  443. ):
  444. break
  445. x1 += 1
  446. x1 += dx
  447. x0 = max(0, self._curpos)
  448. x1 = max(0, min(self.width, x1))
  449. if x1 < x0:
  450. for x in range(x1, x0):
  451. self._curline[x] = self._color
  452. elif x0 < x1:
  453. for x in range(x0, x1):
  454. self._curline[x] = self._color
  455. self._curpos = x1
  456. self._color = 1 - self._color
  457. def _do_pass(self) -> None:
  458. x1 = self._curpos + 1
  459. while 1:
  460. if x1 == 0:
  461. if self._color == 1 and self._refline[x1] != self._color:
  462. break
  463. elif x1 == len(self._refline) or (
  464. self._refline[x1 - 1] == self._color
  465. and self._refline[x1] != self._color
  466. ):
  467. break
  468. x1 += 1
  469. while 1:
  470. if x1 == 0:
  471. if self._color == 0 and self._refline[x1] == self._color:
  472. break
  473. elif x1 == len(self._refline) or (
  474. self._refline[x1 - 1] != self._color
  475. and self._refline[x1] == self._color
  476. ):
  477. break
  478. x1 += 1
  479. for x in range(self._curpos, x1):
  480. self._curline[x] = self._color
  481. self._curpos = x1
  482. def _do_horizontal(self, n1: int, n2: int) -> None:
  483. if self._curpos < 0:
  484. self._curpos = 0
  485. x = self._curpos
  486. for _ in range(n1):
  487. if len(self._curline) <= x:
  488. break
  489. self._curline[x] = self._color
  490. x += 1
  491. for _ in range(n2):
  492. if len(self._curline) <= x:
  493. break
  494. self._curline[x] = 1 - self._color
  495. x += 1
  496. self._curpos = x
  497. def _do_uncompressed(self, bits: str) -> None:
  498. for c in bits:
  499. self._curline[self._curpos] = int(c)
  500. self._curpos += 1
  501. self._flush_line()
  502. class CCITTFaxDecoder(CCITTG4Parser):
  503. def __init__(
  504. self,
  505. width: int,
  506. bytealign: bool = False,
  507. reversed: bool = False,
  508. ) -> None:
  509. CCITTG4Parser.__init__(self, width, bytealign=bytealign)
  510. self.reversed = reversed
  511. self._buf = b""
  512. def close(self) -> bytes:
  513. return self._buf
  514. def output_line(self, y: int, bits: Sequence[int]) -> None:
  515. arr = array.array("B", [0] * ((len(bits) + 7) // 8))
  516. if self.reversed:
  517. bits = [1 - b for b in bits]
  518. for i, b in enumerate(bits):
  519. if b:
  520. arr[i // 8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
  521. self._buf += arr.tobytes()
  522. def ccittfaxdecode(data: bytes, params: Dict[str, object]) -> bytes:
  523. K = params.get("K")
  524. if K == -1:
  525. cols = cast(int, params.get("Columns"))
  526. bytealign = cast(bool, params.get("EncodedByteAlign"))
  527. reversed = cast(bool, params.get("BlackIs1"))
  528. parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
  529. else:
  530. raise PDFValueError(K)
  531. parser.feedbytes(data)
  532. return parser.close()
  533. # test
  534. def main(argv: List[str]) -> None:
  535. if not argv[1:]:
  536. import unittest
  537. unittest.main()
  538. return
  539. class Parser(CCITTG4Parser):
  540. def __init__(self, width: int, bytealign: bool = False) -> None:
  541. import pygame # type: ignore[import]
  542. CCITTG4Parser.__init__(self, width, bytealign=bytealign)
  543. self.img = pygame.Surface((self.width, 1000))
  544. def output_line(self, y: int, bits: Sequence[int]) -> None:
  545. for x, b in enumerate(bits):
  546. if b:
  547. self.img.set_at((x, y), (255, 255, 255))
  548. else:
  549. self.img.set_at((x, y), (0, 0, 0))
  550. def close(self) -> None:
  551. import pygame
  552. pygame.image.save(self.img, "out.bmp")
  553. for path in argv[1:]:
  554. fp = open(path, "rb")
  555. (_, _, k, w, h, _) = path.split(".")
  556. parser = Parser(int(w))
  557. parser.feedbytes(fp.read())
  558. parser.close()
  559. fp.close()