latin_enc.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. """Standard encoding tables used in PDF.
  2. This table is extracted from PDF Reference Manual 1.6, pp.925
  3. "D.1 Latin Character Set and Encodings"
  4. """
  5. from typing import List, Optional, Tuple
  6. EncodingRow = Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]
  7. ENCODING: List[EncodingRow] = [
  8. # (name, std, mac, win, pdf)
  9. ("A", 65, 65, 65, 65),
  10. ("AE", 225, 174, 198, 198),
  11. ("Aacute", None, 231, 193, 193),
  12. ("Acircumflex", None, 229, 194, 194),
  13. ("Adieresis", None, 128, 196, 196),
  14. ("Agrave", None, 203, 192, 192),
  15. ("Aring", None, 129, 197, 197),
  16. ("Atilde", None, 204, 195, 195),
  17. ("B", 66, 66, 66, 66),
  18. ("C", 67, 67, 67, 67),
  19. ("Ccedilla", None, 130, 199, 199),
  20. ("D", 68, 68, 68, 68),
  21. ("E", 69, 69, 69, 69),
  22. ("Eacute", None, 131, 201, 201),
  23. ("Ecircumflex", None, 230, 202, 202),
  24. ("Edieresis", None, 232, 203, 203),
  25. ("Egrave", None, 233, 200, 200),
  26. ("Eth", None, None, 208, 208),
  27. ("Euro", None, None, 128, 160),
  28. ("F", 70, 70, 70, 70),
  29. ("G", 71, 71, 71, 71),
  30. ("H", 72, 72, 72, 72),
  31. ("I", 73, 73, 73, 73),
  32. ("Iacute", None, 234, 205, 205),
  33. ("Icircumflex", None, 235, 206, 206),
  34. ("Idieresis", None, 236, 207, 207),
  35. ("Igrave", None, 237, 204, 204),
  36. ("J", 74, 74, 74, 74),
  37. ("K", 75, 75, 75, 75),
  38. ("L", 76, 76, 76, 76),
  39. ("Lslash", 232, None, None, 149),
  40. ("M", 77, 77, 77, 77),
  41. ("N", 78, 78, 78, 78),
  42. ("Ntilde", None, 132, 209, 209),
  43. ("O", 79, 79, 79, 79),
  44. ("OE", 234, 206, 140, 150),
  45. ("Oacute", None, 238, 211, 211),
  46. ("Ocircumflex", None, 239, 212, 212),
  47. ("Odieresis", None, 133, 214, 214),
  48. ("Ograve", None, 241, 210, 210),
  49. ("Oslash", 233, 175, 216, 216),
  50. ("Otilde", None, 205, 213, 213),
  51. ("P", 80, 80, 80, 80),
  52. ("Q", 81, 81, 81, 81),
  53. ("R", 82, 82, 82, 82),
  54. ("S", 83, 83, 83, 83),
  55. ("Scaron", None, None, 138, 151),
  56. ("T", 84, 84, 84, 84),
  57. ("Thorn", None, None, 222, 222),
  58. ("U", 85, 85, 85, 85),
  59. ("Uacute", None, 242, 218, 218),
  60. ("Ucircumflex", None, 243, 219, 219),
  61. ("Udieresis", None, 134, 220, 220),
  62. ("Ugrave", None, 244, 217, 217),
  63. ("V", 86, 86, 86, 86),
  64. ("W", 87, 87, 87, 87),
  65. ("X", 88, 88, 88, 88),
  66. ("Y", 89, 89, 89, 89),
  67. ("Yacute", None, None, 221, 221),
  68. ("Ydieresis", None, 217, 159, 152),
  69. ("Z", 90, 90, 90, 90),
  70. ("Zcaron", None, None, 142, 153),
  71. ("a", 97, 97, 97, 97),
  72. ("aacute", None, 135, 225, 225),
  73. ("acircumflex", None, 137, 226, 226),
  74. ("acute", 194, 171, 180, 180),
  75. ("adieresis", None, 138, 228, 228),
  76. ("ae", 241, 190, 230, 230),
  77. ("agrave", None, 136, 224, 224),
  78. ("ampersand", 38, 38, 38, 38),
  79. ("aring", None, 140, 229, 229),
  80. ("asciicircum", 94, 94, 94, 94),
  81. ("asciitilde", 126, 126, 126, 126),
  82. ("asterisk", 42, 42, 42, 42),
  83. ("at", 64, 64, 64, 64),
  84. ("atilde", None, 139, 227, 227),
  85. ("b", 98, 98, 98, 98),
  86. ("backslash", 92, 92, 92, 92),
  87. ("bar", 124, 124, 124, 124),
  88. ("braceleft", 123, 123, 123, 123),
  89. ("braceright", 125, 125, 125, 125),
  90. ("bracketleft", 91, 91, 91, 91),
  91. ("bracketright", 93, 93, 93, 93),
  92. ("breve", 198, 249, None, 24),
  93. ("brokenbar", None, None, 166, 166),
  94. ("bullet", 183, 165, 149, 128),
  95. ("c", 99, 99, 99, 99),
  96. ("caron", 207, 255, None, 25),
  97. ("ccedilla", None, 141, 231, 231),
  98. ("cedilla", 203, 252, 184, 184),
  99. ("cent", 162, 162, 162, 162),
  100. ("circumflex", 195, 246, 136, 26),
  101. ("colon", 58, 58, 58, 58),
  102. ("comma", 44, 44, 44, 44),
  103. ("copyright", None, 169, 169, 169),
  104. ("currency", 168, 219, 164, 164),
  105. ("d", 100, 100, 100, 100),
  106. ("dagger", 178, 160, 134, 129),
  107. ("daggerdbl", 179, 224, 135, 130),
  108. ("degree", None, 161, 176, 176),
  109. ("dieresis", 200, 172, 168, 168),
  110. ("divide", None, 214, 247, 247),
  111. ("dollar", 36, 36, 36, 36),
  112. ("dotaccent", 199, 250, None, 27),
  113. ("dotlessi", 245, 245, None, 154),
  114. ("e", 101, 101, 101, 101),
  115. ("eacute", None, 142, 233, 233),
  116. ("ecircumflex", None, 144, 234, 234),
  117. ("edieresis", None, 145, 235, 235),
  118. ("egrave", None, 143, 232, 232),
  119. ("eight", 56, 56, 56, 56),
  120. ("ellipsis", 188, 201, 133, 131),
  121. ("emdash", 208, 209, 151, 132),
  122. ("endash", 177, 208, 150, 133),
  123. ("equal", 61, 61, 61, 61),
  124. ("eth", None, None, 240, 240),
  125. ("exclam", 33, 33, 33, 33),
  126. ("exclamdown", 161, 193, 161, 161),
  127. ("f", 102, 102, 102, 102),
  128. ("fi", 174, 222, None, 147),
  129. ("five", 53, 53, 53, 53),
  130. ("fl", 175, 223, None, 148),
  131. ("florin", 166, 196, 131, 134),
  132. ("four", 52, 52, 52, 52),
  133. ("fraction", 164, 218, None, 135),
  134. ("g", 103, 103, 103, 103),
  135. ("germandbls", 251, 167, 223, 223),
  136. ("grave", 193, 96, 96, 96),
  137. ("greater", 62, 62, 62, 62),
  138. ("guillemotleft", 171, 199, 171, 171),
  139. ("guillemotright", 187, 200, 187, 187),
  140. ("guilsinglleft", 172, 220, 139, 136),
  141. ("guilsinglright", 173, 221, 155, 137),
  142. ("h", 104, 104, 104, 104),
  143. ("hungarumlaut", 205, 253, None, 28),
  144. ("hyphen", 45, 45, 45, 45),
  145. ("i", 105, 105, 105, 105),
  146. ("iacute", None, 146, 237, 237),
  147. ("icircumflex", None, 148, 238, 238),
  148. ("idieresis", None, 149, 239, 239),
  149. ("igrave", None, 147, 236, 236),
  150. ("j", 106, 106, 106, 106),
  151. ("k", 107, 107, 107, 107),
  152. ("l", 108, 108, 108, 108),
  153. ("less", 60, 60, 60, 60),
  154. ("logicalnot", None, 194, 172, 172),
  155. ("lslash", 248, None, None, 155),
  156. ("m", 109, 109, 109, 109),
  157. ("macron", 197, 248, 175, 175),
  158. ("minus", None, None, None, 138),
  159. ("mu", None, 181, 181, 181),
  160. ("multiply", None, None, 215, 215),
  161. ("n", 110, 110, 110, 110),
  162. ("nbspace", None, 202, 160, None),
  163. ("nine", 57, 57, 57, 57),
  164. ("ntilde", None, 150, 241, 241),
  165. ("numbersign", 35, 35, 35, 35),
  166. ("o", 111, 111, 111, 111),
  167. ("oacute", None, 151, 243, 243),
  168. ("ocircumflex", None, 153, 244, 244),
  169. ("odieresis", None, 154, 246, 246),
  170. ("oe", 250, 207, 156, 156),
  171. ("ogonek", 206, 254, None, 29),
  172. ("ograve", None, 152, 242, 242),
  173. ("one", 49, 49, 49, 49),
  174. ("onehalf", None, None, 189, 189),
  175. ("onequarter", None, None, 188, 188),
  176. ("onesuperior", None, None, 185, 185),
  177. ("ordfeminine", 227, 187, 170, 170),
  178. ("ordmasculine", 235, 188, 186, 186),
  179. ("oslash", 249, 191, 248, 248),
  180. ("otilde", None, 155, 245, 245),
  181. ("p", 112, 112, 112, 112),
  182. ("paragraph", 182, 166, 182, 182),
  183. ("parenleft", 40, 40, 40, 40),
  184. ("parenright", 41, 41, 41, 41),
  185. ("percent", 37, 37, 37, 37),
  186. ("period", 46, 46, 46, 46),
  187. ("periodcentered", 180, 225, 183, 183),
  188. ("perthousand", 189, 228, 137, 139),
  189. ("plus", 43, 43, 43, 43),
  190. ("plusminus", None, 177, 177, 177),
  191. ("q", 113, 113, 113, 113),
  192. ("question", 63, 63, 63, 63),
  193. ("questiondown", 191, 192, 191, 191),
  194. ("quotedbl", 34, 34, 34, 34),
  195. ("quotedblbase", 185, 227, 132, 140),
  196. ("quotedblleft", 170, 210, 147, 141),
  197. ("quotedblright", 186, 211, 148, 142),
  198. ("quoteleft", 96, 212, 145, 143),
  199. ("quoteright", 39, 213, 146, 144),
  200. ("quotesinglbase", 184, 226, 130, 145),
  201. ("quotesingle", 169, 39, 39, 39),
  202. ("r", 114, 114, 114, 114),
  203. ("registered", None, 168, 174, 174),
  204. ("ring", 202, 251, None, 30),
  205. ("s", 115, 115, 115, 115),
  206. ("scaron", None, None, 154, 157),
  207. ("section", 167, 164, 167, 167),
  208. ("semicolon", 59, 59, 59, 59),
  209. ("seven", 55, 55, 55, 55),
  210. ("six", 54, 54, 54, 54),
  211. ("slash", 47, 47, 47, 47),
  212. ("space", 32, 32, 32, 32),
  213. ("space", None, 202, 160, None),
  214. ("space", None, 202, 173, None),
  215. ("sterling", 163, 163, 163, 163),
  216. ("t", 116, 116, 116, 116),
  217. ("thorn", None, None, 254, 254),
  218. ("three", 51, 51, 51, 51),
  219. ("threequarters", None, None, 190, 190),
  220. ("threesuperior", None, None, 179, 179),
  221. ("tilde", 196, 247, 152, 31),
  222. ("trademark", None, 170, 153, 146),
  223. ("two", 50, 50, 50, 50),
  224. ("twosuperior", None, None, 178, 178),
  225. ("u", 117, 117, 117, 117),
  226. ("uacute", None, 156, 250, 250),
  227. ("ucircumflex", None, 158, 251, 251),
  228. ("udieresis", None, 159, 252, 252),
  229. ("ugrave", None, 157, 249, 249),
  230. ("underscore", 95, 95, 95, 95),
  231. ("v", 118, 118, 118, 118),
  232. ("w", 119, 119, 119, 119),
  233. ("x", 120, 120, 120, 120),
  234. ("y", 121, 121, 121, 121),
  235. ("yacute", None, None, 253, 253),
  236. ("ydieresis", None, 216, 255, 255),
  237. ("yen", 165, 180, 165, 165),
  238. ("z", 122, 122, 122, 122),
  239. ("zcaron", None, None, 158, 158),
  240. ("zero", 48, 48, 48, 48),
  241. ]