hpack.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. """
  2. Implements the HPACK header compression algorithm as detailed by RFC 7541.
  3. """
  4. from __future__ import annotations
  5. import logging
  6. from typing import TYPE_CHECKING, Any
  7. from .exceptions import HPACKDecodingError, InvalidTableSizeError, OversizedHeaderListError
  8. from .huffman import HuffmanEncoder
  9. from .huffman_constants import REQUEST_CODES, REQUEST_CODES_LENGTH
  10. from .huffman_table import decode_huffman
  11. from .struct import HeaderTuple, HeaderWeaklyTyped, NeverIndexedHeaderTuple
  12. from .table import HeaderTable, table_entry_size
  13. if TYPE_CHECKING:
  14. from collections.abc import Iterable # pragma: no cover
  15. log = logging.getLogger(__name__)
  16. INDEX_NONE = b"\x00"
  17. INDEX_NEVER = b"\x10"
  18. INDEX_INCREMENTAL = b"\x40"
  19. # Precompute 2^i for 1-8 for use in prefix calcs.
  20. # Zero index is not used but there to save a subtraction
  21. # as prefix numbers are not zero indexed.
  22. _PREFIX_BIT_MAX_NUMBERS = [(2 ** i) - 1 for i in range(9)]
  23. # We default the maximum header list we're willing to accept to 64kB. That's a
  24. # lot of headers, but if applications want to raise it they can do.
  25. DEFAULT_MAX_HEADER_LIST_SIZE = 2 ** 16
  26. def _unicode_if_needed(header: HeaderWeaklyTyped, raw: bool) -> HeaderTuple:
  27. """
  28. Provides a header as a unicode string if raw is False, otherwise returns
  29. it as a bytestring.
  30. """
  31. name = bytes(header[0]) # type: ignore
  32. value = bytes(header[1]) # type: ignore
  33. if not raw:
  34. return header.__class__(name.decode("utf-8"), value.decode("utf-8")) # type: ignore
  35. return header.__class__(name, value) # type: ignore
  36. def encode_integer(integer: int, prefix_bits: int) -> bytearray:
  37. """
  38. Encodes an integer according to the wacky integer encoding rules
  39. defined in the HPACK spec.
  40. """
  41. log.debug("Encoding %d with %d bits", integer, prefix_bits)
  42. if integer < 0:
  43. msg = f"Can only encode positive integers, got {integer}"
  44. raise ValueError(msg)
  45. if prefix_bits < 1 or prefix_bits > 8:
  46. msg = f"Prefix bits must be between 1 and 8, got {prefix_bits}"
  47. raise ValueError(msg)
  48. max_number = _PREFIX_BIT_MAX_NUMBERS[prefix_bits]
  49. if integer < max_number:
  50. return bytearray([integer]) # Seriously?
  51. elements = [max_number]
  52. integer -= max_number
  53. while integer >= 128:
  54. elements.append((integer & 127) + 128)
  55. integer >>= 7
  56. elements.append(integer)
  57. return bytearray(elements)
  58. def decode_integer(data: bytes, prefix_bits: int) -> tuple[int, int]:
  59. """
  60. Decodes an integer according to the wacky integer encoding rules
  61. defined in the HPACK spec. Returns a tuple of the decoded integer and the
  62. number of bytes that were consumed from ``data`` in order to get that
  63. integer.
  64. """
  65. if prefix_bits < 1 or prefix_bits > 8:
  66. msg = f"Prefix bits must be between 1 and 8, got {prefix_bits}"
  67. raise ValueError(msg)
  68. max_number = _PREFIX_BIT_MAX_NUMBERS[prefix_bits]
  69. index = 1
  70. shift = 0
  71. mask = (0xFF >> (8 - prefix_bits))
  72. try:
  73. number = data[0] & mask
  74. if number == max_number:
  75. while True:
  76. next_byte = data[index]
  77. index += 1
  78. if next_byte >= 128:
  79. number += (next_byte - 128) << shift
  80. else:
  81. number += next_byte << shift
  82. break
  83. shift += 7
  84. except IndexError as err:
  85. msg = f"Unable to decode HPACK integer representation from {data!r}"
  86. raise HPACKDecodingError(msg) from err
  87. log.debug("Decoded %d, consumed %d bytes", number, index)
  88. return number, index
  89. def _dict_to_iterable(header_dict: dict[bytes | str, bytes | str]) \
  90. -> Iterable[tuple[bytes | str, bytes | str]]:
  91. """
  92. Converts a dictionary to an iterable of key-value tuples. This is a
  93. HPACK-specific function because it pulls "special-headers" out first and
  94. then emits them.
  95. """
  96. if not isinstance(header_dict, dict): # pragma: no cover
  97. msg = f"header_dict not a dict, but {type(header_dict)}"
  98. raise TypeError(msg)
  99. keys = sorted(
  100. header_dict.keys(),
  101. key=lambda k: not _to_bytes(k).startswith(b":"),
  102. )
  103. for key in keys:
  104. yield key, header_dict[key]
  105. def _to_bytes(value: bytes | str | Any) -> bytes:
  106. """
  107. Convert anything to bytes through a UTF-8 encoded string
  108. """
  109. t = type(value)
  110. if t is bytes:
  111. return value # type: ignore
  112. if t is not str:
  113. value = str(value)
  114. return value.encode("utf-8") # type: ignore
  115. class Encoder:
  116. """
  117. An HPACK encoder object. This object takes HTTP headers and emits encoded
  118. HTTP/2 header blocks.
  119. """
  120. def __init__(self) -> None:
  121. self.header_table = HeaderTable()
  122. self.huffman_coder = HuffmanEncoder(
  123. REQUEST_CODES, REQUEST_CODES_LENGTH,
  124. )
  125. self.table_size_changes: list[int] = []
  126. @property
  127. def header_table_size(self) -> int:
  128. """
  129. Controls the size of the HPACK header table.
  130. """
  131. return self.header_table.maxsize
  132. @header_table_size.setter
  133. def header_table_size(self, value: int) -> None:
  134. self.header_table.maxsize = value
  135. if self.header_table.resized:
  136. self.table_size_changes.append(value)
  137. def encode(self,
  138. headers: Iterable[\
  139. HeaderTuple | \
  140. tuple[bytes | str, bytes | str] | \
  141. tuple[bytes | str, bytes | str, bool | None]] | \
  142. dict[bytes | str, bytes | str],
  143. huffman: bool = True) -> bytes:
  144. """
  145. Takes a set of headers and encodes them into a HPACK-encoded header
  146. block.
  147. :param headers: The headers to encode. Must be either an iterable of
  148. tuples, an iterable of :class:`HeaderTuple
  149. <hpack.HeaderTuple>`, or a ``dict``.
  150. If an iterable of tuples, the tuples may be either
  151. two-tuples or three-tuples. If they are two-tuples, the
  152. tuples must be of the format ``(name, value)``. If they
  153. are three-tuples, they must be of the format
  154. ``(name, value, sensitive)``, where ``sensitive`` is a
  155. boolean value indicating whether the header should be
  156. added to header tables anywhere. If not present,
  157. ``sensitive`` defaults to ``False``.
  158. If an iterable of :class:`HeaderTuple
  159. <hpack.HeaderTuple>`, the tuples must always be
  160. two-tuples. Instead of using ``sensitive`` as a third
  161. tuple entry, use :class:`NeverIndexedHeaderTuple
  162. <hpack.NeverIndexedHeaderTuple>` to request that
  163. the field never be indexed.
  164. .. warning:: HTTP/2 requires that all special headers
  165. (headers whose names begin with ``:`` characters)
  166. appear at the *start* of the header block. While
  167. this method will ensure that happens for ``dict``
  168. subclasses, callers using any other iterable of
  169. tuples **must** ensure they place their special
  170. headers at the start of the iterable.
  171. For efficiency reasons users should prefer to use
  172. iterables of two-tuples: fixing the ordering of
  173. dictionary headers is an expensive operation that
  174. should be avoided if possible.
  175. :param huffman: (optional) Whether to Huffman-encode any header sent as
  176. a literal value. Except for use when debugging, it is
  177. recommended that this be left enabled.
  178. :returns: A bytestring containing the HPACK-encoded header block.
  179. """
  180. # Transforming the headers into a header block is a procedure that can
  181. # be modeled as a chain or pipe. First, the headers are encoded. This
  182. # encoding can be done a number of ways. If the header name-value pair
  183. # are already in the header table we can represent them using the
  184. # indexed representation: the same is true if they are in the static
  185. # table. Otherwise, a literal representation will be used.
  186. header_block = []
  187. # Before we begin, if the header table size has been changed we need
  188. # to signal all changes since last emission appropriately.
  189. if self.header_table.resized:
  190. header_block.append(self._encode_table_size_change())
  191. self.header_table.resized = False
  192. if isinstance(headers, dict):
  193. # Turn the headers into a list of tuples if possible. This is the
  194. # natural way to interact with them in HPACK. Because dictionaries are
  195. # un-ordered, we need to make sure we grab the "special" headers first.
  196. hpack_headers = _dict_to_iterable(headers)
  197. else:
  198. """
  199. Assume headers is an iterable of HeaderTuples, or plain 2-tuples, or plain 3-tuples:
  200. examples:
  201. [
  202. HeaderTuple(':method', 'GET'),
  203. NeverIndexedHeaderTuple('customkey', 'sensitiveinfo'),
  204. ]
  205. or
  206. [
  207. (':method', 'GET'),
  208. ('customkey', 'some-data'),
  209. ]
  210. or
  211. [
  212. (':method', 'GET', True),
  213. ('customkey', 'sensitiveinfo', True),
  214. ]
  215. """
  216. hpack_headers = iter(headers) # type: ignore
  217. # Add each header to the header block
  218. for header in hpack_headers:
  219. sensitive = False
  220. if isinstance(header, HeaderTuple):
  221. # HeaderTuple implies it's a 2-tuple with the sensitive information stored as instance attribute
  222. sensitive = not header.indexable
  223. elif len(header) > 2:
  224. sensitive = header[2]
  225. new_header = (_to_bytes(header[0]), _to_bytes(header[1]))
  226. header_block.append(self.add(new_header, sensitive, huffman))
  227. encoded = b"".join(header_block)
  228. log.debug("Encoded header block to %s", encoded)
  229. return encoded
  230. def add(self, to_add: tuple[bytes, bytes], sensitive: bool, huffman: bool = False) -> bytes:
  231. """
  232. Serializes a header key-value tuple.
  233. """
  234. log.debug(
  235. "Adding %s to the header table, sensitive:%s, huffman:%s",
  236. to_add,
  237. sensitive,
  238. huffman,
  239. )
  240. name, value = to_add
  241. # Set our indexing mode
  242. indexbit = INDEX_INCREMENTAL if not sensitive else INDEX_NEVER
  243. # Search for a matching header in the header table.
  244. match = self.header_table.search(name, value)
  245. if match is None:
  246. # Not in the header table. Encode using the literal syntax,
  247. # and add it to the header table.
  248. encoded = self._encode_literal(name, value, indexbit, huffman)
  249. if not sensitive:
  250. self.header_table.add(name, value)
  251. return encoded
  252. # The header is in the table, break out the values. If we matched
  253. # perfectly, we can use the indexed representation: otherwise we
  254. # can use the indexed literal.
  255. index, name, perfect = match
  256. if perfect:
  257. # Indexed representation.
  258. encoded = self._encode_indexed(index)
  259. else:
  260. # Indexed literal. We are going to add header to the
  261. # header table unconditionally. It is a future todo to
  262. # filter out headers which are known to be ineffective for
  263. # indexing since they just take space in the table and
  264. # pushed out other valuable headers.
  265. encoded = self._encode_indexed_literal(
  266. index, value, indexbit, huffman,
  267. )
  268. if not sensitive:
  269. self.header_table.add(name, value)
  270. return encoded
  271. def _encode_indexed(self, index: int) -> bytes:
  272. """
  273. Encodes a header using the indexed representation.
  274. """
  275. field = encode_integer(index, 7)
  276. field[0] |= 0x80 # we set the top bit
  277. return bytes(field)
  278. def _encode_literal(self, name: bytes, value: bytes, indexbit: bytes, huffman: bool = False) -> bytes:
  279. """
  280. Encodes a header with a literal name and literal value. If ``indexing``
  281. is True, the header will be added to the header table: otherwise it
  282. will not.
  283. """
  284. if huffman:
  285. name = self.huffman_coder.encode(name)
  286. value = self.huffman_coder.encode(value)
  287. name_len = encode_integer(len(name), 7)
  288. value_len = encode_integer(len(value), 7)
  289. if huffman:
  290. name_len[0] |= 0x80
  291. value_len[0] |= 0x80
  292. return b"".join(
  293. [indexbit, bytes(name_len), name, bytes(value_len), value],
  294. )
  295. def _encode_indexed_literal(self, index: int, value: bytes, indexbit: bytes, huffman: bool = False) -> bytes:
  296. """
  297. Encodes a header with an indexed name and a literal value and performs
  298. incremental indexing.
  299. """
  300. if indexbit != INDEX_INCREMENTAL:
  301. prefix = encode_integer(index, 4)
  302. else:
  303. prefix = encode_integer(index, 6)
  304. prefix[0] |= ord(indexbit)
  305. if huffman:
  306. value = self.huffman_coder.encode(value)
  307. value_len = encode_integer(len(value), 7)
  308. if huffman:
  309. value_len[0] |= 0x80
  310. return b"".join([bytes(prefix), bytes(value_len), value])
  311. def _encode_table_size_change(self) -> bytes:
  312. """
  313. Produces the encoded form of all header table size change context
  314. updates.
  315. """
  316. block = b""
  317. for size_bytes in self.table_size_changes:
  318. b = encode_integer(size_bytes, 5)
  319. b[0] |= 0x20
  320. block += bytes(b)
  321. self.table_size_changes = []
  322. return block
  323. class Decoder:
  324. """
  325. An HPACK decoder object.
  326. .. versionchanged:: 2.3.0
  327. Added ``max_header_list_size`` argument.
  328. :param max_header_list_size: The maximum decompressed size we will allow
  329. for any single header block. This is a protection against DoS attacks
  330. that attempt to force the application to expand a relatively small
  331. amount of data into a really large header list, allowing enormous
  332. amounts of memory to be allocated.
  333. If this amount of data is exceeded, a `OversizedHeaderListError
  334. <hpack.OversizedHeaderListError>` exception will be raised. At this
  335. point the connection should be shut down, as the HPACK state will no
  336. longer be usable.
  337. Defaults to 64kB.
  338. :type max_header_list_size: ``int``
  339. """
  340. def __init__(self, max_header_list_size: int = DEFAULT_MAX_HEADER_LIST_SIZE) -> None:
  341. self.header_table = HeaderTable()
  342. #: The maximum decompressed size we will allow for any single header
  343. #: block. This is a protection against DoS attacks that attempt to
  344. #: force the application to expand a relatively small amount of data
  345. #: into a really large header list, allowing enormous amounts of memory
  346. #: to be allocated.
  347. #:
  348. #: If this amount of data is exceeded, a `OversizedHeaderListError
  349. #: <hpack.OversizedHeaderListError>` exception will be raised. At this
  350. #: point the connection should be shut down, as the HPACK state will no
  351. #: longer be usable.
  352. #:
  353. #: Defaults to 64kB.
  354. #:
  355. #: .. versionadded:: 2.3.0
  356. self.max_header_list_size = max_header_list_size
  357. #: Maximum allowed header table size.
  358. #:
  359. #: A HTTP/2 implementation should set this to the most recent value of
  360. #: SETTINGS_HEADER_TABLE_SIZE that it sent *and has received an ACK
  361. #: for*. Once this setting is set, the actual header table size will be
  362. #: checked at the end of each decoding run and whenever it is changed,
  363. #: to confirm that it fits in this size.
  364. self.max_allowed_table_size = self.header_table.maxsize
  365. @property
  366. def header_table_size(self) -> int:
  367. """
  368. Controls the size of the HPACK header table.
  369. """
  370. return self.header_table.maxsize
  371. @header_table_size.setter
  372. def header_table_size(self, value: int) -> None:
  373. self.header_table.maxsize = value
  374. def decode(self, data: bytes, raw: bool = False) -> Iterable[HeaderTuple]:
  375. """
  376. Takes an HPACK-encoded header block and decodes it into a header set.
  377. :param data: A bytestring representing a complete HPACK-encoded header
  378. block.
  379. :param raw: (optional) Whether to return the headers as tuples of raw
  380. byte strings or to decode them as UTF-8 before returning
  381. them. The default value is False, which returns tuples of
  382. Unicode strings
  383. :returns: A list of two-tuples of ``(name, value)`` representing the
  384. HPACK-encoded headers, in the order they were decoded.
  385. :raises HPACKDecodingError: If an error is encountered while decoding
  386. the header block.
  387. """
  388. log.debug("Decoding %s", data)
  389. data_mem = memoryview(data)
  390. headers: list[HeaderTuple] = []
  391. data_len = len(data)
  392. inflated_size = 0
  393. current_index = 0
  394. while current_index < data_len:
  395. # Work out what kind of header we're decoding.
  396. # If the high bit is 1, it's an indexed field.
  397. current = data[current_index]
  398. indexed = bool(current & 0x80)
  399. # Otherwise, if the second-highest bit is 1 it's a field that does
  400. # alter the header table.
  401. literal_index = bool(current & 0x40)
  402. # Otherwise, if the third-highest bit is 1 it's an encoding context
  403. # update.
  404. encoding_update = bool(current & 0x20)
  405. if indexed:
  406. header, consumed = self._decode_indexed(
  407. data_mem[current_index:],
  408. )
  409. elif literal_index:
  410. # It's a literal header that does affect the header table.
  411. header, consumed = self._decode_literal_index(
  412. data_mem[current_index:],
  413. )
  414. elif encoding_update:
  415. # It's an update to the encoding context. These are forbidden
  416. # in a header block after any actual header.
  417. if headers:
  418. msg = "Table size update not at the start of the block"
  419. raise HPACKDecodingError(msg)
  420. consumed = self._update_encoding_context(
  421. data_mem[current_index:],
  422. )
  423. header = None
  424. else:
  425. # It's a literal header that does not affect the header table.
  426. header, consumed = self._decode_literal_no_index(
  427. data_mem[current_index:],
  428. )
  429. if header:
  430. headers.append(header)
  431. inflated_size += table_entry_size(header[0], header[1])
  432. if inflated_size > self.max_header_list_size:
  433. msg = f"A header list larger than {self.max_header_list_size} has been received"
  434. raise OversizedHeaderListError(msg)
  435. current_index += consumed
  436. # Confirm that the table size is lower than the maximum. We do this
  437. # here to ensure that we catch when the max has been *shrunk* and the
  438. # remote peer hasn't actually done that.
  439. self._assert_valid_table_size()
  440. try:
  441. return [_unicode_if_needed(h, raw) for h in headers]
  442. except UnicodeDecodeError as err:
  443. msg = "Unable to decode headers as UTF-8"
  444. raise HPACKDecodingError(msg) from err
  445. def _assert_valid_table_size(self) -> None:
  446. """
  447. Check that the table size set by the encoder is lower than the maximum
  448. we expect to have.
  449. """
  450. if self.header_table_size > self.max_allowed_table_size:
  451. msg = "Encoder did not shrink table size to within the max"
  452. raise InvalidTableSizeError(msg)
  453. def _update_encoding_context(self, data: bytes) -> int:
  454. """
  455. Handles a byte that updates the encoding context.
  456. """
  457. # We've been asked to resize the header table.
  458. new_size, consumed = decode_integer(data, 5)
  459. if new_size > self.max_allowed_table_size:
  460. msg = "Encoder exceeded max allowable table size"
  461. raise InvalidTableSizeError(msg)
  462. self.header_table_size = new_size
  463. return consumed
  464. def _decode_indexed(self, data: bytes) -> tuple[HeaderTuple, int]:
  465. """
  466. Decodes a header represented using the indexed representation.
  467. """
  468. index, consumed = decode_integer(data, 7)
  469. header = HeaderTuple(*self.header_table.get_by_index(index))
  470. log.debug("Decoded %s, consumed %d", header, consumed)
  471. return header, consumed
  472. def _decode_literal_no_index(self, data: bytes) -> tuple[HeaderTuple, int]:
  473. return self._decode_literal(data, should_index=False)
  474. def _decode_literal_index(self, data: bytes) -> tuple[HeaderTuple, int]:
  475. return self._decode_literal(data, should_index=True)
  476. def _decode_literal(self, data: bytes, should_index: bool) -> tuple[HeaderTuple, int]:
  477. """
  478. Decodes a header represented with a literal.
  479. """
  480. total_consumed = 0
  481. # When should_index is true, if the low six bits of the first byte are
  482. # nonzero, the header name is indexed.
  483. # When should_index is false, if the low four bits of the first byte
  484. # are nonzero the header name is indexed.
  485. if should_index:
  486. indexed_name = data[0] & 0x3F
  487. name_len = 6
  488. not_indexable = False
  489. else:
  490. high_byte = data[0]
  491. indexed_name = high_byte & 0x0F
  492. name_len = 4
  493. not_indexable = bool(high_byte & 0x10)
  494. if indexed_name:
  495. # Indexed header name.
  496. index, consumed = decode_integer(data, name_len)
  497. name = self.header_table.get_by_index(index)[0]
  498. total_consumed = consumed
  499. length = 0
  500. else:
  501. # Literal header name. The first byte was consumed, so we need to
  502. # move forward.
  503. data = data[1:]
  504. length, consumed = decode_integer(data, 7)
  505. name = data[consumed:consumed + length]
  506. if len(name) != length:
  507. msg = "Truncated header block"
  508. raise HPACKDecodingError(msg)
  509. if data[0] & 0x80:
  510. name = decode_huffman(name)
  511. total_consumed = consumed + length + 1 # Since we moved forward 1.
  512. data = data[consumed + length:]
  513. # The header value is definitely length-based.
  514. length, consumed = decode_integer(data, 7)
  515. value = data[consumed:consumed + length]
  516. if len(value) != length:
  517. msg = "Truncated header block"
  518. raise HPACKDecodingError(msg)
  519. if data[0] & 0x80:
  520. value = decode_huffman(value)
  521. # Updated the total consumed length.
  522. total_consumed += length + consumed
  523. # If we have been told never to index the header field, encode that in
  524. # the tuple we use.
  525. header: HeaderTuple
  526. if not_indexable:
  527. header = NeverIndexedHeaderTuple(name, value)
  528. else:
  529. header = HeaderTuple(name, value)
  530. # If we've been asked to index this, add it to the header table.
  531. if should_index:
  532. self.header_table.add(name, value)
  533. log.debug(
  534. "Decoded %s, total consumed %d bytes, indexed %s",
  535. header,
  536. total_consumed,
  537. should_index,
  538. )
  539. return header, total_consumed