_saslprep.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # Copyright 2016-present MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. # Some changes copyright 2021-present Matthias Valvekens,
  16. # licensed under the license of the pyHanko project (see LICENSE file).
  17. """An implementation of RFC4013 SASLprep."""
  18. __all__ = ["saslprep"]
  19. import stringprep
  20. import unicodedata
  21. from typing import Callable, Tuple
  22. from pdfminer.pdfexceptions import PDFValueError
  23. # RFC4013 section 2.3 prohibited output.
  24. _PROHIBITED: Tuple[Callable[[str], bool], ...] = (
  25. # A strict reading of RFC 4013 requires table c12 here, but
  26. # characters from it are mapped to SPACE in the Map step. Can
  27. # normalization reintroduce them somehow?
  28. stringprep.in_table_c12,
  29. stringprep.in_table_c21_c22,
  30. stringprep.in_table_c3,
  31. stringprep.in_table_c4,
  32. stringprep.in_table_c5,
  33. stringprep.in_table_c6,
  34. stringprep.in_table_c7,
  35. stringprep.in_table_c8,
  36. stringprep.in_table_c9,
  37. )
  38. def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str:
  39. """An implementation of RFC4013 SASLprep.
  40. :param data:
  41. The string to SASLprep.
  42. :param prohibit_unassigned_code_points:
  43. RFC 3454 and RFCs for various SASL mechanisms distinguish between
  44. `queries` (unassigned code points allowed) and
  45. `stored strings` (unassigned code points prohibited). Defaults
  46. to ``True`` (unassigned code points are prohibited).
  47. :return: The SASLprep'ed version of `data`.
  48. """
  49. if prohibit_unassigned_code_points:
  50. prohibited = _PROHIBITED + (stringprep.in_table_a1,)
  51. else:
  52. prohibited = _PROHIBITED
  53. # RFC3454 section 2, step 1 - Map
  54. # RFC4013 section 2.1 mappings
  55. # Map Non-ASCII space characters to SPACE (U+0020). Map
  56. # commonly mapped to nothing characters to, well, nothing.
  57. in_table_c12 = stringprep.in_table_c12
  58. in_table_b1 = stringprep.in_table_b1
  59. data = "".join(
  60. [
  61. "\u0020" if in_table_c12(elt) else elt
  62. for elt in data
  63. if not in_table_b1(elt)
  64. ],
  65. )
  66. # RFC3454 section 2, step 2 - Normalize
  67. # RFC4013 section 2.2 normalization
  68. data = unicodedata.ucd_3_2_0.normalize("NFKC", data)
  69. in_table_d1 = stringprep.in_table_d1
  70. if in_table_d1(data[0]):
  71. if not in_table_d1(data[-1]):
  72. # RFC3454, Section 6, #3. If a string contains any
  73. # RandALCat character, the first and last characters
  74. # MUST be RandALCat characters.
  75. raise PDFValueError("SASLprep: failed bidirectional check")
  76. # RFC3454, Section 6, #2. If a string contains any RandALCat
  77. # character, it MUST NOT contain any LCat character.
  78. prohibited = prohibited + (stringprep.in_table_d2,)
  79. else:
  80. # RFC3454, Section 6, #3. Following the logic of #3, if
  81. # the first character is not a RandALCat, no other character
  82. # can be either.
  83. prohibited = prohibited + (in_table_d1,)
  84. # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
  85. for char in data:
  86. if any(in_table(char) for in_table in prohibited):
  87. raise PDFValueError("SASLprep: failed prohibited character check")
  88. return data