__init__.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #######################################################################################
  2. #
  3. # Adapted from:
  4. # https://github.com/pypa/hatch/blob/5352e44/backend/src/hatchling/licenses/parse.py
  5. #
  6. # MIT License
  7. #
  8. # Copyright (c) 2017-present Ofek Lev <oss@ofek.dev>
  9. #
  10. # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  11. # software and associated documentation files (the "Software"), to deal in the Software
  12. # without restriction, including without limitation the rights to use, copy, modify,
  13. # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  14. # permit persons to whom the Software is furnished to do so, subject to the following
  15. # conditions:
  16. #
  17. # The above copyright notice and this permission notice shall be included in all copies
  18. # or substantial portions of the Software.
  19. #
  20. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  21. # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  22. # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  23. # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  24. # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  25. # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. #
  27. #
  28. # With additional allowance of arbitrary `LicenseRef-` identifiers, not just
  29. # `LicenseRef-Public-Domain` and `LicenseRef-Proprietary`.
  30. #
  31. #######################################################################################
  32. from __future__ import annotations
  33. import re
  34. from typing import NewType, cast
  35. from packaging.licenses._spdx import EXCEPTIONS, LICENSES
  36. __all__ = [
  37. "InvalidLicenseExpression",
  38. "NormalizedLicenseExpression",
  39. "canonicalize_license_expression",
  40. ]
  41. license_ref_allowed = re.compile("^[A-Za-z0-9.-]*$")
  42. NormalizedLicenseExpression = NewType("NormalizedLicenseExpression", str)
  43. class InvalidLicenseExpression(ValueError):
  44. """Raised when a license-expression string is invalid
  45. >>> canonicalize_license_expression("invalid")
  46. Traceback (most recent call last):
  47. ...
  48. packaging.licenses.InvalidLicenseExpression: Invalid license expression: 'invalid'
  49. """
  50. def canonicalize_license_expression(
  51. raw_license_expression: str,
  52. ) -> NormalizedLicenseExpression:
  53. if not raw_license_expression:
  54. message = f"Invalid license expression: {raw_license_expression!r}"
  55. raise InvalidLicenseExpression(message)
  56. # Pad any parentheses so tokenization can be achieved by merely splitting on
  57. # whitespace.
  58. license_expression = raw_license_expression.replace("(", " ( ").replace(")", " ) ")
  59. licenseref_prefix = "LicenseRef-"
  60. license_refs = {
  61. ref.lower(): "LicenseRef-" + ref[len(licenseref_prefix) :]
  62. for ref in license_expression.split()
  63. if ref.lower().startswith(licenseref_prefix.lower())
  64. }
  65. # Normalize to lower case so we can look up licenses/exceptions
  66. # and so boolean operators are Python-compatible.
  67. license_expression = license_expression.lower()
  68. tokens = license_expression.split()
  69. # Rather than implementing boolean logic, we create an expression that Python can
  70. # parse. Everything that is not involved with the grammar itself is treated as
  71. # `False` and the expression should evaluate as such.
  72. python_tokens = []
  73. for token in tokens:
  74. if token not in {"or", "and", "with", "(", ")"}:
  75. python_tokens.append("False")
  76. elif token == "with":
  77. python_tokens.append("or")
  78. elif token == "(" and python_tokens and python_tokens[-1] not in {"or", "and"}:
  79. message = f"Invalid license expression: {raw_license_expression!r}"
  80. raise InvalidLicenseExpression(message)
  81. else:
  82. python_tokens.append(token)
  83. python_expression = " ".join(python_tokens)
  84. try:
  85. invalid = eval(python_expression, globals(), locals())
  86. except Exception:
  87. invalid = True
  88. if invalid is not False:
  89. message = f"Invalid license expression: {raw_license_expression!r}"
  90. raise InvalidLicenseExpression(message) from None
  91. # Take a final pass to check for unknown licenses/exceptions.
  92. normalized_tokens = []
  93. for token in tokens:
  94. if token in {"or", "and", "with", "(", ")"}:
  95. normalized_tokens.append(token.upper())
  96. continue
  97. if normalized_tokens and normalized_tokens[-1] == "WITH":
  98. if token not in EXCEPTIONS:
  99. message = f"Unknown license exception: {token!r}"
  100. raise InvalidLicenseExpression(message)
  101. normalized_tokens.append(EXCEPTIONS[token]["id"])
  102. else:
  103. if token.endswith("+"):
  104. final_token = token[:-1]
  105. suffix = "+"
  106. else:
  107. final_token = token
  108. suffix = ""
  109. if final_token.startswith("licenseref-"):
  110. if not license_ref_allowed.match(final_token):
  111. message = f"Invalid licenseref: {final_token!r}"
  112. raise InvalidLicenseExpression(message)
  113. normalized_tokens.append(license_refs[final_token] + suffix)
  114. else:
  115. if final_token not in LICENSES:
  116. message = f"Unknown license: {final_token!r}"
  117. raise InvalidLicenseExpression(message)
  118. normalized_tokens.append(LICENSES[final_token]["id"] + suffix)
  119. normalized_expression = " ".join(normalized_tokens)
  120. return cast(
  121. NormalizedLicenseExpression,
  122. normalized_expression.replace("( ", "(").replace(" )", ")"),
  123. )