_normalization.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. """
  2. Helpers for normalization as expected in wheel/sdist/module file names
  3. and core metadata
  4. """
  5. import re
  6. from typing import TYPE_CHECKING
  7. import packaging
  8. # https://packaging.python.org/en/latest/specifications/core-metadata/#name
  9. _VALID_NAME = re.compile(r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.I)
  10. _UNSAFE_NAME_CHARS = re.compile(r"[^A-Z0-9._-]+", re.I)
  11. _NON_ALPHANUMERIC = re.compile(r"[^A-Z0-9]+", re.I)
  12. _PEP440_FALLBACK = re.compile(r"^v?(?P<safe>(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I)
  13. def safe_identifier(name: str) -> str:
  14. """Make a string safe to be used as Python identifier.
  15. >>> safe_identifier("12abc")
  16. '_12abc'
  17. >>> safe_identifier("__editable__.myns.pkg-78.9.3_local")
  18. '__editable___myns_pkg_78_9_3_local'
  19. """
  20. safe = re.sub(r'\W|^(?=\d)', '_', name)
  21. assert safe.isidentifier()
  22. return safe
  23. def safe_name(component: str) -> str:
  24. """Escape a component used as a project name according to Core Metadata.
  25. >>> safe_name("hello world")
  26. 'hello-world'
  27. >>> safe_name("hello?world")
  28. 'hello-world'
  29. >>> safe_name("hello_world")
  30. 'hello_world'
  31. """
  32. return _UNSAFE_NAME_CHARS.sub("-", component)
  33. def safe_version(version: str) -> str:
  34. """Convert an arbitrary string into a valid version string.
  35. Can still raise an ``InvalidVersion`` exception.
  36. To avoid exceptions use ``best_effort_version``.
  37. >>> safe_version("1988 12 25")
  38. '1988.12.25'
  39. >>> safe_version("v0.2.1")
  40. '0.2.1'
  41. >>> safe_version("v0.2?beta")
  42. '0.2b0'
  43. >>> safe_version("v0.2 beta")
  44. '0.2b0'
  45. >>> safe_version("ubuntu lts")
  46. Traceback (most recent call last):
  47. ...
  48. packaging.version.InvalidVersion: Invalid version: 'ubuntu.lts'
  49. """
  50. v = version.replace(' ', '.')
  51. try:
  52. return str(packaging.version.Version(v))
  53. except packaging.version.InvalidVersion:
  54. attempt = _UNSAFE_NAME_CHARS.sub("-", v)
  55. return str(packaging.version.Version(attempt))
  56. def best_effort_version(version: str) -> str:
  57. """Convert an arbitrary string into a version-like string.
  58. Fallback when ``safe_version`` is not safe enough.
  59. >>> best_effort_version("v0.2 beta")
  60. '0.2b0'
  61. >>> best_effort_version("ubuntu lts")
  62. '0.dev0+sanitized.ubuntu.lts'
  63. >>> best_effort_version("0.23ubuntu1")
  64. '0.23.dev0+sanitized.ubuntu1'
  65. >>> best_effort_version("0.23-")
  66. '0.23.dev0+sanitized'
  67. >>> best_effort_version("0.-_")
  68. '0.dev0+sanitized'
  69. >>> best_effort_version("42.+?1")
  70. '42.dev0+sanitized.1'
  71. """
  72. try:
  73. return safe_version(version)
  74. except packaging.version.InvalidVersion:
  75. v = version.replace(' ', '.')
  76. match = _PEP440_FALLBACK.search(v)
  77. if match:
  78. safe = match["safe"]
  79. rest = v[len(safe) :]
  80. else:
  81. safe = "0"
  82. rest = version
  83. safe_rest = _NON_ALPHANUMERIC.sub(".", rest).strip(".")
  84. local = f"sanitized.{safe_rest}".strip(".")
  85. return safe_version(f"{safe}.dev0+{local}")
  86. def safe_extra(extra: str) -> str:
  87. """Normalize extra name according to PEP 685
  88. >>> safe_extra("_FrIeNdLy-._.-bArD")
  89. 'friendly-bard'
  90. >>> safe_extra("FrIeNdLy-._.-bArD__._-")
  91. 'friendly-bard'
  92. """
  93. return _NON_ALPHANUMERIC.sub("-", extra).strip("-").lower()
  94. def filename_component(value: str) -> str:
  95. """Normalize each component of a filename (e.g. distribution/version part of wheel)
  96. Note: ``value`` needs to be already normalized.
  97. >>> filename_component("my-pkg")
  98. 'my_pkg'
  99. """
  100. return value.replace("-", "_").strip("_")
  101. def filename_component_broken(value: str) -> str:
  102. """
  103. Produce the incorrect filename component for compatibility.
  104. See pypa/setuptools#4167 for detailed analysis.
  105. TODO: replace this with filename_component after pip 24 is
  106. nearly-ubiquitous.
  107. >>> filename_component_broken('foo_bar-baz')
  108. 'foo-bar-baz'
  109. """
  110. return value.replace('_', '-')
  111. def safer_name(value: str) -> str:
  112. """Like ``safe_name`` but can be used as filename component for wheel"""
  113. # See bdist_wheel.safer_name
  114. return (
  115. # Per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
  116. re.sub(r"[-_.]+", "-", safe_name(value))
  117. .lower()
  118. # Per https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
  119. .replace("-", "_")
  120. )
  121. def safer_best_effort_version(value: str) -> str:
  122. """Like ``best_effort_version`` but can be used as filename component for wheel"""
  123. # See bdist_wheel.safer_verion
  124. # TODO: Replace with only safe_version in the future (no need for best effort)
  125. return filename_component(best_effort_version(value))
  126. def _missing_canonicalize_license_expression(expression: str) -> str:
  127. """
  128. Defer import error to affect only users that actually use it
  129. https://github.com/pypa/setuptools/issues/4894
  130. >>> _missing_canonicalize_license_expression("a OR b")
  131. Traceback (most recent call last):
  132. ...
  133. ImportError: ...Cannot import `packaging.licenses`...
  134. """
  135. raise ImportError(
  136. "Cannot import `packaging.licenses`."
  137. """
  138. Setuptools>=77.0.0 requires "packaging>=24.2" to work properly.
  139. Please make sure you have a suitable version installed.
  140. """
  141. )
  142. try:
  143. from packaging.licenses import (
  144. canonicalize_license_expression as _canonicalize_license_expression,
  145. )
  146. except ImportError: # pragma: nocover
  147. if not TYPE_CHECKING:
  148. # XXX: pyright is still upset even with # pyright: ignore[reportAssignmentType]
  149. _canonicalize_license_expression = _missing_canonicalize_license_expression