build_py.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. from __future__ import annotations
  2. import fnmatch
  3. import itertools
  4. import os
  5. import stat
  6. import textwrap
  7. from collections.abc import Iterable, Iterator
  8. from functools import partial
  9. from glob import glob
  10. from pathlib import Path
  11. from more_itertools import unique_everseen
  12. from .._path import StrPath, StrPathT
  13. from ..dist import Distribution
  14. from ..warnings import SetuptoolsDeprecationWarning
  15. import distutils.command.build_py as orig
  16. import distutils.errors
  17. from distutils.util import convert_path
  18. _IMPLICIT_DATA_FILES = ('*.pyi', 'py.typed')
  19. def make_writable(target) -> None:
  20. os.chmod(target, os.stat(target).st_mode | stat.S_IWRITE)
  21. class build_py(orig.build_py):
  22. """Enhanced 'build_py' command that includes data files with packages
  23. The data files are specified via a 'package_data' argument to 'setup()'.
  24. See 'setuptools.dist.Distribution' for more details.
  25. Also, this version of the 'build_py' command allows you to specify both
  26. 'py_modules' and 'packages' in the same setup operation.
  27. """
  28. distribution: Distribution # override distutils.dist.Distribution with setuptools.dist.Distribution
  29. editable_mode: bool = False
  30. existing_egg_info_dir: StrPath | None = None #: Private API, internal use only.
  31. def finalize_options(self):
  32. orig.build_py.finalize_options(self)
  33. self.package_data = self.distribution.package_data
  34. self.exclude_package_data = self.distribution.exclude_package_data or {}
  35. if 'data_files' in self.__dict__:
  36. del self.__dict__['data_files']
  37. def copy_file( # type: ignore[override] # No overload, no bytes support
  38. self,
  39. infile: StrPath,
  40. outfile: StrPathT,
  41. preserve_mode: bool = True,
  42. preserve_times: bool = True,
  43. link: str | None = None,
  44. level: object = 1,
  45. ) -> tuple[StrPathT | str, bool]:
  46. # Overwrite base class to allow using links
  47. if link:
  48. infile = str(Path(infile).resolve())
  49. outfile = str(Path(outfile).resolve()) # type: ignore[assignment] # Re-assigning a str when outfile is StrPath is ok
  50. return super().copy_file( # pyright: ignore[reportReturnType] # pypa/distutils#309
  51. infile, outfile, preserve_mode, preserve_times, link, level
  52. )
  53. def run(self) -> None:
  54. """Build modules, packages, and copy data files to build directory"""
  55. if not (self.py_modules or self.packages) or self.editable_mode:
  56. return
  57. if self.py_modules:
  58. self.build_modules()
  59. if self.packages:
  60. self.build_packages()
  61. self.build_package_data()
  62. # Only compile actual .py files, using our base class' idea of what our
  63. # output files are.
  64. self.byte_compile(orig.build_py.get_outputs(self, include_bytecode=False))
  65. def __getattr__(self, attr: str):
  66. "lazily compute data files"
  67. if attr == 'data_files':
  68. self.data_files = self._get_data_files()
  69. return self.data_files
  70. return orig.build_py.__getattr__(self, attr)
  71. def _get_data_files(self):
  72. """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
  73. self.analyze_manifest()
  74. return list(map(self._get_pkg_data_files, self.packages or ()))
  75. def get_data_files_without_manifest(self):
  76. """
  77. Generate list of ``(package,src_dir,build_dir,filenames)`` tuples,
  78. but without triggering any attempt to analyze or build the manifest.
  79. """
  80. # Prevent eventual errors from unset `manifest_files`
  81. # (that would otherwise be set by `analyze_manifest`)
  82. self.__dict__.setdefault('manifest_files', {})
  83. return list(map(self._get_pkg_data_files, self.packages or ()))
  84. def _get_pkg_data_files(self, package):
  85. # Locate package source directory
  86. src_dir = self.get_package_dir(package)
  87. # Compute package build directory
  88. build_dir = os.path.join(*([self.build_lib] + package.split('.')))
  89. # Strip directory from globbed filenames
  90. filenames = [
  91. os.path.relpath(file, src_dir)
  92. for file in self.find_data_files(package, src_dir)
  93. ]
  94. return package, src_dir, build_dir, filenames
  95. def find_data_files(self, package, src_dir):
  96. """Return filenames for package's data files in 'src_dir'"""
  97. patterns = self._get_platform_patterns(
  98. self.package_data,
  99. package,
  100. src_dir,
  101. extra_patterns=_IMPLICIT_DATA_FILES,
  102. )
  103. globs_expanded = map(partial(glob, recursive=True), patterns)
  104. # flatten the expanded globs into an iterable of matches
  105. globs_matches = itertools.chain.from_iterable(globs_expanded)
  106. glob_files = filter(os.path.isfile, globs_matches)
  107. files = itertools.chain(
  108. self.manifest_files.get(package, []),
  109. glob_files,
  110. )
  111. return self.exclude_data_files(package, src_dir, files)
  112. def get_outputs(self, include_bytecode: bool = True) -> list[str]: # type: ignore[override] # Using a real boolean instead of 0|1
  113. """See :class:`setuptools.commands.build.SubCommand`"""
  114. if self.editable_mode:
  115. return list(self.get_output_mapping().keys())
  116. return super().get_outputs(include_bytecode)
  117. def get_output_mapping(self) -> dict[str, str]:
  118. """See :class:`setuptools.commands.build.SubCommand`"""
  119. mapping = itertools.chain(
  120. self._get_package_data_output_mapping(),
  121. self._get_module_mapping(),
  122. )
  123. return dict(sorted(mapping, key=lambda x: x[0]))
  124. def _get_module_mapping(self) -> Iterator[tuple[str, str]]:
  125. """Iterate over all modules producing (dest, src) pairs."""
  126. for package, module, module_file in self.find_all_modules():
  127. package = package.split('.')
  128. filename = self.get_module_outfile(self.build_lib, package, module)
  129. yield (filename, module_file)
  130. def _get_package_data_output_mapping(self) -> Iterator[tuple[str, str]]:
  131. """Iterate over package data producing (dest, src) pairs."""
  132. for package, src_dir, build_dir, filenames in self.data_files:
  133. for filename in filenames:
  134. target = os.path.join(build_dir, filename)
  135. srcfile = os.path.join(src_dir, filename)
  136. yield (target, srcfile)
  137. def build_package_data(self) -> None:
  138. """Copy data files into build directory"""
  139. for target, srcfile in self._get_package_data_output_mapping():
  140. self.mkpath(os.path.dirname(target))
  141. _outf, _copied = self.copy_file(srcfile, target)
  142. make_writable(target)
  143. def analyze_manifest(self) -> None:
  144. self.manifest_files: dict[str, list[str]] = {}
  145. if not self.distribution.include_package_data:
  146. return
  147. src_dirs: dict[str, str] = {}
  148. for package in self.packages or ():
  149. # Locate package source directory
  150. src_dirs[assert_relative(self.get_package_dir(package))] = package
  151. if (
  152. self.existing_egg_info_dir
  153. and Path(self.existing_egg_info_dir, "SOURCES.txt").exists()
  154. ):
  155. egg_info_dir = self.existing_egg_info_dir
  156. manifest = Path(egg_info_dir, "SOURCES.txt")
  157. files = manifest.read_text(encoding="utf-8").splitlines()
  158. else:
  159. self.run_command('egg_info')
  160. ei_cmd = self.get_finalized_command('egg_info')
  161. egg_info_dir = ei_cmd.egg_info
  162. files = ei_cmd.filelist.files
  163. check = _IncludePackageDataAbuse()
  164. for path in self._filter_build_files(files, egg_info_dir):
  165. d, f = os.path.split(assert_relative(path))
  166. prev = None
  167. oldf = f
  168. while d and d != prev and d not in src_dirs:
  169. prev = d
  170. d, df = os.path.split(d)
  171. f = os.path.join(df, f)
  172. if d in src_dirs:
  173. if f == oldf:
  174. if check.is_module(f):
  175. continue # it's a module, not data
  176. else:
  177. importable = check.importable_subpackage(src_dirs[d], f)
  178. if importable:
  179. check.warn(importable)
  180. self.manifest_files.setdefault(src_dirs[d], []).append(path)
  181. def _filter_build_files(
  182. self, files: Iterable[str], egg_info: StrPath
  183. ) -> Iterator[str]:
  184. """
  185. ``build_meta`` may try to create egg_info outside of the project directory,
  186. and this can be problematic for certain plugins (reported in issue #3500).
  187. Extensions might also include between their sources files created on the
  188. ``build_lib`` and ``build_temp`` directories.
  189. This function should filter this case of invalid files out.
  190. """
  191. build = self.get_finalized_command("build")
  192. build_dirs = (egg_info, self.build_lib, build.build_temp, build.build_base)
  193. norm_dirs = [os.path.normpath(p) for p in build_dirs if p]
  194. for file in files:
  195. norm_path = os.path.normpath(file)
  196. if not os.path.isabs(file) or all(d not in norm_path for d in norm_dirs):
  197. yield file
  198. def get_data_files(self) -> None:
  199. pass # Lazily compute data files in _get_data_files() function.
  200. def check_package(self, package, package_dir):
  201. """Check namespace packages' __init__ for declare_namespace"""
  202. try:
  203. return self.packages_checked[package]
  204. except KeyError:
  205. pass
  206. init_py = orig.build_py.check_package(self, package, package_dir)
  207. self.packages_checked[package] = init_py
  208. if not init_py or not self.distribution.namespace_packages:
  209. return init_py
  210. for pkg in self.distribution.namespace_packages:
  211. if pkg == package or pkg.startswith(package + '.'):
  212. break
  213. else:
  214. return init_py
  215. with open(init_py, 'rb') as f:
  216. contents = f.read()
  217. if b'declare_namespace' not in contents:
  218. raise distutils.errors.DistutilsError(
  219. f"Namespace package problem: {package} is a namespace package, but "
  220. "its\n__init__.py does not call declare_namespace()! Please "
  221. 'fix it.\n(See the setuptools manual under '
  222. '"Namespace Packages" for details.)\n"'
  223. )
  224. return init_py
  225. def initialize_options(self):
  226. self.packages_checked = {}
  227. orig.build_py.initialize_options(self)
  228. self.editable_mode = False
  229. self.existing_egg_info_dir = None
  230. def get_package_dir(self, package):
  231. res = orig.build_py.get_package_dir(self, package)
  232. if self.distribution.src_root is not None:
  233. return os.path.join(self.distribution.src_root, res)
  234. return res
  235. def exclude_data_files(self, package, src_dir, files):
  236. """Filter filenames for package's data files in 'src_dir'"""
  237. files = list(files)
  238. patterns = self._get_platform_patterns(
  239. self.exclude_package_data,
  240. package,
  241. src_dir,
  242. )
  243. match_groups = (fnmatch.filter(files, pattern) for pattern in patterns)
  244. # flatten the groups of matches into an iterable of matches
  245. matches = itertools.chain.from_iterable(match_groups)
  246. bad = set(matches)
  247. keepers = (fn for fn in files if fn not in bad)
  248. # ditch dupes
  249. return list(unique_everseen(keepers))
  250. @staticmethod
  251. def _get_platform_patterns(spec, package, src_dir, extra_patterns=()):
  252. """
  253. yield platform-specific path patterns (suitable for glob
  254. or fn_match) from a glob-based spec (such as
  255. self.package_data or self.exclude_package_data)
  256. matching package in src_dir.
  257. """
  258. raw_patterns = itertools.chain(
  259. extra_patterns,
  260. spec.get('', []),
  261. spec.get(package, []),
  262. )
  263. return (
  264. # Each pattern has to be converted to a platform-specific path
  265. os.path.join(src_dir, convert_path(pattern))
  266. for pattern in raw_patterns
  267. )
  268. def assert_relative(path):
  269. if not os.path.isabs(path):
  270. return path
  271. from distutils.errors import DistutilsSetupError
  272. msg = (
  273. textwrap.dedent(
  274. """
  275. Error: setup script specifies an absolute path:
  276. %s
  277. setup() arguments must *always* be /-separated paths relative to the
  278. setup.py directory, *never* absolute paths.
  279. """
  280. ).lstrip()
  281. % path
  282. )
  283. raise DistutilsSetupError(msg)
  284. class _IncludePackageDataAbuse:
  285. """Inform users that package or module is included as 'data file'"""
  286. class _Warning(SetuptoolsDeprecationWarning):
  287. _SUMMARY = """
  288. Package {importable!r} is absent from the `packages` configuration.
  289. """
  290. _DETAILS = """
  291. ############################
  292. # Package would be ignored #
  293. ############################
  294. Python recognizes {importable!r} as an importable package[^1],
  295. but it is absent from setuptools' `packages` configuration.
  296. This leads to an ambiguous overall configuration. If you want to distribute this
  297. package, please make sure that {importable!r} is explicitly added
  298. to the `packages` configuration field.
  299. Alternatively, you can also rely on setuptools' discovery methods
  300. (for example by using `find_namespace_packages(...)`/`find_namespace:`
  301. instead of `find_packages(...)`/`find:`).
  302. You can read more about "package discovery" on setuptools documentation page:
  303. - https://setuptools.pypa.io/en/latest/userguide/package_discovery.html
  304. If you don't want {importable!r} to be distributed and are
  305. already explicitly excluding {importable!r} via
  306. `find_namespace_packages(...)/find_namespace` or `find_packages(...)/find`,
  307. you can try to use `exclude_package_data`, or `include-package-data=False` in
  308. combination with a more fine grained `package-data` configuration.
  309. You can read more about "package data files" on setuptools documentation page:
  310. - https://setuptools.pypa.io/en/latest/userguide/datafiles.html
  311. [^1]: For Python, any directory (with suitable naming) can be imported,
  312. even if it does not contain any `.py` files.
  313. On the other hand, currently there is no concept of package data
  314. directory, all directories are treated like packages.
  315. """
  316. # _DUE_DATE: still not defined as this is particularly controversial.
  317. # Warning initially introduced in May 2022. See issue #3340 for discussion.
  318. def __init__(self):
  319. self._already_warned = set()
  320. def is_module(self, file):
  321. return file.endswith(".py") and file[: -len(".py")].isidentifier()
  322. def importable_subpackage(self, parent, file):
  323. pkg = Path(file).parent
  324. parts = list(itertools.takewhile(str.isidentifier, pkg.parts))
  325. if parts:
  326. return ".".join([parent, *parts])
  327. return None
  328. def warn(self, importable):
  329. if importable not in self._already_warned:
  330. self._Warning.emit(importable=importable)
  331. self._already_warned.add(importable)