req_file.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. """
  2. Requirements file parsing
  3. """
  4. import codecs
  5. import locale
  6. import logging
  7. import optparse
  8. import os
  9. import re
  10. import shlex
  11. import sys
  12. import urllib.parse
  13. from dataclasses import dataclass
  14. from optparse import Values
  15. from typing import (
  16. TYPE_CHECKING,
  17. Any,
  18. Callable,
  19. Dict,
  20. Generator,
  21. Iterable,
  22. List,
  23. NoReturn,
  24. Optional,
  25. Tuple,
  26. )
  27. from pip._internal.cli import cmdoptions
  28. from pip._internal.exceptions import InstallationError, RequirementsFileParseError
  29. from pip._internal.models.search_scope import SearchScope
  30. if TYPE_CHECKING:
  31. from pip._internal.index.package_finder import PackageFinder
  32. from pip._internal.network.session import PipSession
  33. __all__ = ["parse_requirements"]
  34. ReqFileLines = Iterable[Tuple[int, str]]
  35. LineParser = Callable[[str], Tuple[str, Values]]
  36. SCHEME_RE = re.compile(r"^(http|https|file):", re.I)
  37. COMMENT_RE = re.compile(r"(^|\s+)#.*$")
  38. # Matches environment variable-style values in '${MY_VARIABLE_1}' with the
  39. # variable name consisting of only uppercase letters, digits or the '_'
  40. # (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
  41. # 2013 Edition.
  42. ENV_VAR_RE = re.compile(r"(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})")
  43. SUPPORTED_OPTIONS: List[Callable[..., optparse.Option]] = [
  44. cmdoptions.index_url,
  45. cmdoptions.extra_index_url,
  46. cmdoptions.no_index,
  47. cmdoptions.constraints,
  48. cmdoptions.requirements,
  49. cmdoptions.editable,
  50. cmdoptions.find_links,
  51. cmdoptions.no_binary,
  52. cmdoptions.only_binary,
  53. cmdoptions.prefer_binary,
  54. cmdoptions.require_hashes,
  55. cmdoptions.pre,
  56. cmdoptions.trusted_host,
  57. cmdoptions.use_new_feature,
  58. ]
  59. # options to be passed to requirements
  60. SUPPORTED_OPTIONS_REQ: List[Callable[..., optparse.Option]] = [
  61. cmdoptions.global_options,
  62. cmdoptions.hash,
  63. cmdoptions.config_settings,
  64. ]
  65. SUPPORTED_OPTIONS_EDITABLE_REQ: List[Callable[..., optparse.Option]] = [
  66. cmdoptions.config_settings,
  67. ]
  68. # the 'dest' string values
  69. SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
  70. SUPPORTED_OPTIONS_EDITABLE_REQ_DEST = [
  71. str(o().dest) for o in SUPPORTED_OPTIONS_EDITABLE_REQ
  72. ]
  73. # order of BOMS is important: codecs.BOM_UTF16_LE is a prefix of codecs.BOM_UTF32_LE
  74. # so data.startswith(BOM_UTF16_LE) would be true for UTF32_LE data
  75. BOMS: List[Tuple[bytes, str]] = [
  76. (codecs.BOM_UTF8, "utf-8"),
  77. (codecs.BOM_UTF32, "utf-32"),
  78. (codecs.BOM_UTF32_BE, "utf-32-be"),
  79. (codecs.BOM_UTF32_LE, "utf-32-le"),
  80. (codecs.BOM_UTF16, "utf-16"),
  81. (codecs.BOM_UTF16_BE, "utf-16-be"),
  82. (codecs.BOM_UTF16_LE, "utf-16-le"),
  83. ]
  84. PEP263_ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)")
  85. DEFAULT_ENCODING = "utf-8"
  86. logger = logging.getLogger(__name__)
  87. @dataclass(frozen=True)
  88. class ParsedRequirement:
  89. # TODO: replace this with slots=True when dropping Python 3.9 support.
  90. __slots__ = (
  91. "requirement",
  92. "is_editable",
  93. "comes_from",
  94. "constraint",
  95. "options",
  96. "line_source",
  97. )
  98. requirement: str
  99. is_editable: bool
  100. comes_from: str
  101. constraint: bool
  102. options: Optional[Dict[str, Any]]
  103. line_source: Optional[str]
  104. @dataclass(frozen=True)
  105. class ParsedLine:
  106. __slots__ = ("filename", "lineno", "args", "opts", "constraint")
  107. filename: str
  108. lineno: int
  109. args: str
  110. opts: Values
  111. constraint: bool
  112. @property
  113. def is_editable(self) -> bool:
  114. return bool(self.opts.editables)
  115. @property
  116. def requirement(self) -> Optional[str]:
  117. if self.args:
  118. return self.args
  119. elif self.is_editable:
  120. # We don't support multiple -e on one line
  121. return self.opts.editables[0]
  122. return None
  123. def parse_requirements(
  124. filename: str,
  125. session: "PipSession",
  126. finder: Optional["PackageFinder"] = None,
  127. options: Optional[optparse.Values] = None,
  128. constraint: bool = False,
  129. ) -> Generator[ParsedRequirement, None, None]:
  130. """Parse a requirements file and yield ParsedRequirement instances.
  131. :param filename: Path or url of requirements file.
  132. :param session: PipSession instance.
  133. :param finder: Instance of pip.index.PackageFinder.
  134. :param options: cli options.
  135. :param constraint: If true, parsing a constraint file rather than
  136. requirements file.
  137. """
  138. line_parser = get_line_parser(finder)
  139. parser = RequirementsFileParser(session, line_parser)
  140. for parsed_line in parser.parse(filename, constraint):
  141. parsed_req = handle_line(
  142. parsed_line, options=options, finder=finder, session=session
  143. )
  144. if parsed_req is not None:
  145. yield parsed_req
  146. def preprocess(content: str) -> ReqFileLines:
  147. """Split, filter, and join lines, and return a line iterator
  148. :param content: the content of the requirements file
  149. """
  150. lines_enum: ReqFileLines = enumerate(content.splitlines(), start=1)
  151. lines_enum = join_lines(lines_enum)
  152. lines_enum = ignore_comments(lines_enum)
  153. lines_enum = expand_env_variables(lines_enum)
  154. return lines_enum
  155. def handle_requirement_line(
  156. line: ParsedLine,
  157. options: Optional[optparse.Values] = None,
  158. ) -> ParsedRequirement:
  159. # preserve for the nested code path
  160. line_comes_from = "{} {} (line {})".format(
  161. "-c" if line.constraint else "-r",
  162. line.filename,
  163. line.lineno,
  164. )
  165. assert line.requirement is not None
  166. # get the options that apply to requirements
  167. if line.is_editable:
  168. supported_dest = SUPPORTED_OPTIONS_EDITABLE_REQ_DEST
  169. else:
  170. supported_dest = SUPPORTED_OPTIONS_REQ_DEST
  171. req_options = {}
  172. for dest in supported_dest:
  173. if dest in line.opts.__dict__ and line.opts.__dict__[dest]:
  174. req_options[dest] = line.opts.__dict__[dest]
  175. line_source = f"line {line.lineno} of {line.filename}"
  176. return ParsedRequirement(
  177. requirement=line.requirement,
  178. is_editable=line.is_editable,
  179. comes_from=line_comes_from,
  180. constraint=line.constraint,
  181. options=req_options,
  182. line_source=line_source,
  183. )
  184. def handle_option_line(
  185. opts: Values,
  186. filename: str,
  187. lineno: int,
  188. finder: Optional["PackageFinder"] = None,
  189. options: Optional[optparse.Values] = None,
  190. session: Optional["PipSession"] = None,
  191. ) -> None:
  192. if opts.hashes:
  193. logger.warning(
  194. "%s line %s has --hash but no requirement, and will be ignored.",
  195. filename,
  196. lineno,
  197. )
  198. if options:
  199. # percolate options upward
  200. if opts.require_hashes:
  201. options.require_hashes = opts.require_hashes
  202. if opts.features_enabled:
  203. options.features_enabled.extend(
  204. f for f in opts.features_enabled if f not in options.features_enabled
  205. )
  206. # set finder options
  207. if finder:
  208. find_links = finder.find_links
  209. index_urls = finder.index_urls
  210. no_index = finder.search_scope.no_index
  211. if opts.no_index is True:
  212. no_index = True
  213. index_urls = []
  214. if opts.index_url and not no_index:
  215. index_urls = [opts.index_url]
  216. if opts.extra_index_urls and not no_index:
  217. index_urls.extend(opts.extra_index_urls)
  218. if opts.find_links:
  219. # FIXME: it would be nice to keep track of the source
  220. # of the find_links: support a find-links local path
  221. # relative to a requirements file.
  222. value = opts.find_links[0]
  223. req_dir = os.path.dirname(os.path.abspath(filename))
  224. relative_to_reqs_file = os.path.join(req_dir, value)
  225. if os.path.exists(relative_to_reqs_file):
  226. value = relative_to_reqs_file
  227. find_links.append(value)
  228. if session:
  229. # We need to update the auth urls in session
  230. session.update_index_urls(index_urls)
  231. search_scope = SearchScope(
  232. find_links=find_links,
  233. index_urls=index_urls,
  234. no_index=no_index,
  235. )
  236. finder.search_scope = search_scope
  237. if opts.pre:
  238. finder.set_allow_all_prereleases()
  239. if opts.prefer_binary:
  240. finder.set_prefer_binary()
  241. if session:
  242. for host in opts.trusted_hosts or []:
  243. source = f"line {lineno} of {filename}"
  244. session.add_trusted_host(host, source=source)
  245. def handle_line(
  246. line: ParsedLine,
  247. options: Optional[optparse.Values] = None,
  248. finder: Optional["PackageFinder"] = None,
  249. session: Optional["PipSession"] = None,
  250. ) -> Optional[ParsedRequirement]:
  251. """Handle a single parsed requirements line; This can result in
  252. creating/yielding requirements, or updating the finder.
  253. :param line: The parsed line to be processed.
  254. :param options: CLI options.
  255. :param finder: The finder - updated by non-requirement lines.
  256. :param session: The session - updated by non-requirement lines.
  257. Returns a ParsedRequirement object if the line is a requirement line,
  258. otherwise returns None.
  259. For lines that contain requirements, the only options that have an effect
  260. are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
  261. requirement. Other options from SUPPORTED_OPTIONS may be present, but are
  262. ignored.
  263. For lines that do not contain requirements, the only options that have an
  264. effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
  265. be present, but are ignored. These lines may contain multiple options
  266. (although our docs imply only one is supported), and all our parsed and
  267. affect the finder.
  268. """
  269. if line.requirement is not None:
  270. parsed_req = handle_requirement_line(line, options)
  271. return parsed_req
  272. else:
  273. handle_option_line(
  274. line.opts,
  275. line.filename,
  276. line.lineno,
  277. finder,
  278. options,
  279. session,
  280. )
  281. return None
  282. class RequirementsFileParser:
  283. def __init__(
  284. self,
  285. session: "PipSession",
  286. line_parser: LineParser,
  287. ) -> None:
  288. self._session = session
  289. self._line_parser = line_parser
  290. def parse(
  291. self, filename: str, constraint: bool
  292. ) -> Generator[ParsedLine, None, None]:
  293. """Parse a given file, yielding parsed lines."""
  294. yield from self._parse_and_recurse(
  295. filename, constraint, [{os.path.abspath(filename): None}]
  296. )
  297. def _parse_and_recurse(
  298. self,
  299. filename: str,
  300. constraint: bool,
  301. parsed_files_stack: List[Dict[str, Optional[str]]],
  302. ) -> Generator[ParsedLine, None, None]:
  303. for line in self._parse_file(filename, constraint):
  304. if line.requirement is None and (
  305. line.opts.requirements or line.opts.constraints
  306. ):
  307. # parse a nested requirements file
  308. if line.opts.requirements:
  309. req_path = line.opts.requirements[0]
  310. nested_constraint = False
  311. else:
  312. req_path = line.opts.constraints[0]
  313. nested_constraint = True
  314. # original file is over http
  315. if SCHEME_RE.search(filename):
  316. # do a url join so relative paths work
  317. req_path = urllib.parse.urljoin(filename, req_path)
  318. # original file and nested file are paths
  319. elif not SCHEME_RE.search(req_path):
  320. # do a join so relative paths work
  321. # and then abspath so that we can identify recursive references
  322. req_path = os.path.abspath(
  323. os.path.join(
  324. os.path.dirname(filename),
  325. req_path,
  326. )
  327. )
  328. parsed_files = parsed_files_stack[0]
  329. if req_path in parsed_files:
  330. initial_file = parsed_files[req_path]
  331. tail = (
  332. f" and again in {initial_file}"
  333. if initial_file is not None
  334. else ""
  335. )
  336. raise RequirementsFileParseError(
  337. f"{req_path} recursively references itself in {filename}{tail}"
  338. )
  339. # Keeping a track where was each file first included in
  340. new_parsed_files = parsed_files.copy()
  341. new_parsed_files[req_path] = filename
  342. yield from self._parse_and_recurse(
  343. req_path, nested_constraint, [new_parsed_files, *parsed_files_stack]
  344. )
  345. else:
  346. yield line
  347. def _parse_file(
  348. self, filename: str, constraint: bool
  349. ) -> Generator[ParsedLine, None, None]:
  350. _, content = get_file_content(filename, self._session)
  351. lines_enum = preprocess(content)
  352. for line_number, line in lines_enum:
  353. try:
  354. args_str, opts = self._line_parser(line)
  355. except OptionParsingError as e:
  356. # add offending line
  357. msg = f"Invalid requirement: {line}\n{e.msg}"
  358. raise RequirementsFileParseError(msg)
  359. yield ParsedLine(
  360. filename,
  361. line_number,
  362. args_str,
  363. opts,
  364. constraint,
  365. )
  366. def get_line_parser(finder: Optional["PackageFinder"]) -> LineParser:
  367. def parse_line(line: str) -> Tuple[str, Values]:
  368. # Build new parser for each line since it accumulates appendable
  369. # options.
  370. parser = build_parser()
  371. defaults = parser.get_default_values()
  372. defaults.index_url = None
  373. if finder:
  374. defaults.format_control = finder.format_control
  375. args_str, options_str = break_args_options(line)
  376. try:
  377. options = shlex.split(options_str)
  378. except ValueError as e:
  379. raise OptionParsingError(f"Could not split options: {options_str}") from e
  380. opts, _ = parser.parse_args(options, defaults)
  381. return args_str, opts
  382. return parse_line
  383. def break_args_options(line: str) -> Tuple[str, str]:
  384. """Break up the line into an args and options string. We only want to shlex
  385. (and then optparse) the options, not the args. args can contain markers
  386. which are corrupted by shlex.
  387. """
  388. tokens = line.split(" ")
  389. args = []
  390. options = tokens[:]
  391. for token in tokens:
  392. if token.startswith("-") or token.startswith("--"):
  393. break
  394. else:
  395. args.append(token)
  396. options.pop(0)
  397. return " ".join(args), " ".join(options)
  398. class OptionParsingError(Exception):
  399. def __init__(self, msg: str) -> None:
  400. self.msg = msg
  401. def build_parser() -> optparse.OptionParser:
  402. """
  403. Return a parser for parsing requirement lines
  404. """
  405. parser = optparse.OptionParser(add_help_option=False)
  406. option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
  407. for option_factory in option_factories:
  408. option = option_factory()
  409. parser.add_option(option)
  410. # By default optparse sys.exits on parsing errors. We want to wrap
  411. # that in our own exception.
  412. def parser_exit(self: Any, msg: str) -> "NoReturn":
  413. raise OptionParsingError(msg)
  414. # NOTE: mypy disallows assigning to a method
  415. # https://github.com/python/mypy/issues/2427
  416. parser.exit = parser_exit # type: ignore
  417. return parser
  418. def join_lines(lines_enum: ReqFileLines) -> ReqFileLines:
  419. """Joins a line ending in '\' with the previous line (except when following
  420. comments). The joined line takes on the index of the first line.
  421. """
  422. primary_line_number = None
  423. new_line: List[str] = []
  424. for line_number, line in lines_enum:
  425. if not line.endswith("\\") or COMMENT_RE.match(line):
  426. if COMMENT_RE.match(line):
  427. # this ensures comments are always matched later
  428. line = " " + line
  429. if new_line:
  430. new_line.append(line)
  431. assert primary_line_number is not None
  432. yield primary_line_number, "".join(new_line)
  433. new_line = []
  434. else:
  435. yield line_number, line
  436. else:
  437. if not new_line:
  438. primary_line_number = line_number
  439. new_line.append(line.strip("\\"))
  440. # last line contains \
  441. if new_line:
  442. assert primary_line_number is not None
  443. yield primary_line_number, "".join(new_line)
  444. # TODO: handle space after '\'.
  445. def ignore_comments(lines_enum: ReqFileLines) -> ReqFileLines:
  446. """
  447. Strips comments and filter empty lines.
  448. """
  449. for line_number, line in lines_enum:
  450. line = COMMENT_RE.sub("", line)
  451. line = line.strip()
  452. if line:
  453. yield line_number, line
  454. def expand_env_variables(lines_enum: ReqFileLines) -> ReqFileLines:
  455. """Replace all environment variables that can be retrieved via `os.getenv`.
  456. The only allowed format for environment variables defined in the
  457. requirement file is `${MY_VARIABLE_1}` to ensure two things:
  458. 1. Strings that contain a `$` aren't accidentally (partially) expanded.
  459. 2. Ensure consistency across platforms for requirement files.
  460. These points are the result of a discussion on the `github pull
  461. request #3514 <https://github.com/pypa/pip/pull/3514>`_.
  462. Valid characters in variable names follow the `POSIX standard
  463. <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
  464. to uppercase letter, digits and the `_` (underscore).
  465. """
  466. for line_number, line in lines_enum:
  467. for env_var, var_name in ENV_VAR_RE.findall(line):
  468. value = os.getenv(var_name)
  469. if not value:
  470. continue
  471. line = line.replace(env_var, value)
  472. yield line_number, line
  473. def get_file_content(url: str, session: "PipSession") -> Tuple[str, str]:
  474. """Gets the content of a file; it may be a filename, file: URL, or
  475. http: URL. Returns (location, content). Content is unicode.
  476. Respects # -*- coding: declarations on the retrieved files.
  477. :param url: File path or url.
  478. :param session: PipSession instance.
  479. """
  480. scheme = urllib.parse.urlsplit(url).scheme
  481. # Pip has special support for file:// URLs (LocalFSAdapter).
  482. if scheme in ["http", "https", "file"]:
  483. # Delay importing heavy network modules until absolutely necessary.
  484. from pip._internal.network.utils import raise_for_status
  485. resp = session.get(url)
  486. raise_for_status(resp)
  487. return resp.url, resp.text
  488. # Assume this is a bare path.
  489. try:
  490. with open(url, "rb") as f:
  491. raw_content = f.read()
  492. except OSError as exc:
  493. raise InstallationError(f"Could not open requirements file: {exc}")
  494. content = _decode_req_file(raw_content, url)
  495. return url, content
  496. def _decode_req_file(data: bytes, url: str) -> str:
  497. for bom, encoding in BOMS:
  498. if data.startswith(bom):
  499. return data[len(bom) :].decode(encoding)
  500. for line in data.split(b"\n")[:2]:
  501. if line[0:1] == b"#":
  502. result = PEP263_ENCODING_RE.search(line)
  503. if result is not None:
  504. encoding = result.groups()[0].decode("ascii")
  505. return data.decode(encoding)
  506. try:
  507. return data.decode(DEFAULT_ENCODING)
  508. except UnicodeDecodeError:
  509. locale_encoding = locale.getpreferredencoding(False) or sys.getdefaultencoding()
  510. logging.warning(
  511. "unable to decode data from %s with default encoding %s, "
  512. "falling back to encoding from locale: %s. "
  513. "If this is intentional you should specify the encoding with a "
  514. "PEP-263 style comment, e.g. '# -*- coding: %s -*-'",
  515. url,
  516. DEFAULT_ENCODING,
  517. locale_encoding,
  518. locale_encoding,
  519. )
  520. return data.decode(locale_encoding)