session.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. import email.utils
  5. import functools
  6. import io
  7. import ipaddress
  8. import json
  9. import logging
  10. import mimetypes
  11. import os
  12. import platform
  13. import shutil
  14. import subprocess
  15. import sys
  16. import urllib.parse
  17. import warnings
  18. from typing import (
  19. TYPE_CHECKING,
  20. Any,
  21. Dict,
  22. Generator,
  23. List,
  24. Mapping,
  25. Optional,
  26. Sequence,
  27. Tuple,
  28. Union,
  29. )
  30. from pip._vendor import requests, urllib3
  31. from pip._vendor.cachecontrol import CacheControlAdapter as _BaseCacheControlAdapter
  32. from pip._vendor.requests.adapters import DEFAULT_POOLBLOCK, BaseAdapter
  33. from pip._vendor.requests.adapters import HTTPAdapter as _BaseHTTPAdapter
  34. from pip._vendor.requests.models import PreparedRequest, Response
  35. from pip._vendor.requests.structures import CaseInsensitiveDict
  36. from pip._vendor.urllib3.connectionpool import ConnectionPool
  37. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  38. from pip import __version__
  39. from pip._internal.metadata import get_default_environment
  40. from pip._internal.models.link import Link
  41. from pip._internal.network.auth import MultiDomainBasicAuth
  42. from pip._internal.network.cache import SafeFileCache
  43. # Import ssl from compat so the initial import occurs in only one place.
  44. from pip._internal.utils.compat import has_tls
  45. from pip._internal.utils.glibc import libc_ver
  46. from pip._internal.utils.misc import build_url_from_netloc, parse_netloc
  47. from pip._internal.utils.urls import url_to_path
  48. if TYPE_CHECKING:
  49. from ssl import SSLContext
  50. from pip._vendor.urllib3.poolmanager import PoolManager
  51. logger = logging.getLogger(__name__)
  52. SecureOrigin = Tuple[str, str, Optional[Union[int, str]]]
  53. # Ignore warning raised when using --trusted-host.
  54. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  55. SECURE_ORIGINS: List[SecureOrigin] = [
  56. # protocol, hostname, port
  57. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  58. ("https", "*", "*"),
  59. ("*", "localhost", "*"),
  60. ("*", "127.0.0.0/8", "*"),
  61. ("*", "::1/128", "*"),
  62. ("file", "*", None),
  63. # ssh is always secure.
  64. ("ssh", "*", "*"),
  65. ]
  66. # These are environment variables present when running under various
  67. # CI systems. For each variable, some CI systems that use the variable
  68. # are indicated. The collection was chosen so that for each of a number
  69. # of popular systems, at least one of the environment variables is used.
  70. # This list is used to provide some indication of and lower bound for
  71. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  72. # For more background, see: https://github.com/pypa/pip/issues/5499
  73. CI_ENVIRONMENT_VARIABLES = (
  74. # Azure Pipelines
  75. "BUILD_BUILDID",
  76. # Jenkins
  77. "BUILD_ID",
  78. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  79. "CI",
  80. # Explicit environment variable.
  81. "PIP_IS_CI",
  82. )
  83. def looks_like_ci() -> bool:
  84. """
  85. Return whether it looks like pip is running under CI.
  86. """
  87. # We don't use the method of checking for a tty (e.g. using isatty())
  88. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  89. # method doesn't provide definitive information in either direction.
  90. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  91. @functools.lru_cache(maxsize=1)
  92. def user_agent() -> str:
  93. """
  94. Return a string representing the user agent.
  95. """
  96. data: Dict[str, Any] = {
  97. "installer": {"name": "pip", "version": __version__},
  98. "python": platform.python_version(),
  99. "implementation": {
  100. "name": platform.python_implementation(),
  101. },
  102. }
  103. if data["implementation"]["name"] == "CPython":
  104. data["implementation"]["version"] = platform.python_version()
  105. elif data["implementation"]["name"] == "PyPy":
  106. pypy_version_info = sys.pypy_version_info # type: ignore
  107. if pypy_version_info.releaselevel == "final":
  108. pypy_version_info = pypy_version_info[:3]
  109. data["implementation"]["version"] = ".".join(
  110. [str(x) for x in pypy_version_info]
  111. )
  112. elif data["implementation"]["name"] == "Jython":
  113. # Complete Guess
  114. data["implementation"]["version"] = platform.python_version()
  115. elif data["implementation"]["name"] == "IronPython":
  116. # Complete Guess
  117. data["implementation"]["version"] = platform.python_version()
  118. if sys.platform.startswith("linux"):
  119. from pip._vendor import distro
  120. linux_distribution = distro.name(), distro.version(), distro.codename()
  121. distro_infos: Dict[str, Any] = dict(
  122. filter(
  123. lambda x: x[1],
  124. zip(["name", "version", "id"], linux_distribution),
  125. )
  126. )
  127. libc = dict(
  128. filter(
  129. lambda x: x[1],
  130. zip(["lib", "version"], libc_ver()),
  131. )
  132. )
  133. if libc:
  134. distro_infos["libc"] = libc
  135. if distro_infos:
  136. data["distro"] = distro_infos
  137. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  138. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  139. if platform.system():
  140. data.setdefault("system", {})["name"] = platform.system()
  141. if platform.release():
  142. data.setdefault("system", {})["release"] = platform.release()
  143. if platform.machine():
  144. data["cpu"] = platform.machine()
  145. if has_tls():
  146. import _ssl as ssl
  147. data["openssl_version"] = ssl.OPENSSL_VERSION
  148. setuptools_dist = get_default_environment().get_distribution("setuptools")
  149. if setuptools_dist is not None:
  150. data["setuptools_version"] = str(setuptools_dist.version)
  151. if shutil.which("rustc") is not None:
  152. # If for any reason `rustc --version` fails, silently ignore it
  153. try:
  154. rustc_output = subprocess.check_output(
  155. ["rustc", "--version"], stderr=subprocess.STDOUT, timeout=0.5
  156. )
  157. except Exception:
  158. pass
  159. else:
  160. if rustc_output.startswith(b"rustc "):
  161. # The format of `rustc --version` is:
  162. # `b'rustc 1.52.1 (9bc8c42bb 2021-05-09)\n'`
  163. # We extract just the middle (1.52.1) part
  164. data["rustc_version"] = rustc_output.split(b" ")[1].decode()
  165. # Use None rather than False so as not to give the impression that
  166. # pip knows it is not being run under CI. Rather, it is a null or
  167. # inconclusive result. Also, we include some value rather than no
  168. # value to make it easier to know that the check has been run.
  169. data["ci"] = True if looks_like_ci() else None
  170. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  171. if user_data is not None:
  172. data["user_data"] = user_data
  173. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  174. data=data,
  175. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  176. )
  177. class LocalFSAdapter(BaseAdapter):
  178. def send(
  179. self,
  180. request: PreparedRequest,
  181. stream: bool = False,
  182. timeout: Optional[Union[float, Tuple[float, float]]] = None,
  183. verify: Union[bool, str] = True,
  184. cert: Optional[Union[str, Tuple[str, str]]] = None,
  185. proxies: Optional[Mapping[str, str]] = None,
  186. ) -> Response:
  187. pathname = url_to_path(request.url)
  188. resp = Response()
  189. resp.status_code = 200
  190. resp.url = request.url
  191. try:
  192. stats = os.stat(pathname)
  193. except OSError as exc:
  194. # format the exception raised as a io.BytesIO object,
  195. # to return a better error message:
  196. resp.status_code = 404
  197. resp.reason = type(exc).__name__
  198. resp.raw = io.BytesIO(f"{resp.reason}: {exc}".encode())
  199. else:
  200. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  201. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  202. resp.headers = CaseInsensitiveDict(
  203. {
  204. "Content-Type": content_type,
  205. "Content-Length": stats.st_size,
  206. "Last-Modified": modified,
  207. }
  208. )
  209. resp.raw = open(pathname, "rb")
  210. resp.close = resp.raw.close
  211. return resp
  212. def close(self) -> None:
  213. pass
  214. class _SSLContextAdapterMixin:
  215. """Mixin to add the ``ssl_context`` constructor argument to HTTP adapters.
  216. The additional argument is forwarded directly to the pool manager. This allows us
  217. to dynamically decide what SSL store to use at runtime, which is used to implement
  218. the optional ``truststore`` backend.
  219. """
  220. def __init__(
  221. self,
  222. *,
  223. ssl_context: Optional["SSLContext"] = None,
  224. **kwargs: Any,
  225. ) -> None:
  226. self._ssl_context = ssl_context
  227. super().__init__(**kwargs)
  228. def init_poolmanager(
  229. self,
  230. connections: int,
  231. maxsize: int,
  232. block: bool = DEFAULT_POOLBLOCK,
  233. **pool_kwargs: Any,
  234. ) -> "PoolManager":
  235. if self._ssl_context is not None:
  236. pool_kwargs.setdefault("ssl_context", self._ssl_context)
  237. return super().init_poolmanager( # type: ignore[misc]
  238. connections=connections,
  239. maxsize=maxsize,
  240. block=block,
  241. **pool_kwargs,
  242. )
  243. class HTTPAdapter(_SSLContextAdapterMixin, _BaseHTTPAdapter):
  244. pass
  245. class CacheControlAdapter(_SSLContextAdapterMixin, _BaseCacheControlAdapter):
  246. pass
  247. class InsecureHTTPAdapter(HTTPAdapter):
  248. def cert_verify(
  249. self,
  250. conn: ConnectionPool,
  251. url: str,
  252. verify: Union[bool, str],
  253. cert: Optional[Union[str, Tuple[str, str]]],
  254. ) -> None:
  255. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  256. class InsecureCacheControlAdapter(CacheControlAdapter):
  257. def cert_verify(
  258. self,
  259. conn: ConnectionPool,
  260. url: str,
  261. verify: Union[bool, str],
  262. cert: Optional[Union[str, Tuple[str, str]]],
  263. ) -> None:
  264. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  265. class PipSession(requests.Session):
  266. timeout: Optional[int] = None
  267. def __init__(
  268. self,
  269. *args: Any,
  270. retries: int = 0,
  271. cache: Optional[str] = None,
  272. trusted_hosts: Sequence[str] = (),
  273. index_urls: Optional[List[str]] = None,
  274. ssl_context: Optional["SSLContext"] = None,
  275. **kwargs: Any,
  276. ) -> None:
  277. """
  278. :param trusted_hosts: Domains not to emit warnings for when not using
  279. HTTPS.
  280. """
  281. super().__init__(*args, **kwargs)
  282. # Namespace the attribute with "pip_" just in case to prevent
  283. # possible conflicts with the base class.
  284. self.pip_trusted_origins: List[Tuple[str, Optional[int]]] = []
  285. self.pip_proxy = None
  286. # Attach our User Agent to the request
  287. self.headers["User-Agent"] = user_agent()
  288. # Attach our Authentication handler to the session
  289. self.auth = MultiDomainBasicAuth(index_urls=index_urls)
  290. # Create our urllib3.Retry instance which will allow us to customize
  291. # how we handle retries.
  292. retries = urllib3.Retry(
  293. # Set the total number of retries that a particular request can
  294. # have.
  295. total=retries,
  296. # A 503 error from PyPI typically means that the Fastly -> Origin
  297. # connection got interrupted in some way. A 503 error in general
  298. # is typically considered a transient error so we'll go ahead and
  299. # retry it.
  300. # A 500 may indicate transient error in Amazon S3
  301. # A 502 may be a transient error from a CDN like CloudFlare or CloudFront
  302. # A 520 or 527 - may indicate transient error in CloudFlare
  303. status_forcelist=[500, 502, 503, 520, 527],
  304. # Add a small amount of back off between failed requests in
  305. # order to prevent hammering the service.
  306. backoff_factor=0.25,
  307. ) # type: ignore
  308. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  309. # support caching so we'll use it for all http:// URLs.
  310. # If caching is disabled, we will also use it for
  311. # https:// hosts that we've marked as ignoring
  312. # TLS errors for (trusted-hosts).
  313. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  314. # We want to _only_ cache responses on securely fetched origins or when
  315. # the host is specified as trusted. We do this because
  316. # we can't validate the response of an insecurely/untrusted fetched
  317. # origin, and we don't want someone to be able to poison the cache and
  318. # require manual eviction from the cache to fix it.
  319. if cache:
  320. secure_adapter = CacheControlAdapter(
  321. cache=SafeFileCache(cache),
  322. max_retries=retries,
  323. ssl_context=ssl_context,
  324. )
  325. self._trusted_host_adapter = InsecureCacheControlAdapter(
  326. cache=SafeFileCache(cache),
  327. max_retries=retries,
  328. )
  329. else:
  330. secure_adapter = HTTPAdapter(max_retries=retries, ssl_context=ssl_context)
  331. self._trusted_host_adapter = insecure_adapter
  332. self.mount("https://", secure_adapter)
  333. self.mount("http://", insecure_adapter)
  334. # Enable file:// urls
  335. self.mount("file://", LocalFSAdapter())
  336. for host in trusted_hosts:
  337. self.add_trusted_host(host, suppress_logging=True)
  338. def update_index_urls(self, new_index_urls: List[str]) -> None:
  339. """
  340. :param new_index_urls: New index urls to update the authentication
  341. handler with.
  342. """
  343. self.auth.index_urls = new_index_urls
  344. def add_trusted_host(
  345. self, host: str, source: Optional[str] = None, suppress_logging: bool = False
  346. ) -> None:
  347. """
  348. :param host: It is okay to provide a host that has previously been
  349. added.
  350. :param source: An optional source string, for logging where the host
  351. string came from.
  352. """
  353. if not suppress_logging:
  354. msg = f"adding trusted host: {host!r}"
  355. if source is not None:
  356. msg += f" (from {source})"
  357. logger.info(msg)
  358. parsed_host, parsed_port = parse_netloc(host)
  359. if parsed_host is None:
  360. raise ValueError(f"Trusted host URL must include a host part: {host!r}")
  361. if (parsed_host, parsed_port) not in self.pip_trusted_origins:
  362. self.pip_trusted_origins.append((parsed_host, parsed_port))
  363. self.mount(
  364. build_url_from_netloc(host, scheme="http") + "/", self._trusted_host_adapter
  365. )
  366. self.mount(build_url_from_netloc(host) + "/", self._trusted_host_adapter)
  367. if not parsed_port:
  368. self.mount(
  369. build_url_from_netloc(host, scheme="http") + ":",
  370. self._trusted_host_adapter,
  371. )
  372. # Mount wildcard ports for the same host.
  373. self.mount(build_url_from_netloc(host) + ":", self._trusted_host_adapter)
  374. def iter_secure_origins(self) -> Generator[SecureOrigin, None, None]:
  375. yield from SECURE_ORIGINS
  376. for host, port in self.pip_trusted_origins:
  377. yield ("*", host, "*" if port is None else port)
  378. def is_secure_origin(self, location: Link) -> bool:
  379. # Determine if this url used a secure transport mechanism
  380. parsed = urllib.parse.urlparse(str(location))
  381. origin_protocol, origin_host, origin_port = (
  382. parsed.scheme,
  383. parsed.hostname,
  384. parsed.port,
  385. )
  386. # The protocol to use to see if the protocol matches.
  387. # Don't count the repository type as part of the protocol: in
  388. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  389. # the last scheme.)
  390. origin_protocol = origin_protocol.rsplit("+", 1)[-1]
  391. # Determine if our origin is a secure origin by looking through our
  392. # hardcoded list of secure origins, as well as any additional ones
  393. # configured on this PackageFinder instance.
  394. for secure_origin in self.iter_secure_origins():
  395. secure_protocol, secure_host, secure_port = secure_origin
  396. if origin_protocol != secure_protocol and secure_protocol != "*":
  397. continue
  398. try:
  399. addr = ipaddress.ip_address(origin_host or "")
  400. network = ipaddress.ip_network(secure_host)
  401. except ValueError:
  402. # We don't have both a valid address or a valid network, so
  403. # we'll check this origin against hostnames.
  404. if (
  405. origin_host
  406. and origin_host.lower() != secure_host.lower()
  407. and secure_host != "*"
  408. ):
  409. continue
  410. else:
  411. # We have a valid address and network, so see if the address
  412. # is contained within the network.
  413. if addr not in network:
  414. continue
  415. # Check to see if the port matches.
  416. if (
  417. origin_port != secure_port
  418. and secure_port != "*"
  419. and secure_port is not None
  420. ):
  421. continue
  422. # If we've gotten here, then this origin matches the current
  423. # secure origin and we should return True
  424. return True
  425. # If we've gotten to this point, then the origin isn't secure and we
  426. # will not accept it as a valid location to search. We will however
  427. # log a warning that we are ignoring it.
  428. logger.warning(
  429. "The repository located at %s is not a trusted or secure host and "
  430. "is being ignored. If this repository is available via HTTPS we "
  431. "recommend you use HTTPS instead, otherwise you may silence "
  432. "this warning and allow it anyway with '--trusted-host %s'.",
  433. origin_host,
  434. origin_host,
  435. )
  436. return False
  437. def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> Response:
  438. # Allow setting a default timeout on a session
  439. kwargs.setdefault("timeout", self.timeout)
  440. # Allow setting a default proxies on a session
  441. kwargs.setdefault("proxies", self.proxies)
  442. # Dispatch the actual request
  443. return super().request(method, url, *args, **kwargs)