wsgi.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. import sys
  2. from functools import partial
  3. from typing import TYPE_CHECKING
  4. import sentry_sdk
  5. from sentry_sdk._werkzeug import _get_headers, get_host
  6. from sentry_sdk.api import continue_trace
  7. from sentry_sdk.consts import OP
  8. from sentry_sdk.integrations._wsgi_common import (
  9. DEFAULT_HTTP_METHODS_TO_CAPTURE,
  10. _filter_headers,
  11. nullcontext,
  12. )
  13. from sentry_sdk.scope import should_send_default_pii, use_isolation_scope
  14. from sentry_sdk.sessions import track_session
  15. from sentry_sdk.tracing import Transaction, TransactionSource
  16. from sentry_sdk.utils import (
  17. ContextVar,
  18. capture_internal_exceptions,
  19. event_from_exception,
  20. reraise,
  21. )
  22. if TYPE_CHECKING:
  23. from typing import Any, Callable, Dict, Iterator, Optional, Protocol, Tuple, TypeVar
  24. from sentry_sdk._types import Event, EventProcessor
  25. from sentry_sdk.utils import ExcInfo
  26. WsgiResponseIter = TypeVar("WsgiResponseIter")
  27. WsgiResponseHeaders = TypeVar("WsgiResponseHeaders")
  28. WsgiExcInfo = TypeVar("WsgiExcInfo")
  29. class StartResponse(Protocol):
  30. def __call__(
  31. self,
  32. status: str,
  33. response_headers: "WsgiResponseHeaders",
  34. exc_info: "Optional[WsgiExcInfo]" = None,
  35. ) -> "WsgiResponseIter": # type: ignore
  36. pass
  37. _wsgi_middleware_applied = ContextVar("sentry_wsgi_middleware_applied")
  38. def wsgi_decoding_dance(s: str, charset: str = "utf-8", errors: str = "replace") -> str:
  39. return s.encode("latin1").decode(charset, errors)
  40. def get_request_url(
  41. environ: "Dict[str, str]", use_x_forwarded_for: bool = False
  42. ) -> str:
  43. """Return the absolute URL without query string for the given WSGI
  44. environment."""
  45. script_name = environ.get("SCRIPT_NAME", "").rstrip("/")
  46. path_info = environ.get("PATH_INFO", "").lstrip("/")
  47. path = f"{script_name}/{path_info}"
  48. return "%s://%s/%s" % (
  49. environ.get("wsgi.url_scheme"),
  50. get_host(environ, use_x_forwarded_for),
  51. wsgi_decoding_dance(path).lstrip("/"),
  52. )
  53. class SentryWsgiMiddleware:
  54. __slots__ = (
  55. "app",
  56. "use_x_forwarded_for",
  57. "span_origin",
  58. "http_methods_to_capture",
  59. )
  60. def __init__(
  61. self,
  62. app: "Callable[[Dict[str, str], Callable[..., Any]], Any]",
  63. use_x_forwarded_for: bool = False,
  64. span_origin: str = "manual",
  65. http_methods_to_capture: "Tuple[str, ...]" = DEFAULT_HTTP_METHODS_TO_CAPTURE,
  66. ) -> None:
  67. self.app = app
  68. self.use_x_forwarded_for = use_x_forwarded_for
  69. self.span_origin = span_origin
  70. self.http_methods_to_capture = http_methods_to_capture
  71. def __call__(
  72. self, environ: "Dict[str, str]", start_response: "Callable[..., Any]"
  73. ) -> "_ScopedResponse":
  74. if _wsgi_middleware_applied.get(False):
  75. return self.app(environ, start_response)
  76. _wsgi_middleware_applied.set(True)
  77. try:
  78. with sentry_sdk.isolation_scope() as scope:
  79. with track_session(scope, session_mode="request"):
  80. with capture_internal_exceptions():
  81. scope.clear_breadcrumbs()
  82. scope._name = "wsgi"
  83. scope.add_event_processor(
  84. _make_wsgi_event_processor(
  85. environ, self.use_x_forwarded_for
  86. )
  87. )
  88. method = environ.get("REQUEST_METHOD", "").upper()
  89. transaction = None
  90. if method in self.http_methods_to_capture:
  91. transaction = continue_trace(
  92. environ,
  93. op=OP.HTTP_SERVER,
  94. name="generic WSGI request",
  95. source=TransactionSource.ROUTE,
  96. origin=self.span_origin,
  97. )
  98. transaction_context = (
  99. sentry_sdk.start_transaction(
  100. transaction,
  101. custom_sampling_context={"wsgi_environ": environ},
  102. )
  103. if transaction is not None
  104. else nullcontext()
  105. )
  106. with transaction_context:
  107. try:
  108. response = self.app(
  109. environ,
  110. partial(
  111. _sentry_start_response, start_response, transaction
  112. ),
  113. )
  114. except BaseException:
  115. reraise(*_capture_exception())
  116. finally:
  117. _wsgi_middleware_applied.set(False)
  118. return _ScopedResponse(scope, response)
  119. def _sentry_start_response(
  120. old_start_response: "StartResponse",
  121. transaction: "Optional[Transaction]",
  122. status: str,
  123. response_headers: "WsgiResponseHeaders",
  124. exc_info: "Optional[WsgiExcInfo]" = None,
  125. ) -> "WsgiResponseIter": # type: ignore[type-var]
  126. with capture_internal_exceptions():
  127. status_int = int(status.split(" ", 1)[0])
  128. if transaction is not None:
  129. transaction.set_http_status(status_int)
  130. if exc_info is None:
  131. # The Django Rest Framework WSGI test client, and likely other
  132. # (incorrect) implementations, cannot deal with the exc_info argument
  133. # if one is present. Avoid providing a third argument if not necessary.
  134. return old_start_response(status, response_headers)
  135. else:
  136. return old_start_response(status, response_headers, exc_info)
  137. def _get_environ(environ: "Dict[str, str]") -> "Iterator[Tuple[str, str]]":
  138. """
  139. Returns our explicitly included environment variables we want to
  140. capture (server name, port and remote addr if pii is enabled).
  141. """
  142. keys = ["SERVER_NAME", "SERVER_PORT"]
  143. if should_send_default_pii():
  144. # make debugging of proxy setup easier. Proxy headers are
  145. # in headers.
  146. keys += ["REMOTE_ADDR"]
  147. for key in keys:
  148. if key in environ:
  149. yield key, environ[key]
  150. def get_client_ip(environ: "Dict[str, str]") -> "Optional[Any]":
  151. """
  152. Infer the user IP address from various headers. This cannot be used in
  153. security sensitive situations since the value may be forged from a client,
  154. but it's good enough for the event payload.
  155. """
  156. try:
  157. return environ["HTTP_X_FORWARDED_FOR"].split(",")[0].strip()
  158. except (KeyError, IndexError):
  159. pass
  160. try:
  161. return environ["HTTP_X_REAL_IP"]
  162. except KeyError:
  163. pass
  164. return environ.get("REMOTE_ADDR")
  165. def _capture_exception() -> "ExcInfo":
  166. """
  167. Captures the current exception and sends it to Sentry.
  168. Returns the ExcInfo tuple to it can be reraised afterwards.
  169. """
  170. exc_info = sys.exc_info()
  171. e = exc_info[1]
  172. # SystemExit(0) is the only uncaught exception that is expected behavior
  173. should_skip_capture = isinstance(e, SystemExit) and e.code in (0, None)
  174. if not should_skip_capture:
  175. event, hint = event_from_exception(
  176. exc_info,
  177. client_options=sentry_sdk.get_client().options,
  178. mechanism={"type": "wsgi", "handled": False},
  179. )
  180. sentry_sdk.capture_event(event, hint=hint)
  181. return exc_info
  182. class _ScopedResponse:
  183. """
  184. Users a separate scope for each response chunk.
  185. This will make WSGI apps more tolerant against:
  186. - WSGI servers streaming responses from a different thread/from
  187. different threads than the one that called start_response
  188. - close() not being called
  189. - WSGI servers streaming responses interleaved from the same thread
  190. """
  191. __slots__ = ("_response", "_scope")
  192. def __init__(
  193. self, scope: "sentry_sdk.scope.Scope", response: "Iterator[bytes]"
  194. ) -> None:
  195. self._scope = scope
  196. self._response = response
  197. def __iter__(self) -> "Iterator[bytes]":
  198. iterator = iter(self._response)
  199. while True:
  200. with use_isolation_scope(self._scope):
  201. try:
  202. chunk = next(iterator)
  203. except StopIteration:
  204. break
  205. except BaseException:
  206. reraise(*_capture_exception())
  207. yield chunk
  208. def close(self) -> None:
  209. with use_isolation_scope(self._scope):
  210. try:
  211. self._response.close() # type: ignore
  212. except AttributeError:
  213. pass
  214. except BaseException:
  215. reraise(*_capture_exception())
  216. def _make_wsgi_event_processor(
  217. environ: "Dict[str, str]", use_x_forwarded_for: bool
  218. ) -> "EventProcessor":
  219. # It's a bit unfortunate that we have to extract and parse the request data
  220. # from the environ so eagerly, but there are a few good reasons for this.
  221. #
  222. # We might be in a situation where the scope never gets torn down
  223. # properly. In that case we will have an unnecessary strong reference to
  224. # all objects in the environ (some of which may take a lot of memory) when
  225. # we're really just interested in a few of them.
  226. #
  227. # Keeping the environment around for longer than the request lifecycle is
  228. # also not necessarily something uWSGI can deal with:
  229. # https://github.com/unbit/uwsgi/issues/1950
  230. client_ip = get_client_ip(environ)
  231. request_url = get_request_url(environ, use_x_forwarded_for)
  232. query_string = environ.get("QUERY_STRING")
  233. method = environ.get("REQUEST_METHOD")
  234. env = dict(_get_environ(environ))
  235. headers = _filter_headers(dict(_get_headers(environ)))
  236. def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event":
  237. with capture_internal_exceptions():
  238. # if the code below fails halfway through we at least have some data
  239. request_info = event.setdefault("request", {})
  240. if should_send_default_pii():
  241. user_info = event.setdefault("user", {})
  242. if client_ip:
  243. user_info.setdefault("ip_address", client_ip)
  244. request_info["url"] = request_url
  245. request_info["query_string"] = query_string
  246. request_info["method"] = method
  247. request_info["env"] = env
  248. request_info["headers"] = headers
  249. return event
  250. return event_processor