litellm.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. import copy
  2. from typing import TYPE_CHECKING
  3. import sentry_sdk
  4. from sentry_sdk import consts
  5. from sentry_sdk.ai.monitoring import record_token_usage
  6. from sentry_sdk.ai.utils import (
  7. get_start_span_function,
  8. set_data_normalized,
  9. truncate_and_annotate_messages,
  10. transform_openai_content_part,
  11. )
  12. from sentry_sdk.consts import SPANDATA
  13. from sentry_sdk.integrations import DidNotEnable, Integration
  14. from sentry_sdk.scope import should_send_default_pii
  15. from sentry_sdk.utils import event_from_exception
  16. if TYPE_CHECKING:
  17. from typing import Any, Dict, List
  18. from datetime import datetime
  19. try:
  20. import litellm # type: ignore[import-not-found]
  21. from litellm import input_callback, success_callback, failure_callback
  22. except ImportError:
  23. raise DidNotEnable("LiteLLM not installed")
  24. def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
  25. """Get the metadata dictionary from the kwargs."""
  26. litellm_params = kwargs.setdefault("litellm_params", {})
  27. # we need this weird little dance, as metadata might be set but may be None initially
  28. metadata = litellm_params.get("metadata")
  29. if metadata is None:
  30. metadata = {}
  31. litellm_params["metadata"] = metadata
  32. return metadata
  33. def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
  34. """
  35. Convert the message parts from OpenAI format to the `gen_ai.request.messages` format
  36. using the OpenAI-specific transformer (LiteLLM uses OpenAI's message format).
  37. Deep copies messages to avoid mutating original kwargs.
  38. """
  39. # Deep copy to avoid mutating original messages from kwargs
  40. messages = copy.deepcopy(messages)
  41. for message in messages:
  42. if not isinstance(message, dict):
  43. continue
  44. content = message.get("content")
  45. if isinstance(content, (list, tuple)):
  46. transformed = []
  47. for item in content:
  48. if isinstance(item, dict):
  49. result = transform_openai_content_part(item)
  50. # If transformation succeeded, use the result; otherwise keep original
  51. transformed.append(result if result is not None else item)
  52. else:
  53. transformed.append(item)
  54. message["content"] = transformed
  55. return messages
  56. def _input_callback(kwargs: "Dict[str, Any]") -> None:
  57. """Handle the start of a request."""
  58. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  59. if integration is None:
  60. return
  61. # Get key parameters
  62. full_model = kwargs.get("model", "")
  63. try:
  64. model, provider, _, _ = litellm.get_llm_provider(full_model)
  65. except Exception:
  66. model = full_model
  67. provider = "unknown"
  68. call_type = kwargs.get("call_type", None)
  69. if call_type == "embedding":
  70. operation = "embeddings"
  71. else:
  72. operation = "chat"
  73. # Start a new span/transaction
  74. span = get_start_span_function()(
  75. op=(
  76. consts.OP.GEN_AI_CHAT
  77. if operation == "chat"
  78. else consts.OP.GEN_AI_EMBEDDINGS
  79. ),
  80. name=f"{operation} {model}",
  81. origin=LiteLLMIntegration.origin,
  82. )
  83. span.__enter__()
  84. # Store span for later
  85. _get_metadata_dict(kwargs)["_sentry_span"] = span
  86. # Set basic data
  87. set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
  88. set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
  89. # Record input/messages if allowed
  90. if should_send_default_pii() and integration.include_prompts:
  91. if operation == "embeddings":
  92. # For embeddings, look for the 'input' parameter
  93. embedding_input = kwargs.get("input")
  94. if embedding_input:
  95. scope = sentry_sdk.get_current_scope()
  96. # Normalize to list format
  97. input_list = (
  98. embedding_input
  99. if isinstance(embedding_input, list)
  100. else [embedding_input]
  101. )
  102. messages_data = truncate_and_annotate_messages(input_list, span, scope)
  103. if messages_data is not None:
  104. set_data_normalized(
  105. span,
  106. SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
  107. messages_data,
  108. unpack=False,
  109. )
  110. else:
  111. # For chat, look for the 'messages' parameter
  112. messages = kwargs.get("messages", [])
  113. if messages:
  114. scope = sentry_sdk.get_current_scope()
  115. messages = _convert_message_parts(messages)
  116. messages_data = truncate_and_annotate_messages(messages, span, scope)
  117. if messages_data is not None:
  118. set_data_normalized(
  119. span,
  120. SPANDATA.GEN_AI_REQUEST_MESSAGES,
  121. messages_data,
  122. unpack=False,
  123. )
  124. # Record other parameters
  125. params = {
  126. "model": SPANDATA.GEN_AI_REQUEST_MODEL,
  127. "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
  128. "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
  129. "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
  130. "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
  131. "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
  132. "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
  133. }
  134. for key, attribute in params.items():
  135. value = kwargs.get(key)
  136. if value is not None:
  137. set_data_normalized(span, attribute, value)
  138. # Record LiteLLM-specific parameters
  139. litellm_params = {
  140. "api_base": kwargs.get("api_base"),
  141. "api_version": kwargs.get("api_version"),
  142. "custom_llm_provider": kwargs.get("custom_llm_provider"),
  143. }
  144. for key, value in litellm_params.items():
  145. if value is not None:
  146. set_data_normalized(span, f"gen_ai.litellm.{key}", value)
  147. def _success_callback(
  148. kwargs: "Dict[str, Any]",
  149. completion_response: "Any",
  150. start_time: "datetime",
  151. end_time: "datetime",
  152. ) -> None:
  153. """Handle successful completion."""
  154. span = _get_metadata_dict(kwargs).get("_sentry_span")
  155. if span is None:
  156. return
  157. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  158. if integration is None:
  159. return
  160. try:
  161. # Record model information
  162. if hasattr(completion_response, "model"):
  163. set_data_normalized(
  164. span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
  165. )
  166. # Record response content if allowed
  167. if should_send_default_pii() and integration.include_prompts:
  168. if hasattr(completion_response, "choices"):
  169. response_messages = []
  170. for choice in completion_response.choices:
  171. if hasattr(choice, "message"):
  172. if hasattr(choice.message, "model_dump"):
  173. response_messages.append(choice.message.model_dump())
  174. elif hasattr(choice.message, "dict"):
  175. response_messages.append(choice.message.dict())
  176. else:
  177. # Fallback for basic message objects
  178. msg = {}
  179. if hasattr(choice.message, "role"):
  180. msg["role"] = choice.message.role
  181. if hasattr(choice.message, "content"):
  182. msg["content"] = choice.message.content
  183. if hasattr(choice.message, "tool_calls"):
  184. msg["tool_calls"] = choice.message.tool_calls
  185. response_messages.append(msg)
  186. if response_messages:
  187. set_data_normalized(
  188. span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
  189. )
  190. # Record token usage
  191. if hasattr(completion_response, "usage"):
  192. usage = completion_response.usage
  193. record_token_usage(
  194. span,
  195. input_tokens=getattr(usage, "prompt_tokens", None),
  196. output_tokens=getattr(usage, "completion_tokens", None),
  197. total_tokens=getattr(usage, "total_tokens", None),
  198. )
  199. finally:
  200. # Always finish the span and clean up
  201. span.__exit__(None, None, None)
  202. def _failure_callback(
  203. kwargs: "Dict[str, Any]",
  204. exception: Exception,
  205. start_time: "datetime",
  206. end_time: "datetime",
  207. ) -> None:
  208. """Handle request failure."""
  209. span = _get_metadata_dict(kwargs).get("_sentry_span")
  210. if span is None:
  211. return
  212. try:
  213. # Capture the exception
  214. event, hint = event_from_exception(
  215. exception,
  216. client_options=sentry_sdk.get_client().options,
  217. mechanism={"type": "litellm", "handled": False},
  218. )
  219. sentry_sdk.capture_event(event, hint=hint)
  220. finally:
  221. # Always finish the span and clean up
  222. span.__exit__(type(exception), exception, None)
  223. class LiteLLMIntegration(Integration):
  224. """
  225. LiteLLM integration for Sentry.
  226. This integration automatically captures LiteLLM API calls and sends them to Sentry
  227. for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
  228. supports, including OpenAI, Anthropic, Google, Cohere, and many others.
  229. Features:
  230. - Automatic exception capture for all LiteLLM calls
  231. - Token usage tracking across all providers
  232. - Provider detection and attribution
  233. - Input/output message capture (configurable)
  234. - Streaming response support
  235. - Cost tracking integration
  236. Usage:
  237. ```python
  238. import litellm
  239. import sentry_sdk
  240. # Initialize Sentry with the LiteLLM integration
  241. sentry_sdk.init(
  242. dsn="your-dsn",
  243. send_default_pii=True
  244. integrations=[
  245. sentry_sdk.integrations.LiteLLMIntegration(
  246. include_prompts=True # Set to False to exclude message content
  247. )
  248. ]
  249. )
  250. # All LiteLLM calls will now be monitored
  251. response = litellm.completion(
  252. model="gpt-3.5-turbo",
  253. messages=[{"role": "user", "content": "Hello!"}]
  254. )
  255. ```
  256. Configuration:
  257. - include_prompts (bool): Whether to include prompts and responses in spans.
  258. Defaults to True. Set to False to exclude potentially sensitive data.
  259. """
  260. identifier = "litellm"
  261. origin = f"auto.ai.{identifier}"
  262. def __init__(self: "LiteLLMIntegration", include_prompts: bool = True) -> None:
  263. self.include_prompts = include_prompts
  264. @staticmethod
  265. def setup_once() -> None:
  266. """Set up LiteLLM callbacks for monitoring."""
  267. litellm.input_callback = input_callback or []
  268. if _input_callback not in litellm.input_callback:
  269. litellm.input_callback.append(_input_callback)
  270. litellm.success_callback = success_callback or []
  271. if _success_callback not in litellm.success_callback:
  272. litellm.success_callback.append(_success_callback)
  273. litellm.failure_callback = failure_callback or []
  274. if _failure_callback not in litellm.failure_callback:
  275. litellm.failure_callback.append(_failure_callback)