_telemetry.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from queue import Queue
  2. from threading import Lock, Thread
  3. from typing import Dict, Optional, Union
  4. from urllib.parse import quote
  5. from .. import constants, logging
  6. from . import build_hf_headers, get_session, hf_raise_for_status
  7. logger = logging.get_logger(__name__)
  8. # Telemetry is sent by a separate thread to avoid blocking the main thread.
  9. # A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
  10. # If the thread stops for some reason -shouldn't happen-, we restart a new one.
  11. _TELEMETRY_THREAD: Optional[Thread] = None
  12. _TELEMETRY_THREAD_LOCK = Lock() # Lock to avoid starting multiple threads in parallel
  13. _TELEMETRY_QUEUE: Queue = Queue()
  14. def send_telemetry(
  15. topic: str,
  16. *,
  17. library_name: Optional[str] = None,
  18. library_version: Optional[str] = None,
  19. user_agent: Union[Dict, str, None] = None,
  20. ) -> None:
  21. """
  22. Sends telemetry that helps tracking usage of different HF libraries.
  23. This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
  24. to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
  25. `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
  26. `HF_HUB_OFFLINE=1`).
  27. Telemetry collection is run in a separate thread to minimize impact for the user.
  28. Args:
  29. topic (`str`):
  30. Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
  31. subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
  32. library_name (`str`, *optional*):
  33. The name of the library that is making the HTTP request. Will be added to the user-agent header.
  34. library_version (`str`, *optional*):
  35. The version of the library that is making the HTTP request. Will be added to the user-agent header.
  36. user_agent (`str`, `dict`, *optional*):
  37. The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
  38. Example:
  39. ```py
  40. >>> from huggingface_hub.utils import send_telemetry
  41. # Send telemetry without library information
  42. >>> send_telemetry("ping")
  43. # Send telemetry to subtopic with library information
  44. >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
  45. # Send telemetry with additional data
  46. >>> send_telemetry(
  47. ... topic="examples",
  48. ... library_name="transformers",
  49. ... library_version="4.26.0",
  50. ... user_agent={"pipeline": "text_classification", "framework": "flax"},
  51. ... )
  52. ```
  53. """
  54. if constants.HF_HUB_OFFLINE or constants.HF_HUB_DISABLE_TELEMETRY:
  55. return
  56. _start_telemetry_thread() # starts thread only if doesn't exist yet
  57. _TELEMETRY_QUEUE.put(
  58. {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
  59. )
  60. def _start_telemetry_thread():
  61. """Start a daemon thread to consume tasks from the telemetry queue.
  62. If the thread is interrupted, start a new one.
  63. """
  64. with _TELEMETRY_THREAD_LOCK: # avoid to start multiple threads if called concurrently
  65. global _TELEMETRY_THREAD
  66. if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
  67. _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
  68. _TELEMETRY_THREAD.start()
  69. def _telemetry_worker():
  70. """Wait for a task and consume it."""
  71. while True:
  72. kwargs = _TELEMETRY_QUEUE.get()
  73. _send_telemetry_in_thread(**kwargs)
  74. _TELEMETRY_QUEUE.task_done()
  75. def _send_telemetry_in_thread(
  76. topic: str,
  77. *,
  78. library_name: Optional[str] = None,
  79. library_version: Optional[str] = None,
  80. user_agent: Union[Dict, str, None] = None,
  81. ) -> None:
  82. """Contains the actual data sending data to the Hub.
  83. This function is called directly in gradio's analytics because
  84. it is not possible to send telemetry from a daemon thread.
  85. See here: https://github.com/gradio-app/gradio/pull/8180
  86. Please do not rename or remove this function.
  87. """
  88. path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
  89. try:
  90. r = get_session().head(
  91. f"{constants.ENDPOINT}/api/telemetry/{path}",
  92. headers=build_hf_headers(
  93. token=False, # no need to send a token for telemetry
  94. library_name=library_name,
  95. library_version=library_version,
  96. user_agent=user_agent,
  97. ),
  98. )
  99. hf_raise_for_status(r)
  100. except Exception as e:
  101. # We don't want to error in case of connection errors of any kind.
  102. logger.debug(f"Error while sending telemetry: {e}")