_headers.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. # coding=utf-8
  2. # Copyright 2022-present, the HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Contains utilities to handle headers to send in calls to Huggingface Hub."""
  16. from typing import Dict, Optional, Union
  17. from huggingface_hub.errors import LocalTokenNotFoundError
  18. from .. import constants
  19. from ._auth import get_token
  20. from ._deprecation import _deprecate_arguments
  21. from ._runtime import (
  22. get_fastai_version,
  23. get_fastcore_version,
  24. get_hf_hub_version,
  25. get_python_version,
  26. get_tf_version,
  27. get_torch_version,
  28. is_fastai_available,
  29. is_fastcore_available,
  30. is_tf_available,
  31. is_torch_available,
  32. )
  33. from ._validators import validate_hf_hub_args
  34. @_deprecate_arguments(
  35. version="1.0",
  36. deprecated_args="is_write_action",
  37. custom_message="This argument is ignored and we let the server handle the permission error instead (if any).",
  38. )
  39. @validate_hf_hub_args
  40. def build_hf_headers(
  41. *,
  42. token: Optional[Union[bool, str]] = None,
  43. library_name: Optional[str] = None,
  44. library_version: Optional[str] = None,
  45. user_agent: Union[Dict, str, None] = None,
  46. headers: Optional[Dict[str, str]] = None,
  47. is_write_action: bool = False,
  48. ) -> Dict[str, str]:
  49. """
  50. Build headers dictionary to send in a HF Hub call.
  51. By default, authorization token is always provided either from argument (explicit
  52. use) or retrieved from the cache (implicit use). To explicitly avoid sending the
  53. token to the Hub, set `token=False` or set the `HF_HUB_DISABLE_IMPLICIT_TOKEN`
  54. environment variable.
  55. In case of an API call that requires write access, an error is thrown if token is
  56. `None` or token is an organization token (starting with `"api_org***"`).
  57. In addition to the auth header, a user-agent is added to provide information about
  58. the installed packages (versions of python, huggingface_hub, torch, tensorflow,
  59. fastai and fastcore).
  60. Args:
  61. token (`str`, `bool`, *optional*):
  62. The token to be sent in authorization header for the Hub call:
  63. - if a string, it is used as the Hugging Face token
  64. - if `True`, the token is read from the machine (cache or env variable)
  65. - if `False`, authorization header is not set
  66. - if `None`, the token is read from the machine only except if
  67. `HF_HUB_DISABLE_IMPLICIT_TOKEN` env variable is set.
  68. library_name (`str`, *optional*):
  69. The name of the library that is making the HTTP request. Will be added to
  70. the user-agent header.
  71. library_version (`str`, *optional*):
  72. The version of the library that is making the HTTP request. Will be added
  73. to the user-agent header.
  74. user_agent (`str`, `dict`, *optional*):
  75. The user agent info in the form of a dictionary or a single string. It will
  76. be completed with information about the installed packages.
  77. headers (`dict`, *optional*):
  78. Additional headers to include in the request. Those headers take precedence
  79. over the ones generated by this function.
  80. is_write_action (`bool`):
  81. Ignored and deprecated argument.
  82. Returns:
  83. A `Dict` of headers to pass in your API call.
  84. Example:
  85. ```py
  86. >>> build_hf_headers(token="hf_***") # explicit token
  87. {"authorization": "Bearer hf_***", "user-agent": ""}
  88. >>> build_hf_headers(token=True) # explicitly use cached token
  89. {"authorization": "Bearer hf_***",...}
  90. >>> build_hf_headers(token=False) # explicitly don't use cached token
  91. {"user-agent": ...}
  92. >>> build_hf_headers() # implicit use of the cached token
  93. {"authorization": "Bearer hf_***",...}
  94. # HF_HUB_DISABLE_IMPLICIT_TOKEN=True # to set as env variable
  95. >>> build_hf_headers() # token is not sent
  96. {"user-agent": ...}
  97. >>> build_hf_headers(library_name="transformers", library_version="1.2.3")
  98. {"authorization": ..., "user-agent": "transformers/1.2.3; hf_hub/0.10.2; python/3.10.4; tensorflow/1.55"}
  99. ```
  100. Raises:
  101. [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  102. If organization token is passed and "write" access is required.
  103. [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  104. If "write" access is required but token is not passed and not saved locally.
  105. [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
  106. If `token=True` but token is not saved locally.
  107. """
  108. # Get auth token to send
  109. token_to_send = get_token_to_send(token)
  110. # Combine headers
  111. hf_headers = {
  112. "user-agent": _http_user_agent(
  113. library_name=library_name,
  114. library_version=library_version,
  115. user_agent=user_agent,
  116. )
  117. }
  118. if token_to_send is not None:
  119. hf_headers["authorization"] = f"Bearer {token_to_send}"
  120. if headers is not None:
  121. hf_headers.update(headers)
  122. return hf_headers
  123. def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
  124. """Select the token to send from either `token` or the cache."""
  125. # Case token is explicitly provided
  126. if isinstance(token, str):
  127. return token
  128. # Case token is explicitly forbidden
  129. if token is False:
  130. return None
  131. # Token is not provided: we get it from local cache
  132. cached_token = get_token()
  133. # Case token is explicitly required
  134. if token is True:
  135. if cached_token is None:
  136. raise LocalTokenNotFoundError(
  137. "Token is required (`token=True`), but no token found. You"
  138. " need to provide a token or be logged in to Hugging Face with"
  139. " `hf auth login` or `huggingface_hub.login`. See"
  140. " https://huggingface.co/settings/tokens."
  141. )
  142. return cached_token
  143. # Case implicit use of the token is forbidden by env variable
  144. if constants.HF_HUB_DISABLE_IMPLICIT_TOKEN:
  145. return None
  146. # Otherwise: we use the cached token as the user has not explicitly forbidden it
  147. return cached_token
  148. def _http_user_agent(
  149. *,
  150. library_name: Optional[str] = None,
  151. library_version: Optional[str] = None,
  152. user_agent: Union[Dict, str, None] = None,
  153. ) -> str:
  154. """Format a user-agent string containing information about the installed packages.
  155. Args:
  156. library_name (`str`, *optional*):
  157. The name of the library that is making the HTTP request.
  158. library_version (`str`, *optional*):
  159. The version of the library that is making the HTTP request.
  160. user_agent (`str`, `dict`, *optional*):
  161. The user agent info in the form of a dictionary or a single string.
  162. Returns:
  163. The formatted user-agent string.
  164. """
  165. if library_name is not None:
  166. ua = f"{library_name}/{library_version}"
  167. else:
  168. ua = "unknown/None"
  169. ua += f"; hf_hub/{get_hf_hub_version()}"
  170. ua += f"; python/{get_python_version()}"
  171. if not constants.HF_HUB_DISABLE_TELEMETRY:
  172. if is_torch_available():
  173. ua += f"; torch/{get_torch_version()}"
  174. if is_tf_available():
  175. ua += f"; tensorflow/{get_tf_version()}"
  176. if is_fastai_available():
  177. ua += f"; fastai/{get_fastai_version()}"
  178. if is_fastcore_available():
  179. ua += f"; fastcore/{get_fastcore_version()}"
  180. if isinstance(user_agent, dict):
  181. ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
  182. elif isinstance(user_agent, str):
  183. ua += "; " + user_agent
  184. # Retrieve user-agent origin headers from environment variable
  185. origin = constants.HF_HUB_USER_AGENT_ORIGIN
  186. if origin is not None:
  187. ua += "; origin/" + origin
  188. return _deduplicate_user_agent(ua)
  189. def _deduplicate_user_agent(user_agent: str) -> str:
  190. """Deduplicate redundant information in the generated user-agent."""
  191. # Split around ";" > Strip whitespaces > Store as dict keys (ensure unicity) > format back as string
  192. # Order is implicitly preserved by dictionary structure (see https://stackoverflow.com/a/53657523).
  193. return "; ".join({key.strip(): None for key in user_agent.split(";")}.keys())