upload.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. # coding=utf-8
  2. # Copyright 2023-present, the HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Contains command to upload a repo or file with the CLI.
  16. Usage:
  17. # Upload file (implicit)
  18. hf upload my-cool-model ./my-cool-model.safetensors
  19. # Upload file (explicit)
  20. hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
  21. # Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
  22. hf upload my-cool-model
  23. # Upload directory (explicit)
  24. hf upload my-cool-model ./models/my-cool-model .
  25. # Upload filtered directory (example: tensorboard logs except for the last run)
  26. hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
  27. # Upload with wildcard
  28. hf upload my-cool-model "./model/training/*.safetensors"
  29. # Upload private dataset
  30. hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
  31. # Upload with token
  32. hf upload Wauplin/my-cool-model --token=hf_****
  33. # Sync local Space with Hub (upload new files, delete removed files)
  34. hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
  35. # Schedule commits every 30 minutes
  36. hf upload Wauplin/my-cool-model --every=30
  37. """
  38. import os
  39. import time
  40. import warnings
  41. from typing import Annotated, Optional
  42. import typer
  43. from huggingface_hub import logging
  44. from huggingface_hub._commit_scheduler import CommitScheduler
  45. from huggingface_hub.errors import RevisionNotFoundError
  46. from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
  47. from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
  48. logger = logging.get_logger(__name__)
  49. def upload(
  50. repo_id: RepoIdArg,
  51. local_path: Annotated[
  52. Optional[str],
  53. typer.Argument(
  54. help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
  55. ),
  56. ] = None,
  57. path_in_repo: Annotated[
  58. Optional[str],
  59. typer.Argument(
  60. help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
  61. ),
  62. ] = None,
  63. repo_type: RepoTypeOpt = RepoType.model,
  64. revision: RevisionOpt = None,
  65. private: PrivateOpt = None,
  66. include: Annotated[
  67. Optional[list[str]],
  68. typer.Option(
  69. help="Glob patterns to match files to upload.",
  70. ),
  71. ] = None,
  72. exclude: Annotated[
  73. Optional[list[str]],
  74. typer.Option(
  75. help="Glob patterns to exclude from files to upload.",
  76. ),
  77. ] = None,
  78. delete: Annotated[
  79. Optional[list[str]],
  80. typer.Option(
  81. help="Glob patterns for file to be deleted from the repo while committing.",
  82. ),
  83. ] = None,
  84. commit_message: Annotated[
  85. Optional[str],
  86. typer.Option(
  87. help="The summary / title / first line of the generated commit.",
  88. ),
  89. ] = None,
  90. commit_description: Annotated[
  91. Optional[str],
  92. typer.Option(
  93. help="The description of the generated commit.",
  94. ),
  95. ] = None,
  96. create_pr: Annotated[
  97. bool,
  98. typer.Option(
  99. help="Whether to upload content as a new Pull Request.",
  100. ),
  101. ] = False,
  102. every: Annotated[
  103. Optional[float],
  104. typer.Option(
  105. help="f set, a background job is scheduled to create commits every `every` minutes.",
  106. ),
  107. ] = None,
  108. token: TokenOpt = None,
  109. quiet: Annotated[
  110. bool,
  111. typer.Option(
  112. help="Disable progress bars and warnings; print only the returned path.",
  113. ),
  114. ] = False,
  115. ) -> None:
  116. """Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
  117. if every is not None and every <= 0:
  118. raise typer.BadParameter("--every must be a positive value", param_hint="every")
  119. repo_type_str = repo_type.value
  120. api = get_hf_api(token=token)
  121. # Resolve local_path and path_in_repo based on implicit/explicit rules
  122. resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
  123. repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
  124. )
  125. def run_upload() -> str:
  126. if os.path.isfile(resolved_local_path):
  127. if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
  128. warnings.warn("Ignoring --include since a single file is uploaded.")
  129. if exclude is not None and len(exclude) > 0:
  130. warnings.warn("Ignoring --exclude since a single file is uploaded.")
  131. if delete is not None and len(delete) > 0:
  132. warnings.warn("Ignoring --delete since a single file is uploaded.")
  133. # Schedule commits if `every` is set
  134. if every is not None:
  135. if os.path.isfile(resolved_local_path):
  136. # If file => watch entire folder + use allow_patterns
  137. folder_path = os.path.dirname(resolved_local_path)
  138. pi = (
  139. resolved_path_in_repo[: -len(resolved_local_path)]
  140. if resolved_path_in_repo.endswith(resolved_local_path)
  141. else resolved_path_in_repo
  142. )
  143. allow_patterns = [resolved_local_path]
  144. ignore_patterns: Optional[list[str]] = []
  145. else:
  146. folder_path = resolved_local_path
  147. pi = resolved_path_in_repo
  148. allow_patterns = (
  149. resolved_include or []
  150. if isinstance(resolved_include, list)
  151. else [resolved_include]
  152. if isinstance(resolved_include, str)
  153. else []
  154. )
  155. ignore_patterns = exclude or []
  156. if delete is not None and len(delete) > 0:
  157. warnings.warn("Ignoring --delete when uploading with scheduled commits.")
  158. scheduler = CommitScheduler(
  159. folder_path=folder_path,
  160. repo_id=repo_id,
  161. repo_type=repo_type_str,
  162. revision=revision,
  163. allow_patterns=allow_patterns,
  164. ignore_patterns=ignore_patterns,
  165. path_in_repo=pi,
  166. private=private,
  167. every=every,
  168. hf_api=api,
  169. )
  170. print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
  171. try:
  172. while True:
  173. time.sleep(100)
  174. except KeyboardInterrupt:
  175. scheduler.stop()
  176. return "Stopped scheduled commits."
  177. # Otherwise, create repo and proceed with the upload
  178. if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
  179. raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
  180. created = api.create_repo(
  181. repo_id=repo_id,
  182. repo_type=repo_type_str,
  183. exist_ok=True,
  184. private=private,
  185. space_sdk="gradio" if repo_type_str == "space" else None,
  186. # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
  187. # ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
  188. ).repo_id
  189. # Check if branch already exists and if not, create it
  190. if revision is not None and not create_pr:
  191. try:
  192. api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
  193. except RevisionNotFoundError:
  194. logger.info(f"Branch '{revision}' not found. Creating it...")
  195. api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
  196. # ^ `exist_ok=True` to avoid race concurrency issues
  197. # File-based upload
  198. if os.path.isfile(resolved_local_path):
  199. return api.upload_file(
  200. path_or_fileobj=resolved_local_path,
  201. path_in_repo=resolved_path_in_repo,
  202. repo_id=created,
  203. repo_type=repo_type_str,
  204. revision=revision,
  205. commit_message=commit_message,
  206. commit_description=commit_description,
  207. create_pr=create_pr,
  208. )
  209. # Folder-based upload
  210. return api.upload_folder(
  211. folder_path=resolved_local_path,
  212. path_in_repo=resolved_path_in_repo,
  213. repo_id=created,
  214. repo_type=repo_type_str,
  215. revision=revision,
  216. commit_message=commit_message,
  217. commit_description=commit_description,
  218. create_pr=create_pr,
  219. allow_patterns=(
  220. resolved_include
  221. if isinstance(resolved_include, list)
  222. else [resolved_include]
  223. if isinstance(resolved_include, str)
  224. else None
  225. ),
  226. ignore_patterns=exclude,
  227. delete_patterns=delete,
  228. )
  229. if quiet:
  230. disable_progress_bars()
  231. with warnings.catch_warnings():
  232. warnings.simplefilter("ignore")
  233. print(run_upload())
  234. enable_progress_bars()
  235. else:
  236. print(run_upload())
  237. logging.set_verbosity_warning()
  238. def _resolve_upload_paths(
  239. *, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
  240. ) -> tuple[str, str, Optional[list[str]]]:
  241. repo_name = repo_id.split("/")[-1]
  242. resolved_include = include
  243. if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
  244. if include is not None:
  245. raise ValueError("Cannot set --include when local_path contains a wildcard.")
  246. if path_in_repo is not None and path_in_repo != ".":
  247. raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
  248. return ".", local_path, ["."] # will be adjusted below; placeholder for type
  249. if local_path is None and os.path.isfile(repo_name):
  250. return repo_name, repo_name, resolved_include
  251. if local_path is None and os.path.isdir(repo_name):
  252. return repo_name, ".", resolved_include
  253. if local_path is None:
  254. raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
  255. if path_in_repo is None and os.path.isfile(local_path):
  256. return local_path, os.path.basename(local_path), resolved_include
  257. if path_in_repo is None:
  258. return local_path, ".", resolved_include
  259. return local_path, path_in_repo, resolved_include