upload.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. # coding=utf-8
  2. # Copyright 2023-present, the HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Contains command to upload a repo or file with the CLI.
  16. Usage:
  17. # Upload file (implicit)
  18. hf upload my-cool-model ./my-cool-model.safetensors
  19. # Upload file (explicit)
  20. hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
  21. # Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
  22. hf upload my-cool-model
  23. # Upload directory (explicit)
  24. hf upload my-cool-model ./models/my-cool-model .
  25. # Upload filtered directory (example: tensorboard logs except for the last run)
  26. hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
  27. # Upload with wildcard
  28. hf upload my-cool-model "./model/training/*.safetensors"
  29. # Upload private dataset
  30. hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
  31. # Upload with token
  32. hf upload Wauplin/my-cool-model --token=hf_****
  33. # Sync local Space with Hub (upload new files, delete removed files)
  34. hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
  35. # Schedule commits every 30 minutes
  36. hf upload Wauplin/my-cool-model --every=30
  37. """
  38. import os
  39. import time
  40. import warnings
  41. from argparse import Namespace, _SubParsersAction
  42. from typing import List, Optional
  43. from huggingface_hub import logging
  44. from huggingface_hub._commit_scheduler import CommitScheduler
  45. from huggingface_hub.commands import BaseHuggingfaceCLICommand
  46. from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
  47. from huggingface_hub.errors import RevisionNotFoundError
  48. from huggingface_hub.hf_api import HfApi
  49. from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
  50. from huggingface_hub.utils._runtime import is_xet_available
  51. logger = logging.get_logger(__name__)
  52. class UploadCommand(BaseHuggingfaceCLICommand):
  53. @staticmethod
  54. def register_subcommand(parser: _SubParsersAction):
  55. upload_parser = parser.add_parser(
  56. "upload", help="Upload a file or a folder to the Hub. Recommended for single-commit uploads."
  57. )
  58. upload_parser.add_argument(
  59. "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
  60. )
  61. upload_parser.add_argument(
  62. "local_path",
  63. nargs="?",
  64. help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
  65. )
  66. upload_parser.add_argument(
  67. "path_in_repo",
  68. nargs="?",
  69. help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
  70. )
  71. upload_parser.add_argument(
  72. "--repo-type",
  73. choices=["model", "dataset", "space"],
  74. default="model",
  75. help="Type of the repo to upload to (e.g. `dataset`).",
  76. )
  77. upload_parser.add_argument(
  78. "--revision",
  79. type=str,
  80. help=(
  81. "An optional Git revision to push to. It can be a branch name or a PR reference. If revision does not"
  82. " exist and `--create-pr` is not set, a branch will be automatically created."
  83. ),
  84. )
  85. upload_parser.add_argument(
  86. "--private",
  87. action="store_true",
  88. help=(
  89. "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already"
  90. " exists."
  91. ),
  92. )
  93. upload_parser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
  94. upload_parser.add_argument(
  95. "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload."
  96. )
  97. upload_parser.add_argument(
  98. "--delete",
  99. nargs="*",
  100. type=str,
  101. help="Glob patterns for file to be deleted from the repo while committing.",
  102. )
  103. upload_parser.add_argument(
  104. "--commit-message", type=str, help="The summary / title / first line of the generated commit."
  105. )
  106. upload_parser.add_argument("--commit-description", type=str, help="The description of the generated commit.")
  107. upload_parser.add_argument(
  108. "--create-pr", action="store_true", help="Whether to upload content as a new Pull Request."
  109. )
  110. upload_parser.add_argument(
  111. "--every",
  112. type=float,
  113. help="If set, a background job is scheduled to create commits every `every` minutes.",
  114. )
  115. upload_parser.add_argument(
  116. "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
  117. )
  118. upload_parser.add_argument(
  119. "--quiet",
  120. action="store_true",
  121. help="If True, progress bars are disabled and only the path to the uploaded files is printed.",
  122. )
  123. upload_parser.set_defaults(func=UploadCommand)
  124. def __init__(self, args: Namespace) -> None:
  125. self.repo_id: str = args.repo_id
  126. self.repo_type: Optional[str] = args.repo_type
  127. self.revision: Optional[str] = args.revision
  128. self.private: bool = args.private
  129. self.include: Optional[List[str]] = args.include
  130. self.exclude: Optional[List[str]] = args.exclude
  131. self.delete: Optional[List[str]] = args.delete
  132. self.commit_message: Optional[str] = args.commit_message
  133. self.commit_description: Optional[str] = args.commit_description
  134. self.create_pr: bool = args.create_pr
  135. self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
  136. self.quiet: bool = args.quiet # disable warnings and progress bars
  137. # Check `--every` is valid
  138. if args.every is not None and args.every <= 0:
  139. raise ValueError(f"`every` must be a positive value (got '{args.every}')")
  140. self.every: Optional[float] = args.every
  141. # Resolve `local_path` and `path_in_repo`
  142. repo_name: str = args.repo_id.split("/")[-1] # e.g. "Wauplin/my-cool-model" => "my-cool-model"
  143. self.local_path: str
  144. self.path_in_repo: str
  145. if args.local_path is not None and any(c in args.local_path for c in ["*", "?", "["]):
  146. if args.include is not None:
  147. raise ValueError("Cannot set `--include` when passing a `local_path` containing a wildcard.")
  148. if args.path_in_repo is not None and args.path_in_repo != ".":
  149. raise ValueError("Cannot set `path_in_repo` when passing a `local_path` containing a wildcard.")
  150. self.local_path = "."
  151. self.include = args.local_path
  152. self.path_in_repo = "."
  153. elif args.local_path is None and os.path.isfile(repo_name):
  154. # Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
  155. self.local_path = repo_name
  156. self.path_in_repo = repo_name
  157. elif args.local_path is None and os.path.isdir(repo_name):
  158. # Implicit case 2: user provided only a repo_id which happen to be a local folder as well => upload it at root
  159. self.local_path = repo_name
  160. self.path_in_repo = "."
  161. elif args.local_path is None:
  162. # Implicit case 3: user provided only a repo_id that does not match a local file or folder
  163. # => the user must explicitly provide a local_path => raise exception
  164. raise ValueError(f"'{repo_name}' is not a local file or folder. Please set `local_path` explicitly.")
  165. elif args.path_in_repo is None and os.path.isfile(args.local_path):
  166. # Explicit local path to file, no path in repo => upload it at root with same name
  167. self.local_path = args.local_path
  168. self.path_in_repo = os.path.basename(args.local_path)
  169. elif args.path_in_repo is None:
  170. # Explicit local path to folder, no path in repo => upload at root
  171. self.local_path = args.local_path
  172. self.path_in_repo = "."
  173. else:
  174. # Finally, if both paths are explicit
  175. self.local_path = args.local_path
  176. self.path_in_repo = args.path_in_repo
  177. def run(self) -> None:
  178. if self.quiet:
  179. disable_progress_bars()
  180. with warnings.catch_warnings():
  181. warnings.simplefilter("ignore")
  182. print(self._upload())
  183. enable_progress_bars()
  184. else:
  185. logging.set_verbosity_info()
  186. print(self._upload())
  187. logging.set_verbosity_warning()
  188. def _upload(self) -> str:
  189. if os.path.isfile(self.local_path):
  190. if self.include is not None and len(self.include) > 0:
  191. warnings.warn("Ignoring `--include` since a single file is uploaded.")
  192. if self.exclude is not None and len(self.exclude) > 0:
  193. warnings.warn("Ignoring `--exclude` since a single file is uploaded.")
  194. if self.delete is not None and len(self.delete) > 0:
  195. warnings.warn("Ignoring `--delete` since a single file is uploaded.")
  196. if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
  197. logger.info(
  198. "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
  199. " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
  200. )
  201. # Schedule commits if `every` is set
  202. if self.every is not None:
  203. if os.path.isfile(self.local_path):
  204. # If file => watch entire folder + use allow_patterns
  205. folder_path = os.path.dirname(self.local_path)
  206. path_in_repo = (
  207. self.path_in_repo[: -len(self.local_path)] # remove filename from path_in_repo
  208. if self.path_in_repo.endswith(self.local_path)
  209. else self.path_in_repo
  210. )
  211. allow_patterns = [self.local_path]
  212. ignore_patterns = []
  213. else:
  214. folder_path = self.local_path
  215. path_in_repo = self.path_in_repo
  216. allow_patterns = self.include or []
  217. ignore_patterns = self.exclude or []
  218. if self.delete is not None and len(self.delete) > 0:
  219. warnings.warn("Ignoring `--delete` when uploading with scheduled commits.")
  220. scheduler = CommitScheduler(
  221. folder_path=folder_path,
  222. repo_id=self.repo_id,
  223. repo_type=self.repo_type,
  224. revision=self.revision,
  225. allow_patterns=allow_patterns,
  226. ignore_patterns=ignore_patterns,
  227. path_in_repo=path_in_repo,
  228. private=self.private,
  229. every=self.every,
  230. hf_api=self.api,
  231. )
  232. print(f"Scheduling commits every {self.every} minutes to {scheduler.repo_id}.")
  233. try: # Block main thread until KeyboardInterrupt
  234. while True:
  235. time.sleep(100)
  236. except KeyboardInterrupt:
  237. scheduler.stop()
  238. return "Stopped scheduled commits."
  239. # Otherwise, create repo and proceed with the upload
  240. if not os.path.isfile(self.local_path) and not os.path.isdir(self.local_path):
  241. raise FileNotFoundError(f"No such file or directory: '{self.local_path}'.")
  242. repo_id = self.api.create_repo(
  243. repo_id=self.repo_id,
  244. repo_type=self.repo_type,
  245. exist_ok=True,
  246. private=self.private,
  247. space_sdk="gradio" if self.repo_type == "space" else None,
  248. # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
  249. # ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
  250. ).repo_id
  251. # Check if branch already exists and if not, create it
  252. if self.revision is not None and not self.create_pr:
  253. try:
  254. self.api.repo_info(repo_id=repo_id, repo_type=self.repo_type, revision=self.revision)
  255. except RevisionNotFoundError:
  256. logger.info(f"Branch '{self.revision}' not found. Creating it...")
  257. self.api.create_branch(repo_id=repo_id, repo_type=self.repo_type, branch=self.revision, exist_ok=True)
  258. # ^ `exist_ok=True` to avoid race concurrency issues
  259. # File-based upload
  260. if os.path.isfile(self.local_path):
  261. return self.api.upload_file(
  262. path_or_fileobj=self.local_path,
  263. path_in_repo=self.path_in_repo,
  264. repo_id=repo_id,
  265. repo_type=self.repo_type,
  266. revision=self.revision,
  267. commit_message=self.commit_message,
  268. commit_description=self.commit_description,
  269. create_pr=self.create_pr,
  270. )
  271. # Folder-based upload
  272. else:
  273. return self.api.upload_folder(
  274. folder_path=self.local_path,
  275. path_in_repo=self.path_in_repo,
  276. repo_id=repo_id,
  277. repo_type=self.repo_type,
  278. revision=self.revision,
  279. commit_message=self.commit_message,
  280. commit_description=self.commit_description,
  281. create_pr=self.create_pr,
  282. allow_patterns=self.include,
  283. ignore_patterns=self.exclude,
  284. delete_patterns=self.delete,
  285. )