upload_large_folder.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # coding=utf-8
  2. # Copyright 2023-present, the HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Contains command to upload a large folder with the CLI."""
  16. import os
  17. from argparse import Namespace, _SubParsersAction
  18. from typing import List, Optional
  19. from huggingface_hub import logging
  20. from huggingface_hub.commands import BaseHuggingfaceCLICommand
  21. from huggingface_hub.hf_api import HfApi
  22. from huggingface_hub.utils import disable_progress_bars
  23. from ._cli_utils import ANSI
  24. logger = logging.get_logger(__name__)
  25. class UploadLargeFolderCommand(BaseHuggingfaceCLICommand):
  26. @staticmethod
  27. def register_subcommand(parser: _SubParsersAction):
  28. subparser = parser.add_parser(
  29. "upload-large-folder",
  30. help="Upload a large folder to the Hub. Recommended for resumable uploads.",
  31. )
  32. subparser.add_argument(
  33. "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
  34. )
  35. subparser.add_argument("local_path", type=str, help="Local path to the file or folder to upload.")
  36. subparser.add_argument(
  37. "--repo-type",
  38. choices=["model", "dataset", "space"],
  39. help="Type of the repo to upload to (e.g. `dataset`).",
  40. )
  41. subparser.add_argument(
  42. "--revision",
  43. type=str,
  44. help=("An optional Git revision to push to. It can be a branch name or a PR reference."),
  45. )
  46. subparser.add_argument(
  47. "--private",
  48. action="store_true",
  49. help=(
  50. "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists."
  51. ),
  52. )
  53. subparser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
  54. subparser.add_argument("--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload.")
  55. subparser.add_argument(
  56. "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
  57. )
  58. subparser.add_argument(
  59. "--num-workers", type=int, help="Number of workers to use to hash, upload and commit files."
  60. )
  61. subparser.add_argument("--no-report", action="store_true", help="Whether to disable regular status report.")
  62. subparser.add_argument("--no-bars", action="store_true", help="Whether to disable progress bars.")
  63. subparser.set_defaults(func=UploadLargeFolderCommand)
  64. def __init__(self, args: Namespace) -> None:
  65. self.repo_id: str = args.repo_id
  66. self.local_path: str = args.local_path
  67. self.repo_type: str = args.repo_type
  68. self.revision: Optional[str] = args.revision
  69. self.private: bool = args.private
  70. self.include: Optional[List[str]] = args.include
  71. self.exclude: Optional[List[str]] = args.exclude
  72. self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
  73. self.num_workers: Optional[int] = args.num_workers
  74. self.no_report: bool = args.no_report
  75. self.no_bars: bool = args.no_bars
  76. if not os.path.isdir(self.local_path):
  77. raise ValueError("Large upload is only supported for folders.")
  78. def run(self) -> None:
  79. logging.set_verbosity_info()
  80. print(
  81. ANSI.yellow(
  82. "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
  83. "This is a new feature so feedback is very welcome!\n"
  84. "\n"
  85. "A few things to keep in mind:\n"
  86. " - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
  87. " - Do not start several processes in parallel.\n"
  88. " - You can interrupt and resume the process at any time. "
  89. "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
  90. " - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
  91. "\n"
  92. f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
  93. " - You must not modify those files manually.\n"
  94. " - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
  95. " - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
  96. "\n"
  97. "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
  98. "You can also entirely disable the status report with `--no-report`.\n"
  99. "\n"
  100. "For more details, run `hf upload-large-folder --help` or check the documentation at "
  101. "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
  102. )
  103. )
  104. if self.no_bars:
  105. disable_progress_bars()
  106. self.api.upload_large_folder(
  107. repo_id=self.repo_id,
  108. folder_path=self.local_path,
  109. repo_type=self.repo_type,
  110. revision=self.revision,
  111. private=self.private,
  112. allow_patterns=self.include,
  113. ignore_patterns=self.exclude,
  114. num_workers=self.num_workers,
  115. print_report=not self.no_report,
  116. )