| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- # coding=utf-8
- # Copyright 2023-present, the HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Contains command to upload a large folder with the CLI."""
- import os
- from argparse import Namespace, _SubParsersAction
- from typing import List, Optional
- from huggingface_hub import logging
- from huggingface_hub.commands import BaseHuggingfaceCLICommand
- from huggingface_hub.hf_api import HfApi
- from huggingface_hub.utils import disable_progress_bars
- from ._cli_utils import ANSI
- logger = logging.get_logger(__name__)
- class UploadLargeFolderCommand(BaseHuggingfaceCLICommand):
- @staticmethod
- def register_subcommand(parser: _SubParsersAction):
- subparser = parser.add_parser(
- "upload-large-folder",
- help="Upload a large folder to the Hub. Recommended for resumable uploads.",
- )
- subparser.add_argument(
- "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
- )
- subparser.add_argument("local_path", type=str, help="Local path to the file or folder to upload.")
- subparser.add_argument(
- "--repo-type",
- choices=["model", "dataset", "space"],
- help="Type of the repo to upload to (e.g. `dataset`).",
- )
- subparser.add_argument(
- "--revision",
- type=str,
- help=("An optional Git revision to push to. It can be a branch name or a PR reference."),
- )
- subparser.add_argument(
- "--private",
- action="store_true",
- help=(
- "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists."
- ),
- )
- subparser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
- subparser.add_argument("--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload.")
- subparser.add_argument(
- "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
- )
- subparser.add_argument(
- "--num-workers", type=int, help="Number of workers to use to hash, upload and commit files."
- )
- subparser.add_argument("--no-report", action="store_true", help="Whether to disable regular status report.")
- subparser.add_argument("--no-bars", action="store_true", help="Whether to disable progress bars.")
- subparser.set_defaults(func=UploadLargeFolderCommand)
- def __init__(self, args: Namespace) -> None:
- self.repo_id: str = args.repo_id
- self.local_path: str = args.local_path
- self.repo_type: str = args.repo_type
- self.revision: Optional[str] = args.revision
- self.private: bool = args.private
- self.include: Optional[List[str]] = args.include
- self.exclude: Optional[List[str]] = args.exclude
- self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
- self.num_workers: Optional[int] = args.num_workers
- self.no_report: bool = args.no_report
- self.no_bars: bool = args.no_bars
- if not os.path.isdir(self.local_path):
- raise ValueError("Large upload is only supported for folders.")
- def run(self) -> None:
- logging.set_verbosity_info()
- print(
- ANSI.yellow(
- "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
- "This is a new feature so feedback is very welcome!\n"
- "\n"
- "A few things to keep in mind:\n"
- " - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
- " - Do not start several processes in parallel.\n"
- " - You can interrupt and resume the process at any time. "
- "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
- " - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
- "\n"
- f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
- " - You must not modify those files manually.\n"
- " - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
- " - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
- "\n"
- "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
- "You can also entirely disable the status report with `--no-report`.\n"
- "\n"
- "For more details, run `hf upload-large-folder --help` or check the documentation at "
- "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
- )
- )
- if self.no_bars:
- disable_progress_bars()
- self.api.upload_large_folder(
- repo_id=self.repo_id,
- folder_path=self.local_path,
- repo_type=self.repo_type,
- revision=self.revision,
- private=self.private,
- allow_patterns=self.include,
- ignore_patterns=self.exclude,
- num_workers=self.num_workers,
- print_report=not self.no_report,
- )
|