| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- #!/usr/bin/env python
- # Copyright 2021 The HuggingFace Team. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from pathlib import Path
- import torch
- from ...utils import (
- is_hpu_available,
- is_mlu_available,
- is_musa_available,
- is_npu_available,
- is_sdaa_available,
- is_xpu_available,
- )
- from .config_args import ClusterConfig, default_json_config_file
- from .config_utils import SubcommandHelpFormatter
- description = "Create a default config file for Accelerate with only a few flags set."
- def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file):
- """
- Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also
- set CPU if it is a CPU-only machine.
- Args:
- mixed_precision (`str`, *optional*, defaults to "no"):
- Mixed Precision to use. Should be one of "no", "fp16", or "bf16"
- save_location (`str`, *optional*, defaults to `default_json_config_file`):
- Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default
- location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting
- the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`.
- """
- path = Path(save_location)
- path.parent.mkdir(parents=True, exist_ok=True)
- if path.exists():
- print(
- f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`."
- )
- return False
- mixed_precision = mixed_precision.lower()
- if mixed_precision not in ["no", "fp16", "bf16", "fp8"]:
- raise ValueError(
- f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}"
- )
- config = {
- "compute_environment": "LOCAL_MACHINE",
- "mixed_precision": mixed_precision,
- }
- if is_mlu_available():
- num_mlus = torch.mlu.device_count()
- config["num_processes"] = num_mlus
- config["use_cpu"] = False
- if num_mlus > 1:
- config["distributed_type"] = "MULTI_MLU"
- else:
- config["distributed_type"] = "NO"
- if is_sdaa_available():
- num_sdaas = torch.sdaa.device_count()
- config["num_processes"] = num_sdaas
- config["use_cpu"] = False
- if num_sdaas > 1:
- config["distributed_type"] = "MULTI_SDAA"
- else:
- config["distributed_type"] = "NO"
- elif is_musa_available():
- num_musas = torch.musa.device_count()
- config["num_processes"] = num_musas
- config["use_cpu"] = False
- if num_musas > 1:
- config["distributed_type"] = "MULTI_MUSA"
- else:
- config["distributed_type"] = "NO"
- elif is_hpu_available():
- num_hpus = torch.hpu.device_count()
- config["num_processes"] = num_hpus
- config["use_cpu"] = False
- if num_hpus > 1:
- config["distributed_type"] = "MULTI_HPU"
- else:
- config["distributed_type"] = "NO"
- elif torch.cuda.is_available():
- num_gpus = torch.cuda.device_count()
- config["num_processes"] = num_gpus
- config["use_cpu"] = False
- if num_gpus > 1:
- config["distributed_type"] = "MULTI_GPU"
- else:
- config["distributed_type"] = "NO"
- elif is_xpu_available():
- num_xpus = torch.xpu.device_count()
- config["num_processes"] = num_xpus
- config["use_cpu"] = False
- if num_xpus > 1:
- config["distributed_type"] = "MULTI_XPU"
- else:
- config["distributed_type"] = "NO"
- elif is_npu_available():
- num_npus = torch.npu.device_count()
- config["num_processes"] = num_npus
- config["use_cpu"] = False
- if num_npus > 1:
- config["distributed_type"] = "MULTI_NPU"
- else:
- config["distributed_type"] = "NO"
- else:
- num_xpus = 0
- config["use_cpu"] = True
- config["num_processes"] = 1
- config["distributed_type"] = "NO"
- config["debug"] = False
- config["enable_cpu_affinity"] = False
- config = ClusterConfig(**config)
- config.to_json_file(path)
- return path
- def default_command_parser(parser, parents):
- parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
- parser.add_argument(
- "--config_file",
- default=default_json_config_file,
- help=(
- "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
- "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
- "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
- "with 'huggingface'."
- ),
- dest="save_location",
- )
- parser.add_argument(
- "--mixed_precision",
- choices=["no", "fp16", "bf16"],
- type=str,
- help="Whether or not to use mixed precision training. "
- "Choose between FP16 and BF16 (bfloat16) training. "
- "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
- default="no",
- )
- parser.set_defaults(func=default_config_command)
- return parser
- def default_config_command(args):
- config_file = write_basic_config(args.mixed_precision, args.save_location)
- if config_file:
- print(f"accelerate configuration saved at {config_file}")
|