yichael
/
AutoAndroidController


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
							#!/usr/bin/env python

# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from ...utils import (
    ComputeEnvironment,
    DistributedType,
    is_deepspeed_available,
    is_fp8_available,
    is_hpu_available,
    is_mlu_available,
    is_mps_available,
    is_msamp_available,
    is_musa_available,
    is_npu_available,
    is_sdaa_available,
    is_transformer_engine_available,
    is_transformers_available,
    is_xpu_available,
)
from ...utils.constants import (
    DEEPSPEED_MULTINODE_LAUNCHERS,
    FSDP2_STATE_DICT_TYPE,
    FSDP_AUTO_WRAP_POLICY,
    FSDP_BACKWARD_PREFETCH,
    FSDP_SHARDING_STRATEGY,
    FSDP_STATE_DICT_TYPE,
    TORCH_DYNAMO_MODES,
)
from .config_args import ClusterConfig
from .config_utils import (
    DYNAMO_BACKENDS,
    _ask_field,
    _ask_options,
    _convert_distributed_mode,
    _convert_dynamo_backend,
    _convert_fp8_backend,
    _convert_mixed_precision,
    _convert_yes_no_to_bool,
)


def get_cluster_input():
    distributed_type = _ask_options(
        "Which type of machine are you using?",
        [
            "No distributed training",
            "multi-CPU",
            "multi-XPU",
            "multi-HPU",
            "multi-GPU",
            "multi-NPU",
            "multi-MLU",
            "multi-SDAA",
            "multi-MUSA",
            "TPU",
        ],
        _convert_distributed_mode,
    )

    machine_rank = 0
    num_machines = 1
    num_processes = 1
    gpu_ids = None
    main_process_ip = None
    main_process_port = None
    rdzv_backend = "static"
    same_network = True
    debug = False

    if distributed_type in [
        DistributedType.MULTI_GPU,
        DistributedType.MULTI_MLU,
        DistributedType.MULTI_SDAA,
        DistributedType.MULTI_MUSA,
        DistributedType.MULTI_NPU,
        DistributedType.MULTI_XPU,
        DistributedType.MULTI_CPU,
        DistributedType.MULTI_HPU,
    ]:
        num_machines = _ask_field(
            "How many different machines will you use (use more than 1 for multi-node training)? [1]: ",
            int,
            default=1,
        )
        if num_machines > 1:
            machine_rank = _ask_options(
                "What is the rank of this machine?",
                list(range(num_machines)),
                int,
            )
            main_process_ip = _ask_field(
                "What is the IP address of the machine that will host the main process? ",
            )
            main_process_port = _ask_field(
                "What is the port you will use to communicate with the main process? ",
                int,
            )
            same_network = _ask_field(
                "Are all the machines on the same local network? Answer `no` if nodes are on the cloud and/or on different network hosts [YES/no]: ",
                _convert_yes_no_to_bool,
                default=True,
                error_message="Please enter yes or no.",
            )
            if not same_network:
                rdzv_backend = _ask_field(
                    "What rendezvous backend will you use? ('static', 'c10d', ...): ", default="static"
                )
        debug = _ask_field(
            "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )

    if distributed_type == DistributedType.NO:
        use_cpu = _ask_field(
            "Do you want to run your training on CPU only (even if a GPU / Apple Silicon / Ascend NPU device is available)? [yes/NO]:",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
    elif distributed_type == DistributedType.MULTI_CPU:
        use_cpu = True
    else:
        use_cpu = False

    ipex_config = {}
    mpirun_config = {}
    if use_cpu or is_xpu_available():
        ipex_config["ipex"] = _ask_field(
            "Do you want to use Intel PyTorch Extension (IPEX) to speed up training on CPU/XPU? [yes/NO]:",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )

    if use_cpu:
        if distributed_type == DistributedType.MULTI_CPU:
            use_mpirun = _ask_field(
                "Do you want accelerate to launch mpirun? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            if use_mpirun:
                mpirun_hostfile = _ask_field(
                    "Please enter the path to the hostfile to use with mpirun [~/hostfile]: ",
                    str,
                    default="~/hostfile",
                )
                mpirun_config["mpirun_hostfile"] = os.path.expanduser(mpirun_hostfile.strip())
                mpirun_config["mpirun_ccl"] = _ask_field("Enter the number of oneCCL worker threads [1]: ", default=1)

    dynamo_config = {}
    use_dynamo = _ask_field(
        "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
        _convert_yes_no_to_bool,
        default=False,
        error_message="Please enter yes or no.",
    )
    if use_dynamo:
        prefix = "dynamo_"
        dynamo_config[prefix + "backend"] = _ask_options(
            "Which dynamo backend would you like to use?",
            [x.lower() for x in DYNAMO_BACKENDS],
            _convert_dynamo_backend,
            default=2,
        )
        use_custom_options = _ask_field(
            "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )

        if use_custom_options:
            dynamo_config[prefix + "mode"] = _ask_options(
                "Which mode do you want to use?",
                TORCH_DYNAMO_MODES,
                lambda x: TORCH_DYNAMO_MODES[int(x)],
                default=0,
            )
            dynamo_config[prefix + "use_fullgraph"] = _ask_field(
                "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            dynamo_config[prefix + "use_dynamic"] = _ask_field(
                "Do you want to enable dynamic shape tracing? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
                "Do you want to enable regional compilation? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )

    use_mps = not use_cpu and is_mps_available()
    deepspeed_config = {}
    if (
        distributed_type
        in [
            DistributedType.MULTI_GPU,
            DistributedType.MULTI_XPU,
            DistributedType.MULTI_HPU,
            DistributedType.MULTI_NPU,
            DistributedType.MULTI_MLU,
            DistributedType.MULTI_SDAA,
            DistributedType.MULTI_MUSA,
            DistributedType.NO,
        ]
        and not use_mps
    ):
        use_deepspeed = _ask_field(
            "Do you want to use DeepSpeed? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
        if use_deepspeed:
            distributed_type = DistributedType.DEEPSPEED
            assert is_deepspeed_available(), (
                "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
            )

        if distributed_type == DistributedType.DEEPSPEED:
            use_deepspeed_config = _ask_field(
                "Do you want to specify a json file to a DeepSpeed config? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            if use_deepspeed_config:
                deepspeed_config["deepspeed_config_file"] = _ask_field(
                    "Please enter the path to the json DeepSpeed config file: ",
                    str,
                    default="none",
                )
            else:
                deepspeed_config["zero_stage"] = _ask_options(
                    "What should be your DeepSpeed's ZeRO optimization stage?",
                    [0, 1, 2, 3],
                    int,
                    default=2,
                )

                deepspeed_devices = ["none", "cpu", "nvme"]
                if deepspeed_config["zero_stage"] >= 2:
                    deepspeed_config["offload_optimizer_device"] = _ask_options(
                        "Where to offload optimizer states?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
                    )
                    deepspeed_config["offload_param_device"] = _ask_options(
                        "Where to offload parameters?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
                    )
                    if deepspeed_config["offload_param_device"] == "nvme":
                        deepspeed_config["offload_param_nvme_path"] = _ask_field(
                            "Nvme Path to offload parameters?",
                            str,
                            default="/nvme",
                        )
                    if deepspeed_config["offload_optimizer_device"] == "nvme":
                        deepspeed_config["offload_optimizer_nvme_path"] = _ask_field(
                            "Nvme Path to offload optimizer states?",
                            str,
                            default="/nvme",
                        )
                deepspeed_config["gradient_accumulation_steps"] = _ask_field(
                    "How many gradient accumulation steps you're passing in your script? [1]: ",
                    int,
                    default=1,
                )
                use_gradient_clipping = _ask_field(
                    "Do you want to use gradient clipping? [yes/NO]: ",
                    _convert_yes_no_to_bool,
                    default=False,
                    error_message="Please enter yes or no.",
                )
                if use_gradient_clipping:
                    deepspeed_config["gradient_clipping"] = _ask_field(
                        "What is the gradient clipping value? [1.0]: ",
                        float,
                        default=1.0,
                    )
                if deepspeed_config["zero_stage"] == 3:
                    deepspeed_config["zero3_save_16bit_model"] = _ask_field(
                        "Do you want to save 16-bit model weights when using ZeRO Stage-3? [yes/NO]: ",
                        _convert_yes_no_to_bool,
                        default=False,
                        error_message="Please enter yes or no.",
                    )
            deepspeed_config["zero3_init_flag"] = _ask_field(
                "Do you want to enable `deepspeed.zero.Init` when using ZeRO Stage-3 for constructing massive models? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            if deepspeed_config["zero3_init_flag"]:
                if not is_transformers_available():
                    raise Exception(
                        "When `zero3_init_flag` is set, it requires Transformers to be installed. "
                        "Please run `pip3 install transformers`."
                    )
            use_moe = _ask_field(
                "Do you want to enable Mixture-of-Experts training (MoE)? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            if use_moe:
                deepspeed_config["deepspeed_moe_layer_cls_names"] = _ask_field(
                    "Specify the comma-separated list of transformers MoE layer class names (case-sensitive), e.g : "
                    " `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... : ",
                    str,
                )

            if num_machines > 1:
                launcher_query = "Which Type of launcher do you want to use?"
                deepspeed_config["deepspeed_multinode_launcher"] = _ask_options(
                    launcher_query,
                    DEEPSPEED_MULTINODE_LAUNCHERS,
                    lambda x: DEEPSPEED_MULTINODE_LAUNCHERS[int(x)],
                )

                if deepspeed_config["deepspeed_multinode_launcher"] != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
                    deepspeed_config["deepspeed_hostfile"] = _ask_field(
                        "DeepSpeed configures multi-node compute resources with hostfile. "
                        "Each row is of the format `hostname slots=[num_gpus]`, e.g., `localhost slots=2`; "
                        "for more information please refer official [documentation]"
                        "(https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node). "
                        "Please specify the location of hostfile: ",
                        str,
                    )

                    is_exclusion_filter = _ask_field(
                        "Do you want to specify exclusion filter string? [yes/NO]: ",
                        _convert_yes_no_to_bool,
                        default=False,
                        error_message="Please enter yes or no.",
                    )
                    if is_exclusion_filter:
                        deepspeed_config["deepspeed_exclusion_filter"] = _ask_field(
                            "DeepSpeed exclusion filter string: ",
                            str,
                        )

                    is_inclusion_filter = _ask_field(
                        "Do you want to specify inclusion filter string? [yes/NO]: ",
                        _convert_yes_no_to_bool,
                        default=False,
                        error_message="Please enter yes or no.",
                    )
                    if is_inclusion_filter:
                        deepspeed_config["deepspeed_inclusion_filter"] = _ask_field(
                            "DeepSpeed inclusion filter string: ",
                            str,
                        )

    fsdp_config = {}

    if distributed_type in [
        DistributedType.MULTI_GPU,
        DistributedType.MULTI_NPU,
        DistributedType.MULTI_MLU,
        DistributedType.MULTI_SDAA,
        DistributedType.MULTI_MUSA,
        DistributedType.MULTI_XPU,
        DistributedType.MULTI_HPU,
    ]:
        use_fsdp = _ask_field(
            "Do you want to use FullyShardedDataParallel? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
        if use_fsdp:
            distributed_type = DistributedType.FSDP
        if distributed_type == DistributedType.FSDP:
            fsdp_config["fsdp_version"] = _ask_options(
                "What should be your FSDP version? [2]: ",
                [1, 2],
                lambda x: int(x) + 1,
                default=1,
            )
            fsdp_version = fsdp_config["fsdp_version"]  # extract to a variable to simplify usage later

            if fsdp_version == 1:
                sharding_strategy_query = "What should be your sharding strategy?"
                fsdp_config["fsdp_reshard_after_forward"] = _ask_options(
                    sharding_strategy_query,
                    FSDP_SHARDING_STRATEGY,
                    lambda x: FSDP_SHARDING_STRATEGY[int(x)],
                )
            else:
                fsdp_config["fsdp_reshard_after_forward"] = _ask_field(
                    "Do you want to enable resharding after forward? [YES/no]: ",
                    _convert_yes_no_to_bool,
                    default=True,
                    error_message="Please enter yes or no.",
                )

            fsdp_config["fsdp_offload_params"] = _ask_field(
                "Do you want to offload parameters and gradients to CPU? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )

            fsdp_wrap_query = "What should be your auto wrap policy?"
            fsdp_config["fsdp_auto_wrap_policy"] = _ask_options(
                fsdp_wrap_query,
                FSDP_AUTO_WRAP_POLICY,
                lambda x: FSDP_AUTO_WRAP_POLICY[int(x)],
            )
            if fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[0]:
                use_no_split_modules = _ask_field(
                    "Do you want to use the model's `_no_split_modules` to wrap. Only applicable for 🤗 Transformers [yes/NO]: ",
                    _convert_yes_no_to_bool,
                    default=False,
                    error_message="Please enter yes or no.",
                )
                if not use_no_split_modules:
                    fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = _ask_field(
                        "Specify the comma-separated list of transformer layer class names (case-sensitive) to wrap ,e.g, :"
                        "`BertLayer`, `GPTJBlock`, `T5Block`, `BertLayer,BertEmbeddings,BertSelfOutput` ...? : ",
                        str,
                    )
            elif fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[1]:
                fsdp_config["fsdp_min_num_params"] = _ask_field(
                    "What should be your FSDP's minimum number of parameters for Default Auto Wrapping Policy? [1e8]: ",
                    int,
                    default=100000000,
                )
            # Removed in FSDP2, ask for user input for FSDP1
            if fsdp_version == 1:
                fsdp_backward_prefetch_query = "What should be your FSDP's backward prefetch policy?"
                fsdp_config["fsdp_backward_prefetch"] = _ask_options(
                    fsdp_backward_prefetch_query,
                    FSDP_BACKWARD_PREFETCH,
                    lambda x: FSDP_BACKWARD_PREFETCH[int(x)],
                )

            fsdp_state_dict_type_query = "What should be your FSDP's state dict type?"
            fsdp_config["fsdp_state_dict_type"] = _ask_options(
                fsdp_state_dict_type_query,
                FSDP_STATE_DICT_TYPE if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE,
                lambda x: FSDP_STATE_DICT_TYPE[int(x)] if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE[int(x)],
                default=0,
            )
            # Not implemented in FSDP2, ask for user input for FSDP1
            if fsdp_version == 1:
                fsdp_config["fsdp_forward_prefetch"] = _ask_field(
                    "Do you want to enable FSDP's forward prefetch policy? [yes/NO]: ",
                    _convert_yes_no_to_bool,
                    default=False,
                    error_message="Please enter yes or no.",
                )
            # Obsolete in FSDP2, ask for user input for FSDP1
            if fsdp_version == 1:
                fsdp_config["fsdp_use_orig_params"] = _ask_field(
                    "Do you want to enable FSDP's `use_orig_params` feature? [YES/no]: ",
                    _convert_yes_no_to_bool,
                    default=True,
                    error_message="Please enter yes or no.",
                )
            fsdp_config["fsdp_cpu_ram_efficient_loading"] = _ask_field(
                "Do you want to enable CPU RAM efficient model loading? Only applicable for 🤗 Transformers models. [YES/no]: ",
                _convert_yes_no_to_bool,
                default=True,
                error_message="Please enter yes or no.",
            )
            # Obsolete in FSDP2, ask for user input for FSDP1
            if fsdp_version == 1:
                if fsdp_config["fsdp_cpu_ram_efficient_loading"]:
                    fsdp_config["fsdp_sync_module_states"] = True
                else:
                    fsdp_config["fsdp_sync_module_states"] = _ask_field(
                        "Do you want each individually wrapped FSDP unit to broadcast module parameters from rank 0 at the start? [YES/no]: ",
                        _convert_yes_no_to_bool,
                        default=True,
                        error_message="Please enter yes or no.",
                    )
            fsdp_config["fsdp_activation_checkpointing"] = _ask_field(
                "Do you want to enable FSDP activation checkpointing? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )

    parallelism_config = {}

    if fsdp_config.get("fsdp_version", 1) == 2:
        use_parallelism_config = _ask_field(
            "Do you want to use the parallelism config? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )

        if use_parallelism_config:
            prefix = "parallelism_config_"
            parallelism_config[prefix + "dp_replicate_size"] = _ask_field(
                "What is the data parallelism replicate size? [1]: ",
                int,
                default=1,
                error_message="Please enter an integer.",
            )

            parallelism_config[prefix + "dp_shard_size"] = _ask_field(
                "What is the FSDP shard size? [1]: ",
                int,
                default=1,
                error_message="Please enter an integer.",
            )

            parallelism_config[prefix + "tp_size"] = _ask_field(
                "What is the tensor parallelism size? [1]: ",
                int,
                default=1,
                error_message="Please enter an integer.",
            )

            parallelism_config[prefix + "cp_size"] = _ask_field(
                "What is the context parallelism size? [1]: ",
                int,
                default=1,
                error_message="Please enter an integer.",
            )
            if parallelism_config[prefix + "cp_size"] > 1:
                parallelism_config[prefix + "cp_comm_strategy"] = _ask_options(
                    "What is the compute parallelism communication strategy?",
                    ["allgather", "alltoall"],
                    lambda x: ["allgather", "alltoall"][int(x)],
                    default=0,
                )

    megatron_lm_config = {}
    if distributed_type in [DistributedType.MULTI_GPU]:
        use_megatron_lm = _ask_field(
            "Do you want to use Megatron-LM ? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
        if use_megatron_lm:
            distributed_type = DistributedType.MEGATRON_LM
        if distributed_type == DistributedType.MEGATRON_LM:
            prefix = "megatron_lm_"
            megatron_lm_config[prefix + "tp_degree"] = _ask_field(
                "What is the Tensor Parallelism degree/size? [1]:",
                int,
                default=1,
                error_message="Please enter an integer.",
            )
            if megatron_lm_config[prefix + "tp_degree"] > 1:
                megatron_lm_config[prefix + "sequence_parallelism"] = _ask_field(
                    "Do you want to enable Sequence Parallelism? [YES/no]: ",
                    _convert_yes_no_to_bool,
                    default=True,
                    error_message="Please enter yes or no.",
                )

            megatron_lm_config[prefix + "pp_degree"] = _ask_field(
                "What is the Pipeline Parallelism degree/size? [1]:",
                int,
                default=1,
                error_message="Please enter an integer.",
            )
            if megatron_lm_config[prefix + "pp_degree"] > 1:
                megatron_lm_config[prefix + "num_micro_batches"] = _ask_field(
                    "What is the number of micro-batches? [1]:",
                    int,
                    default=1,
                    error_message="Please enter an integer.",
                )

            megatron_lm_config[prefix + "recompute_activations"] = _ask_field(
                "Do you want to enable selective activation recomputation? [YES/no]: ",
                _convert_yes_no_to_bool,
                default=True,
                error_message="Please enter yes or no.",
            )

            megatron_lm_config[prefix + "use_distributed_optimizer"] = _ask_field(
                "Do you want to use distributed optimizer "
                "which shards optimizer state and gradients across data parallel ranks? [YES/no]: ",
                _convert_yes_no_to_bool,
                default=True,
                error_message="Please enter yes or no.",
            )

            megatron_lm_config[prefix + "gradient_clipping"] = _ask_field(
                "What is the gradient clipping value based on global L2 Norm (0 to disable)? [1.0]: ",
                float,
                default=1.0,
            )
    # TPU specific defaults
    tpu_commands = None
    tpu_command_file = None
    tpu_downcast_bf16 = "no"
    tpu_env = []
    tpu_name = None
    tpu_vm = None
    tpu_zone = None
    tpu_use_sudo = False
    tpu_use_cluster = False

    if distributed_type in [
        DistributedType.MULTI_CPU,
        DistributedType.MULTI_XPU,
        DistributedType.MULTI_HPU,
        DistributedType.MULTI_GPU,
        DistributedType.MULTI_MLU,
        DistributedType.MULTI_SDAA,
        DistributedType.MULTI_MUSA,
        DistributedType.MULTI_NPU,
        DistributedType.XLA,
    ]:
        machine_type = str(distributed_type).split(".")[1].replace("MULTI_", "")
        if machine_type == "TPU":
            machine_type += " cores"
        elif machine_type == "CPU":
            machine_type = "processes"
        else:
            machine_type += "(s)"
        num_processes = _ask_field(
            f"How many {machine_type} should be used for distributed training? [1]:",
            int,
            default=1,
            error_message="Please enter an integer.",
        )
    elif distributed_type in [DistributedType.FSDP, DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]:
        num_processes = _ask_field(
            "How many GPU(s) should be used for distributed training? [1]:",
            int,
            default=1,
            error_message="Please enter an integer.",
        )
    else:
        num_processes = 1

    if (distributed_type == DistributedType.MULTI_GPU) and (num_machines == 1) and (num_processes == 1):
        raise ValueError(
            f"Specified distributed type {distributed_type} but only using 1 GPU on a single machine. Please select `No distributed training` for the type of machine you are using."
        )

    if (
        distributed_type
        in [
            DistributedType.MULTI_GPU,
            DistributedType.MULTI_MLU,
            DistributedType.MULTI_SDAA,
            DistributedType.MULTI_MUSA,
            DistributedType.MULTI_NPU,
            DistributedType.MULTI_XPU,
            DistributedType.MULTI_HPU,
            DistributedType.NO,
        ]
        and not use_cpu
        and not use_mps
    ):
        if is_npu_available():
            machine_type = "NPU(s)"
        elif is_mlu_available():
            machine_type = "MLU(s)"
        elif is_sdaa_available():
            machine_type = "SDAA(s)"
        elif is_musa_available():
            machine_type = "MUSA(s)"
        elif is_xpu_available():
            machine_type = "XPU(s)"
        elif is_hpu_available():
            machine_type = "HPU(s)"
        else:
            machine_type = "GPU(s)"
        gpu_ids = _ask_field(
            f"What {machine_type} (by id) should be used for training on this machine as a comma-separated list? [all]:",
            default="all",
        )

    # CPU affinity is only supported on NVIDIA hardware for now
    enable_cpu_affinity = False
    if distributed_type in (DistributedType.NO, DistributedType.MULTI_GPU) and not use_cpu and not use_mps:
        enable_cpu_affinity = _ask_field(
            "Would you like to enable numa efficiency? (Currently only supported on NVIDIA hardware). [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )

    fp8_config = None
    if distributed_type == DistributedType.XLA:
        mixed_precision = "no"
        main_training_function = _ask_field(
            "What is the name of the function in your script that should be launched in all parallel scripts? [main]: ",
            default="main",
        )
        tpu_use_cluster = _ask_field(
            "Are you using a TPU cluster? [yes/NO]: ",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
        if tpu_use_cluster:
            tpu_name = _ask_field(
                "What is the name of your TPU cluster? ",
                default=None,
                error_message="Please enter the name of your TPU cluster.",
            )
            tpu_zone = _ask_field(
                "What is the zone of your TPU cluster? ",
                default=None,
                error_message="Please enter the zone of your TPU cluster.",
            )
            tpu_use_sudo = _ask_field(
                "To run a python script in a TPU pod, should `sudo` be used? [yes/NO]: ",
                default=False,
                error_message="Please enter yes or no.",
            )
            run_commands = _ask_field(
                "Do you have code you wish to run on startup in each pod? [yes/NO]: ",
                _convert_yes_no_to_bool,
                default=False,
                error_message="Please enter yes or no.",
            )
            if run_commands:
                use_command_file = _ask_field(
                    "Is this code located in a bash script? [yes/NO]: ",
                    _convert_yes_no_to_bool,
                    default=False,
                    error_message="Please enter yes or no.",
                )
                if use_command_file:
                    tpu_command_file = _ask_field(
                        "What is the path to your bash script? ",
                        default=None,
                        error_message="Please enter the path to your bash script.",
                    )
                    tpu_command_file = os.path.abspath(tpu_command_file)
                else:
                    print("Please enter each command separately you wish to run on startup in each pod.")
                    tpu_commands = []
                    another_command = True
                    while another_command:
                        tpu_commands.append(
                            _ask_field(
                                "Please enter a single command to be ran ",
                                default=None,
                                error_message="Please enter the commands you wish to run on startup in each pod as a single string.",
                            )
                        )
                        another_command = _ask_field(
                            "Do you wish to add another command? [yes/NO]: ",
                            _convert_yes_no_to_bool,
                            default=False,
                            error_message="Please enter yes or no.",
                        )
            tpu_vm = _ask_field(
                "If not using an instance group, what are the names of the Compute VM instances to be used, separated by a comma: ",
                default="",
            ).split(",")
            tpu_env = _ask_field(
                "What environment variables do you wish to set in each pod, separated by a comma: ",
                default="",
            ).split(",")

    else:
        main_training_function = "main"
        if distributed_type == DistributedType.DEEPSPEED and use_deepspeed_config:
            mixed_precision = None
        else:
            mixed_precision = _ask_options(
                "Do you wish to use mixed precision?",
                ["no", "fp16", "bf16", "fp8"],
                _convert_mixed_precision,
            )
            if mixed_precision == "fp8":
                if not is_fp8_available():
                    raise ValueError("FP8 (either Transformer Engine or MSAMP) is not installed on this machine.")
                fp8_config = {}
                fp8_config["backend"] = _ask_options(
                    "Which FP8 backend do you want to use?",
                    ["te", "msamp"],
                    _convert_fp8_backend,
                )
                if fp8_config["backend"] == "TE":
                    if not is_transformer_engine_available():
                        raise ValueError("TransformersEngine was selected, but it is not installed on this machine.")
                    fp8_config["use_autocast_during_eval"] = _ask_field(
                        "Do you want to use FP8 autocast during eval mode? Generally better metrics are found when this is disabled [yes/NO]: ",
                        _convert_yes_no_to_bool,
                        default=False,
                    )
                    fp8_config["margin"] = _ask_field(
                        "What margin should be used for gradient scaling? [0]: ",
                        int,
                        default=0,
                    )
                    fp8_config["interval"] = _ask_field(
                        "What interval should be used for for how often the scaling factor is recomputed? [1]: ",
                        int,
                        default=1,
                    )
                    fp8_config["fp8_format"] = _ask_options(
                        "Which weight format should be used?",
                        ["HYBRID", "E4M3", "E5M2"],
                        lambda i: ["HYBRID", "E4M3", "E5M2"][i],
                        default=0,
                    )
                    fp8_config["amax_history_length"] = _ask_field(
                        "What length of history should be used for the amax scaling factor computation? [1024]: ",
                        int,
                        default=1024,
                    )
                    fp8_config["amax_compute_algorithm"] = _ask_options(
                        "Which algorithm should be used for the amax scaling factor computation?",
                        ["max", "most_recent"],
                        lambda x: "max" if x == 0 else "most_recent",
                        default=0,
                    )
                    fp8_config["override_linear_precision"] = _ask_field(
                        "Do you want to to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision? [yes/NO]: ",
                        _convert_yes_no_to_bool,
                        default=False,
                    )
                    if fp8_config["override_linear_precision"]:
                        fprop = _ask_field(
                            "Should `fprop` be executed in higher precision? [yes/NO]: ",
                            _convert_yes_no_to_bool,
                            default=False,
                        )
                        dgrad = _ask_field(
                            "Should `dgrad` be executed in higher precision? [yes/NO]: ",
                            _convert_yes_no_to_bool,
                            default=False,
                        )
                        wgrad = _ask_field(
                            "Should `wgrad` be executed in higher precision? [yes/NO]: ",
                            _convert_yes_no_to_bool,
                            default=False,
                        )
                        fp8_config["override_linear_precision"] = (fprop, dgrad, wgrad)
                    else:
                        fp8_config["override_linear_precision"] = (False, False, False)

                elif fp8_config["backend"] == "MSAMP":
                    if not is_msamp_available():
                        raise ValueError("MSAMP was selected, but it is not installed on this machine.")
                    fp8_config["optimization_level"] = _ask_options(
                        "Which optimization level should be used?",
                        ["O1", "O2"],
                        lambda x: "O1" if x == 0 else "O2",
                        default=1,
                    )

    if use_dynamo and mixed_precision == "no" and not use_cpu:
        print(
            "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
        )

    if distributed_type == DistributedType.XLA and mixed_precision == "bf16":
        tpu_downcast_bf16 = _ask_field(
            "Should `torch.float` be cast as `bfloat16` and `torch.double` remain `float32` on TPUs?", default="no"
        )

    return ClusterConfig(
        compute_environment=ComputeEnvironment.LOCAL_MACHINE,
        distributed_type=distributed_type,
        num_processes=num_processes,
        gpu_ids=gpu_ids,
        mixed_precision=mixed_precision,
        downcast_bf16=tpu_downcast_bf16,
        machine_rank=machine_rank,
        num_machines=num_machines,
        main_process_ip=main_process_ip,
        main_process_port=main_process_port,
        main_training_function=main_training_function,
        fp8_config=fp8_config,
        deepspeed_config=deepspeed_config,
        fsdp_config=fsdp_config,
        parallelism_config=parallelism_config,
        megatron_lm_config=megatron_lm_config,
        ipex_config=ipex_config,
        mpirun_config=mpirun_config,
        use_cpu=use_cpu,
        rdzv_backend=rdzv_backend,
        same_network=same_network,
        commands=tpu_commands,
        command_file=tpu_command_file,
        tpu_env=tpu_env,
        tpu_name=tpu_name,
        tpu_vm=tpu_vm,
        tpu_zone=tpu_zone,
        tpu_use_sudo=tpu_use_sudo,
        tpu_use_cluster=tpu_use_cluster,
        dynamo_config=dynamo_config,
        debug=debug,
        enable_cpu_affinity=enable_cpu_affinity,
    )