registry.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # Copyright (c) Facebook, Inc. and its affiliates.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD-style license found in the
  5. # LICENSE file in the root directory of this source tree.
  6. import logging
  7. import sys
  8. from .api import (
  9. rendezvous_handler_registry as handler_registry,
  10. RendezvousHandler,
  11. RendezvousParameters,
  12. )
  13. from .dynamic_rendezvous import create_handler
  14. if sys.version_info < (3, 10):
  15. from importlib_metadata import entry_points
  16. else:
  17. from importlib.metadata import entry_points
  18. log = logging.getLogger(__name__)
  19. __all__ = ["get_rendezvous_handler"]
  20. def _create_static_handler(params: RendezvousParameters) -> RendezvousHandler:
  21. from . import static_tcp_rendezvous
  22. return static_tcp_rendezvous.create_rdzv_handler(params)
  23. def _create_etcd_handler(params: RendezvousParameters) -> RendezvousHandler:
  24. from . import etcd_rendezvous
  25. return etcd_rendezvous.create_rdzv_handler(params)
  26. def _create_etcd_v2_handler(params: RendezvousParameters) -> RendezvousHandler:
  27. from .etcd_rendezvous_backend import create_backend
  28. backend, store = create_backend(params)
  29. return create_handler(store, backend, params)
  30. def _create_c10d_handler(params: RendezvousParameters) -> RendezvousHandler:
  31. from .c10d_rendezvous_backend import create_backend
  32. backend, store = create_backend(params)
  33. return create_handler(store, backend, params)
  34. def _register_default_handlers() -> None:
  35. handler_registry.register("etcd", _create_etcd_handler)
  36. handler_registry.register("etcd-v2", _create_etcd_v2_handler)
  37. handler_registry.register("c10d", _create_c10d_handler)
  38. handler_registry.register("static", _create_static_handler)
  39. def _register_out_of_tree_handlers() -> None:
  40. discovered_handler_generators = entry_points(group="torchrun.handlers")
  41. for handler_generator in discovered_handler_generators:
  42. try:
  43. get_handler = discovered_handler_generators[handler_generator.name].load()
  44. handler_registry.register(handler_generator.name, get_handler())
  45. except Exception:
  46. log.warning(
  47. "Exception while registering out of tree plugin %s: ",
  48. handler_generator.name,
  49. exc_info=True,
  50. )
  51. def get_rendezvous_handler(params: RendezvousParameters) -> RendezvousHandler:
  52. """
  53. Obtain a reference to a :py:class`RendezvousHandler`.
  54. Custom rendezvous handlers can be registered by
  55. ::
  56. from torch.distributed.elastic.rendezvous import rendezvous_handler_registry
  57. from torch.distributed.elastic.rendezvous.registry import get_rendezvous_handler
  58. def create_my_rdzv(params: RendezvousParameters):
  59. return MyCustomRdzv(params)
  60. rendezvous_handler_registry.register("my_rdzv_backend_name", create_my_rdzv)
  61. my_rdzv_handler = get_rendezvous_handler(
  62. "my_rdzv_backend_name", RendezvousParameters
  63. )
  64. """
  65. return handler_registry.create_handler(params)