DeviceAccelerator.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. #pragma once
  2. #include <c10/core/CachingDeviceAllocator.h>
  3. #include <c10/core/DeviceType.h>
  4. #include <c10/macros/Macros.h>
  5. #include <ATen/detail/MTIAHooksInterface.h>
  6. #include <optional>
  7. namespace at::accelerator {
  8. // Note [Accelerator Concept]
  9. // This file defines the top level Accelerator concept for PyTorch.
  10. // A device is an accelerator per the definition here if:
  11. // - It is mutually exclusive with all other accelerators
  12. // - It performs asynchronous compute via a Stream/Event system
  13. // - It provides a set of common APIs as defined by AcceleratorHooksInterface
  14. //
  15. // As of today, accelerator devices are (in no particular order):
  16. // CUDA, MTIA, XPU, HIP, MPS, PrivateUse1
  17. // Ensures that only one accelerator is available (at
  18. // compile time if possible) and return it.
  19. // When checked is true, the returned optional always has a value.
  20. TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
  21. // Check if the given device type is an accelerator.
  22. TORCH_API bool isAccelerator(c10::DeviceType device_type);
  23. // Check if the given device type is an accelerator, not the excluded ones.
  24. template <
  25. typename... T,
  26. typename = std::enable_if_t<(std::is_same_v<T, c10::DeviceType> && ...)>>
  27. inline bool isAcceleratorExcluded(
  28. c10::DeviceType device_type,
  29. c10::DeviceType first_excluded,
  30. T... rest_excluded) {
  31. if constexpr (sizeof...(rest_excluded) > 0) {
  32. return device_type != first_excluded &&
  33. isAcceleratorExcluded(device_type, rest_excluded...);
  34. } else {
  35. return device_type != first_excluded && isAccelerator(device_type);
  36. }
  37. }
  38. // Return the number of the device available. Note that this is *REQUIRED* to
  39. // not raise any exception.
  40. TORCH_API c10::DeviceIndex deviceCount();
  41. // Set the current device index to the given device index.
  42. TORCH_API void setDeviceIndex(c10::DeviceIndex device_index);
  43. // Get the current device index.
  44. TORCH_API c10::DeviceIndex getDeviceIndex();
  45. // Set the current stream to a given stream. Note that this API doesn't change
  46. // the current device index.
  47. TORCH_API void setCurrentStream(c10::Stream stream);
  48. // Get the current stream of the given device index.
  49. TORCH_API c10::Stream getCurrentStream(c10::DeviceIndex device_index);
  50. // Wait (by blocking the calling thread) until all the work previously enqueued
  51. // on the given device index has been completed.
  52. TORCH_API void synchronizeDevice(c10::DeviceIndex device_index);
  53. // Set the current device index to the given device_index and return the
  54. // original device index that was active before the change.
  55. TORCH_API c10::DeviceIndex exchangeDevice(c10::DeviceIndex device_index);
  56. // Set the current device index to the given device_index. Avoid creating a new
  57. // context if the context for device_index is not initialized. Return the
  58. // original device index that was active before the change.
  59. TORCH_API c10::DeviceIndex maybeExchangeDevice(c10::DeviceIndex device_index);
  60. TORCH_API inline void emptyCache() {
  61. const auto device_type = getAccelerator(true).value();
  62. at::getDeviceAllocator(device_type)->emptyCache();
  63. }
  64. TORCH_API inline at::CachingDeviceAllocator::DeviceStats getDeviceStats(
  65. c10::DeviceIndex device_index) {
  66. const auto device_type = getAccelerator(true).value();
  67. return at::getDeviceAllocator(device_type)->getDeviceStats(device_index);
  68. }
  69. TORCH_API inline void resetAccumulatedStats(c10::DeviceIndex device_index) {
  70. const auto device_type = getAccelerator(true).value();
  71. at::getDeviceAllocator(device_type)->resetAccumulatedStats(device_index);
  72. }
  73. TORCH_API inline void resetPeakStats(c10::DeviceIndex device_index) {
  74. const auto device_type = getAccelerator(true).value();
  75. at::getDeviceAllocator(device_type)->resetPeakStats(device_index);
  76. }
  77. } // namespace at::accelerator
  78. namespace at {
  79. // Keep BC only
  80. using at::accelerator::getAccelerator;
  81. using at::accelerator::isAccelerator;
  82. } // namespace at