CUDAHooksInterface.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. #pragma once
  2. #include <c10/core/Allocator.h>
  3. #include <c10/util/Exception.h>
  4. #include <c10/util/Registry.h>
  5. #include <ATen/detail/AcceleratorHooksInterface.h>
  6. // NB: Class must live in `at` due to limitations of Registry.h.
  7. namespace at {
  8. // Forward-declares at::cuda::NVRTC
  9. namespace cuda {
  10. struct NVRTC;
  11. } // namespace cuda
  12. #ifdef _MSC_VER
  13. constexpr const char* CUDA_HELP =
  14. "PyTorch splits its backend into two shared libraries: a CPU library "
  15. "and a CUDA library; this error has occurred because you are trying "
  16. "to use some CUDA functionality, but the CUDA library has not been "
  17. "loaded by the dynamic linker for some reason. The CUDA library MUST "
  18. "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
  19. "One common culprit is a lack of -INCLUDE:?warp_size@cuda@at@@YAHXZ "
  20. "in your link arguments; many dynamic linkers will delete dynamic library "
  21. "dependencies if you don't depend on any of their symbols. You can check "
  22. "if this has occurred by using link on your binary to see if there is a "
  23. "dependency on *_cuda.dll library.";
  24. #else
  25. constexpr const char* CUDA_HELP =
  26. "PyTorch splits its backend into two shared libraries: a CPU library "
  27. "and a CUDA library; this error has occurred because you are trying "
  28. "to use some CUDA functionality, but the CUDA library has not been "
  29. "loaded by the dynamic linker for some reason. The CUDA library MUST "
  30. "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
  31. "One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many "
  32. "dynamic linkers will delete dynamic library dependencies if you don't "
  33. "depend on any of their symbols. You can check if this has occurred by "
  34. "using ldd on your binary to see if there is a dependency on *_cuda.so "
  35. "library.";
  36. #endif
  37. // The CUDAHooksInterface is an omnibus interface for any CUDA functionality
  38. // which we may want to call into from CPU code (and thus must be dynamically
  39. // dispatched, to allow for separate compilation of CUDA code). How do I
  40. // decide if a function should live in this class? There are two tests:
  41. //
  42. // 1. Does the *implementation* of this function require linking against
  43. // CUDA libraries?
  44. //
  45. // 2. Is this function *called* from non-CUDA ATen code?
  46. //
  47. // (2) should filter out many ostensible use-cases, since many times a CUDA
  48. // function provided by ATen is only really ever used by actual CUDA code.
  49. //
  50. // TODO: Consider putting the stub definitions in another class, so that one
  51. // never forgets to implement each virtual function in the real implementation
  52. // in CUDAHooks. This probably doesn't buy us much though.
  53. struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
  54. // This should never actually be implemented, but it is used to
  55. // squelch -Werror=non-virtual-dtor
  56. ~CUDAHooksInterface() override = default;
  57. // Initialize THCState and, transitively, the CUDA state
  58. void init() const override {
  59. TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
  60. }
  61. const Generator& getDefaultGenerator(
  62. [[maybe_unused]] DeviceIndex device_index = -1) const override {
  63. TORCH_CHECK(
  64. false,
  65. "Cannot get default CUDA generator without ATen_cuda library. ",
  66. CUDA_HELP);
  67. }
  68. Generator getNewGenerator(
  69. [[maybe_unused]] DeviceIndex device_index = -1) const override {
  70. TORCH_CHECK(
  71. false,
  72. "Cannot get CUDA generator without ATen_cuda library. ",
  73. CUDA_HELP);
  74. }
  75. Device getDeviceFromPtr(void* /*data*/) const override {
  76. TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP);
  77. }
  78. bool isPinnedPtr(const void* /*data*/) const override {
  79. return false;
  80. }
  81. virtual bool hasCUDA() const {
  82. return false;
  83. }
  84. virtual bool hasCUDART() const {
  85. return false;
  86. }
  87. virtual bool hasMAGMA() const {
  88. return false;
  89. }
  90. virtual bool hasCuDNN() const {
  91. return false;
  92. }
  93. virtual bool hasCuSOLVER() const {
  94. return false;
  95. }
  96. virtual bool hasCuBLASLt() const {
  97. return false;
  98. }
  99. virtual bool hasROCM() const {
  100. return false;
  101. }
  102. virtual bool hasCKSDPA() const {
  103. return false;
  104. }
  105. virtual bool hasCKGEMM() const {
  106. return false;
  107. }
  108. virtual const at::cuda::NVRTC& nvrtc() const {
  109. TORCH_CHECK(false, "NVRTC requires CUDA. ", CUDA_HELP);
  110. }
  111. bool hasPrimaryContext(DeviceIndex device_index) const override {
  112. TORCH_CHECK(false, "Cannot call hasPrimaryContext(", device_index, ") without ATen_cuda library. ", CUDA_HELP);
  113. }
  114. virtual DeviceIndex current_device() const {
  115. return -1;
  116. }
  117. Allocator* getPinnedMemoryAllocator() const override {
  118. TORCH_CHECK(false, "Pinned memory requires CUDA. ", CUDA_HELP);
  119. }
  120. virtual Allocator* getCUDADeviceAllocator() const {
  121. TORCH_CHECK(false, "CUDADeviceAllocator requires CUDA. ", CUDA_HELP);
  122. }
  123. virtual bool compiledWithCuDNN() const {
  124. return false;
  125. }
  126. virtual bool compiledWithMIOpen() const {
  127. return false;
  128. }
  129. virtual bool supportsDilatedConvolutionWithCuDNN() const {
  130. return false;
  131. }
  132. virtual bool supportsDepthwiseConvolutionWithCuDNN() const {
  133. return false;
  134. }
  135. virtual bool supportsBFloat16ConvolutionWithCuDNNv8() const {
  136. return false;
  137. }
  138. virtual long versionCuDNN() const {
  139. TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
  140. }
  141. virtual long versionRuntimeCuDNN() const {
  142. TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
  143. }
  144. virtual long versionCuDNNFrontend() const {
  145. TORCH_CHECK(false, "Cannot query cuDNN Frontend version without ATen_cuda library. ", CUDA_HELP);
  146. }
  147. virtual long versionMIOpen() const {
  148. TORCH_CHECK(false, "Cannot query MIOpen version without ATen_cuda library. ", CUDA_HELP);
  149. }
  150. virtual long versionCUDART() const {
  151. TORCH_CHECK(false, "Cannot query CUDART version without ATen_cuda library. ", CUDA_HELP);
  152. }
  153. virtual std::string showConfig() const {
  154. TORCH_CHECK(false, "Cannot query detailed CUDA version without ATen_cuda library. ", CUDA_HELP);
  155. }
  156. virtual double batchnormMinEpsilonCuDNN() const {
  157. TORCH_CHECK(false,
  158. "Cannot query batchnormMinEpsilonCuDNN() without ATen_cuda library. ", CUDA_HELP);
  159. }
  160. virtual int64_t cuFFTGetPlanCacheMaxSize(DeviceIndex /*device_index*/) const {
  161. TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
  162. }
  163. virtual void cuFFTSetPlanCacheMaxSize(DeviceIndex /*device_index*/, int64_t /*max_size*/) const {
  164. TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
  165. }
  166. virtual int64_t cuFFTGetPlanCacheSize(DeviceIndex /*device_index*/) const {
  167. TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
  168. }
  169. virtual void cuFFTClearPlanCache(DeviceIndex /*device_index*/) const {
  170. TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP);
  171. }
  172. virtual int getNumGPUs() const {
  173. return 0;
  174. }
  175. #ifdef USE_ROCM
  176. virtual bool isGPUArch(const std::vector<std::string>& /*archs*/, DeviceIndex = -1 /*device_index*/) const {
  177. TORCH_CHECK(false, "Cannot check GPU arch without ATen_cuda library. ", CUDA_HELP);
  178. }
  179. #endif
  180. virtual void deviceSynchronize(DeviceIndex /*device_index*/) const {
  181. TORCH_CHECK(false, "Cannot synchronize CUDA device without ATen_cuda library. ", CUDA_HELP);
  182. }
  183. };
  184. // NB: dummy argument to suppress "ISO C++11 requires at least one argument
  185. // for the "..." in a variadic macro"
  186. struct TORCH_API CUDAHooksArgs {};
  187. TORCH_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs);
  188. #define REGISTER_CUDA_HOOKS(clsname) \
  189. C10_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname)
  190. namespace detail {
  191. TORCH_API const CUDAHooksInterface& getCUDAHooks();
  192. } // namespace detail
  193. } // namespace at