dlpack.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*!
  2. * Copyright (c) 2017 by Contributors
  3. * \file dlpack.h
  4. * \brief The common header of DLPack.
  5. */
  6. #ifndef DLPACK_DLPACK_H_
  7. #define DLPACK_DLPACK_H_
  8. /**
  9. * \brief Compatibility with C++
  10. */
  11. #ifdef __cplusplus
  12. #define DLPACK_EXTERN_C extern "C"
  13. #else
  14. #define DLPACK_EXTERN_C
  15. #endif
  16. /*! \brief The current major version of dlpack */
  17. #define DLPACK_MAJOR_VERSION 1
  18. /*! \brief The current minor version of dlpack */
  19. #define DLPACK_MINOR_VERSION 0
  20. /*! \brief DLPACK_DLL prefix for windows */
  21. #ifdef _WIN32
  22. #ifdef DLPACK_EXPORTS
  23. #define DLPACK_DLL __declspec(dllexport)
  24. #else
  25. #define DLPACK_DLL __declspec(dllimport)
  26. #endif
  27. #else
  28. #define DLPACK_DLL
  29. #endif
  30. // NOLINTNEXTLINE(modernize-deprecated-headers)
  31. #include <stdint.h>
  32. // NOLINTNEXTLINE(modernize-deprecated-headers)
  33. #include <stddef.h>
  34. #ifdef __cplusplus
  35. extern "C" {
  36. #endif
  37. /*!
  38. * \brief The DLPack version.
  39. *
  40. * A change in major version indicates that we have changed the
  41. * data layout of the ABI - DLManagedTensorVersioned.
  42. *
  43. * A change in minor version indicates that we have added new
  44. * code, such as a new device type, but the ABI is kept the same.
  45. *
  46. * If an obtained DLPack tensor has a major version that disagrees
  47. * with the version number specified in this header file
  48. * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
  49. * (and it is safe to do so). It is not safe to access any other fields
  50. * as the memory layout will have changed.
  51. *
  52. * In the case of a minor version mismatch, the tensor can be safely used as
  53. * long as the consumer knows how to interpret all fields. Minor version
  54. * updates indicate the addition of enumeration values.
  55. */
  56. typedef struct {
  57. /*! \brief DLPack major version. */
  58. uint32_t major;
  59. /*! \brief DLPack minor version. */
  60. uint32_t minor;
  61. } DLPackVersion;
  62. /*!
  63. * \brief The device type in DLDevice.
  64. */
  65. #ifdef __cplusplus
  66. typedef enum : int32_t {
  67. #else
  68. typedef enum {
  69. #endif
  70. /*! \brief CPU device */
  71. kDLCPU = 1,
  72. /*! \brief CUDA GPU device */
  73. kDLCUDA = 2,
  74. /*!
  75. * \brief Pinned CUDA CPU memory by cudaMallocHost
  76. */
  77. kDLCUDAHost = 3,
  78. /*! \brief OpenCL devices. */
  79. kDLOpenCL = 4,
  80. /*! \brief Vulkan buffer for next generation graphics. */
  81. kDLVulkan = 7,
  82. /*! \brief Metal for Apple GPU. */
  83. kDLMetal = 8,
  84. /*! \brief Verilog simulator buffer */
  85. kDLVPI = 9,
  86. /*! \brief ROCm GPUs for AMD GPUs */
  87. kDLROCM = 10,
  88. /*!
  89. * \brief Pinned ROCm CPU memory allocated by hipMallocHost
  90. */
  91. kDLROCMHost = 11,
  92. /*!
  93. * \brief Reserved extension device type,
  94. * used for quickly test extension device
  95. * The semantics can differ depending on the implementation.
  96. */
  97. kDLExtDev = 12,
  98. /*!
  99. * \brief CUDA managed/unified memory allocated by cudaMallocManaged
  100. */
  101. kDLCUDAManaged = 13,
  102. /*!
  103. * \brief Unified shared memory allocated on a oneAPI non-partititioned
  104. * device. Call to oneAPI runtime is required to determine the device
  105. * type, the USM allocation type and the sycl context it is bound to.
  106. *
  107. */
  108. kDLOneAPI = 14,
  109. /*! \brief GPU support for next generation WebGPU standard. */
  110. kDLWebGPU = 15,
  111. /*! \brief Qualcomm Hexagon DSP */
  112. kDLHexagon = 16,
  113. /*! \brief Microsoft MAIA devices */
  114. kDLMAIA = 17,
  115. } DLDeviceType;
  116. /*!
  117. * \brief A Device for Tensor and operator.
  118. */
  119. typedef struct {
  120. /*! \brief The device type used in the device. */
  121. DLDeviceType device_type;
  122. /*!
  123. * \brief The device index.
  124. * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
  125. */
  126. int32_t device_id;
  127. } DLDevice;
  128. /*!
  129. * \brief The type code options DLDataType.
  130. */
  131. typedef enum {
  132. /*! \brief signed integer */
  133. kDLInt = 0U,
  134. /*! \brief unsigned integer */
  135. kDLUInt = 1U,
  136. /*! \brief IEEE floating point */
  137. kDLFloat = 2U,
  138. /*!
  139. * \brief Opaque handle type, reserved for testing purposes.
  140. * Frameworks need to agree on the handle data type for the exchange to be well-defined.
  141. */
  142. kDLOpaqueHandle = 3U,
  143. /*! \brief bfloat16 */
  144. kDLBfloat = 4U,
  145. /*!
  146. * \brief complex number
  147. * (C/C++/Python layout: compact struct per complex number)
  148. */
  149. kDLComplex = 5U,
  150. /*! \brief boolean */
  151. kDLBool = 6U,
  152. } DLDataTypeCode;
  153. /*!
  154. * \brief The data type the tensor can hold. The data type is assumed to follow the
  155. * native endian-ness. An explicit error message should be raised when attempting to
  156. * export an array with non-native endianness
  157. *
  158. * Examples
  159. * - float: type_code = 2, bits = 32, lanes = 1
  160. * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
  161. * - int8: type_code = 0, bits = 8, lanes = 1
  162. * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
  163. * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
  164. */
  165. typedef struct {
  166. /*!
  167. * \brief Type code of base types.
  168. * We keep it uint8_t instead of DLDataTypeCode for minimal memory
  169. * footprint, but the value should be one of DLDataTypeCode enum values.
  170. * */
  171. uint8_t code;
  172. /*!
  173. * \brief Number of bits, common choices are 8, 16, 32.
  174. */
  175. uint8_t bits;
  176. /*! \brief Number of lanes in the type, used for vector types. */
  177. uint16_t lanes;
  178. } DLDataType;
  179. /*!
  180. * \brief Plain C Tensor object, does not manage memory.
  181. */
  182. typedef struct {
  183. /*!
  184. * \brief The data pointer points to the allocated data. This will be CUDA
  185. * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
  186. * types. This pointer is always aligned to 256 bytes as in CUDA. The
  187. * `byte_offset` field should be used to point to the beginning of the data.
  188. *
  189. * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
  190. * TVM, perhaps others) do not adhere to this 256 byte alignment requirement
  191. * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
  192. * (after which this note will be updated); at the moment it is recommended
  193. * to not rely on the data pointer being correctly aligned.
  194. *
  195. * For given DLTensor, the size of memory required to store the contents of
  196. * data is calculated as follows:
  197. *
  198. * \code{.c}
  199. * static inline size_t GetDataSize(const DLTensor* t) {
  200. * size_t size = 1;
  201. * for (tvm_index_t i = 0; i < t->ndim; ++i) {
  202. * size *= t->shape[i];
  203. * }
  204. * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
  205. * return size;
  206. * }
  207. * \endcode
  208. *
  209. * Note that if the tensor is of size zero, then the data pointer should be
  210. * set to `NULL`.
  211. */
  212. void* data;
  213. /*! \brief The device of the tensor */
  214. DLDevice device;
  215. /*! \brief Number of dimensions */
  216. int32_t ndim;
  217. /*! \brief The data type of the pointer*/
  218. DLDataType dtype;
  219. /*! \brief The shape of the tensor */
  220. const int64_t* shape;
  221. /*!
  222. * \brief strides of the tensor (in number of elements, not bytes)
  223. * can be NULL, indicating tensor is compact and row-majored.
  224. */
  225. const int64_t* strides;
  226. /*! \brief The offset in bytes to the beginning pointer to data */
  227. uint64_t byte_offset;
  228. } DLTensor;
  229. /*!
  230. * \brief C Tensor object, manage memory of DLTensor. This data structure is
  231. * intended to facilitate the borrowing of DLTensor by another framework. It is
  232. * not meant to transfer the tensor. When the borrowing framework doesn't need
  233. * the tensor, it should call the deleter to notify the host that the resource
  234. * is no longer needed.
  235. *
  236. * \note This data structure is used as Legacy DLManagedTensor
  237. * in DLPack exchange and is deprecated after DLPack v0.8
  238. * Use DLManagedTensorVersioned instead.
  239. * This data structure may get renamed or deleted in future versions.
  240. *
  241. * \sa DLManagedTensorVersioned
  242. */
  243. typedef struct DLManagedTensor {
  244. /*! \brief DLTensor which is being memory managed */
  245. DLTensor dl_tensor;
  246. /*! \brief the context of the original host framework of DLManagedTensor in
  247. * which DLManagedTensor is used in the framework. It can also be NULL.
  248. */
  249. void * manager_ctx;
  250. /*!
  251. * \brief Destructor - this should be called
  252. * to destruct the manager_ctx which backs the DLManagedTensor. It can be
  253. * NULL if there is no way for the caller to provide a reasonable destructor.
  254. * The destructor deletes the argument self as well.
  255. */
  256. void (*deleter)(struct DLManagedTensor * self);
  257. } DLManagedTensor;
  258. // bit masks used in in the DLManagedTensorVersioned
  259. /*! \brief bit mask to indicate that the tensor is read only. */
  260. #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
  261. /*!
  262. * \brief bit mask to indicate that the tensor is a copy made by the producer.
  263. *
  264. * If set, the tensor is considered solely owned throughout its lifetime by the
  265. * consumer, until the producer-provided deleter is invoked.
  266. */
  267. #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
  268. /*!
  269. * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
  270. *
  271. * This data structure is intended to facilitate the borrowing of DLTensor by
  272. * another framework. It is not meant to transfer the tensor. When the borrowing
  273. * framework doesn't need the tensor, it should call the deleter to notify the
  274. * host that the resource is no longer needed.
  275. *
  276. * \note This is the current standard DLPack exchange data structure.
  277. */
  278. struct DLManagedTensorVersioned {
  279. /*!
  280. * \brief The API and ABI version of the current managed Tensor
  281. */
  282. DLPackVersion version;
  283. /*!
  284. * \brief the context of the original host framework.
  285. *
  286. * Stores DLManagedTensorVersioned is used in the
  287. * framework. It can also be NULL.
  288. */
  289. void *manager_ctx;
  290. /*!
  291. * \brief Destructor.
  292. *
  293. * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
  294. * It can be NULL if there is no way for the caller to provide a reasonable
  295. * destructor. The destructor deletes the argument self as well.
  296. */
  297. void (*deleter)(struct DLManagedTensorVersioned *self);
  298. /*!
  299. * \brief Additional bitmask flags information about the tensor.
  300. *
  301. * By default the flags should be set to 0.
  302. *
  303. * \note Future ABI changes should keep everything until this field
  304. * stable, to ensure that deleter can be correctly called.
  305. *
  306. * \sa DLPACK_FLAG_BITMASK_READ_ONLY
  307. * \sa DLPACK_FLAG_BITMASK_IS_COPIED
  308. */
  309. uint64_t flags;
  310. /*! \brief DLTensor which is being memory managed */
  311. DLTensor dl_tensor;
  312. };
  313. #ifdef __cplusplus
  314. } // DLPACK_EXTERN_C
  315. #endif
  316. #endif // DLPACK_DLPACK_H_