memory.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. import collections
  2. import ctypes
  3. from typing import Any, Union
  4. import torch
  5. from torch._utils import _dummy_type
  6. from torch.types import Device
  7. from . import _get_device_index, _is_compiled, _lazy_init, is_initialized
  8. if not _is_compiled():
  9. # Define dummy base classes
  10. torch._C.__dict__["_xpu_XPUAllocator"] = _dummy_type("_xpu_XPUAllocator")
  11. _device_t = Union[Device, str, int, None]
  12. def empty_cache() -> None:
  13. r"""Release all unoccupied cached memory currently held by the caching
  14. allocator so that those can be used in other XPU application.
  15. .. note::
  16. :func:`~torch.xpu.empty_cache` doesn't increase the amount of XPU
  17. memory available for PyTorch. However, it may help reduce fragmentation
  18. of XPU memory in certain cases.
  19. """
  20. if is_initialized():
  21. torch._C._xpu_emptyCache()
  22. def reset_peak_memory_stats(device: _device_t = None) -> None:
  23. r"""Reset the "peak" stats tracked by the XPU memory allocator.
  24. See :func:`~torch.xpu.memory_stats` for details. Peak stats correspond to the
  25. `"peak"` key in each individual stat dict.
  26. Args:
  27. device (torch.device or int or str, optional): selected device. Returns
  28. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  29. if :attr:`device` is ``None`` (default).
  30. """
  31. device = _get_device_index(device, optional=True)
  32. return torch._C._xpu_resetPeakMemoryStats(device)
  33. def reset_accumulated_memory_stats(device: _device_t = None) -> None:
  34. r"""Reset the "accumulated" (historical) stats tracked by the XPU memory allocator.
  35. See :func:`~torch.xpu.memory_stats` for details. Accumulated stats correspond to
  36. the `"allocated"` and `"freed"` keys in each individual stat dict.
  37. Args:
  38. device (torch.device or int or str, optional): selected device. Returns
  39. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  40. if :attr:`device` is ``None`` (default).
  41. """
  42. device = _get_device_index(device, optional=True)
  43. return torch._C._xpu_resetAccumulatedMemoryStats(device)
  44. def memory_stats_as_nested_dict(device: _device_t = None) -> dict[str, Any]:
  45. r"""Return the result of :func:`~torch.xpu.memory_stats` as a nested dictionary."""
  46. if not is_initialized():
  47. return {}
  48. device = _get_device_index(device, optional=True)
  49. return torch._C._xpu_memoryStats(device)
  50. def memory_stats(device: _device_t = None) -> dict[str, Any]:
  51. r"""Return a dictionary of XPU memory allocator statistics for a given device.
  52. The return value of this function is a dictionary of statistics, each of
  53. which is a non-negative integer.
  54. Core statistics:
  55. - ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
  56. amount of allocated memory.
  57. - ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
  58. amount of reserved memory.
  59. - ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
  60. amount of active memory.
  61. - ``"requested_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
  62. memory requested by client code, compare this with allocated_bytes to check if
  63. allocation rounding adds too much overhead.
  64. For these core statistics, values are broken down as follows.
  65. Pool type:
  66. - ``all``: combined statistics across all memory pools.
  67. - ``large_pool``: statistics for the large allocation pool (for size >= 1MB allocations).
  68. - ``small_pool``: statistics for the small allocation pool (for size < 1MB allocations).
  69. Metric type:
  70. - ``current``: current value of this metric.
  71. - ``peak``: maximum value of this metric.
  72. - ``allocated``: historical total increase in this metric.
  73. - ``freed``: historical total decrease in this metric.
  74. Args:
  75. device (torch.device or int or str, optional): selected device. Returns
  76. statistics for the current device, given by :func:`~torch.xpu.current_device`,
  77. if :attr:`device` is ``None`` (default).
  78. """
  79. result = []
  80. def _recurse_add_to_result(prefix: str, obj: Any) -> None:
  81. if isinstance(obj, dict):
  82. if len(prefix) > 0:
  83. prefix += "."
  84. for k, v in obj.items():
  85. _recurse_add_to_result(prefix + k, v)
  86. else:
  87. result.append((prefix, obj))
  88. stats = memory_stats_as_nested_dict(device=device)
  89. _recurse_add_to_result("", stats)
  90. result.sort()
  91. return collections.OrderedDict(result)
  92. def memory_allocated(device: _device_t = None) -> int:
  93. r"""Return the current GPU memory occupied by tensors in bytes for a given device.
  94. Args:
  95. device (torch.device or int or str, optional): selected device. Returns
  96. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  97. if :attr:`device` is ``None`` (default).
  98. .. note::
  99. This is likely less than the amount shown in `xpu-smi` since some
  100. unused memory can be held by the caching allocator and some context
  101. needs to be created on GPU.
  102. """
  103. return memory_stats(device=device).get("allocated_bytes.all.current", 0)
  104. def max_memory_allocated(device: _device_t = None) -> int:
  105. r"""Return the maximum GPU memory occupied by tensors in bytes for a given device.
  106. By default, this returns the peak allocated memory since the beginning of
  107. this program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to
  108. reset the starting point in tracking this metric. For example, these two
  109. functions can measure the peak allocated memory usage of each iteration in a
  110. training loop.
  111. Args:
  112. device (torch.device or int or str, optional): selected device. Returns
  113. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  114. if :attr:`device` is ``None`` (default).
  115. """
  116. return memory_stats(device=device).get("allocated_bytes.all.peak", 0)
  117. def memory_reserved(device: _device_t = None) -> int:
  118. r"""Return the current GPU memory managed by the caching allocator in bytes for a given device.
  119. Args:
  120. device (torch.device or int or str, optional): selected device. Returns
  121. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  122. if :attr:`device` is ``None`` (default).
  123. """
  124. return memory_stats(device=device).get("reserved_bytes.all.current", 0)
  125. def max_memory_reserved(device: _device_t = None) -> int:
  126. r"""Return the maximum GPU memory managed by the caching allocator in bytes for a given device.
  127. By default, this returns the peak cached memory since the beginning of this
  128. program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to reset
  129. the starting point in tracking this metric. For example, these two functions
  130. can measure the peak cached memory amount of each iteration in a training
  131. loop.
  132. Args:
  133. device (torch.device or int or str, optional): selected device. Returns
  134. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  135. if :attr:`device` is ``None`` (default).
  136. """
  137. return memory_stats(device=device).get("reserved_bytes.all.peak", 0)
  138. def mem_get_info(device: _device_t = None) -> tuple[int, int]:
  139. r"""Return the global free and total GPU memory for a given device.
  140. Args:
  141. device (torch.device or int or str, optional): selected device. Returns
  142. statistic for the current device, given by :func:`~torch.xpu.current_device`,
  143. if :attr:`device` is ``None`` (default).
  144. Returns:
  145. int: the memory available on the device in units of bytes.
  146. int: the total memory on the device in units of bytes
  147. """
  148. _lazy_init()
  149. device = _get_device_index(device, optional=True)
  150. return torch._C._xpu_getMemoryInfo(device)
  151. def get_per_process_memory_fraction(device: _device_t = None) -> float:
  152. r"""
  153. Retrieve the memory fraction currently set for a process on a given XPU device.
  154. This fraction represents the portion of the total device memory that
  155. the caching allocator is allowed to use. The allowed memory is calculated as:
  156. .. math:: \text{allowed\_memory} = \text{total\_memory} \times \text{fraction}
  157. Args:
  158. device (torch.device or int or str, optional): selected device. It uses the current device,
  159. given by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None`` (default).
  160. Returns:
  161. float: The memory fraction in the range 0.0 to 1.0.
  162. """
  163. _lazy_init()
  164. device = _get_device_index(device, optional=True)
  165. return torch._C._xpu_getMemoryFraction(device)
  166. def set_per_process_memory_fraction(fraction: float, device: _device_t = None) -> None:
  167. r"""
  168. Set the memory fraction for a single process on XPU device.
  169. This function limits the amount of memory that the caching allocator can allocate
  170. on the specified XPU device. The allowed memory is computed as:
  171. .. math:: \text{allowed\_memory} = \text{total\_memory} \times \text{fraction}
  172. If the process attempts to allocate more than this allowed memory,
  173. an out-of-memory error will be raised by the allocator.
  174. Arguments:
  175. fraction (float): Range: 0~1. Allowed memory equals total_memory * fraction.
  176. device (torch.device or int or str, optional): selected device. It uses the current device,
  177. given by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None`` (default).
  178. .. note:: In general, the total available free memory is less than the total capacity.
  179. """
  180. _lazy_init()
  181. device = _get_device_index(device, optional=True)
  182. if not isinstance(fraction, float):
  183. raise TypeError("Invalid type for fraction argument, must be `float`")
  184. # pyrefly: ignore [missing-attribute]
  185. torch._C._xpu_setMemoryFraction(fraction, device)
  186. class _XPUAllocator:
  187. r"""Wrapper over internal XPU memory allocators."""
  188. def __init__(self, allocator: torch._C._xpu_XPUAllocator):
  189. self._allocator = allocator
  190. def allocator(self):
  191. return self._allocator
  192. class XPUPluggableAllocator(_XPUAllocator):
  193. r"""XPU memory allocator loaded from a shared library."""
  194. def __init__(self, path_to_lib_file: str, alloc_fn_name: str, free_fn_name: str):
  195. r"""XPU memory allocator loaded dynamically from a shared library.
  196. This lets users provide custom allocation and free functions implemented
  197. in a separate shared library. The allocator is registered through
  198. ``torch._C._xpu_customAllocator`` and becomes available for use via
  199. ``torch.memory.xpu.change_current_allocator``.
  200. Arguments:
  201. path_to_lib_file (str):
  202. Filesystem path to the shared library file containing the allocation
  203. and free functions.
  204. alloc_fn_name (str):
  205. Name of the allocation function exported from the shared library.
  206. The function must have the signature:
  207. ``void* alloc_fn(size_t size, int device, sycl::queue* queue);``
  208. free_fn_name (str):
  209. Name of the free function exported from the shared library.
  210. The function must have the signature:
  211. ``void free_fn(void* ptr, size_t size, sycl::queue* queue);``
  212. """
  213. allocator_lib = ctypes.CDLL(path_to_lib_file)
  214. alloc_fn_ptr = getattr(allocator_lib, alloc_fn_name)
  215. free_fn_ptr = getattr(allocator_lib, free_fn_name)
  216. alloc_fn_addr = ctypes.cast(alloc_fn_ptr, ctypes.c_void_p).value
  217. free_fn_addr = ctypes.cast(free_fn_ptr, ctypes.c_void_p).value
  218. if alloc_fn_addr is None or free_fn_addr is None:
  219. raise RuntimeError(
  220. "Failed to load allocator symbols from the shared library."
  221. )
  222. self._allocator = torch._C._xpu_customAllocator(alloc_fn_addr, free_fn_addr)
  223. def change_current_allocator(allocator: _XPUAllocator) -> None:
  224. r"""Change the currently used memory allocator to be the one provided.
  225. .. note::
  226. If the current allocator has already been used/initialized, this function will error.
  227. Arguments:
  228. allocator (torch.xpu.memory._XPUAllocator): allocator to be set as the active one.
  229. """
  230. torch._C._xpu_changeCurrentAllocator(allocator.allocator())
  231. def _get_current_allocator() -> _XPUAllocator:
  232. r"""Return the allocator being currently used.
  233. Returns:
  234. _XPUAllocator: the allocator being currently used.
  235. """
  236. return _XPUAllocator(torch._C._xpu_getAllocator())
  237. __all__ = [
  238. "XPUPluggableAllocator",
  239. "change_current_allocator",
  240. "empty_cache",
  241. "get_per_process_memory_fraction",
  242. "max_memory_allocated",
  243. "max_memory_reserved",
  244. "mem_get_info",
  245. "memory_allocated",
  246. "memory_reserved",
  247. "memory_stats",
  248. "memory_stats_as_nested_dict",
  249. "reset_accumulated_memory_stats",
  250. "reset_peak_memory_stats",
  251. "set_per_process_memory_fraction",
  252. ]