_profiler.pyi 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. from enum import Enum
  2. from typing import Literal, TypeAlias
  3. from torch._C import device, dtype, layout
  4. # defined in torch/csrc/profiler/python/init.cpp
  5. class RecordScope(Enum):
  6. FUNCTION = ...
  7. BACKWARD_FUNCTION = ...
  8. TORCHSCRIPT_FUNCTION = ...
  9. KERNEL_FUNCTION_DTYPE = ...
  10. CUSTOM_CLASS = ...
  11. BUILD_FEATURE = ...
  12. LITE_INTERPRETER = ...
  13. USER_SCOPE = ...
  14. STATIC_RUNTIME_OP = ...
  15. STATIC_RUNTIME_MODEL = ...
  16. class ProfilerState(Enum):
  17. Disabled = ...
  18. CPU = ...
  19. CUDA = ...
  20. NVTX = ...
  21. ITT = ...
  22. PRIVATEUSE1 = ...
  23. KINETO = ...
  24. KINETO_GPU_FALLBACK = ...
  25. KINETO_PRIVATEUSE1_FALLBACK = ...
  26. class ActiveProfilerType(Enum):
  27. NONE = ...
  28. LEGACY = ...
  29. KINETO = ...
  30. NVTX = ...
  31. ITT = ...
  32. PRIVATEUSE1 = ...
  33. class ProfilerActivity(Enum):
  34. CPU = ...
  35. CUDA = ...
  36. XPU = ...
  37. MTIA = ...
  38. HPU = ...
  39. PrivateUse1 = ...
  40. class _EventType(Enum):
  41. TorchOp = ...
  42. Backend = ...
  43. Allocation = ...
  44. OutOfMemory = ...
  45. PyCall = ...
  46. PyCCall = ...
  47. Kineto = ...
  48. class _ExperimentalConfig:
  49. def __init__(
  50. self,
  51. profiler_metrics: list[str] = ...,
  52. profiler_measure_per_kernel: bool = ...,
  53. verbose: bool = ...,
  54. performance_events: list[str] = ...,
  55. enable_cuda_sync_events: bool = ...,
  56. profile_all_threads: bool = ...,
  57. ) -> None: ...
  58. class ProfilerConfig:
  59. def __init__(
  60. self,
  61. state: ProfilerState,
  62. report_input_shapes: bool,
  63. profile_memory: bool,
  64. with_stack: bool,
  65. with_flops: bool,
  66. with_modules: bool,
  67. experimental_config: _ExperimentalConfig,
  68. trace_id: str | None = None,
  69. ) -> None: ...
  70. class _ProfilerEvent:
  71. start_tid: int
  72. start_time_ns: int
  73. children: list[_ProfilerEvent]
  74. # TODO(robieta): remove in favor of `self.typed`
  75. extra_fields: (
  76. _ExtraFields_TorchOp
  77. | _ExtraFields_Backend
  78. | _ExtraFields_Allocation
  79. | _ExtraFields_OutOfMemory
  80. | _ExtraFields_PyCall
  81. | _ExtraFields_PyCCall
  82. | _ExtraFields_Kineto
  83. )
  84. @property
  85. def typed(
  86. self,
  87. ) -> (
  88. tuple[Literal[_EventType.TorchOp], _ExtraFields_TorchOp]
  89. | tuple[Literal[_EventType.Backend], _ExtraFields_Backend]
  90. | tuple[Literal[_EventType.Allocation], _ExtraFields_Allocation]
  91. | tuple[Literal[_EventType.OutOfMemory], _ExtraFields_OutOfMemory]
  92. | tuple[Literal[_EventType.PyCall], _ExtraFields_PyCall]
  93. | tuple[Literal[_EventType.PyCCall], _ExtraFields_PyCCall]
  94. | tuple[Literal[_EventType.Kineto], _ExtraFields_Kineto]
  95. ): ...
  96. @property
  97. def name(self) -> str: ...
  98. @property
  99. def tag(self) -> _EventType: ...
  100. @property
  101. def id(self) -> int: ...
  102. @property
  103. def parent(self) -> _ProfilerEvent | None: ...
  104. @property
  105. def correlation_id(self) -> int: ...
  106. @property
  107. def end_time_ns(self) -> int: ...
  108. @property
  109. def duration_time_ns(self) -> int: ...
  110. class _TensorMetadata:
  111. impl_ptr: int | None
  112. storage_data_ptr: int | None
  113. id: int | None
  114. @property
  115. def allocation_id(self) -> int | None: ...
  116. @property
  117. def layout(self) -> layout: ...
  118. @property
  119. def device(self) -> device: ...
  120. @property
  121. def dtype(self) -> dtype: ...
  122. @property
  123. def sizes(self) -> list[int]: ...
  124. @property
  125. def strides(self) -> list[int]: ...
  126. Scalar: TypeAlias = int | float | bool | complex
  127. Input: TypeAlias = _TensorMetadata | list[_TensorMetadata] | Scalar | None
  128. class _ExtraFields_TorchOp:
  129. name: str
  130. sequence_number: int
  131. allow_tf32_cublas: bool
  132. @property
  133. def inputs(self) -> list[Input]: ...
  134. @property
  135. def scope(self) -> RecordScope: ...
  136. class _ExtraFields_Backend: ...
  137. class _ExtraFields_Allocation:
  138. ptr: int
  139. id: int | None
  140. alloc_size: int
  141. total_allocated: int
  142. total_reserved: int
  143. @property
  144. def allocation_id(self) -> int | None: ...
  145. @property
  146. def device(self) -> device: ...
  147. class _ExtraFields_OutOfMemory: ...
  148. class _PyFrameState:
  149. line_number: int
  150. function_name: str
  151. @property
  152. def file_name(self) -> str: ...
  153. class _NNModuleInfo:
  154. @property
  155. def self_ptr(self) -> int: ...
  156. @property
  157. def cls_ptr(self) -> int: ...
  158. @property
  159. def cls_name(self) -> str: ...
  160. @property
  161. def parameters(
  162. self,
  163. ) -> list[tuple[str, _TensorMetadata, _TensorMetadata | None]]: ...
  164. class _OptimizerInfo:
  165. @property
  166. def parameters(
  167. self,
  168. ) -> list[
  169. tuple[
  170. # Parameter
  171. _TensorMetadata,
  172. #
  173. # Gradient (if present during optimizer.step())
  174. _TensorMetadata | None,
  175. #
  176. # Optimizer state for Parameter as (name, tensor) pairs
  177. list[tuple[str, _TensorMetadata]],
  178. ]
  179. ]: ...
  180. class _ExtraFields_PyCCall:
  181. @property
  182. def caller(self) -> _PyFrameState: ...
  183. class _ExtraFields_PyCall:
  184. @property
  185. def callsite(self) -> _PyFrameState: ...
  186. @property
  187. def caller(self) -> _PyFrameState: ...
  188. @property
  189. def module(self) -> _NNModuleInfo | None: ...
  190. @property
  191. def optimizer(self) -> _OptimizerInfo | None: ...
  192. class _ExtraFields_Kineto: ...
  193. def _add_execution_trace_observer(output_file_path: str) -> bool: ...
  194. def _remove_execution_trace_observer() -> None: ...
  195. def _enable_execution_trace_observer() -> None: ...
  196. def _disable_execution_trace_observer() -> None: ...
  197. def _set_record_concrete_inputs_enabled_val(val: bool) -> None: ...
  198. def _set_fwd_bwd_enabled_val(val: bool) -> None: ...
  199. def _set_cuda_sync_enabled_val(val: bool) -> None: ...
  200. class CapturedTraceback: ...
  201. def gather_traceback(python: bool, script: bool, cpp: bool) -> CapturedTraceback: ...
  202. # The Dict has name, filename, line
  203. def symbolize_tracebacks(
  204. to_symbolize: list[CapturedTraceback],
  205. ) -> list[list[dict[str, str]]]: ...
  206. class _RecordFunctionFast:
  207. def __init__(
  208. self,
  209. name: str,
  210. input_values: list | tuple | None = None,
  211. keyword_values: dict | None = None,
  212. ) -> None: ...
  213. def __enter__(self) -> None: ...
  214. def __exit__(self, *exc_info: object) -> None: ...