conv.py 75 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863
  1. # mypy: allow-untyped-defs
  2. import math
  3. from typing import Literal, Optional, Union
  4. from typing_extensions import deprecated
  5. import torch
  6. from torch import Tensor
  7. from torch._torch_docs import reproducibility_notes
  8. from torch.nn import functional as F, init
  9. from torch.nn.common_types import _size_1_t, _size_2_t, _size_3_t
  10. from torch.nn.parameter import Parameter, UninitializedParameter
  11. from .lazy import LazyModuleMixin
  12. from .module import Module
  13. from .utils import _pair, _reverse_repeat_tuple, _single, _triple
  14. __all__ = [
  15. "Conv1d",
  16. "Conv2d",
  17. "Conv3d",
  18. "ConvTranspose1d",
  19. "ConvTranspose2d",
  20. "ConvTranspose3d",
  21. "LazyConv1d",
  22. "LazyConv2d",
  23. "LazyConv3d",
  24. "LazyConvTranspose1d",
  25. "LazyConvTranspose2d",
  26. "LazyConvTranspose3d",
  27. ]
  28. convolution_notes = {
  29. "groups_note": r"""* :attr:`groups` controls the connections between inputs and outputs.
  30. :attr:`in_channels` and :attr:`out_channels` must both be divisible by
  31. :attr:`groups`. For example,
  32. * At groups=1, all inputs are convolved to all outputs.
  33. * At groups=2, the operation becomes equivalent to having two conv
  34. layers side by side, each seeing half the input channels
  35. and producing half the output channels, and both subsequently
  36. concatenated.
  37. * At groups= :attr:`in_channels`, each input channel is convolved with
  38. its own set of filters (of size
  39. :math:`\frac{\text{out\_channels}}{\text{in\_channels}}`).""",
  40. "depthwise_separable_note": r"""When `groups == in_channels` and `out_channels == K * in_channels`,
  41. where `K` is a positive integer, this operation is also known as a "depthwise convolution".
  42. In other words, for an input of size :math:`(N, C_{in}, L_{in})`,
  43. a depthwise convolution with a depthwise multiplier `K` can be performed with the arguments
  44. :math:`(C_\text{in}=C_\text{in}, C_\text{out}=C_\text{in} \times \text{K}, ..., \text{groups}=C_\text{in})`.""",
  45. } # noqa: B950
  46. class _ConvNd(Module):
  47. __constants__ = [
  48. "stride",
  49. "padding",
  50. "dilation",
  51. "groups",
  52. "padding_mode",
  53. "output_padding",
  54. "in_channels",
  55. "out_channels",
  56. "kernel_size",
  57. ]
  58. __annotations__ = {"bias": Optional[torch.Tensor]}
  59. def _conv_forward( # type: ignore[empty-body]
  60. self, input: Tensor, weight: Tensor, bias: Optional[Tensor]
  61. ) -> Tensor: ...
  62. in_channels: int
  63. _reversed_padding_repeated_twice: list[int]
  64. out_channels: int
  65. kernel_size: tuple[int, ...]
  66. stride: tuple[int, ...]
  67. padding: Union[str, tuple[int, ...]]
  68. dilation: tuple[int, ...]
  69. transposed: bool
  70. output_padding: tuple[int, ...]
  71. groups: int
  72. padding_mode: Literal["zeros", "reflect", "replicate", "circular"]
  73. weight: Tensor
  74. bias: Optional[Tensor]
  75. def __init__(
  76. self,
  77. in_channels: int,
  78. out_channels: int,
  79. kernel_size: tuple[int, ...],
  80. stride: tuple[int, ...],
  81. padding: Union[str, tuple[int, ...]],
  82. dilation: tuple[int, ...],
  83. transposed: bool,
  84. output_padding: tuple[int, ...],
  85. groups: int,
  86. bias: bool,
  87. padding_mode: Literal["zeros", "reflect", "replicate", "circular"],
  88. device=None,
  89. dtype=None,
  90. ) -> None:
  91. factory_kwargs = {"device": device, "dtype": dtype}
  92. super().__init__()
  93. if groups <= 0:
  94. raise ValueError("groups must be a positive integer")
  95. if in_channels % groups != 0:
  96. raise ValueError("in_channels must be divisible by groups")
  97. if out_channels % groups != 0:
  98. raise ValueError("out_channels must be divisible by groups")
  99. valid_padding_strings = {"same", "valid"}
  100. if isinstance(padding, str):
  101. if padding not in valid_padding_strings:
  102. raise ValueError(
  103. f"Invalid padding string {padding!r}, should be one of {valid_padding_strings}"
  104. )
  105. if padding == "same" and any(s != 1 for s in stride):
  106. raise ValueError(
  107. "padding='same' is not supported for strided convolutions"
  108. )
  109. valid_padding_modes = {"zeros", "reflect", "replicate", "circular"}
  110. if padding_mode not in valid_padding_modes:
  111. raise ValueError(
  112. f"padding_mode must be one of {valid_padding_modes}, but got padding_mode='{padding_mode}'"
  113. )
  114. self.in_channels = in_channels
  115. self.out_channels = out_channels
  116. self.kernel_size = kernel_size
  117. self.stride = stride
  118. self.padding = padding
  119. self.dilation = dilation
  120. self.transposed = transposed
  121. self.output_padding = output_padding
  122. self.groups = groups
  123. self.padding_mode = padding_mode
  124. # `_reversed_padding_repeated_twice` is the padding to be passed to
  125. # `F.pad` if needed (e.g., for non-zero padding types that are
  126. # implemented as two ops: padding + conv). `F.pad` accepts paddings in
  127. # reverse order than the dimension.
  128. if isinstance(self.padding, str):
  129. self._reversed_padding_repeated_twice = [0, 0] * len(kernel_size)
  130. if padding == "same":
  131. for d, k, i in zip(
  132. dilation, kernel_size, range(len(kernel_size) - 1, -1, -1)
  133. ):
  134. total_padding = d * (k - 1)
  135. left_pad = total_padding // 2
  136. self._reversed_padding_repeated_twice[2 * i] = left_pad
  137. self._reversed_padding_repeated_twice[2 * i + 1] = (
  138. total_padding - left_pad
  139. )
  140. else:
  141. self._reversed_padding_repeated_twice = _reverse_repeat_tuple(
  142. self.padding, 2
  143. )
  144. if transposed:
  145. self.weight = Parameter(
  146. torch.empty(
  147. (in_channels, out_channels // groups, *kernel_size),
  148. **factory_kwargs,
  149. )
  150. )
  151. else:
  152. self.weight = Parameter(
  153. torch.empty(
  154. (out_channels, in_channels // groups, *kernel_size),
  155. **factory_kwargs,
  156. )
  157. )
  158. if bias:
  159. self.bias = Parameter(torch.empty(out_channels, **factory_kwargs))
  160. else:
  161. self.register_parameter("bias", None)
  162. self.reset_parameters()
  163. def reset_parameters(self) -> None:
  164. # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
  165. # uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size)
  166. # For more details see: https://github.com/pytorch/pytorch/issues/15314#issuecomment-477448573
  167. init.kaiming_uniform_(self.weight, a=math.sqrt(5))
  168. if self.bias is not None:
  169. fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
  170. if fan_in != 0:
  171. bound = 1 / math.sqrt(fan_in)
  172. init.uniform_(self.bias, -bound, bound)
  173. def extra_repr(self):
  174. s = "{in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}"
  175. if self.padding != (0,) * len(self.padding):
  176. s += ", padding={padding}"
  177. if self.dilation != (1,) * len(self.dilation):
  178. s += ", dilation={dilation}"
  179. if self.output_padding != (0,) * len(self.output_padding):
  180. s += ", output_padding={output_padding}"
  181. if self.groups != 1:
  182. s += ", groups={groups}"
  183. if self.bias is None:
  184. s += ", bias=False"
  185. if self.padding_mode != "zeros":
  186. s += ", padding_mode={padding_mode}"
  187. return s.format(**self.__dict__)
  188. def __setstate__(self, state):
  189. super().__setstate__(state)
  190. if not hasattr(self, "padding_mode"):
  191. self.padding_mode = "zeros"
  192. class Conv1d(_ConvNd):
  193. __doc__ = (
  194. r"""Applies a 1D convolution over an input signal composed of several input
  195. planes.
  196. In the simplest case, the output value of the layer with input size
  197. :math:`(N, C_{\text{in}}, L)` and output :math:`(N, C_{\text{out}}, L_{\text{out}})` can be
  198. precisely described as:
  199. .. math::
  200. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  201. \sum_{k = 0}^{C_{in} - 1} \text{weight}(C_{\text{out}_j}, k)
  202. \star \text{input}(N_i, k)
  203. where :math:`\star` is the valid `cross-correlation`_ operator,
  204. :math:`N` is a batch size, :math:`C` denotes a number of channels,
  205. :math:`L` is a length of signal sequence.
  206. """
  207. + r"""
  208. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  209. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  210. * :attr:`stride` controls the stride for the cross-correlation, a single
  211. number or a one-element tuple.
  212. * :attr:`padding` controls the amount of padding applied to the input. It
  213. can be either a string {{'valid', 'same'}} or a tuple of ints giving the
  214. amount of implicit padding applied on both sides.
  215. """
  216. """
  217. * :attr:`dilation` controls the spacing between the kernel points; also
  218. known as the \u00e0 trous algorithm. It is harder to describe, but this `link`_
  219. has a nice visualization of what :attr:`dilation` does.
  220. """
  221. r"""
  222. {groups_note}
  223. Note:
  224. {depthwise_separable_note}
  225. Note:
  226. {cudnn_reproducibility_note}
  227. Note:
  228. ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
  229. the input so the output has the shape as the input. However, this mode
  230. doesn't support any stride values other than 1.
  231. Note:
  232. This module supports complex data types i.e. ``complex32, complex64, complex128``.
  233. Args:
  234. in_channels (int): Number of channels in the input image
  235. out_channels (int): Number of channels produced by the convolution
  236. kernel_size (int or tuple): Size of the convolving kernel
  237. stride (int or tuple, optional): Stride of the convolution. Default: 1
  238. padding (int, tuple or str, optional): Padding added to both sides of
  239. the input. Default: 0
  240. dilation (int or tuple, optional): Spacing between kernel
  241. elements. Default: 1
  242. groups (int, optional): Number of blocked connections from input
  243. channels to output channels. Default: 1
  244. bias (bool, optional): If ``True``, adds a learnable bias to the
  245. output. Default: ``True``
  246. padding_mode (str, optional): ``'zeros'``, ``'reflect'``,
  247. ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  248. """.format(**reproducibility_notes, **convolution_notes)
  249. + r"""
  250. Shape:
  251. - Input: :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`
  252. - Output: :math:`(N, C_{out}, L_{out})` or :math:`(C_{out}, L_{out})`, where
  253. .. math::
  254. L_{out} = \left\lfloor\frac{L_{in} + 2 \times \text{padding} - \text{dilation}
  255. \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
  256. Attributes:
  257. weight (Tensor): the learnable weights of the module of shape
  258. :math:`(\text{out\_channels},
  259. \frac{\text{in\_channels}}{\text{groups}}, \text{kernel\_size})`.
  260. The values of these weights are sampled from
  261. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  262. :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}`
  263. bias (Tensor): the learnable bias of the module of shape
  264. (out_channels). If :attr:`bias` is ``True``, then the values of these weights are
  265. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  266. :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}`
  267. Examples::
  268. >>> m = nn.Conv1d(16, 33, 3, stride=2)
  269. >>> input = torch.randn(20, 16, 50)
  270. >>> output = m(input)
  271. .. _cross-correlation:
  272. https://en.wikipedia.org/wiki/Cross-correlation
  273. .. _link:
  274. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  275. """
  276. )
  277. def __init__(
  278. self,
  279. in_channels: int,
  280. out_channels: int,
  281. kernel_size: _size_1_t,
  282. stride: _size_1_t = 1,
  283. padding: Union[str, _size_1_t] = 0,
  284. dilation: _size_1_t = 1,
  285. groups: int = 1,
  286. bias: bool = True,
  287. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  288. device=None,
  289. dtype=None,
  290. ) -> None:
  291. factory_kwargs = {"device": device, "dtype": dtype}
  292. # we create new variables below to make mypy happy since kernel_size has
  293. # type Union[int, Tuple[int]] and kernel_size_ has type Tuple[int]
  294. kernel_size_ = _single(kernel_size)
  295. stride_ = _single(stride)
  296. padding_ = padding if isinstance(padding, str) else _single(padding)
  297. dilation_ = _single(dilation)
  298. super().__init__(
  299. in_channels,
  300. out_channels,
  301. kernel_size_,
  302. stride_,
  303. padding_,
  304. dilation_,
  305. False,
  306. _single(0),
  307. groups,
  308. bias,
  309. padding_mode,
  310. **factory_kwargs,
  311. )
  312. def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
  313. if self.padding_mode != "zeros":
  314. return F.conv1d(
  315. F.pad(
  316. input, self._reversed_padding_repeated_twice, mode=self.padding_mode
  317. ),
  318. weight,
  319. bias,
  320. self.stride,
  321. _single(0),
  322. self.dilation,
  323. self.groups,
  324. )
  325. return F.conv1d(
  326. input, weight, bias, self.stride, self.padding, self.dilation, self.groups
  327. )
  328. def forward(self, input: Tensor) -> Tensor:
  329. return self._conv_forward(input, self.weight, self.bias)
  330. class Conv2d(_ConvNd):
  331. __doc__ = (
  332. r"""Applies a 2D convolution over an input signal composed of several input
  333. planes.
  334. In the simplest case, the output value of the layer with input size
  335. :math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`
  336. can be precisely described as:
  337. .. math::
  338. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  339. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  340. where :math:`\star` is the valid 2D `cross-correlation`_ operator,
  341. :math:`N` is a batch size, :math:`C` denotes a number of channels,
  342. :math:`H` is a height of input planes in pixels, and :math:`W` is
  343. width in pixels.
  344. """
  345. + r"""
  346. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  347. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  348. * :attr:`stride` controls the stride for the cross-correlation, a single
  349. number or a tuple.
  350. * :attr:`padding` controls the amount of padding applied to the input. It
  351. can be either a string {{'valid', 'same'}} or an int / a tuple of ints giving the
  352. amount of implicit padding applied on both sides.
  353. """
  354. """
  355. * :attr:`dilation` controls the spacing between the kernel points; also
  356. known as the \u00e0 trous algorithm. It is harder to describe, but this `link`_
  357. has a nice visualization of what :attr:`dilation` does.
  358. """
  359. r"""
  360. {groups_note}
  361. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
  362. - a single ``int`` -- in which case the same value is used for the height and width dimension
  363. - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
  364. and the second `int` for the width dimension
  365. Note:
  366. {depthwise_separable_note}
  367. Note:
  368. {cudnn_reproducibility_note}
  369. Note:
  370. ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
  371. the input so the output has the shape as the input. However, this mode
  372. doesn't support any stride values other than 1.
  373. Note:
  374. This module supports complex data types i.e. ``complex32, complex64, complex128``.
  375. Args:
  376. in_channels (int): Number of channels in the input image
  377. out_channels (int): Number of channels produced by the convolution
  378. kernel_size (int or tuple): Size of the convolving kernel
  379. stride (int or tuple, optional): Stride of the convolution. Default: 1
  380. padding (int, tuple or str, optional): Padding added to all four sides of
  381. the input. Default: 0
  382. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  383. groups (int, optional): Number of blocked connections from input
  384. channels to output channels. Default: 1
  385. bias (bool, optional): If ``True``, adds a learnable bias to the
  386. output. Default: ``True``
  387. padding_mode (str, optional): ``'zeros'``, ``'reflect'``,
  388. ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  389. """.format(**reproducibility_notes, **convolution_notes)
  390. + r"""
  391. Shape:
  392. - Input: :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`
  393. - Output: :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(C_{out}, H_{out}, W_{out})`, where
  394. .. math::
  395. H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0]
  396. \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
  397. .. math::
  398. W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1]
  399. \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
  400. Attributes:
  401. weight (Tensor): the learnable weights of the module of shape
  402. :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
  403. :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
  404. The values of these weights are sampled from
  405. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  406. :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
  407. bias (Tensor): the learnable bias of the module of shape
  408. (out_channels). If :attr:`bias` is ``True``,
  409. then the values of these weights are
  410. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  411. :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
  412. Examples:
  413. >>> # With square kernels and equal stride
  414. >>> m = nn.Conv2d(16, 33, 3, stride=2)
  415. >>> # non-square kernels and unequal stride and with padding
  416. >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
  417. >>> # non-square kernels and unequal stride and with padding and dilation
  418. >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
  419. >>> input = torch.randn(20, 16, 50, 100)
  420. >>> output = m(input)
  421. .. _cross-correlation:
  422. https://en.wikipedia.org/wiki/Cross-correlation
  423. .. _link:
  424. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  425. """
  426. )
  427. def __init__(
  428. self,
  429. in_channels: int,
  430. out_channels: int,
  431. kernel_size: _size_2_t,
  432. stride: _size_2_t = 1,
  433. padding: Union[str, _size_2_t] = 0,
  434. dilation: _size_2_t = 1,
  435. groups: int = 1,
  436. bias: bool = True,
  437. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  438. device=None,
  439. dtype=None,
  440. ) -> None:
  441. factory_kwargs = {"device": device, "dtype": dtype}
  442. kernel_size_ = _pair(kernel_size)
  443. stride_ = _pair(stride)
  444. padding_ = padding if isinstance(padding, str) else _pair(padding)
  445. dilation_ = _pair(dilation)
  446. super().__init__(
  447. in_channels,
  448. out_channels,
  449. kernel_size_,
  450. stride_,
  451. padding_,
  452. dilation_,
  453. False,
  454. _pair(0),
  455. groups,
  456. bias,
  457. padding_mode,
  458. **factory_kwargs,
  459. )
  460. def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
  461. if self.padding_mode != "zeros":
  462. return F.conv2d(
  463. F.pad(
  464. input, self._reversed_padding_repeated_twice, mode=self.padding_mode
  465. ),
  466. weight,
  467. bias,
  468. self.stride,
  469. _pair(0),
  470. self.dilation,
  471. self.groups,
  472. )
  473. return F.conv2d(
  474. input, weight, bias, self.stride, self.padding, self.dilation, self.groups
  475. )
  476. def forward(self, input: Tensor) -> Tensor:
  477. return self._conv_forward(input, self.weight, self.bias)
  478. class Conv3d(_ConvNd):
  479. __doc__ = (
  480. r"""Applies a 3D convolution over an input signal composed of several input
  481. planes.
  482. In the simplest case, the output value of the layer with input size :math:`(N, C_{in}, D, H, W)`
  483. and output :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` can be precisely described as:
  484. .. math::
  485. out(N_i, C_{out_j}) = bias(C_{out_j}) +
  486. \sum_{k = 0}^{C_{in} - 1} weight(C_{out_j}, k) \star input(N_i, k)
  487. where :math:`\star` is the valid 3D `cross-correlation`_ operator
  488. """
  489. + r"""
  490. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  491. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  492. * :attr:`stride` controls the stride for the cross-correlation.
  493. * :attr:`padding` controls the amount of padding applied to the input. It
  494. can be either a string {{'valid', 'same'}} or a tuple of ints giving the
  495. amount of implicit padding applied on both sides.
  496. """
  497. """
  498. * :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
  499. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
  500. """
  501. r"""
  502. {groups_note}
  503. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
  504. - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
  505. - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
  506. the second `int` for the height dimension and the third `int` for the width dimension
  507. Note:
  508. {depthwise_separable_note}
  509. Note:
  510. {cudnn_reproducibility_note}
  511. Note:
  512. ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
  513. the input so the output has the shape as the input. However, this mode
  514. doesn't support any stride values other than 1.
  515. Note:
  516. This module supports complex data types i.e. ``complex32, complex64, complex128``.
  517. Args:
  518. in_channels (int): Number of channels in the input image
  519. out_channels (int): Number of channels produced by the convolution
  520. kernel_size (int or tuple): Size of the convolving kernel
  521. stride (int or tuple, optional): Stride of the convolution. Default: 1
  522. padding (int, tuple or str, optional): Padding added to all six sides of
  523. the input. Default: 0
  524. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  525. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  526. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  527. padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  528. """.format(**reproducibility_notes, **convolution_notes)
  529. + r"""
  530. Shape:
  531. - Input: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` or :math:`(C_{in}, D_{in}, H_{in}, W_{in})`
  532. - Output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` or :math:`(C_{out}, D_{out}, H_{out}, W_{out})`,
  533. where
  534. .. math::
  535. D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0]
  536. \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
  537. .. math::
  538. H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1]
  539. \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
  540. .. math::
  541. W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2]
  542. \times (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
  543. Attributes:
  544. weight (Tensor): the learnable weights of the module of shape
  545. :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
  546. :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`.
  547. The values of these weights are sampled from
  548. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  549. :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
  550. bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``,
  551. then the values of these weights are
  552. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  553. :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
  554. Examples::
  555. >>> # With square kernels and equal stride
  556. >>> m = nn.Conv3d(16, 33, 3, stride=2)
  557. >>> # non-square kernels and unequal stride and with padding
  558. >>> m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))
  559. >>> input = torch.randn(20, 16, 10, 50, 100)
  560. >>> output = m(input)
  561. .. _cross-correlation:
  562. https://en.wikipedia.org/wiki/Cross-correlation
  563. .. _link:
  564. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  565. """
  566. )
  567. def __init__(
  568. self,
  569. in_channels: int,
  570. out_channels: int,
  571. kernel_size: _size_3_t,
  572. stride: _size_3_t = 1,
  573. padding: Union[str, _size_3_t] = 0,
  574. dilation: _size_3_t = 1,
  575. groups: int = 1,
  576. bias: bool = True,
  577. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  578. device=None,
  579. dtype=None,
  580. ) -> None:
  581. factory_kwargs = {"device": device, "dtype": dtype}
  582. kernel_size_ = _triple(kernel_size)
  583. stride_ = _triple(stride)
  584. padding_ = padding if isinstance(padding, str) else _triple(padding)
  585. dilation_ = _triple(dilation)
  586. super().__init__(
  587. in_channels,
  588. out_channels,
  589. kernel_size_,
  590. stride_,
  591. padding_,
  592. dilation_,
  593. False,
  594. _triple(0),
  595. groups,
  596. bias,
  597. padding_mode,
  598. **factory_kwargs,
  599. )
  600. def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
  601. if self.padding_mode != "zeros":
  602. return F.conv3d(
  603. F.pad(
  604. input, self._reversed_padding_repeated_twice, mode=self.padding_mode
  605. ),
  606. weight,
  607. bias,
  608. self.stride,
  609. _triple(0),
  610. self.dilation,
  611. self.groups,
  612. )
  613. return F.conv3d(
  614. input, weight, bias, self.stride, self.padding, self.dilation, self.groups
  615. )
  616. def forward(self, input: Tensor) -> Tensor:
  617. return self._conv_forward(input, self.weight, self.bias)
  618. class _ConvTransposeNd(_ConvNd):
  619. def __init__(
  620. self,
  621. in_channels,
  622. out_channels,
  623. kernel_size,
  624. stride,
  625. padding,
  626. dilation,
  627. transposed,
  628. output_padding,
  629. groups,
  630. bias,
  631. padding_mode,
  632. device=None,
  633. dtype=None,
  634. ) -> None:
  635. if padding_mode != "zeros":
  636. raise ValueError(
  637. f'Only "zeros" padding mode is supported for {self.__class__.__name__}'
  638. )
  639. factory_kwargs = {"device": device, "dtype": dtype}
  640. super().__init__(
  641. in_channels,
  642. out_channels,
  643. kernel_size,
  644. stride,
  645. padding,
  646. dilation,
  647. transposed,
  648. output_padding,
  649. groups,
  650. bias,
  651. padding_mode,
  652. **factory_kwargs,
  653. )
  654. # dilation being an optional parameter is for backwards
  655. # compatibility
  656. def _output_padding(
  657. self,
  658. input: Tensor,
  659. output_size: Optional[list[int]],
  660. stride: list[int],
  661. padding: list[int],
  662. kernel_size: list[int],
  663. num_spatial_dims: int,
  664. dilation: Optional[list[int]] = None,
  665. ) -> list[int]:
  666. if output_size is None:
  667. ret = _single(self.output_padding) # converting to list if was not already
  668. else:
  669. has_batch_dim = input.dim() == num_spatial_dims + 2
  670. num_non_spatial_dims = 2 if has_batch_dim else 1
  671. if len(output_size) == num_non_spatial_dims + num_spatial_dims:
  672. output_size = output_size[num_non_spatial_dims:]
  673. if len(output_size) != num_spatial_dims:
  674. raise ValueError(
  675. f"ConvTranspose{num_spatial_dims}D: for {input.dim()}D input, output_size must have {num_spatial_dims} "
  676. f"or {num_non_spatial_dims + num_spatial_dims} elements (got {len(output_size)})"
  677. )
  678. min_sizes = torch.jit.annotate(list[int], [])
  679. max_sizes = torch.jit.annotate(list[int], [])
  680. for d in range(num_spatial_dims):
  681. dim_size = (
  682. (input.size(d + num_non_spatial_dims) - 1) * stride[d]
  683. - 2 * padding[d]
  684. + (dilation[d] if dilation is not None else 1)
  685. * (kernel_size[d] - 1)
  686. + 1
  687. )
  688. min_sizes.append(dim_size)
  689. max_sizes.append(min_sizes[d] + stride[d] - 1)
  690. for i in range(len(output_size)):
  691. size = output_size[i]
  692. min_size = min_sizes[i]
  693. max_size = max_sizes[i]
  694. if size < min_size or size > max_size:
  695. raise ValueError(
  696. f"requested an output size of {output_size}, but valid sizes range "
  697. f"from {min_sizes} to {max_sizes} (for an input of {input.size()[2:]})"
  698. )
  699. res = torch.jit.annotate(list[int], [])
  700. for d in range(num_spatial_dims):
  701. res.append(output_size[d] - min_sizes[d])
  702. ret = res
  703. return ret
  704. class ConvTranspose1d(_ConvTransposeNd):
  705. __doc__ = (
  706. r"""Applies a 1D transposed convolution operator over an input image
  707. composed of several input planes.
  708. This module can be seen as the gradient of Conv1d with respect to its input.
  709. It is also known as a fractionally-strided convolution or
  710. a deconvolution (although it is not an actual deconvolution operation as it does
  711. not compute a true inverse of convolution). For more information, see the visualizations
  712. `here`_ and the `Deconvolutional Networks`_ paper.
  713. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  714. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  715. * :attr:`stride` controls the stride for the cross-correlation.
  716. * :attr:`padding` controls the amount of implicit zero padding on both
  717. sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
  718. below for details.
  719. * :attr:`output_padding` controls the additional size added to one side
  720. of the output shape. See note below for details.
  721. """
  722. """
  723. * :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
  724. It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
  725. """
  726. r"""
  727. {groups_note}
  728. Note:
  729. The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
  730. amount of zero padding to both sizes of the input. This is set so that
  731. when a :class:`~torch.nn.Conv1d` and a :class:`~torch.nn.ConvTranspose1d`
  732. are initialized with same parameters, they are inverses of each other in
  733. regard to the input and output shapes. However, when ``stride > 1``,
  734. :class:`~torch.nn.Conv1d` maps multiple input shapes to the same output
  735. shape. :attr:`output_padding` is provided to resolve this ambiguity by
  736. effectively increasing the calculated output shape on one side. Note
  737. that :attr:`output_padding` is only used to find output shape, but does
  738. not actually add zero-padding to output.
  739. Note:
  740. In some circumstances when using the CUDA backend with CuDNN, this operator
  741. may select a nondeterministic algorithm to increase performance. If this is
  742. undesirable, you can try to make the operation deterministic (potentially at
  743. a performance cost) by setting ``torch.backends.cudnn.deterministic =
  744. True``.
  745. Please see the notes on :doc:`/notes/randomness` for background.
  746. Args:
  747. in_channels (int): Number of channels in the input image
  748. out_channels (int): Number of channels produced by the convolution
  749. kernel_size (int or tuple): Size of the convolving kernel
  750. stride (int or tuple, optional): Stride of the convolution. Default: 1
  751. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  752. will be added to both sides of the input. Default: 0
  753. output_padding (int or tuple, optional): Additional size added to one side
  754. of the output shape. Default: 0
  755. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  756. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  757. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  758. """.format(**reproducibility_notes, **convolution_notes)
  759. + r"""
  760. Shape:
  761. - Input: :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`
  762. - Output: :math:`(N, C_{out}, L_{out})` or :math:`(C_{out}, L_{out})`, where
  763. .. math::
  764. L_{out} = (L_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation}
  765. \times (\text{kernel\_size} - 1) + \text{output\_padding} + 1
  766. Attributes:
  767. weight (Tensor): the learnable weights of the module of shape
  768. :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},`
  769. :math:`\text{kernel\_size})`.
  770. The values of these weights are sampled from
  771. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  772. :math:`k = \frac{groups}{C_\text{out} * \text{kernel\_size}}`
  773. bias (Tensor): the learnable bias of the module of shape (out_channels).
  774. If :attr:`bias` is ``True``, then the values of these weights are
  775. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  776. :math:`k = \frac{groups}{C_\text{out} * \text{kernel\_size}}`
  777. .. _`here`:
  778. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  779. .. _`Deconvolutional Networks`:
  780. https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf
  781. """
  782. )
  783. def __init__(
  784. self,
  785. in_channels: int,
  786. out_channels: int,
  787. kernel_size: _size_1_t,
  788. stride: _size_1_t = 1,
  789. padding: _size_1_t = 0,
  790. output_padding: _size_1_t = 0,
  791. groups: int = 1,
  792. bias: bool = True,
  793. dilation: _size_1_t = 1,
  794. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  795. device=None,
  796. dtype=None,
  797. ) -> None:
  798. factory_kwargs = {"device": device, "dtype": dtype}
  799. kernel_size = _single(kernel_size)
  800. stride = _single(stride)
  801. padding = _single(padding)
  802. dilation = _single(dilation)
  803. output_padding = _single(output_padding)
  804. super().__init__(
  805. in_channels,
  806. out_channels,
  807. kernel_size,
  808. stride,
  809. padding,
  810. dilation,
  811. True,
  812. output_padding,
  813. groups,
  814. bias,
  815. padding_mode,
  816. **factory_kwargs,
  817. )
  818. def forward(self, input: Tensor, output_size: Optional[list[int]] = None) -> Tensor:
  819. if self.padding_mode != "zeros":
  820. raise ValueError(
  821. "Only `zeros` padding mode is supported for ConvTranspose1d"
  822. )
  823. assert isinstance(self.padding, tuple)
  824. # One cannot replace List by Tuple or Sequence in "_output_padding" because
  825. # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
  826. num_spatial_dims = 1
  827. output_padding = self._output_padding(
  828. input,
  829. output_size,
  830. self.stride, # type: ignore[arg-type]
  831. self.padding, # type: ignore[arg-type]
  832. self.kernel_size, # type: ignore[arg-type]
  833. num_spatial_dims,
  834. self.dilation, # type: ignore[arg-type]
  835. )
  836. return F.conv_transpose1d(
  837. input,
  838. self.weight,
  839. self.bias,
  840. self.stride,
  841. self.padding,
  842. output_padding,
  843. self.groups,
  844. self.dilation,
  845. )
  846. class ConvTranspose2d(_ConvTransposeNd):
  847. __doc__ = (
  848. r"""Applies a 2D transposed convolution operator over an input image
  849. composed of several input planes.
  850. This module can be seen as the gradient of Conv2d with respect to its input.
  851. It is also known as a fractionally-strided convolution or
  852. a deconvolution (although it is not an actual deconvolution operation as it does
  853. not compute a true inverse of convolution). For more information, see the visualizations
  854. `here`_ and the `Deconvolutional Networks`_ paper.
  855. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  856. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  857. * :attr:`stride` controls the stride for the cross-correlation. When stride > 1, ConvTranspose2d inserts zeros between input
  858. elements along the spatial dimensions before applying the convolution kernel. This zero-insertion operation is the standard
  859. behavior of transposed convolutions, which can increase the spatial resolution and is equivalent to a learnable
  860. upsampling operation.
  861. * :attr:`padding` controls the amount of implicit zero padding on both
  862. sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
  863. below for details.
  864. * :attr:`output_padding` controls the additional size added to one side
  865. of the output shape. See note below for details.
  866. """
  867. """
  868. * :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
  869. It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
  870. """
  871. r"""
  872. {groups_note}
  873. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`output_padding`
  874. can either be:
  875. - a single ``int`` -- in which case the same value is used for the height and width dimensions
  876. - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
  877. and the second `int` for the width dimension
  878. Note:
  879. The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
  880. amount of zero padding to both sizes of the input. This is set so that
  881. when a :class:`~torch.nn.Conv2d` and a :class:`~torch.nn.ConvTranspose2d`
  882. are initialized with same parameters, they are inverses of each other in
  883. regard to the input and output shapes. However, when ``stride > 1``,
  884. :class:`~torch.nn.Conv2d` maps multiple input shapes to the same output
  885. shape. :attr:`output_padding` is provided to resolve this ambiguity by
  886. effectively increasing the calculated output shape on one side. Note
  887. that :attr:`output_padding` is only used to find output shape, but does
  888. not actually add zero-padding to output.
  889. Note:
  890. {cudnn_reproducibility_note}
  891. Args:
  892. in_channels (int): Number of channels in the input image
  893. out_channels (int): Number of channels produced by the convolution
  894. kernel_size (int or tuple): Size of the convolving kernel
  895. stride (int or tuple, optional): Stride of the convolution. Default: 1
  896. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  897. will be added to both sides of each dimension in the input. Default: 0
  898. output_padding (int or tuple, optional): Additional size added to one side
  899. of each dimension in the output shape. Default: 0
  900. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  901. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  902. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  903. """.format(**reproducibility_notes, **convolution_notes)
  904. + r"""
  905. Shape:
  906. - Input: :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`
  907. - Output: :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(C_{out}, H_{out}, W_{out})`, where
  908. .. math::
  909. H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{dilation}[0]
  910. \times (\text{kernel\_size}[0] - 1) + \text{output\_padding}[0] + 1
  911. .. math::
  912. W_{out} = (W_{in} - 1) \times \text{stride}[1] - 2 \times \text{padding}[1] + \text{dilation}[1]
  913. \times (\text{kernel\_size}[1] - 1) + \text{output\_padding}[1] + 1
  914. Attributes:
  915. weight (Tensor): the learnable weights of the module of shape
  916. :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},`
  917. :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
  918. The values of these weights are sampled from
  919. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  920. :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
  921. bias (Tensor): the learnable bias of the module of shape (out_channels)
  922. If :attr:`bias` is ``True``, then the values of these weights are
  923. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  924. :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
  925. Examples::
  926. >>> # With square kernels and equal stride
  927. >>> m = nn.ConvTranspose2d(16, 33, 3, stride=2)
  928. >>> # non-square kernels and unequal stride and with padding
  929. >>> m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
  930. >>> input = torch.randn(20, 16, 50, 100)
  931. >>> output = m(input)
  932. >>> # exact output size can be also specified as an argument
  933. >>> input = torch.randn(1, 16, 12, 12)
  934. >>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
  935. >>> upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
  936. >>> h = downsample(input)
  937. >>> h.size()
  938. torch.Size([1, 16, 6, 6])
  939. >>> output = upsample(h, output_size=input.size())
  940. >>> output.size()
  941. torch.Size([1, 16, 12, 12])
  942. .. _`here`:
  943. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  944. .. _`Deconvolutional Networks`:
  945. https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf
  946. """
  947. )
  948. def __init__(
  949. self,
  950. in_channels: int,
  951. out_channels: int,
  952. kernel_size: _size_2_t,
  953. stride: _size_2_t = 1,
  954. padding: _size_2_t = 0,
  955. output_padding: _size_2_t = 0,
  956. groups: int = 1,
  957. bias: bool = True,
  958. dilation: _size_2_t = 1,
  959. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  960. device=None,
  961. dtype=None,
  962. ) -> None:
  963. factory_kwargs = {"device": device, "dtype": dtype}
  964. kernel_size = _pair(kernel_size)
  965. stride = _pair(stride)
  966. padding = _pair(padding)
  967. dilation = _pair(dilation)
  968. output_padding = _pair(output_padding)
  969. super().__init__(
  970. in_channels,
  971. out_channels,
  972. kernel_size,
  973. stride,
  974. padding,
  975. dilation,
  976. True,
  977. output_padding,
  978. groups,
  979. bias,
  980. padding_mode,
  981. **factory_kwargs,
  982. )
  983. def forward(self, input: Tensor, output_size: Optional[list[int]] = None) -> Tensor:
  984. """
  985. Performs the forward pass.
  986. Attributes:
  987. input (Tensor): The input tensor.
  988. output_size (list[int], optional): A list of integers representing
  989. the size of the output tensor. Default is None.
  990. """
  991. if self.padding_mode != "zeros":
  992. raise ValueError(
  993. "Only `zeros` padding mode is supported for ConvTranspose2d"
  994. )
  995. assert isinstance(self.padding, tuple)
  996. # One cannot replace List by Tuple or Sequence in "_output_padding" because
  997. # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
  998. num_spatial_dims = 2
  999. output_padding = self._output_padding(
  1000. input,
  1001. output_size,
  1002. self.stride, # type: ignore[arg-type]
  1003. self.padding, # type: ignore[arg-type]
  1004. self.kernel_size, # type: ignore[arg-type]
  1005. num_spatial_dims,
  1006. self.dilation, # type: ignore[arg-type]
  1007. )
  1008. return F.conv_transpose2d(
  1009. input,
  1010. self.weight,
  1011. self.bias,
  1012. self.stride,
  1013. self.padding,
  1014. output_padding,
  1015. self.groups,
  1016. self.dilation,
  1017. )
  1018. class ConvTranspose3d(_ConvTransposeNd):
  1019. __doc__ = (
  1020. r"""Applies a 3D transposed convolution operator over an input image composed of several input
  1021. planes.
  1022. The transposed convolution operator multiplies each input value element-wise by a learnable kernel,
  1023. and sums over the outputs from all input feature planes.
  1024. This module can be seen as the gradient of Conv3d with respect to its input.
  1025. It is also known as a fractionally-strided convolution or
  1026. a deconvolution (although it is not an actual deconvolution operation as it does
  1027. not compute a true inverse of convolution). For more information, see the visualizations
  1028. `here`_ and the `Deconvolutional Networks`_ paper.
  1029. This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
  1030. On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.
  1031. * :attr:`stride` controls the stride for the cross-correlation.
  1032. * :attr:`padding` controls the amount of implicit zero padding on both
  1033. sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
  1034. below for details.
  1035. * :attr:`output_padding` controls the additional size added to one side
  1036. of the output shape. See note below for details.
  1037. """
  1038. """
  1039. * :attr:`dilation` controls the spacing between the kernel points; also known as the \u00e0 trous algorithm.
  1040. It is harder to describe, but the link `here`_ has a nice visualization of what :attr:`dilation` does.
  1041. """
  1042. r"""
  1043. {groups_note}
  1044. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`output_padding`
  1045. can either be:
  1046. - a single ``int`` -- in which case the same value is used for the depth, height and width dimensions
  1047. - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
  1048. the second `int` for the height dimension and the third `int` for the width dimension
  1049. Note:
  1050. The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
  1051. amount of zero padding to both sizes of the input. This is set so that
  1052. when a :class:`~torch.nn.Conv3d` and a :class:`~torch.nn.ConvTranspose3d`
  1053. are initialized with same parameters, they are inverses of each other in
  1054. regard to the input and output shapes. However, when ``stride > 1``,
  1055. :class:`~torch.nn.Conv3d` maps multiple input shapes to the same output
  1056. shape. :attr:`output_padding` is provided to resolve this ambiguity by
  1057. effectively increasing the calculated output shape on one side. Note
  1058. that :attr:`output_padding` is only used to find output shape, but does
  1059. not actually add zero-padding to output.
  1060. Note:
  1061. {cudnn_reproducibility_note}
  1062. Args:
  1063. in_channels (int): Number of channels in the input image
  1064. out_channels (int): Number of channels produced by the convolution
  1065. kernel_size (int or tuple): Size of the convolving kernel
  1066. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1067. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  1068. will be added to both sides of each dimension in the input. Default: 0
  1069. output_padding (int or tuple, optional): Additional size added to one side
  1070. of each dimension in the output shape. Default: 0
  1071. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  1072. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  1073. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  1074. """.format(**reproducibility_notes, **convolution_notes)
  1075. + r"""
  1076. Shape:
  1077. - Input: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` or :math:`(C_{in}, D_{in}, H_{in}, W_{in})`
  1078. - Output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` or
  1079. :math:`(C_{out}, D_{out}, H_{out}, W_{out})`, where
  1080. .. math::
  1081. D_{out} = (D_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{dilation}[0]
  1082. \times (\text{kernel\_size}[0] - 1) + \text{output\_padding}[0] + 1
  1083. .. math::
  1084. H_{out} = (H_{in} - 1) \times \text{stride}[1] - 2 \times \text{padding}[1] + \text{dilation}[1]
  1085. \times (\text{kernel\_size}[1] - 1) + \text{output\_padding}[1] + 1
  1086. .. math::
  1087. W_{out} = (W_{in} - 1) \times \text{stride}[2] - 2 \times \text{padding}[2] + \text{dilation}[2]
  1088. \times (\text{kernel\_size}[2] - 1) + \text{output\_padding}[2] + 1
  1089. Attributes:
  1090. weight (Tensor): the learnable weights of the module of shape
  1091. :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},`
  1092. :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`.
  1093. The values of these weights are sampled from
  1094. :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  1095. :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
  1096. bias (Tensor): the learnable bias of the module of shape (out_channels)
  1097. If :attr:`bias` is ``True``, then the values of these weights are
  1098. sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
  1099. :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
  1100. Examples::
  1101. >>> # With square kernels and equal stride
  1102. >>> m = nn.ConvTranspose3d(16, 33, 3, stride=2)
  1103. >>> # non-square kernels and unequal stride and with padding
  1104. >>> m = nn.ConvTranspose3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(0, 4, 2))
  1105. >>> input = torch.randn(20, 16, 10, 50, 100)
  1106. >>> output = m(input)
  1107. .. _`here`:
  1108. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  1109. .. _`Deconvolutional Networks`:
  1110. https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf
  1111. """
  1112. )
  1113. def __init__(
  1114. self,
  1115. in_channels: int,
  1116. out_channels: int,
  1117. kernel_size: _size_3_t,
  1118. stride: _size_3_t = 1,
  1119. padding: _size_3_t = 0,
  1120. output_padding: _size_3_t = 0,
  1121. groups: int = 1,
  1122. bias: bool = True,
  1123. dilation: _size_3_t = 1,
  1124. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1125. device=None,
  1126. dtype=None,
  1127. ) -> None:
  1128. factory_kwargs = {"device": device, "dtype": dtype}
  1129. kernel_size = _triple(kernel_size)
  1130. stride = _triple(stride)
  1131. padding = _triple(padding)
  1132. dilation = _triple(dilation)
  1133. output_padding = _triple(output_padding)
  1134. super().__init__(
  1135. in_channels,
  1136. out_channels,
  1137. kernel_size,
  1138. stride,
  1139. padding,
  1140. dilation,
  1141. True,
  1142. output_padding,
  1143. groups,
  1144. bias,
  1145. padding_mode,
  1146. **factory_kwargs,
  1147. )
  1148. def forward(self, input: Tensor, output_size: Optional[list[int]] = None) -> Tensor:
  1149. if self.padding_mode != "zeros":
  1150. raise ValueError(
  1151. "Only `zeros` padding mode is supported for ConvTranspose3d"
  1152. )
  1153. assert isinstance(self.padding, tuple)
  1154. # One cannot replace List by Tuple or Sequence in "_output_padding" because
  1155. # TorchScript does not support `Sequence[T]` or `Tuple[T, ...]`.
  1156. num_spatial_dims = 3
  1157. output_padding = self._output_padding(
  1158. input,
  1159. output_size,
  1160. self.stride, # type: ignore[arg-type]
  1161. self.padding, # type: ignore[arg-type]
  1162. self.kernel_size, # type: ignore[arg-type]
  1163. num_spatial_dims,
  1164. self.dilation, # type: ignore[arg-type]
  1165. )
  1166. return F.conv_transpose3d(
  1167. input,
  1168. self.weight,
  1169. self.bias,
  1170. self.stride,
  1171. self.padding,
  1172. output_padding,
  1173. self.groups,
  1174. self.dilation,
  1175. )
  1176. # TODO: Deprecate and remove the following alias `_ConvTransposeMixin`.
  1177. #
  1178. # `_ConvTransposeMixin` was a mixin that was removed. It is meant to be used
  1179. # with `_ConvNd` to construct actual module classes that implements conv
  1180. # transpose ops:
  1181. #
  1182. # class MyConvTranspose(_ConvNd, _ConvTransposeMixin):
  1183. # ...
  1184. #
  1185. # In PyTorch, it has been replaced by `_ConvTransposeNd`, which is a proper
  1186. # subclass of `_ConvNd`. However, some user code in the wild still (incorrectly)
  1187. # use the internal class `_ConvTransposeMixin`. Hence, we provide this alias
  1188. # for BC, because it is cheap and easy for us to do so, even though that
  1189. # `_ConvTransposeNd` is really not a mixin anymore (but multiple inheritance as
  1190. # above would still work).
  1191. class _ConvTransposeMixin(_ConvTransposeNd):
  1192. @deprecated(
  1193. "`_ConvTransposeMixin` is a deprecated internal class. "
  1194. "Please consider using public APIs.",
  1195. category=FutureWarning,
  1196. )
  1197. def __init__(self, *args, **kwargs) -> None:
  1198. super().__init__(*args, **kwargs)
  1199. # TODO: Conv2dLocal
  1200. # TODO: Conv2dMap
  1201. # TODO: ConvTranspose2dMap
  1202. class _LazyConvXdMixin(LazyModuleMixin):
  1203. groups: int
  1204. transposed: bool
  1205. in_channels: int
  1206. out_channels: int
  1207. kernel_size: tuple[int, ...]
  1208. weight: UninitializedParameter
  1209. bias: UninitializedParameter
  1210. def reset_parameters(self) -> None:
  1211. # has_uninitialized_params is defined in parent class and it is using a protocol on self
  1212. if not self.has_uninitialized_params() and self.in_channels != 0: # type: ignore[misc]
  1213. # "type:ignore[..]" is required because mypy thinks that "reset_parameters" is undefined
  1214. # in super class. Turns out that it is defined in _ConvND which is inherited by any class
  1215. # that also inherits _LazyConvXdMixin
  1216. super().reset_parameters() # type: ignore[misc]
  1217. # Signature of "initialize_parameters" is incompatible with the definition in supertype LazyModuleMixin
  1218. def initialize_parameters(self, input: Tensor, *args, **kwargs) -> None: # type: ignore[override]
  1219. # defined by parent class but using a protocol
  1220. if self.has_uninitialized_params(): # type: ignore[misc]
  1221. self.in_channels = self._get_in_channels(input)
  1222. if self.in_channels % self.groups != 0:
  1223. raise ValueError("in_channels must be divisible by groups")
  1224. assert isinstance(self.weight, UninitializedParameter)
  1225. if self.transposed:
  1226. self.weight.materialize(
  1227. (
  1228. self.in_channels,
  1229. self.out_channels // self.groups,
  1230. *self.kernel_size,
  1231. )
  1232. )
  1233. else:
  1234. self.weight.materialize(
  1235. (
  1236. self.out_channels,
  1237. self.in_channels // self.groups,
  1238. *self.kernel_size,
  1239. )
  1240. )
  1241. if self.bias is not None:
  1242. assert isinstance(self.bias, UninitializedParameter)
  1243. self.bias.materialize((self.out_channels,))
  1244. self.reset_parameters()
  1245. # Function to extract in_channels from first input.
  1246. def _get_in_channels(self, input: Tensor) -> int:
  1247. num_spatial_dims = self._get_num_spatial_dims()
  1248. num_dims_no_batch = num_spatial_dims + 1 # +1 for channels dim
  1249. num_dims_batch = num_dims_no_batch + 1
  1250. if input.dim() not in (num_dims_no_batch, num_dims_batch):
  1251. raise RuntimeError(
  1252. f"Expected {num_dims_no_batch}D (unbatched) or {num_dims_batch}D (batched) input "
  1253. f"to {self.__class__.__name__}, but "
  1254. f"got input of size: {input.shape}"
  1255. )
  1256. return input.shape[1] if input.dim() == num_dims_batch else input.shape[0]
  1257. # Function to return the number of spatial dims expected for inputs to the module.
  1258. # This is expected to be implemented by subclasses.
  1259. def _get_num_spatial_dims(self) -> int:
  1260. raise NotImplementedError
  1261. # LazyConv1d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1262. class LazyConv1d(_LazyConvXdMixin, Conv1d): # type: ignore[misc]
  1263. r"""A :class:`torch.nn.Conv1d` module with lazy initialization of the ``in_channels`` argument.
  1264. The ``in_channels`` argument of the :class:`Conv1d` is inferred from the ``input.size(1)``.
  1265. The attributes that will be lazily initialized are `weight` and `bias`.
  1266. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1267. on lazy modules and their limitations.
  1268. Args:
  1269. out_channels (int): Number of channels produced by the convolution
  1270. kernel_size (int or tuple): Size of the convolving kernel
  1271. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1272. padding (int or tuple, optional): Zero-padding added to both sides of
  1273. the input. Default: 0
  1274. dilation (int or tuple, optional): Spacing between kernel
  1275. elements. Default: 1
  1276. groups (int, optional): Number of blocked connections from input
  1277. channels to output channels. Default: 1
  1278. bias (bool, optional): If ``True``, adds a learnable bias to the
  1279. output. Default: ``True``
  1280. padding_mode (str, optional): ``'zeros'``, ``'reflect'``,
  1281. ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  1282. .. seealso:: :class:`torch.nn.Conv1d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1283. """
  1284. # super class define this variable as None. "type: ignore[..] is required
  1285. # since we are redefining the variable.
  1286. cls_to_become = Conv1d # type: ignore[assignment]
  1287. def __init__(
  1288. self,
  1289. out_channels: int,
  1290. kernel_size: _size_1_t,
  1291. stride: _size_1_t = 1,
  1292. padding: _size_1_t = 0,
  1293. dilation: _size_1_t = 1,
  1294. groups: int = 1,
  1295. bias: bool = True,
  1296. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1297. device=None,
  1298. dtype=None,
  1299. ) -> None:
  1300. factory_kwargs = {"device": device, "dtype": dtype}
  1301. super().__init__(
  1302. 0,
  1303. 0,
  1304. kernel_size,
  1305. stride,
  1306. padding,
  1307. dilation,
  1308. groups,
  1309. # bias is hardcoded to False to avoid creating tensor
  1310. # that will soon be overwritten.
  1311. False,
  1312. padding_mode,
  1313. **factory_kwargs,
  1314. )
  1315. self.weight = UninitializedParameter(**factory_kwargs)
  1316. self.out_channels = out_channels
  1317. if bias:
  1318. self.bias = UninitializedParameter(**factory_kwargs)
  1319. def _get_num_spatial_dims(self) -> int:
  1320. return 1
  1321. # LazyConv2d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1322. class LazyConv2d(_LazyConvXdMixin, Conv2d): # type: ignore[misc]
  1323. r"""A :class:`torch.nn.Conv2d` module with lazy initialization of the ``in_channels`` argument.
  1324. The ``in_channels`` argument of the :class:`Conv2d` that is inferred from the ``input.size(1)``.
  1325. The attributes that will be lazily initialized are `weight` and `bias`.
  1326. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1327. on lazy modules and their limitations.
  1328. Args:
  1329. out_channels (int): Number of channels produced by the convolution
  1330. kernel_size (int or tuple): Size of the convolving kernel
  1331. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1332. padding (int or tuple, optional): Zero-padding added to both sides of
  1333. the input. Default: 0
  1334. dilation (int or tuple, optional): Spacing between kernel
  1335. elements. Default: 1
  1336. groups (int, optional): Number of blocked connections from input
  1337. channels to output channels. Default: 1
  1338. bias (bool, optional): If ``True``, adds a learnable bias to the
  1339. output. Default: ``True``
  1340. padding_mode (str, optional): ``'zeros'``, ``'reflect'``,
  1341. ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  1342. .. seealso:: :class:`torch.nn.Conv2d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1343. """
  1344. # super class define this variable as None. "type: ignore[..] is required
  1345. # since we are redefining the variable.
  1346. cls_to_become = Conv2d # type: ignore[assignment]
  1347. def __init__(
  1348. self,
  1349. out_channels: int,
  1350. kernel_size: _size_2_t,
  1351. stride: _size_2_t = 1,
  1352. padding: _size_2_t = 0,
  1353. dilation: _size_2_t = 1,
  1354. groups: int = 1,
  1355. bias: bool = True,
  1356. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1357. device=None,
  1358. dtype=None,
  1359. ) -> None:
  1360. factory_kwargs = {"device": device, "dtype": dtype}
  1361. super().__init__(
  1362. 0,
  1363. 0,
  1364. kernel_size,
  1365. stride,
  1366. padding,
  1367. dilation,
  1368. groups,
  1369. # bias is hardcoded to False to avoid creating tensor
  1370. # that will soon be overwritten.
  1371. False,
  1372. padding_mode,
  1373. **factory_kwargs,
  1374. )
  1375. self.weight = UninitializedParameter(**factory_kwargs)
  1376. self.out_channels = out_channels
  1377. if bias:
  1378. self.bias = UninitializedParameter(**factory_kwargs)
  1379. def _get_num_spatial_dims(self) -> int:
  1380. return 2
  1381. # LazyConv3d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1382. class LazyConv3d(_LazyConvXdMixin, Conv3d): # type: ignore[misc]
  1383. r"""A :class:`torch.nn.Conv3d` module with lazy initialization of the ``in_channels`` argument.
  1384. The ``in_channels`` argument of the :class:`Conv3d` that is inferred from
  1385. the ``input.size(1)``.
  1386. The attributes that will be lazily initialized are `weight` and `bias`.
  1387. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1388. on lazy modules and their limitations.
  1389. Args:
  1390. out_channels (int): Number of channels produced by the convolution
  1391. kernel_size (int or tuple): Size of the convolving kernel
  1392. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1393. padding (int or tuple, optional): Zero-padding added to both sides of
  1394. the input. Default: 0
  1395. dilation (int or tuple, optional): Spacing between kernel
  1396. elements. Default: 1
  1397. groups (int, optional): Number of blocked connections from input
  1398. channels to output channels. Default: 1
  1399. bias (bool, optional): If ``True``, adds a learnable bias to the
  1400. output. Default: ``True``
  1401. padding_mode (str, optional): ``'zeros'``, ``'reflect'``,
  1402. ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
  1403. .. seealso:: :class:`torch.nn.Conv3d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1404. """
  1405. # super class define this variable as None. "type: ignore[..] is required
  1406. # since we are redefining the variable.
  1407. cls_to_become = Conv3d # type: ignore[assignment]
  1408. def __init__(
  1409. self,
  1410. out_channels: int,
  1411. kernel_size: _size_3_t,
  1412. stride: _size_3_t = 1,
  1413. padding: _size_3_t = 0,
  1414. dilation: _size_3_t = 1,
  1415. groups: int = 1,
  1416. bias: bool = True,
  1417. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1418. device=None,
  1419. dtype=None,
  1420. ) -> None:
  1421. factory_kwargs = {"device": device, "dtype": dtype}
  1422. super().__init__(
  1423. 0,
  1424. 0,
  1425. kernel_size,
  1426. stride,
  1427. padding,
  1428. dilation,
  1429. groups,
  1430. # bias is hardcoded to False to avoid creating tensor
  1431. # that will soon be overwritten.
  1432. False,
  1433. padding_mode,
  1434. **factory_kwargs,
  1435. )
  1436. self.weight = UninitializedParameter(**factory_kwargs)
  1437. self.out_channels = out_channels
  1438. if bias:
  1439. self.bias = UninitializedParameter(**factory_kwargs)
  1440. def _get_num_spatial_dims(self) -> int:
  1441. return 3
  1442. # LazyConvTranspose1d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1443. class LazyConvTranspose1d(_LazyConvXdMixin, ConvTranspose1d): # type: ignore[misc]
  1444. r"""A :class:`torch.nn.ConvTranspose1d` module with lazy initialization of the ``in_channels`` argument.
  1445. The ``in_channels`` argument of the :class:`ConvTranspose1d` that is inferred from
  1446. the ``input.size(1)``.
  1447. The attributes that will be lazily initialized are `weight` and `bias`.
  1448. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1449. on lazy modules and their limitations.
  1450. Args:
  1451. out_channels (int): Number of channels produced by the convolution
  1452. kernel_size (int or tuple): Size of the convolving kernel
  1453. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1454. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  1455. will be added to both sides of the input. Default: 0
  1456. output_padding (int or tuple, optional): Additional size added to one side
  1457. of the output shape. Default: 0
  1458. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  1459. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  1460. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  1461. .. seealso:: :class:`torch.nn.ConvTranspose1d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1462. """
  1463. # super class define this variable as None. "type: ignore[..] is required
  1464. # since we are redefining the variable.
  1465. cls_to_become = ConvTranspose1d # type: ignore[assignment]
  1466. def __init__(
  1467. self,
  1468. out_channels: int,
  1469. kernel_size: _size_1_t,
  1470. stride: _size_1_t = 1,
  1471. padding: _size_1_t = 0,
  1472. output_padding: _size_1_t = 0,
  1473. groups: int = 1,
  1474. bias: bool = True,
  1475. dilation: _size_1_t = 1,
  1476. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1477. device=None,
  1478. dtype=None,
  1479. ) -> None:
  1480. factory_kwargs = {"device": device, "dtype": dtype}
  1481. super().__init__(
  1482. 0,
  1483. 0,
  1484. kernel_size,
  1485. stride,
  1486. padding,
  1487. output_padding,
  1488. groups,
  1489. # bias is hardcoded to False to avoid creating tensor
  1490. # that will soon be overwritten.
  1491. False,
  1492. dilation,
  1493. padding_mode,
  1494. **factory_kwargs,
  1495. )
  1496. self.weight = UninitializedParameter(**factory_kwargs)
  1497. self.out_channels = out_channels
  1498. if bias:
  1499. self.bias = UninitializedParameter(**factory_kwargs)
  1500. def _get_num_spatial_dims(self) -> int:
  1501. return 1
  1502. # LazyConvTranspose2d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1503. class LazyConvTranspose2d(_LazyConvXdMixin, ConvTranspose2d): # type: ignore[misc]
  1504. r"""A :class:`torch.nn.ConvTranspose2d` module with lazy initialization of the ``in_channels`` argument.
  1505. The ``in_channels`` argument of the :class:`ConvTranspose2d` is inferred from
  1506. the ``input.size(1)``.
  1507. The attributes that will be lazily initialized are `weight` and `bias`.
  1508. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1509. on lazy modules and their limitations.
  1510. Args:
  1511. out_channels (int): Number of channels produced by the convolution
  1512. kernel_size (int or tuple): Size of the convolving kernel
  1513. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1514. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  1515. will be added to both sides of each dimension in the input. Default: 0
  1516. output_padding (int or tuple, optional): Additional size added to one side
  1517. of each dimension in the output shape. Default: 0
  1518. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  1519. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  1520. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  1521. .. seealso:: :class:`torch.nn.ConvTranspose2d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1522. """
  1523. # super class define this variable as None. "type: ignore[..] is required
  1524. # since we are redefining the variable.
  1525. cls_to_become = ConvTranspose2d # type: ignore[assignment]
  1526. def __init__(
  1527. self,
  1528. out_channels: int,
  1529. kernel_size: _size_2_t,
  1530. stride: _size_2_t = 1,
  1531. padding: _size_2_t = 0,
  1532. output_padding: _size_2_t = 0,
  1533. groups: int = 1,
  1534. bias: bool = True,
  1535. dilation: int = 1,
  1536. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1537. device=None,
  1538. dtype=None,
  1539. ) -> None:
  1540. factory_kwargs = {"device": device, "dtype": dtype}
  1541. super().__init__(
  1542. 0,
  1543. 0,
  1544. kernel_size,
  1545. stride,
  1546. padding,
  1547. output_padding,
  1548. groups,
  1549. # bias is hardcoded to False to avoid creating tensor
  1550. # that will soon be overwritten.
  1551. False,
  1552. dilation,
  1553. padding_mode,
  1554. **factory_kwargs,
  1555. )
  1556. self.weight = UninitializedParameter(**factory_kwargs)
  1557. self.out_channels = out_channels
  1558. if bias:
  1559. self.bias = UninitializedParameter(**factory_kwargs)
  1560. def _get_num_spatial_dims(self) -> int:
  1561. return 2
  1562. # LazyConvTranspose3d defines weight as a Tensor but derived class defines it as UnitializeParameter
  1563. class LazyConvTranspose3d(_LazyConvXdMixin, ConvTranspose3d): # type: ignore[misc]
  1564. r"""A :class:`torch.nn.ConvTranspose3d` module with lazy initialization of the ``in_channels`` argument.
  1565. The ``in_channels`` argument of the :class:`ConvTranspose3d` is inferred from
  1566. the ``input.size(1)``.
  1567. The attributes that will be lazily initialized are `weight` and `bias`.
  1568. Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
  1569. on lazy modules and their limitations.
  1570. Args:
  1571. out_channels (int): Number of channels produced by the convolution
  1572. kernel_size (int or tuple): Size of the convolving kernel
  1573. stride (int or tuple, optional): Stride of the convolution. Default: 1
  1574. padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
  1575. will be added to both sides of each dimension in the input. Default: 0
  1576. output_padding (int or tuple, optional): Additional size added to one side
  1577. of each dimension in the output shape. Default: 0
  1578. groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
  1579. bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
  1580. dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
  1581. .. seealso:: :class:`torch.nn.ConvTranspose3d` and :class:`torch.nn.modules.lazy.LazyModuleMixin`
  1582. """
  1583. # super class define this variable as None. "type: ignore[..] is required
  1584. # since we are redefining the variable.
  1585. cls_to_become = ConvTranspose3d # type: ignore[assignment]
  1586. def __init__(
  1587. self,
  1588. out_channels: int,
  1589. kernel_size: _size_3_t,
  1590. stride: _size_3_t = 1,
  1591. padding: _size_3_t = 0,
  1592. output_padding: _size_3_t = 0,
  1593. groups: int = 1,
  1594. bias: bool = True,
  1595. dilation: _size_3_t = 1,
  1596. padding_mode: Literal["zeros", "reflect", "replicate", "circular"] = "zeros",
  1597. device=None,
  1598. dtype=None,
  1599. ) -> None:
  1600. factory_kwargs = {"device": device, "dtype": dtype}
  1601. super().__init__(
  1602. 0,
  1603. 0,
  1604. kernel_size,
  1605. stride,
  1606. padding,
  1607. output_padding,
  1608. groups,
  1609. # bias is hardcoded to False to avoid creating tensor
  1610. # that will soon be overwritten.
  1611. False,
  1612. dilation,
  1613. padding_mode,
  1614. **factory_kwargs,
  1615. )
  1616. self.weight = UninitializedParameter(**factory_kwargs)
  1617. self.out_channels = out_channels
  1618. if bias:
  1619. self.bias = UninitializedParameter(**factory_kwargs)
  1620. def _get_num_spatial_dims(self) -> int:
  1621. return 3