conv.py 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode, pir
  15. from paddle.base.framework import _global_flags, in_dynamic_or_pir_mode
  16. from paddle.device import (
  17. get_all_custom_device_type,
  18. is_compiled_with_cuda,
  19. is_compiled_with_rocm,
  20. )
  21. from paddle.tensor.manipulation import reshape
  22. from paddle.tensor.math import _add_with_axis
  23. from ...base.data_feeder import check_dtype, check_variable_and_dtype
  24. from ...base.layer_helper import LayerHelper
  25. from ...common_ops_import import Variable
  26. from ...device import get_cudnn_version
  27. from ...framework import no_grad
  28. from ...tensor.manipulation import squeeze, unsqueeze
  29. from ...utils import (
  30. _contain_var,
  31. _convert_to_tensor_list,
  32. _is_symmetric_padding,
  33. convert_to_list,
  34. )
  35. __all__ = []
  36. def _is_list_or_tuple(input):
  37. return isinstance(input, (list, tuple))
  38. def _zero_padding_in_batch_and_channel(padding, channel_last):
  39. if channel_last:
  40. return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
  41. else:
  42. return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
  43. def _exclude_padding_in_batch_and_channel(padding, channel_last):
  44. padding_ = padding[1:-1] if channel_last else padding[2:]
  45. padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
  46. return padding_
  47. def _update_padding_nd(padding, channel_last, num_dims):
  48. if isinstance(padding, str):
  49. padding = padding.upper()
  50. if padding not in ["SAME", "VALID"]:
  51. raise ValueError(
  52. f"Unknown padding: '{padding}'. It can only be 'SAME' or 'VALID'."
  53. )
  54. if padding == "VALID":
  55. padding_algorithm = "VALID"
  56. padding = [0] * num_dims
  57. else:
  58. padding_algorithm = "SAME"
  59. padding = [0] * num_dims
  60. elif _is_list_or_tuple(padding):
  61. # for padding like
  62. # [(pad_before, pad_after), (pad_before, pad_after), ...]
  63. # padding for batch_dim and channel_dim included
  64. if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
  65. if not _zero_padding_in_batch_and_channel(padding, channel_last):
  66. raise ValueError(
  67. f"Non-zero padding({padding}) in the batch or channel dimensions "
  68. "is not supported."
  69. )
  70. padding_algorithm = "EXPLICIT"
  71. padding = _exclude_padding_in_batch_and_channel(
  72. padding, channel_last
  73. )
  74. if _is_symmetric_padding(padding, num_dims):
  75. padding = padding[0::2]
  76. # for padding like [pad_before, pad_after, pad_before, pad_after, ...]
  77. elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
  78. padding_algorithm = "EXPLICIT"
  79. padding = convert_to_list(padding, 2 * num_dims, 'padding')
  80. if _is_symmetric_padding(padding, num_dims):
  81. padding = padding[0::2]
  82. # for padding like [pad_d1, pad_d2, ...]
  83. elif len(padding) == num_dims and isinstance(padding[0], int):
  84. padding_algorithm = "EXPLICIT"
  85. padding = convert_to_list(padding, num_dims, 'padding')
  86. else:
  87. raise ValueError(f"In valid padding: {padding}")
  88. # for integer padding
  89. else:
  90. padding_algorithm = "EXPLICIT"
  91. padding = convert_to_list(padding, num_dims, 'padding')
  92. if not all(p >= 0 for p in padding):
  93. raise ValueError(
  94. f"Invalid padding, all value should be larger than or equal to 0, but received: {padding}"
  95. )
  96. return padding, padding_algorithm
  97. def _conv_nd(
  98. x,
  99. weight,
  100. bias=None,
  101. stride=1,
  102. padding=0,
  103. padding_algorithm=None,
  104. dilation=1,
  105. groups=1,
  106. data_format="NCHW",
  107. channel_dim=1,
  108. op_type="conv2d",
  109. use_cudnn=True,
  110. name=None,
  111. ):
  112. # Due to the poor performance of NHWC, we transpose the input to NCHW.
  113. if in_dynamic_or_pir_mode() and op_type == "conv2d":
  114. pre_bias = _C_ops.conv2d(
  115. x,
  116. weight,
  117. stride,
  118. padding,
  119. padding_algorithm,
  120. dilation,
  121. groups,
  122. data_format,
  123. )
  124. if bias is not None:
  125. new_shape = [1] * len(x.shape)
  126. new_shape[channel_dim] = -1
  127. bias = bias.reshape(new_shape)
  128. # TODO(qili93): temporary for ascend npu performance to be removed along with npu_identity op
  129. if (
  130. _global_flags()['FLAGS_npu_storage_format']
  131. and 'npu' in get_all_custom_device_type()
  132. ):
  133. with no_grad():
  134. bias_storage = _C_ops.npu_identity(
  135. bias, 3
  136. ) # ACL_FORMAT_NC1HWC0 = 3
  137. bias_storage._share_underline_tensor_to(bias)
  138. return _C_ops.add(pre_bias, bias)
  139. else:
  140. return pre_bias
  141. if in_dynamic_or_pir_mode() and op_type == "depthwise_conv2d":
  142. pre_bias = _C_ops.depthwise_conv2d(
  143. x,
  144. weight,
  145. stride,
  146. padding,
  147. padding_algorithm,
  148. groups,
  149. dilation,
  150. data_format,
  151. )
  152. if bias is not None:
  153. new_shape = [1] * len(x.shape)
  154. new_shape[channel_dim] = -1
  155. bias = bias.reshape(new_shape)
  156. return _C_ops.add(pre_bias, bias)
  157. else:
  158. return pre_bias
  159. if in_dynamic_or_pir_mode() and op_type == "conv3d":
  160. pre_bias = _C_ops.conv3d(
  161. x,
  162. weight,
  163. stride,
  164. padding,
  165. padding_algorithm,
  166. groups,
  167. dilation,
  168. data_format,
  169. )
  170. if bias is not None:
  171. new_shape = [1] * len(x.shape)
  172. new_shape[channel_dim] = -1
  173. bias = bias.reshape(new_shape)
  174. return _C_ops.add(pre_bias, bias)
  175. else:
  176. return pre_bias
  177. if in_dynamic_mode():
  178. attrs = (
  179. 'strides',
  180. stride,
  181. 'paddings',
  182. padding,
  183. 'dilations',
  184. dilation,
  185. 'groups',
  186. groups,
  187. 'use_cudnn',
  188. use_cudnn,
  189. 'fuse_relu_before_depthwise_conv',
  190. False,
  191. "padding_algorithm",
  192. padding_algorithm,
  193. "data_format",
  194. data_format,
  195. )
  196. pre_bias = getattr(_legacy_C_ops, op_type)(x, weight, *attrs)
  197. if bias is not None:
  198. out = _add_with_axis(pre_bias, bias, axis=channel_dim)
  199. else:
  200. out = pre_bias
  201. else:
  202. inputs = {'Input': [x], 'Filter': [weight]}
  203. attrs = {
  204. 'strides': stride,
  205. 'paddings': padding,
  206. 'dilations': dilation,
  207. 'groups': groups,
  208. 'use_cudnn': use_cudnn,
  209. 'fuse_relu_before_depthwise_conv': False,
  210. "padding_algorithm": padding_algorithm,
  211. "data_format": data_format,
  212. }
  213. check_variable_and_dtype(
  214. x, 'x', ['float16', 'uint16', 'float32', 'float64'], op_type
  215. )
  216. helper = LayerHelper(op_type, **locals())
  217. dtype = helper.input_dtype(input_param_name='x')
  218. pre_bias = helper.create_variable_for_type_inference(dtype)
  219. outputs = {"Output": [pre_bias]}
  220. helper.append_op(
  221. type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
  222. )
  223. if bias is not None:
  224. out = helper.create_variable_for_type_inference(dtype)
  225. x_shape = list(pre_bias.shape)
  226. y_shape = list(bias.shape)
  227. if channel_dim == -1 or len(x_shape) == len(y_shape):
  228. helper.append_op(
  229. type='elementwise_add',
  230. inputs={'X': [pre_bias], 'Y': [bias]},
  231. outputs={'Out': [out]},
  232. attrs={'axis': -1},
  233. )
  234. else:
  235. assert len(x_shape) > len(
  236. y_shape
  237. ), 'The length of pre_bias must greater than the length of bias'
  238. padding = len(x_shape) - len(y_shape) - channel_dim
  239. bias = reshape(
  240. bias, [1] * channel_dim + y_shape + [1] * padding
  241. )
  242. helper.append_op(
  243. type='elementwise_add',
  244. inputs={'X': [pre_bias], 'Y': [bias]},
  245. outputs={'Out': [out]},
  246. attrs={'axis': -1},
  247. )
  248. else:
  249. out = pre_bias
  250. return out
  251. def conv1d(
  252. x,
  253. weight,
  254. bias=None,
  255. stride=1,
  256. padding=0,
  257. dilation=1,
  258. groups=1,
  259. data_format='NCL',
  260. name=None,
  261. ):
  262. r"""
  263. The convolution1D layer calculates the output based on the input, filter
  264. and strides, paddings, dilations, groups parameters. Input and
  265. Output are in NCL format, where N is batch size, C is the number of
  266. channels, L is the length of the feature.
  267. Filter is in MCK format, where M is the number of output image channels,
  268. C is the number of input image channels, K is the size of the kernel.
  269. If the groups is greater than 1, C will equal the number of input image
  270. channels divided by the groups. If bias attribution and activation type
  271. are provided, bias is added to the output of the convolution, and the
  272. corresponding activation function is applied to the final result.
  273. For each input :math:`X`, the equation is:
  274. .. math::
  275. Out = \sigma (W \ast X + b)
  276. Where:
  277. * :math:`X`: Input value, a tensor with NCL format.
  278. * :math:`W`: Kernel value, a tensor with MCK format.
  279. * :math:`\\ast`: Convolution operation.
  280. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
  281. * :math:`\\sigma`: Activation function.
  282. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
  283. Example:
  284. - Input:
  285. Input shape: :math:`(N, C_{in}, L_{in})`
  286. Filter shape: :math:`(C_{out}, C_{in}, L_f)`
  287. - Output:
  288. Output shape: :math:`(N, C_{out}, L_{out})`
  289. Where
  290. .. math::
  291. L_{out} = \frac{(L_{in} + 2 * padding - (dilation * (L_f - 1) + 1))}{stride} + 1
  292. Args:
  293. x (Tensor): The input is 3-D Tensor with shape [N, C, L], the data type
  294. of input is float16 or float32 or float64.
  295. weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is
  296. the number of output channels, g is the number of groups, K is the kernel's size.
  297. bias (Tensor, optional): The bias with shape [M,]. Default: None.
  298. stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
  299. contain one integers, (stride_size). Default: 1.
  300. padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
  301. 1. a string in ['valid', 'same'].
  302. 2. an int, which means the feature map is zero paded by size of `padding` on both sides.
  303. 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
  304. 4. a list[int] or tuple[int] whose length is 2. It has the form [pad_before, pad_after].
  305. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
  306. The default value is 0.
  307. dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must
  308. contain one integer, (dilation_size). Default: 1.
  309. groups (int, optional): The groups number of the conv1d function. According to grouped
  310. convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
  311. the first half of the filters is only connected to the first half
  312. of the input channels, while the second half of the filters is only
  313. connected to the second half of the input channels. Default: 1.
  314. data_format (str, optional): Specify the data format of the input, and the data format of the output
  315. will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`.
  316. The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
  317. `[batch_size, input_channels, feature_length]`.
  318. name(str, optional): For detailed information, please refer
  319. to :ref:`api_guide_Name`. Usually name is no need to set and
  320. None by default.
  321. Returns:
  322. A tensor representing the conv1d, whose data type is the
  323. same with input.
  324. Examples:
  325. .. code-block:: python
  326. >>> import paddle
  327. >>> import paddle.nn.functional as F
  328. >>> x = paddle.to_tensor([[[4, 8, 1, 9],
  329. ... [7, 2, 0, 9],
  330. ... [6, 9, 2, 6]]], dtype="float32")
  331. >>> w = paddle.to_tensor([[[9, 3, 4],
  332. ... [0, 0, 7],
  333. ... [2, 5, 6]],
  334. ... [[0, 3, 4],
  335. ... [2, 9, 7],
  336. ... [5, 6, 8]]], dtype="float32")
  337. >>> y = F.conv1d(x, w)
  338. >>> print(y)
  339. Tensor(shape=[1, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  340. [[[133., 238.],
  341. [160., 211.]]])
  342. """
  343. cudnn_version = get_cudnn_version()
  344. if cudnn_version is not None:
  345. use_cudnn = True
  346. else:
  347. use_cudnn = False
  348. if data_format not in ["NCL", "NLC"]:
  349. raise ValueError(
  350. "Attr(data_format) should be 'NCL' or 'NLC'. "
  351. f"Received Attr(data_format): {data_format}."
  352. )
  353. channel_last = data_format == "NLC"
  354. channel_dim = -1 if channel_last else 1
  355. conv2d_data_format = "NHWC" if channel_last else "NCHW"
  356. if len(x.shape) != 3:
  357. raise ValueError(
  358. f"Input x should be 3D tensor, but received x with the shape of {x.shape}"
  359. )
  360. num_channels = x.shape[channel_dim]
  361. num_filters = weight.shape[0]
  362. if num_channels < 0:
  363. raise ValueError(
  364. f"The channel dimension of the input({x.shape}) "
  365. f"should be defined. Received: {num_channels}."
  366. )
  367. if groups <= 0:
  368. raise ValueError(
  369. f"The groups of conv1d should be greater than 0. Received groups: {groups}"
  370. )
  371. if num_channels % groups != 0:
  372. raise ValueError(
  373. "the channel of input must be divisible by groups,"
  374. f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
  375. f", the groups is {groups}"
  376. )
  377. if num_filters % groups != 0:
  378. raise ValueError(
  379. "the number of filters must be divisible by groups,"
  380. f"received: the number of filters is {num_filters}, the shape of weight is {weight.shape}"
  381. f", the groups is {groups}"
  382. )
  383. # update attrs
  384. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)
  385. if len(padding) == 2:
  386. padding = [0] * 2 + padding
  387. elif len(padding) == 1:
  388. padding = [0] + padding
  389. else:
  390. raise ValueError(
  391. f"The size of padding's dimension should be 1 or 2. But got padding={padding}"
  392. )
  393. stride = [1] + convert_to_list(stride, 1, 'stride')
  394. dilation = [1] + convert_to_list(dilation, 1, 'dilation')
  395. from ...tensor.creation import assign as paddle_assign
  396. weight = paddle_assign(weight)
  397. weight = unsqueeze(weight, axis=[-2])
  398. l_type = "conv2d"
  399. # When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance
  400. if (
  401. is_compiled_with_cuda()
  402. and num_channels == groups
  403. and num_channels != 1
  404. and num_filters % num_channels == 0
  405. ):
  406. l_type = 'depthwise_conv2d'
  407. use_cudnn = False
  408. squeeze_axis = -3 if channel_last else -2
  409. x = unsqueeze(x, axis=[squeeze_axis])
  410. if in_dynamic_or_pir_mode():
  411. if l_type == 'conv2d':
  412. out = _C_ops.conv2d(
  413. x,
  414. weight,
  415. stride,
  416. padding,
  417. padding_algorithm,
  418. dilation,
  419. groups,
  420. conv2d_data_format,
  421. )
  422. else:
  423. out = _C_ops.depthwise_conv2d(
  424. x,
  425. weight,
  426. stride,
  427. padding,
  428. padding_algorithm,
  429. groups,
  430. dilation,
  431. conv2d_data_format,
  432. False,
  433. -1,
  434. False,
  435. False,
  436. )
  437. if bias is not None:
  438. out = _add_with_axis(out, bias, axis=channel_dim)
  439. else:
  440. inputs = {'Input': [x], 'Filter': [weight]}
  441. attrs = {
  442. 'strides': stride,
  443. 'paddings': padding,
  444. 'dilations': dilation,
  445. 'groups': groups,
  446. 'use_cudnn': use_cudnn,
  447. 'fuse_relu_before_depthwise_conv': False,
  448. "padding_algorithm": padding_algorithm,
  449. "data_format": conv2d_data_format,
  450. }
  451. check_variable_and_dtype(
  452. x, 'input', ['float16', 'float32', 'float64'], 'conv2d'
  453. )
  454. helper = LayerHelper(l_type, **locals())
  455. dtype = helper.input_dtype(input_param_name='x')
  456. out = helper.create_variable_for_type_inference(dtype)
  457. outputs = {"Output": [out]}
  458. helper.append_op(
  459. type=l_type, inputs=inputs, outputs=outputs, attrs=attrs
  460. )
  461. if bias is not None:
  462. out = _add_with_axis(out, bias, axis=channel_dim)
  463. out = squeeze(out, axis=[squeeze_axis])
  464. return out
  465. def conv2d(
  466. x,
  467. weight,
  468. bias=None,
  469. stride=1,
  470. padding=0,
  471. dilation=1,
  472. groups=1,
  473. data_format="NCHW",
  474. name=None,
  475. ):
  476. r"""
  477. The convolution2D layer calculates the output based on the input, filter
  478. and strides, paddings, dilations, groups parameters. Input and
  479. Output are in NCHW or NHWC format, where N is batch size, C is the number of
  480. channels, H is the height of the feature, and W is the width of the feature.
  481. Filter is in MCHW format, where M is the number of output image channels,
  482. C is the number of input image channels, H is the height of the filter,
  483. and W is the width of the filter. If the groups is greater than 1,
  484. C will equal the number of input image channels divided by the groups.
  485. Please refer to UFLDL's `convolution
  486. <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
  487. for more details.
  488. If bias attribution and activation type are provided, bias is added to the
  489. output of the convolution, and the corresponding activation function is
  490. applied to the final result.
  491. For each input :math:`X`, the equation is:
  492. .. math::
  493. Out = \sigma (W \ast X + b)
  494. Where:
  495. * :math:`X`: Input value, a tensor with NCHW or NHWC format.
  496. * :math:`W`: Filter value, a tensor with MCHW format.
  497. * :math:`\\ast`: Convolution operation.
  498. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
  499. * :math:`\\sigma`: Activation function.
  500. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
  501. Example:
  502. - Input:
  503. Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
  504. Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
  505. - Output:
  506. Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
  507. Where
  508. .. math::
  509. H_{out}&= \frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
  510. W_{out}&= \frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
  511. Args:
  512. x (Tensor): The input is 4-D Tensor with shape [N, C, H, W], the data type
  513. of input is float16 or float32 or float64.
  514. weight (Tensor): The convolution kernel with shape [M, C/g, kH, kW], where M is
  515. the number of output channels, g is the number of groups, kH is the filter's
  516. height, kW is the filter's width.
  517. bias (Tensor, optional): The bias with shape [M,].
  518. stride (int|list|tuple, optional): The stride size. It means the stride in convolution.
  519. If stride is a list/tuple, it must contain two integers, (stride_height, stride_width).
  520. Otherwise, stride_height = stride_width = stride. Default: stride = 1.
  521. padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
  522. on both sides for each dimension.If `padding` is a string, either 'VALID' or
  523. 'SAME' which is the padding algorithm. If padding size is a tuple or list,
  524. it could be in three forms: `[pad_height, pad_width]` or
  525. `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
  526. `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
  527. [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
  528. when `data_format` is `"NHWC"`, `padding` can be in the form
  529. `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
  530. Default: padding = 0.
  531. dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel
  532. points. If dilation is a list/tuple, it must contain two integers, (dilation_height,
  533. dilation_width). Otherwise, dilation_height = dilation_width = dilation.
  534. Default: dilation = 1.
  535. groups (int, optional): The groups number of the Conv2D Layer. According to grouped
  536. convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
  537. the first half of the filters is only connected to the first half
  538. of the input channels, while the second half of the filters is only
  539. connected to the second half of the input channels. Default: groups=1.
  540. data_format (str, optional): Specify the data format of the input, and the data format of the output
  541. will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
  542. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
  543. `[batch_size, input_channels, input_height, input_width]`.
  544. name(str, optional): For detailed information, please refer
  545. to :ref:`api_guide_Name`. Usually name is no need to set and
  546. None by default.
  547. Returns:
  548. A Tensor representing the conv2d result, whose data type is the same with input.
  549. Examples:
  550. .. code-block:: python
  551. >>> import paddle
  552. >>> import paddle.nn.functional as F
  553. >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
  554. >>> w_var = paddle.randn((6, 3, 3, 3), dtype='float32')
  555. >>> y_var = F.conv2d(x_var, w_var)
  556. >>> print(y_var.shape)
  557. [2, 6, 6, 6]
  558. """
  559. # entry checks
  560. if data_format not in ["NCHW", "NHWC"]:
  561. raise ValueError(
  562. "Attr(data_format) should be 'NCHW' or 'NHWC'. "
  563. f"Received Attr(data_format): {data_format}."
  564. )
  565. channel_last = data_format == "NHWC"
  566. channel_dim = -1 if channel_last else 1
  567. if len(x.shape) != 4:
  568. raise ValueError(
  569. f"Input x should be 4D tensor, but received x with the shape of {x.shape}"
  570. )
  571. num_channels = x.shape[channel_dim]
  572. num_filters = weight.shape[0]
  573. if num_channels < 0:
  574. raise ValueError(
  575. f"The channel dimension of the input({x.shape}) "
  576. f"should be defined. Received: {num_channels}."
  577. )
  578. if groups <= 0:
  579. raise ValueError(
  580. f"The groups of conv2d should be greater than 0. Received groups: {groups}"
  581. )
  582. if num_channels % groups != 0:
  583. raise ValueError(
  584. "the channel of input must be divisible by groups,"
  585. f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
  586. f", the groups is {groups}"
  587. )
  588. if num_filters % groups != 0:
  589. raise ValueError(
  590. "the number of filters must be divisible by groups,"
  591. f"received: the number of filters is {num_filters}, the shape of weight is {weight.shape}"
  592. f", the groups is {groups}"
  593. )
  594. cudnn_version = get_cudnn_version()
  595. use_cudnn = (
  596. True
  597. if (is_compiled_with_cuda() and cudnn_version is not None)
  598. else False
  599. )
  600. # update attrs
  601. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
  602. stride = convert_to_list(stride, 2, 'stride')
  603. dilation = convert_to_list(dilation, 2, 'dilation')
  604. l_type = "conv2d"
  605. if (
  606. num_channels == groups
  607. and num_channels != 1
  608. and num_filters % num_channels == 0
  609. ):
  610. l_type = 'depthwise_conv2d'
  611. if is_compiled_with_rocm():
  612. use_cudnn = True
  613. else:
  614. use_cudnn = False
  615. else:
  616. if in_dynamic_mode():
  617. pre_bias = _C_ops.conv2d(
  618. x,
  619. weight,
  620. stride,
  621. padding,
  622. padding_algorithm,
  623. dilation,
  624. groups,
  625. data_format,
  626. )
  627. if bias is not None:
  628. channel_dim = (
  629. channel_dim + len(x.shape)
  630. if channel_dim < 0
  631. else channel_dim
  632. )
  633. if len(bias.shape) < len(x.shape):
  634. bias = _C_ops.reshape(
  635. bias,
  636. [1 for i in range(channel_dim)]
  637. + bias.shape
  638. + [1 for i in range(len(x.shape) - channel_dim - 1)],
  639. )
  640. # TODO(qili93): temporary for ascend npu performance to be removed along with npu_identity op
  641. if (
  642. _global_flags()['FLAGS_npu_storage_format']
  643. and 'npu' in get_all_custom_device_type()
  644. ):
  645. with no_grad():
  646. bias_storage = _C_ops.npu_identity(
  647. bias, 3
  648. ) # ACL_FORMAT_NC1HWC0 = 3
  649. bias_storage._share_underline_tensor_to(bias)
  650. return _C_ops.add(pre_bias, bias)
  651. else:
  652. return pre_bias
  653. if (
  654. is_compiled_with_cuda()
  655. and get_flags("FLAGS_conv2d_disable_cudnn")[
  656. "FLAGS_conv2d_disable_cudnn"
  657. ]
  658. ):
  659. use_cudnn = False
  660. return _conv_nd(
  661. x,
  662. weight,
  663. bias,
  664. stride,
  665. padding,
  666. padding_algorithm,
  667. dilation,
  668. groups,
  669. data_format,
  670. channel_dim,
  671. l_type,
  672. use_cudnn,
  673. name,
  674. )
  675. def conv1d_transpose(
  676. x,
  677. weight,
  678. bias=None,
  679. stride=1,
  680. padding=0,
  681. output_padding=0,
  682. groups=1,
  683. dilation=1,
  684. output_size=None,
  685. data_format="NCL",
  686. name=None,
  687. ):
  688. r"""
  689. The 1-D convolution transpose layer calculates the output based on the input,
  690. filter, and dilation, stride, padding. Input(Input) and output(Output)
  691. are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels,
  692. L is the length of the feature. The details of convolution transpose
  693. layer, please refer to the following explanation and references
  694. `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
  695. If bias attribution and activation type are provided, bias is added to
  696. the output of the convolution, and the corresponding activation function
  697. is applied to the final result.
  698. For each input :math:`X`, the equation is:
  699. .. math::
  700. Out = \sigma (W \ast X + b)
  701. Where:
  702. * :math:`X`: Input value, a 3-D Tensor with 'NCL' format or 'NLC' format.
  703. * :math:`W`: Filter value, a 3-D Tensor with 'MCK' format.
  704. * :math:`\\ast`: Convolution operation.
  705. * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
  706. * :math:`\\sigma`: Activation function.
  707. * :math:`Out`: Output value, a 3-D Tensor with data format 'NCL' or 'NLC', the shape of :math:`Out` and :math:`X` may be different.
  708. Example:
  709. - Input:
  710. Input shape: :math:`(N, C_{in}, L_{in})`
  711. Filter shape: :math:`(C_{in}, C_{out}, L_f)`
  712. - Output:
  713. Output shape: :math:`(N, C_{out}, L_{out})`
  714. Where
  715. .. math::
  716. L^\prime_{out} &= (L_{in} - 1) * stride - 2 * padding + dilation * (L_f - 1) + 1 \\
  717. L_{out} &\in [ L^\prime_{out}, L^\prime_{out} + stride ]
  718. Note:
  719. The conv1d_transpose can be seen as the backward of the conv1d. For conv1d,
  720. when stride > 1, conv1d maps multiple input shape to the same output shape,
  721. so for conv1d_transpose, when stride > 1, input shape maps multiple output shape.
  722. If output_size is None, :math:`L_{out} = L^\prime_{out}`;
  723. else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}`
  724. and :math:`L^\prime_{out} + stride`.
  725. Args:
  726. x(Tensor): 3-D tensor with [N, C, L] or [N, L, C] format,
  727. its data type is float32 or float64.
  728. weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, K],
  729. where M is the number of output channels(filters), g is the number of groups,
  730. K is the size of the kernel.
  731. bias(Tensor, optional): The bias, a Tensor with shape [M, ].
  732. stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution.
  733. If stride is a list/tuple, it must contain one integer, `(stride_size)`.
  734. Default: stride = 1.
  735. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
  736. `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
  737. string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
  738. If `padding` is a tuple or list, it could be in two forms:
  739. `[pad]` or `[pad_left, pad_right]`. Default: padding = 0.
  740. output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension.
  741. If it is a list/tuple, it must contain one integer. Default: 0.
  742. groups(int, optional): The groups number of the conv1d transpose function. Inspired by
  743. grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
  744. when group=2, the first half of the filters is only connected to the
  745. first half of the input channels, while the second half of the
  746. filters is only connected to the second half of the input channels.
  747. Default: groups = 1.
  748. dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points.
  749. If dilation is a list/tuple, it must contain one integer, `(dilation_size)`.
  750. Default: dilation = 1.
  751. output_size(int|tuple|list, optional): The output image size. If output size is a
  752. tuple/list, it must contain one integer, `(feature_length)`. None if use
  753. filter_size(shape of weight), padding, and stride to calculate output_size.
  754. data_format (str, optional): Specify the data format of the input, and the data format of the output
  755. will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`.
  756. The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
  757. `[batch_size, input_channels, input_length]`.
  758. name(str, optional): For detailed information, please refer
  759. to :ref:`api_guide_Name`. Usually name is no need to set and
  760. None by default.
  761. Returns:
  762. A tensor representing the result of 1-D transpose convolution, whose
  763. data type is the same with input. And its shape is (num_batches, channels, length)
  764. when data_format is `"NCL"` and (num_batches, length, channels) when data_format is
  765. `"NLC"`.
  766. Examples:
  767. .. code-block:: python
  768. >>> import paddle
  769. >>> import paddle.nn.functional as F
  770. >>> # shape: (1, 2, 4)
  771. >>> x = paddle.to_tensor([[[4, 0, 9, 7],
  772. >>> [8, 0, 9, 2,]]], dtype="float32")
  773. >>> # shape: (2, 1, 2)
  774. >>> w = paddle.to_tensor([[[7, 0]],
  775. >>> [[4, 2]]], dtype="float32")
  776. >>> y = F.conv1d_transpose(x, w)
  777. >>> print(y)
  778. Tensor(shape=[1, 1, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
  779. [[[60., 16., 99., 75., 4. ]]])
  780. """
  781. cudnn_version = get_cudnn_version()
  782. if cudnn_version is not None:
  783. use_cudnn = True
  784. else:
  785. use_cudnn = False
  786. if data_format not in ['NCL', 'NLC']:
  787. raise ValueError(
  788. "Attr(data_format) of conv2d_transpose got wrong value: "
  789. f"received {data_format}, but only 'NCL' or 'NLC' are supported."
  790. )
  791. channel_last = data_format == "NLC"
  792. channel_dim = -1 if channel_last else 1
  793. if len(x.shape) != 3:
  794. raise ValueError(
  795. f"Input x should be 3D tensor, but received x with the shape of {x.shape}"
  796. )
  797. num_channels = x.shape[channel_dim]
  798. if num_channels < 0:
  799. raise ValueError(
  800. f"The channel dimension of the input({x.shape}) "
  801. f"should be defined. Received: {num_channels}."
  802. )
  803. if groups <= 0:
  804. raise ValueError(
  805. f"The groups of conv1d_transpose should be greater than 0. Received groups: {groups}"
  806. )
  807. if num_channels % groups != 0:
  808. raise ValueError(
  809. "the channel of input must be divisible by groups,"
  810. f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
  811. f", the groups is {groups}"
  812. )
  813. # update attrs
  814. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)
  815. if len(padding) == 2:
  816. padding = padding + [0] * 2
  817. elif len(padding) == 1:
  818. padding = padding + [0]
  819. else:
  820. raise ValueError(
  821. f"The size of padding's dimension should 1 or 2. But got padding={padding}"
  822. )
  823. stride = convert_to_list(stride, 1, 'stride') + [1]
  824. dilation = convert_to_list(dilation, 1, 'dilation') + [1]
  825. if output_size is None:
  826. output_size = []
  827. else:
  828. if output_padding != 0:
  829. raise ValueError(
  830. 'output_padding option is mutually exclusive with '
  831. 'output_size'
  832. )
  833. if isinstance(output_size, (list, tuple, int)):
  834. output_size = convert_to_list(output_size, 1, 'output_size') + [1]
  835. else:
  836. raise ValueError(
  837. "output_size should be int, or list, tuple of ints"
  838. )
  839. if output_padding == 0:
  840. output_padding = []
  841. else:
  842. output_padding = convert_to_list(
  843. output_padding, 1, 'output_padding'
  844. ) + [0]
  845. if len(output_padding) > 0 and output_padding[0] > stride[0]:
  846. raise ValueError(
  847. "The size of output_padding should not be greater than stride."
  848. f"But got output_padding={output_padding[0]} and stride={stride[0]}"
  849. )
  850. if len(weight.shape) != 3:
  851. raise ValueError(
  852. f'Input weight should be 3D tensor, but received weight with the shape of {weight.shape}'
  853. )
  854. op_type = 'conv2d_transpose'
  855. num_filters = weight.shape[1]
  856. if (
  857. num_channels == groups
  858. and num_channels != 1
  859. and num_filters == 1
  860. and not use_cudnn
  861. ):
  862. op_type = 'depthwise_conv2d_transpose'
  863. use_cudnn = False
  864. squeeze_axis = -2 if channel_last else -1
  865. conv2d_data_format = "NHWC" if channel_last else "NCHW"
  866. x = unsqueeze(x, axis=[squeeze_axis])
  867. weight = unsqueeze(weight, axis=[-1])
  868. if in_dynamic_mode():
  869. out = getattr(_C_ops, op_type)(
  870. x,
  871. weight,
  872. stride,
  873. padding,
  874. output_padding,
  875. output_size,
  876. padding_algorithm,
  877. groups,
  878. dilation,
  879. conv2d_data_format,
  880. )
  881. if bias is not None:
  882. out = _add_with_axis(out, bias, axis=channel_dim)
  883. else:
  884. inputs = {'Input': [x], 'Filter': [weight]}
  885. attrs = {
  886. 'output_padding': output_padding,
  887. 'output_size': output_size,
  888. 'strides': stride,
  889. 'paddings': padding,
  890. 'padding_algorithm': padding_algorithm,
  891. 'dilations': dilation,
  892. 'groups': groups,
  893. 'use_cudnn': use_cudnn,
  894. 'data_format': conv2d_data_format,
  895. }
  896. check_variable_and_dtype(
  897. x, 'input', ['float16', 'float32', 'float64'], 'conv2d_transpose'
  898. )
  899. helper = LayerHelper(op_type, **locals())
  900. dtype = helper.input_dtype(input_param_name='x')
  901. out = helper.create_variable_for_type_inference(dtype)
  902. outputs = {"Output": [out]}
  903. helper.append_op(
  904. type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
  905. )
  906. if bias is not None:
  907. out = _add_with_axis(out, bias, axis=channel_dim)
  908. out = squeeze(out, axis=[squeeze_axis])
  909. return out
  910. def conv2d_transpose(
  911. x,
  912. weight,
  913. bias=None,
  914. stride=1,
  915. padding=0,
  916. output_padding=0,
  917. dilation=1,
  918. groups=1,
  919. output_size=None,
  920. data_format='NCHW',
  921. name=None,
  922. ):
  923. r"""
  924. The convolution2D transpose layer calculates the output based on the input,
  925. filter, and dilations, strides, paddings. Input(Input) and output(Output)
  926. are in NCHW or NHWC format. Where N is batch size, C is the number of channels,
  927. H is the height of the feature, and W is the width of the feature.
  928. Parameters(dilations, strides, paddings) are two elements. These two elements
  929. represent height and width, respectively. The details of convolution transpose
  930. layer, please refer to the following explanation and references
  931. `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
  932. If bias attribution and activation type are provided, bias is added to
  933. the output of the convolution, and the corresponding activation function
  934. is applied to the final result.
  935. See more detail in :ref:`api_paddle_nn_Conv2DTranspose` .
  936. For each input :math:`X`, the equation is:
  937. .. math::
  938. Out = \sigma (W \ast X + b)
  939. Where:
  940. * :math:`X`: Input value, a 4-D Tensor with NCHW or NHWC format.
  941. * :math:`W`: Filter value, a 4-D Tensor with MCHW format.
  942. * :math:`\\ast`: Convolution operation.
  943. * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
  944. * :math:`\\sigma`: Activation function.
  945. * :math:`Out`: Output value, a 4-D Tensor with data format 'NCHW' or 'NHWC', the shape of :math:`Out` and :math:`X` may be different.
  946. Example:
  947. - Input:
  948. Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
  949. Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
  950. - Output:
  951. Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
  952. Where
  953. .. math::
  954. H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\
  955. W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\
  956. H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ] \\
  957. W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ]
  958. Note:
  959. The conv2d_transpose can be seen as the backward of the conv2d. For conv2d,
  960. when stride > 1, conv2d maps multiple input shape to the same output shape,
  961. so for conv2d_transpose, when stride > 1, input shape maps multiple output shape.
  962. If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`;
  963. else, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}`
  964. and :math:`H^\prime_{out} + strides[0]`, and the :math:`W_{out}` of the output size must
  965. between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[1]`.
  966. Args:
  967. x(Tensor): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format,
  968. whose data type is float32 or float64.
  969. weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, kH, kW],
  970. where M is the number of output channels(filters), g is the number of groups,
  971. kH is the height of the kernel, and kW is the width of the kernel.
  972. bias(Tensor, optional): The bias, a Tensor with shape [M, ].
  973. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
  974. If stride is a list/tuple, it must contain two integers, (stride_height, stride_width).
  975. Otherwise, stride_height = stride_width = stride. Default: stride = 1.
  976. padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
  977. on both sides for each dimension. If `padding` is a string, either 'VALID' or
  978. 'SAME' which is the padding algorithm. If padding size is a tuple or list,
  979. it could be in three forms: `[pad_height, pad_width]` or
  980. `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
  981. and when `data_format` is `"NCHW"`, `padding` can be in the form
  982. `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
  983. when `data_format` is `"NHWC"`, `padding` can be in the form
  984. `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
  985. Default: padding = 0.
  986. output_padding(int|list|tuple, optional): Additional size added to one side
  987. of each dimension in the output shape. Default: 0.
  988. groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by
  989. grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
  990. when group=2, the first half of the filters is only connected to the
  991. first half of the input channels, while the second half of the
  992. filters is only connected to the second half of the input channels.
  993. Default: groups = 1.
  994. dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
  995. If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width).
  996. Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
  997. output_size(int|tuple|list, optional): The output image size. If output size is a
  998. tuple/list, it must contain two integers, (image_height, image_width). None if use
  999. filter_size(shape of weight), padding, and stride to calculate output_size.
  1000. data_format (str, optional): Specify the data format of the input, and the data format of the output
  1001. will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
  1002. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
  1003. `[batch_size, input_channels, input_height, input_width]`.
  1004. name(str, optional): For detailed information, please refer
  1005. to :ref:`api_guide_Name`. Usually name is no need to set and
  1006. None by default.
  1007. Returns:
  1008. A Tensor representing the conv2d_transpose, whose
  1009. data type is the same with input and shape is (num_batches, channels, out_h,
  1010. out_w) or (num_batches, out_h, out_w, channels). The tensor variable storing
  1011. transposed convolution result.
  1012. Examples:
  1013. .. code-block:: python
  1014. >>> import paddle
  1015. >>> import paddle.nn.functional as F
  1016. >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
  1017. >>> w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
  1018. >>> y_var = F.conv2d_transpose(x_var, w_var)
  1019. >>> print(y_var.shape)
  1020. [2, 6, 10, 10]
  1021. """
  1022. if data_format not in ['NCHW', 'NHWC']:
  1023. raise ValueError(
  1024. "Attr(data_format) of conv2d_transpose got wrong value: "
  1025. f"received {data_format}, but only 'NCHW' or 'NHWC' are supported."
  1026. )
  1027. channel_last = data_format == "NHWC"
  1028. channel_dim = -1 if channel_last else 1
  1029. if len(x.shape) != 4:
  1030. raise ValueError(
  1031. f"Input x should be 4D tensor, but received x with the shape of {x.shape}"
  1032. )
  1033. if len(weight.shape) != 4:
  1034. raise ValueError(
  1035. f"Input weight should be 4D tensor, but received weight with the shape of {weight.shape}"
  1036. )
  1037. num_channels = x.shape[channel_dim]
  1038. if num_channels < 0:
  1039. raise ValueError(
  1040. f"The channel dimension of the input({x.shape}) "
  1041. f"should be defined. Received: {num_channels}."
  1042. )
  1043. if groups <= 0:
  1044. raise ValueError(
  1045. f"The groups of conv2d_transpose should be greater than 0. Received groups: {groups}"
  1046. )
  1047. if num_channels % groups != 0:
  1048. raise ValueError(
  1049. "the channel of input must be divisible by groups,"
  1050. f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
  1051. f", the groups is {groups}"
  1052. )
  1053. cudnn_version = get_cudnn_version()
  1054. use_cudnn = (
  1055. True
  1056. if (is_compiled_with_cuda() and cudnn_version is not None)
  1057. else False
  1058. )
  1059. # update attrs
  1060. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
  1061. stride = convert_to_list(stride, 2, 'stride')
  1062. dilation = convert_to_list(dilation, 2, 'dilation')
  1063. if output_size is None:
  1064. output_size = []
  1065. else:
  1066. if output_padding != 0:
  1067. raise ValueError(
  1068. 'output_padding option is mutually exclusive with '
  1069. 'output_size'
  1070. )
  1071. if isinstance(output_size, (list, tuple)):
  1072. if _contain_var(output_size):
  1073. output_size = _convert_to_tensor_list(output_size)
  1074. else:
  1075. output_size = convert_to_list(output_size, 2, 'output_size')
  1076. elif isinstance(output_size, int):
  1077. output_size = convert_to_list(output_size, 2, 'output_size')
  1078. elif isinstance(output_size, (Variable, pir.Value)):
  1079. check_dtype(
  1080. output_size.dtype,
  1081. 'output_size',
  1082. ['int32', 'int64'],
  1083. 'conv2d_transpose',
  1084. )
  1085. if len(output_size.shape) == 1 and (
  1086. output_size.shape[0] == 1 or output_size.shape[0] == 2
  1087. ):
  1088. if output_size.shape[0] == 1:
  1089. output_size = [output_size, output_size]
  1090. else:
  1091. raise ValueError(
  1092. "output_size must contain one or two integers."
  1093. )
  1094. else:
  1095. raise ValueError(
  1096. "output_size should be int or Tensor or list, tuple of ints or Tensor"
  1097. )
  1098. if output_padding == 0:
  1099. output_padding = []
  1100. else:
  1101. output_padding = convert_to_list(output_padding, 2, 'output_padding')
  1102. op_type = 'conv2d_transpose'
  1103. num_filters = weight.shape[1]
  1104. if num_channels == groups and num_channels != 1 and num_filters == 1:
  1105. op_type = 'depthwise_conv2d_transpose'
  1106. use_cudnn = False
  1107. if in_dynamic_or_pir_mode():
  1108. op = (
  1109. _C_ops.conv2d_transpose
  1110. if op_type == 'conv2d_transpose'
  1111. else _C_ops.depthwise_conv2d_transpose
  1112. )
  1113. pre_bias = op(
  1114. x,
  1115. weight,
  1116. stride,
  1117. padding,
  1118. output_padding,
  1119. output_size,
  1120. padding_algorithm,
  1121. groups,
  1122. dilation,
  1123. data_format,
  1124. )
  1125. if bias is not None:
  1126. return _add_with_axis(pre_bias, bias, axis=channel_dim)
  1127. else:
  1128. return pre_bias
  1129. else:
  1130. inputs = {'Input': [x], 'Filter': [weight]}
  1131. attrs = {
  1132. 'output_padding': output_padding,
  1133. 'output_size': output_size,
  1134. 'strides': stride,
  1135. 'paddings': padding,
  1136. 'padding_algorithm': padding_algorithm,
  1137. 'dilations': dilation,
  1138. 'groups': groups,
  1139. 'use_cudnn': use_cudnn,
  1140. 'data_format': data_format,
  1141. }
  1142. check_variable_and_dtype(
  1143. x,
  1144. 'x',
  1145. ['float16', 'uint16', 'float32', 'float64'],
  1146. 'conv2d_transpose',
  1147. )
  1148. helper = LayerHelper(op_type, **locals())
  1149. pre_bias = helper.create_variable_for_type_inference(x.dtype)
  1150. outputs = {"Output": [pre_bias]}
  1151. helper.append_op(
  1152. type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
  1153. )
  1154. if bias is not None:
  1155. out = helper.create_variable_for_type_inference(x.dtype)
  1156. x_shape = list(pre_bias.shape)
  1157. y_shape = list(bias.shape)
  1158. if channel_dim == -1 or len(x_shape) == len(y_shape):
  1159. helper.append_op(
  1160. type='elementwise_add',
  1161. inputs={'X': [pre_bias], 'Y': [bias]},
  1162. outputs={'Out': [out]},
  1163. attrs={'axis': -1},
  1164. )
  1165. else:
  1166. assert len(x_shape) > len(
  1167. y_shape
  1168. ), 'The length of pre_bias must greater than the length of bias'
  1169. padding = len(x_shape) - len(y_shape) - channel_dim
  1170. bias = reshape(
  1171. bias, [1] * channel_dim + y_shape + [1] * padding
  1172. )
  1173. helper.append_op(
  1174. type='elementwise_add',
  1175. inputs={'X': [pre_bias], 'Y': [bias]},
  1176. outputs={'Out': [out]},
  1177. attrs={'axis': -1},
  1178. )
  1179. else:
  1180. out = pre_bias
  1181. return out
  1182. def conv3d(
  1183. x,
  1184. weight,
  1185. bias=None,
  1186. stride=1,
  1187. padding=0,
  1188. dilation=1,
  1189. groups=1,
  1190. data_format="NCDHW",
  1191. name=None,
  1192. ):
  1193. r"""
  1194. The convolution3D layer calculates the output based on the input, filter
  1195. and strides, paddings, dilations, groups parameters. Input(Input) and
  1196. Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of
  1197. channels, D is the depth of the feature, H is the height of the feature,
  1198. and W is the width of the feature. Convolution3D is similar with Convolution2D
  1199. but adds one dimension(depth). If bias attribution and activation type are
  1200. provided, bias is added to the output of the convolution, and the
  1201. corresponding activation function is applied to the final result.
  1202. For each input :math:`X`, the equation is:
  1203. .. math::
  1204. Out = \sigma (W \ast X + b)
  1205. In the above equation:
  1206. * :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
  1207. * :math:`W`: Filter value, a tensor with MCDHW format.
  1208. * :math:`\\ast`: Convolution operation.
  1209. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
  1210. * :math:`\\sigma`: Activation function.
  1211. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
  1212. Example:
  1213. - Input:
  1214. Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
  1215. Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
  1216. - Output:
  1217. Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
  1218. Where
  1219. .. math::
  1220. D_{out}&= \frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\
  1221. H_{out}&= \frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\
  1222. W_{out}&= \frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
  1223. Args:
  1224. x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W], the data
  1225. type of input is float16 or float32 or float64.
  1226. weight (Tensor): The convolution kernel, a Tensor with shape [M, C/g, kD, kH, kW],
  1227. where M is the number of filters(output channels), g is the number of groups,
  1228. kD, kH, kW are the filter's depth, height and width respectively.
  1229. bias (Tensor, optional): The bias, a Tensor of shape [M, ].
  1230. stride (int|list|tuple, optional): The stride size. It means the stride in convolution. If stride is a
  1231. list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width).
  1232. Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1.
  1233. padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
  1234. on both sides for each dimension. If `padding` is a string, either 'VALID' or
  1235. 'SAME' which is the padding algorithm. If padding size is a tuple or list,
  1236. it could be in three forms: `[pad_depth, pad_height, pad_width]` or
  1237. `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
  1238. and when `data_format` is `"NCDHW"`, `padding` can be in the form
  1239. `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
  1240. when `data_format` is `"NDHWC"`, `padding` can be in the form
  1241. `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
  1242. Default: padding = 0.
  1243. dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
  1244. If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height,
  1245. dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
  1246. Default: dilation = 1.
  1247. groups (int, optional): The groups number of the Conv3D Layer. According to grouped
  1248. convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
  1249. the first half of the filters is only connected to the first half
  1250. of the input channels, while the second half of the filters is only
  1251. connected to the second half of the input channels. Default: groups=1
  1252. data_format (str, optional): Specify the data format of the input, and the data format of the output
  1253. will be consistent with that of the input. An optional string from: `"NCDHW"`, `"NDHWC"`.
  1254. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
  1255. `[batch_size, input_channels, input_depth, input_height, input_width]`.
  1256. name(str|None, optional): For detailed information, please refer
  1257. to :ref:`api_guide_Name`. Usually name is no need to set and
  1258. None by default.
  1259. Returns:
  1260. A Tensor representing the conv3d, whose data type is
  1261. the same with input. If act is None, the tensor storing the
  1262. convolution result, and if act is not None, the tensor storing
  1263. convolution and non-linearity activation result.
  1264. Examples:
  1265. .. code-block:: python
  1266. >>> import paddle
  1267. >>> import paddle.nn.functional as F
  1268. >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
  1269. >>> w_var = paddle.randn((6, 3, 3, 3, 3), dtype='float32')
  1270. >>> y_var = F.conv3d(x_var, w_var)
  1271. >>> print(y_var.shape)
  1272. [2, 6, 6, 6, 6]
  1273. """
  1274. # entry check
  1275. if data_format not in ["NCDHW", "NDHWC"]:
  1276. raise ValueError(
  1277. "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
  1278. f"Attr(data_format): {data_format}."
  1279. )
  1280. channel_last = data_format == "NDHWC"
  1281. channel_dim = -1 if channel_last else 1
  1282. if len(x.shape) != 5:
  1283. raise ValueError(
  1284. f"Input x should be 5D tensor, but received x with the shape of {x.shape}"
  1285. )
  1286. num_channels = x.shape[channel_dim]
  1287. num_filters = weight.shape[0]
  1288. if num_channels < 0:
  1289. raise ValueError(
  1290. f"The channel dimension of the input({x.shape}) should be defined. "
  1291. f"Received: {num_channels}."
  1292. )
  1293. if groups <= 0:
  1294. raise ValueError(
  1295. f"The groups of conv3d should be greater than 0. Received groups: {groups}"
  1296. )
  1297. if num_channels % groups != 0:
  1298. raise ValueError(
  1299. "The number of input channels must be divisible by Attr(groups). "
  1300. f"Received: number of channels({num_channels}), groups({groups})."
  1301. )
  1302. if num_filters % groups != 0:
  1303. raise ValueError(
  1304. "The number of filters must be divisible by Attr(groups). "
  1305. f"Received: number of filters({num_filters}), groups({groups})."
  1306. )
  1307. cudnn_version = get_cudnn_version()
  1308. use_cudnn = (
  1309. True
  1310. if (is_compiled_with_cuda() and cudnn_version is not None)
  1311. else False
  1312. )
  1313. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
  1314. stride = convert_to_list(stride, 3, 'stride')
  1315. dilation = convert_to_list(dilation, 3, 'dilation')
  1316. op_type = "conv3d"
  1317. return _conv_nd(
  1318. x,
  1319. weight,
  1320. bias,
  1321. stride,
  1322. padding,
  1323. padding_algorithm,
  1324. dilation,
  1325. groups,
  1326. data_format,
  1327. channel_dim,
  1328. op_type,
  1329. use_cudnn,
  1330. name,
  1331. )
  1332. def conv3d_transpose(
  1333. x,
  1334. weight,
  1335. bias=None,
  1336. stride=1,
  1337. padding=0,
  1338. output_padding=0,
  1339. groups=1,
  1340. dilation=1,
  1341. output_size=None,
  1342. data_format='NCDHW',
  1343. name=None,
  1344. ):
  1345. r"""
  1346. The convolution3d transpose layer calculates the output based on the input,
  1347. filter, and dilations, strides, paddings. Input(Input) and output(Output)
  1348. are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels,
  1349. D is the depth of the feature, H is the height of the feature, and W
  1350. is the width of the feature. Parameters(dilations, strides, paddings) are
  1351. two elements. These two elements represent height and width, respectively.
  1352. The details of convolution transpose layer, please refer to the following
  1353. explanation and references `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
  1354. If bias attribution and activation type are provided, bias is added to
  1355. the output of the convolution, and the corresponding activation function
  1356. is applied to the final result.
  1357. See more detail in :ref:`api_paddle_nn_Conv3DTranspose` .
  1358. For each input :math:`X`, the equation is:
  1359. .. math::
  1360. Out = \sigma (W \ast X + b)
  1361. In the above equation:
  1362. * :math:`X`: Input value, a Tensor with NCDHW or NDHWC format.
  1363. * :math:`W`: Filter value, a Tensor with NCDHW format.
  1364. * :math:`\ast`: Convolution operation.
  1365. * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
  1366. * :math:`\sigma`: Activation function.
  1367. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
  1368. Example:
  1369. - Input:
  1370. Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
  1371. Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
  1372. - Output:
  1373. Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
  1374. Where
  1375. .. math::
  1376. D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\
  1377. H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\
  1378. W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\
  1379. D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\
  1380. H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\
  1381. W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[2] ]
  1382. Note:
  1383. The conv3d_transpose can be seen as the backward of the conv3d. For conv3d,
  1384. when stride > 1, conv3d maps multiple input shape to the same output shape,
  1385. so for conv3d_transpose, when stride > 1, input shape maps multiple output shape.
  1386. If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`;
  1387. else, the :math:`D_{out}` of the output size must between :math:`D^\prime_{out}` and
  1388. :math:`D^\prime_{out} + strides[0]`, the :math:`H_{out}` of the output size must
  1389. between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[1]`, and the
  1390. :math:`W_{out}` of the output size must between :math:`W^\prime_{out}` and
  1391. :math:`W^\prime_{out} + strides[2]`.
  1392. Args:
  1393. x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type
  1394. of input is float32 or float64.
  1395. weight (Tensor): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW],
  1396. where M is the number of filters (output channels), g is the number of groups,
  1397. kD, kH, kW are the filter's depth, height and width respectively.
  1398. bias (Tensor, optional): The bias, a Tensor of shape [M, ]. Default: None.
  1399. stride (int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
  1400. If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height,
  1401. stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
  1402. Default: 1.
  1403. padding (str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
  1404. on both sides for each dimension. If `padding` is a string, either 'VALID' or
  1405. 'SAME' which is the padding algorithm. If padding size is a tuple or list,
  1406. it could be in three forms: `[pad_depth, pad_height, pad_width]` or
  1407. `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
  1408. and when `data_format` is `"NCDHW"`, `padding` can be in the form
  1409. `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
  1410. when `data_format` is `"NDHWC"`, `padding` can be in the form
  1411. `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
  1412. Default: 0.
  1413. output_padding (int|list|tuple, optional): Additional size added to one side
  1414. of each dimension in the output shape. Default: 0.
  1415. groups (int, optional): The groups number of the Conv3D transpose layer. Inspired by
  1416. grouped convolution in `Alex Krizhevsky's Deep CNN paper <https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf>`_, in which
  1417. when groups = 2, the first half of the filters is only connected to the
  1418. first half of the input channels, while the second half of the
  1419. filters is only connected to the second half of the input channels.
  1420. Default: 1.
  1421. dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
  1422. If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height,
  1423. dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
  1424. Default: 1.
  1425. output_size (int|list|tuple, optional): The output image size. If output size is a
  1426. list/tuple, it must contain three integers, (image_depth, image_height, image_width).
  1427. None if use filter_size(shape of weight), padding, and stride to calculate output_size.
  1428. data_format (str, optional): Specify the data format of the input, and the data format of the output
  1429. will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
  1430. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`.
  1431. Default: `"NCHW"`.
  1432. name (str, optional): For detailed information, please refer
  1433. to :ref:`api_guide_Name`. Usually name is no need to set.
  1434. Default: None.
  1435. Returns:
  1436. A Tensor representing the conv3d_transpose, whose data
  1437. type is the same with input and shape is (num_batches, channels, out_d, out_h,
  1438. out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor
  1439. variable storing the transposed convolution result, and if act is not None, the tensor
  1440. variable storing transposed convolution and non-linearity activation result.
  1441. Examples:
  1442. .. code-block:: python
  1443. >>> import paddle
  1444. >>> import paddle.nn.functional as F
  1445. >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
  1446. >>> w_var = paddle.randn((3, 6, 3, 3, 3), dtype='float32')
  1447. >>> y_var = F.conv3d_transpose(x_var, w_var)
  1448. >>> print(y_var.shape)
  1449. [2, 6, 10, 10, 10]
  1450. """
  1451. # entry checks
  1452. if data_format not in ["NCDHW", "NDHWC"]:
  1453. raise ValueError(
  1454. "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
  1455. f"Attr(data_format): {data_format}."
  1456. )
  1457. channel_last = data_format == "NDHWC"
  1458. channel_dim = -1 if channel_last else 1
  1459. if len(x.shape) != 5:
  1460. raise ValueError(
  1461. f"Input x should be 5D tensor, but received x with the shape of {x.shape}"
  1462. )
  1463. if len(weight.shape) != 5:
  1464. raise ValueError(
  1465. f"Input weight should be 5D tensor, but received weight with the shape of {weight.shape}"
  1466. )
  1467. num_channels = x.shape[channel_dim]
  1468. num_filters = weight.shape[1]
  1469. if num_channels < 0:
  1470. raise ValueError(
  1471. f"The channel dimension of the input({x.shape}) should be defined. "
  1472. f"Received: {num_channels}."
  1473. )
  1474. if groups <= 0:
  1475. raise ValueError(
  1476. f"The groups of conv3d_transpose should be greater than 0. Received groups: {groups}"
  1477. )
  1478. if num_channels % groups != 0:
  1479. raise ValueError(
  1480. "The number of input channels must be divisible by Attr(groups). "
  1481. f"Received: number of channels({num_channels}), groups({groups})."
  1482. )
  1483. padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
  1484. stride = convert_to_list(stride, 3, 'stride')
  1485. dilation = convert_to_list(dilation, 3, 'dilation')
  1486. if output_size is None:
  1487. output_size = []
  1488. else:
  1489. if output_padding != 0:
  1490. raise ValueError(
  1491. 'output_padding option is mutually exclusive with '
  1492. 'output_size'
  1493. )
  1494. if isinstance(output_size, (list, tuple, int)):
  1495. output_size = convert_to_list(output_size, 3, 'output_size')
  1496. else:
  1497. raise ValueError(
  1498. "output_size should be int, or list, tuple of ints"
  1499. )
  1500. if output_padding == 0:
  1501. output_padding = []
  1502. else:
  1503. output_padding = convert_to_list(output_padding, 3, 'output_padding')
  1504. cudnn_version = get_cudnn_version()
  1505. # TODO(LielinJiang): whether to use cudnn according to the version of cudnn
  1506. use_cudnn = (
  1507. True
  1508. if (is_compiled_with_cuda() and cudnn_version is not None)
  1509. else False
  1510. )
  1511. op_type = 'conv3d_transpose'
  1512. data_format_ = "NHWC" if channel_last else "NCHW"
  1513. if in_dynamic_or_pir_mode():
  1514. pre_bias = _C_ops.conv3d_transpose(
  1515. x,
  1516. weight,
  1517. stride,
  1518. padding,
  1519. output_padding,
  1520. output_size,
  1521. padding_algorithm,
  1522. groups,
  1523. dilation,
  1524. data_format_,
  1525. )
  1526. if bias is not None:
  1527. return _add_with_axis(pre_bias, bias, axis=channel_dim)
  1528. else:
  1529. return pre_bias
  1530. else:
  1531. inputs = {'Input': [x], 'Filter': [weight]}
  1532. attrs = {
  1533. 'output_padding': output_padding,
  1534. 'output_size': output_size,
  1535. 'paddings': padding,
  1536. "padding_algorithm": padding_algorithm,
  1537. 'strides': stride,
  1538. 'dilations': dilation,
  1539. 'groups': groups,
  1540. 'use_cudnn': use_cudnn,
  1541. "data_format": data_format_,
  1542. }
  1543. helper = LayerHelper(op_type, **locals())
  1544. check_variable_and_dtype(
  1545. x, 'x', ['float16', 'float32', 'float64'], 'conv3d'
  1546. )
  1547. pre_bias = helper.create_variable_for_type_inference(x.dtype)
  1548. outputs = {"Output": [pre_bias]}
  1549. helper.append_op(
  1550. type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
  1551. )
  1552. if bias is not None:
  1553. out = _add_with_axis(pre_bias, bias, axis=channel_dim)
  1554. else:
  1555. out = pre_bias
  1556. return out