| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode, pir
- from paddle.base.framework import _global_flags, in_dynamic_or_pir_mode
- from paddle.device import (
- get_all_custom_device_type,
- is_compiled_with_cuda,
- is_compiled_with_rocm,
- )
- from paddle.tensor.manipulation import reshape
- from paddle.tensor.math import _add_with_axis
- from ...base.data_feeder import check_dtype, check_variable_and_dtype
- from ...base.layer_helper import LayerHelper
- from ...common_ops_import import Variable
- from ...device import get_cudnn_version
- from ...framework import no_grad
- from ...tensor.manipulation import squeeze, unsqueeze
- from ...utils import (
- _contain_var,
- _convert_to_tensor_list,
- _is_symmetric_padding,
- convert_to_list,
- )
- __all__ = []
- def _is_list_or_tuple(input):
- return isinstance(input, (list, tuple))
- def _zero_padding_in_batch_and_channel(padding, channel_last):
- if channel_last:
- return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
- else:
- return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
- def _exclude_padding_in_batch_and_channel(padding, channel_last):
- padding_ = padding[1:-1] if channel_last else padding[2:]
- padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
- return padding_
- def _update_padding_nd(padding, channel_last, num_dims):
- if isinstance(padding, str):
- padding = padding.upper()
- if padding not in ["SAME", "VALID"]:
- raise ValueError(
- f"Unknown padding: '{padding}'. It can only be 'SAME' or 'VALID'."
- )
- if padding == "VALID":
- padding_algorithm = "VALID"
- padding = [0] * num_dims
- else:
- padding_algorithm = "SAME"
- padding = [0] * num_dims
- elif _is_list_or_tuple(padding):
- # for padding like
- # [(pad_before, pad_after), (pad_before, pad_after), ...]
- # padding for batch_dim and channel_dim included
- if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
- if not _zero_padding_in_batch_and_channel(padding, channel_last):
- raise ValueError(
- f"Non-zero padding({padding}) in the batch or channel dimensions "
- "is not supported."
- )
- padding_algorithm = "EXPLICIT"
- padding = _exclude_padding_in_batch_and_channel(
- padding, channel_last
- )
- if _is_symmetric_padding(padding, num_dims):
- padding = padding[0::2]
- # for padding like [pad_before, pad_after, pad_before, pad_after, ...]
- elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
- padding_algorithm = "EXPLICIT"
- padding = convert_to_list(padding, 2 * num_dims, 'padding')
- if _is_symmetric_padding(padding, num_dims):
- padding = padding[0::2]
- # for padding like [pad_d1, pad_d2, ...]
- elif len(padding) == num_dims and isinstance(padding[0], int):
- padding_algorithm = "EXPLICIT"
- padding = convert_to_list(padding, num_dims, 'padding')
- else:
- raise ValueError(f"In valid padding: {padding}")
- # for integer padding
- else:
- padding_algorithm = "EXPLICIT"
- padding = convert_to_list(padding, num_dims, 'padding')
- if not all(p >= 0 for p in padding):
- raise ValueError(
- f"Invalid padding, all value should be larger than or equal to 0, but received: {padding}"
- )
- return padding, padding_algorithm
- def _conv_nd(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- padding_algorithm=None,
- dilation=1,
- groups=1,
- data_format="NCHW",
- channel_dim=1,
- op_type="conv2d",
- use_cudnn=True,
- name=None,
- ):
- # Due to the poor performance of NHWC, we transpose the input to NCHW.
- if in_dynamic_or_pir_mode() and op_type == "conv2d":
- pre_bias = _C_ops.conv2d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- dilation,
- groups,
- data_format,
- )
- if bias is not None:
- new_shape = [1] * len(x.shape)
- new_shape[channel_dim] = -1
- bias = bias.reshape(new_shape)
- # TODO(qili93): temporary for ascend npu performance to be removed along with npu_identity op
- if (
- _global_flags()['FLAGS_npu_storage_format']
- and 'npu' in get_all_custom_device_type()
- ):
- with no_grad():
- bias_storage = _C_ops.npu_identity(
- bias, 3
- ) # ACL_FORMAT_NC1HWC0 = 3
- bias_storage._share_underline_tensor_to(bias)
- return _C_ops.add(pre_bias, bias)
- else:
- return pre_bias
- if in_dynamic_or_pir_mode() and op_type == "depthwise_conv2d":
- pre_bias = _C_ops.depthwise_conv2d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- groups,
- dilation,
- data_format,
- )
- if bias is not None:
- new_shape = [1] * len(x.shape)
- new_shape[channel_dim] = -1
- bias = bias.reshape(new_shape)
- return _C_ops.add(pre_bias, bias)
- else:
- return pre_bias
- if in_dynamic_or_pir_mode() and op_type == "conv3d":
- pre_bias = _C_ops.conv3d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- groups,
- dilation,
- data_format,
- )
- if bias is not None:
- new_shape = [1] * len(x.shape)
- new_shape[channel_dim] = -1
- bias = bias.reshape(new_shape)
- return _C_ops.add(pre_bias, bias)
- else:
- return pre_bias
- if in_dynamic_mode():
- attrs = (
- 'strides',
- stride,
- 'paddings',
- padding,
- 'dilations',
- dilation,
- 'groups',
- groups,
- 'use_cudnn',
- use_cudnn,
- 'fuse_relu_before_depthwise_conv',
- False,
- "padding_algorithm",
- padding_algorithm,
- "data_format",
- data_format,
- )
- pre_bias = getattr(_legacy_C_ops, op_type)(x, weight, *attrs)
- if bias is not None:
- out = _add_with_axis(pre_bias, bias, axis=channel_dim)
- else:
- out = pre_bias
- else:
- inputs = {'Input': [x], 'Filter': [weight]}
- attrs = {
- 'strides': stride,
- 'paddings': padding,
- 'dilations': dilation,
- 'groups': groups,
- 'use_cudnn': use_cudnn,
- 'fuse_relu_before_depthwise_conv': False,
- "padding_algorithm": padding_algorithm,
- "data_format": data_format,
- }
- check_variable_and_dtype(
- x, 'x', ['float16', 'uint16', 'float32', 'float64'], op_type
- )
- helper = LayerHelper(op_type, **locals())
- dtype = helper.input_dtype(input_param_name='x')
- pre_bias = helper.create_variable_for_type_inference(dtype)
- outputs = {"Output": [pre_bias]}
- helper.append_op(
- type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
- )
- if bias is not None:
- out = helper.create_variable_for_type_inference(dtype)
- x_shape = list(pre_bias.shape)
- y_shape = list(bias.shape)
- if channel_dim == -1 or len(x_shape) == len(y_shape):
- helper.append_op(
- type='elementwise_add',
- inputs={'X': [pre_bias], 'Y': [bias]},
- outputs={'Out': [out]},
- attrs={'axis': -1},
- )
- else:
- assert len(x_shape) > len(
- y_shape
- ), 'The length of pre_bias must greater than the length of bias'
- padding = len(x_shape) - len(y_shape) - channel_dim
- bias = reshape(
- bias, [1] * channel_dim + y_shape + [1] * padding
- )
- helper.append_op(
- type='elementwise_add',
- inputs={'X': [pre_bias], 'Y': [bias]},
- outputs={'Out': [out]},
- attrs={'axis': -1},
- )
- else:
- out = pre_bias
- return out
- def conv1d(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- dilation=1,
- groups=1,
- data_format='NCL',
- name=None,
- ):
- r"""
- The convolution1D layer calculates the output based on the input, filter
- and strides, paddings, dilations, groups parameters. Input and
- Output are in NCL format, where N is batch size, C is the number of
- channels, L is the length of the feature.
- Filter is in MCK format, where M is the number of output image channels,
- C is the number of input image channels, K is the size of the kernel.
- If the groups is greater than 1, C will equal the number of input image
- channels divided by the groups. If bias attribution and activation type
- are provided, bias is added to the output of the convolution, and the
- corresponding activation function is applied to the final result.
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- Where:
- * :math:`X`: Input value, a tensor with NCL format.
- * :math:`W`: Kernel value, a tensor with MCK format.
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, L_{in})`
- Filter shape: :math:`(C_{out}, C_{in}, L_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, L_{out})`
- Where
- .. math::
- L_{out} = \frac{(L_{in} + 2 * padding - (dilation * (L_f - 1) + 1))}{stride} + 1
- Args:
- x (Tensor): The input is 3-D Tensor with shape [N, C, L], the data type
- of input is float16 or float32 or float64.
- weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is
- the number of output channels, g is the number of groups, K is the kernel's size.
- bias (Tensor, optional): The bias with shape [M,]. Default: None.
- stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must
- contain one integers, (stride_size). Default: 1.
- padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
- 1. a string in ['valid', 'same'].
- 2. an int, which means the feature map is zero paded by size of `padding` on both sides.
- 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
- 4. a list[int] or tuple[int] whose length is 2. It has the form [pad_before, pad_after].
- 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
- The default value is 0.
- dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must
- contain one integer, (dilation_size). Default: 1.
- groups (int, optional): The groups number of the conv1d function. According to grouped
- convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
- the first half of the filters is only connected to the first half
- of the input channels, while the second half of the filters is only
- connected to the second half of the input channels. Default: 1.
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`.
- The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
- `[batch_size, input_channels, feature_length]`.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
- Returns:
- A tensor representing the conv1d, whose data type is the
- same with input.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> x = paddle.to_tensor([[[4, 8, 1, 9],
- ... [7, 2, 0, 9],
- ... [6, 9, 2, 6]]], dtype="float32")
- >>> w = paddle.to_tensor([[[9, 3, 4],
- ... [0, 0, 7],
- ... [2, 5, 6]],
- ... [[0, 3, 4],
- ... [2, 9, 7],
- ... [5, 6, 8]]], dtype="float32")
- >>> y = F.conv1d(x, w)
- >>> print(y)
- Tensor(shape=[1, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[[133., 238.],
- [160., 211.]]])
- """
- cudnn_version = get_cudnn_version()
- if cudnn_version is not None:
- use_cudnn = True
- else:
- use_cudnn = False
- if data_format not in ["NCL", "NLC"]:
- raise ValueError(
- "Attr(data_format) should be 'NCL' or 'NLC'. "
- f"Received Attr(data_format): {data_format}."
- )
- channel_last = data_format == "NLC"
- channel_dim = -1 if channel_last else 1
- conv2d_data_format = "NHWC" if channel_last else "NCHW"
- if len(x.shape) != 3:
- raise ValueError(
- f"Input x should be 3D tensor, but received x with the shape of {x.shape}"
- )
- num_channels = x.shape[channel_dim]
- num_filters = weight.shape[0]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) "
- f"should be defined. Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv1d should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "the channel of input must be divisible by groups,"
- f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
- f", the groups is {groups}"
- )
- if num_filters % groups != 0:
- raise ValueError(
- "the number of filters must be divisible by groups,"
- f"received: the number of filters is {num_filters}, the shape of weight is {weight.shape}"
- f", the groups is {groups}"
- )
- # update attrs
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)
- if len(padding) == 2:
- padding = [0] * 2 + padding
- elif len(padding) == 1:
- padding = [0] + padding
- else:
- raise ValueError(
- f"The size of padding's dimension should be 1 or 2. But got padding={padding}"
- )
- stride = [1] + convert_to_list(stride, 1, 'stride')
- dilation = [1] + convert_to_list(dilation, 1, 'dilation')
- from ...tensor.creation import assign as paddle_assign
- weight = paddle_assign(weight)
- weight = unsqueeze(weight, axis=[-2])
- l_type = "conv2d"
- # When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance
- if (
- is_compiled_with_cuda()
- and num_channels == groups
- and num_channels != 1
- and num_filters % num_channels == 0
- ):
- l_type = 'depthwise_conv2d'
- use_cudnn = False
- squeeze_axis = -3 if channel_last else -2
- x = unsqueeze(x, axis=[squeeze_axis])
- if in_dynamic_or_pir_mode():
- if l_type == 'conv2d':
- out = _C_ops.conv2d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- dilation,
- groups,
- conv2d_data_format,
- )
- else:
- out = _C_ops.depthwise_conv2d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- groups,
- dilation,
- conv2d_data_format,
- False,
- -1,
- False,
- False,
- )
- if bias is not None:
- out = _add_with_axis(out, bias, axis=channel_dim)
- else:
- inputs = {'Input': [x], 'Filter': [weight]}
- attrs = {
- 'strides': stride,
- 'paddings': padding,
- 'dilations': dilation,
- 'groups': groups,
- 'use_cudnn': use_cudnn,
- 'fuse_relu_before_depthwise_conv': False,
- "padding_algorithm": padding_algorithm,
- "data_format": conv2d_data_format,
- }
- check_variable_and_dtype(
- x, 'input', ['float16', 'float32', 'float64'], 'conv2d'
- )
- helper = LayerHelper(l_type, **locals())
- dtype = helper.input_dtype(input_param_name='x')
- out = helper.create_variable_for_type_inference(dtype)
- outputs = {"Output": [out]}
- helper.append_op(
- type=l_type, inputs=inputs, outputs=outputs, attrs=attrs
- )
- if bias is not None:
- out = _add_with_axis(out, bias, axis=channel_dim)
- out = squeeze(out, axis=[squeeze_axis])
- return out
- def conv2d(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- dilation=1,
- groups=1,
- data_format="NCHW",
- name=None,
- ):
- r"""
- The convolution2D layer calculates the output based on the input, filter
- and strides, paddings, dilations, groups parameters. Input and
- Output are in NCHW or NHWC format, where N is batch size, C is the number of
- channels, H is the height of the feature, and W is the width of the feature.
- Filter is in MCHW format, where M is the number of output image channels,
- C is the number of input image channels, H is the height of the filter,
- and W is the width of the filter. If the groups is greater than 1,
- C will equal the number of input image channels divided by the groups.
- Please refer to UFLDL's `convolution
- <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
- for more details.
- If bias attribution and activation type are provided, bias is added to the
- output of the convolution, and the corresponding activation function is
- applied to the final result.
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- Where:
- * :math:`X`: Input value, a tensor with NCHW or NHWC format.
- * :math:`W`: Filter value, a tensor with MCHW format.
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
- Where
- .. math::
- H_{out}&= \frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
- W_{out}&= \frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
- Args:
- x (Tensor): The input is 4-D Tensor with shape [N, C, H, W], the data type
- of input is float16 or float32 or float64.
- weight (Tensor): The convolution kernel with shape [M, C/g, kH, kW], where M is
- the number of output channels, g is the number of groups, kH is the filter's
- height, kW is the filter's width.
- bias (Tensor, optional): The bias with shape [M,].
- stride (int|list|tuple, optional): The stride size. It means the stride in convolution.
- If stride is a list/tuple, it must contain two integers, (stride_height, stride_width).
- Otherwise, stride_height = stride_width = stride. Default: stride = 1.
- padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
- on both sides for each dimension.If `padding` is a string, either 'VALID' or
- 'SAME' which is the padding algorithm. If padding size is a tuple or list,
- it could be in three forms: `[pad_height, pad_width]` or
- `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
- `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
- [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
- when `data_format` is `"NHWC"`, `padding` can be in the form
- `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
- Default: padding = 0.
- dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel
- points. If dilation is a list/tuple, it must contain two integers, (dilation_height,
- dilation_width). Otherwise, dilation_height = dilation_width = dilation.
- Default: dilation = 1.
- groups (int, optional): The groups number of the Conv2D Layer. According to grouped
- convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
- the first half of the filters is only connected to the first half
- of the input channels, while the second half of the filters is only
- connected to the second half of the input channels. Default: groups=1.
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
- The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
- `[batch_size, input_channels, input_height, input_width]`.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
- Returns:
- A Tensor representing the conv2d result, whose data type is the same with input.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
- >>> w_var = paddle.randn((6, 3, 3, 3), dtype='float32')
- >>> y_var = F.conv2d(x_var, w_var)
- >>> print(y_var.shape)
- [2, 6, 6, 6]
- """
- # entry checks
- if data_format not in ["NCHW", "NHWC"]:
- raise ValueError(
- "Attr(data_format) should be 'NCHW' or 'NHWC'. "
- f"Received Attr(data_format): {data_format}."
- )
- channel_last = data_format == "NHWC"
- channel_dim = -1 if channel_last else 1
- if len(x.shape) != 4:
- raise ValueError(
- f"Input x should be 4D tensor, but received x with the shape of {x.shape}"
- )
- num_channels = x.shape[channel_dim]
- num_filters = weight.shape[0]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) "
- f"should be defined. Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv2d should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "the channel of input must be divisible by groups,"
- f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
- f", the groups is {groups}"
- )
- if num_filters % groups != 0:
- raise ValueError(
- "the number of filters must be divisible by groups,"
- f"received: the number of filters is {num_filters}, the shape of weight is {weight.shape}"
- f", the groups is {groups}"
- )
- cudnn_version = get_cudnn_version()
- use_cudnn = (
- True
- if (is_compiled_with_cuda() and cudnn_version is not None)
- else False
- )
- # update attrs
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
- stride = convert_to_list(stride, 2, 'stride')
- dilation = convert_to_list(dilation, 2, 'dilation')
- l_type = "conv2d"
- if (
- num_channels == groups
- and num_channels != 1
- and num_filters % num_channels == 0
- ):
- l_type = 'depthwise_conv2d'
- if is_compiled_with_rocm():
- use_cudnn = True
- else:
- use_cudnn = False
- else:
- if in_dynamic_mode():
- pre_bias = _C_ops.conv2d(
- x,
- weight,
- stride,
- padding,
- padding_algorithm,
- dilation,
- groups,
- data_format,
- )
- if bias is not None:
- channel_dim = (
- channel_dim + len(x.shape)
- if channel_dim < 0
- else channel_dim
- )
- if len(bias.shape) < len(x.shape):
- bias = _C_ops.reshape(
- bias,
- [1 for i in range(channel_dim)]
- + bias.shape
- + [1 for i in range(len(x.shape) - channel_dim - 1)],
- )
- # TODO(qili93): temporary for ascend npu performance to be removed along with npu_identity op
- if (
- _global_flags()['FLAGS_npu_storage_format']
- and 'npu' in get_all_custom_device_type()
- ):
- with no_grad():
- bias_storage = _C_ops.npu_identity(
- bias, 3
- ) # ACL_FORMAT_NC1HWC0 = 3
- bias_storage._share_underline_tensor_to(bias)
- return _C_ops.add(pre_bias, bias)
- else:
- return pre_bias
- if (
- is_compiled_with_cuda()
- and get_flags("FLAGS_conv2d_disable_cudnn")[
- "FLAGS_conv2d_disable_cudnn"
- ]
- ):
- use_cudnn = False
- return _conv_nd(
- x,
- weight,
- bias,
- stride,
- padding,
- padding_algorithm,
- dilation,
- groups,
- data_format,
- channel_dim,
- l_type,
- use_cudnn,
- name,
- )
- def conv1d_transpose(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- output_padding=0,
- groups=1,
- dilation=1,
- output_size=None,
- data_format="NCL",
- name=None,
- ):
- r"""
- The 1-D convolution transpose layer calculates the output based on the input,
- filter, and dilation, stride, padding. Input(Input) and output(Output)
- are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels,
- L is the length of the feature. The details of convolution transpose
- layer, please refer to the following explanation and references
- `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
- If bias attribution and activation type are provided, bias is added to
- the output of the convolution, and the corresponding activation function
- is applied to the final result.
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- Where:
- * :math:`X`: Input value, a 3-D Tensor with 'NCL' format or 'NLC' format.
- * :math:`W`: Filter value, a 3-D Tensor with 'MCK' format.
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, a 3-D Tensor with data format 'NCL' or 'NLC', the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, L_{in})`
- Filter shape: :math:`(C_{in}, C_{out}, L_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, L_{out})`
- Where
- .. math::
- L^\prime_{out} &= (L_{in} - 1) * stride - 2 * padding + dilation * (L_f - 1) + 1 \\
- L_{out} &\in [ L^\prime_{out}, L^\prime_{out} + stride ]
- Note:
- The conv1d_transpose can be seen as the backward of the conv1d. For conv1d,
- when stride > 1, conv1d maps multiple input shape to the same output shape,
- so for conv1d_transpose, when stride > 1, input shape maps multiple output shape.
- If output_size is None, :math:`L_{out} = L^\prime_{out}`;
- else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}`
- and :math:`L^\prime_{out} + stride`.
- Args:
- x(Tensor): 3-D tensor with [N, C, L] or [N, L, C] format,
- its data type is float32 or float64.
- weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, K],
- where M is the number of output channels(filters), g is the number of groups,
- K is the size of the kernel.
- bias(Tensor, optional): The bias, a Tensor with shape [M, ].
- stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution.
- If stride is a list/tuple, it must contain one integer, `(stride_size)`.
- Default: stride = 1.
- padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
- `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
- string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
- If `padding` is a tuple or list, it could be in two forms:
- `[pad]` or `[pad_left, pad_right]`. Default: padding = 0.
- output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension.
- If it is a list/tuple, it must contain one integer. Default: 0.
- groups(int, optional): The groups number of the conv1d transpose function. Inspired by
- grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
- when group=2, the first half of the filters is only connected to the
- first half of the input channels, while the second half of the
- filters is only connected to the second half of the input channels.
- Default: groups = 1.
- dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points.
- If dilation is a list/tuple, it must contain one integer, `(dilation_size)`.
- Default: dilation = 1.
- output_size(int|tuple|list, optional): The output image size. If output size is a
- tuple/list, it must contain one integer, `(feature_length)`. None if use
- filter_size(shape of weight), padding, and stride to calculate output_size.
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`.
- The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
- `[batch_size, input_channels, input_length]`.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
- Returns:
- A tensor representing the result of 1-D transpose convolution, whose
- data type is the same with input. And its shape is (num_batches, channels, length)
- when data_format is `"NCL"` and (num_batches, length, channels) when data_format is
- `"NLC"`.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> # shape: (1, 2, 4)
- >>> x = paddle.to_tensor([[[4, 0, 9, 7],
- >>> [8, 0, 9, 2,]]], dtype="float32")
- >>> # shape: (2, 1, 2)
- >>> w = paddle.to_tensor([[[7, 0]],
- >>> [[4, 2]]], dtype="float32")
- >>> y = F.conv1d_transpose(x, w)
- >>> print(y)
- Tensor(shape=[1, 1, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[[60., 16., 99., 75., 4. ]]])
- """
- cudnn_version = get_cudnn_version()
- if cudnn_version is not None:
- use_cudnn = True
- else:
- use_cudnn = False
- if data_format not in ['NCL', 'NLC']:
- raise ValueError(
- "Attr(data_format) of conv2d_transpose got wrong value: "
- f"received {data_format}, but only 'NCL' or 'NLC' are supported."
- )
- channel_last = data_format == "NLC"
- channel_dim = -1 if channel_last else 1
- if len(x.shape) != 3:
- raise ValueError(
- f"Input x should be 3D tensor, but received x with the shape of {x.shape}"
- )
- num_channels = x.shape[channel_dim]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) "
- f"should be defined. Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv1d_transpose should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "the channel of input must be divisible by groups,"
- f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
- f", the groups is {groups}"
- )
- # update attrs
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)
- if len(padding) == 2:
- padding = padding + [0] * 2
- elif len(padding) == 1:
- padding = padding + [0]
- else:
- raise ValueError(
- f"The size of padding's dimension should 1 or 2. But got padding={padding}"
- )
- stride = convert_to_list(stride, 1, 'stride') + [1]
- dilation = convert_to_list(dilation, 1, 'dilation') + [1]
- if output_size is None:
- output_size = []
- else:
- if output_padding != 0:
- raise ValueError(
- 'output_padding option is mutually exclusive with '
- 'output_size'
- )
- if isinstance(output_size, (list, tuple, int)):
- output_size = convert_to_list(output_size, 1, 'output_size') + [1]
- else:
- raise ValueError(
- "output_size should be int, or list, tuple of ints"
- )
- if output_padding == 0:
- output_padding = []
- else:
- output_padding = convert_to_list(
- output_padding, 1, 'output_padding'
- ) + [0]
- if len(output_padding) > 0 and output_padding[0] > stride[0]:
- raise ValueError(
- "The size of output_padding should not be greater than stride."
- f"But got output_padding={output_padding[0]} and stride={stride[0]}"
- )
- if len(weight.shape) != 3:
- raise ValueError(
- f'Input weight should be 3D tensor, but received weight with the shape of {weight.shape}'
- )
- op_type = 'conv2d_transpose'
- num_filters = weight.shape[1]
- if (
- num_channels == groups
- and num_channels != 1
- and num_filters == 1
- and not use_cudnn
- ):
- op_type = 'depthwise_conv2d_transpose'
- use_cudnn = False
- squeeze_axis = -2 if channel_last else -1
- conv2d_data_format = "NHWC" if channel_last else "NCHW"
- x = unsqueeze(x, axis=[squeeze_axis])
- weight = unsqueeze(weight, axis=[-1])
- if in_dynamic_mode():
- out = getattr(_C_ops, op_type)(
- x,
- weight,
- stride,
- padding,
- output_padding,
- output_size,
- padding_algorithm,
- groups,
- dilation,
- conv2d_data_format,
- )
- if bias is not None:
- out = _add_with_axis(out, bias, axis=channel_dim)
- else:
- inputs = {'Input': [x], 'Filter': [weight]}
- attrs = {
- 'output_padding': output_padding,
- 'output_size': output_size,
- 'strides': stride,
- 'paddings': padding,
- 'padding_algorithm': padding_algorithm,
- 'dilations': dilation,
- 'groups': groups,
- 'use_cudnn': use_cudnn,
- 'data_format': conv2d_data_format,
- }
- check_variable_and_dtype(
- x, 'input', ['float16', 'float32', 'float64'], 'conv2d_transpose'
- )
- helper = LayerHelper(op_type, **locals())
- dtype = helper.input_dtype(input_param_name='x')
- out = helper.create_variable_for_type_inference(dtype)
- outputs = {"Output": [out]}
- helper.append_op(
- type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
- )
- if bias is not None:
- out = _add_with_axis(out, bias, axis=channel_dim)
- out = squeeze(out, axis=[squeeze_axis])
- return out
- def conv2d_transpose(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- output_padding=0,
- dilation=1,
- groups=1,
- output_size=None,
- data_format='NCHW',
- name=None,
- ):
- r"""
- The convolution2D transpose layer calculates the output based on the input,
- filter, and dilations, strides, paddings. Input(Input) and output(Output)
- are in NCHW or NHWC format. Where N is batch size, C is the number of channels,
- H is the height of the feature, and W is the width of the feature.
- Parameters(dilations, strides, paddings) are two elements. These two elements
- represent height and width, respectively. The details of convolution transpose
- layer, please refer to the following explanation and references
- `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
- If bias attribution and activation type are provided, bias is added to
- the output of the convolution, and the corresponding activation function
- is applied to the final result.
- See more detail in :ref:`api_paddle_nn_Conv2DTranspose` .
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- Where:
- * :math:`X`: Input value, a 4-D Tensor with NCHW or NHWC format.
- * :math:`W`: Filter value, a 4-D Tensor with MCHW format.
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, a 4-D Tensor with data format 'NCHW' or 'NHWC', the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
- Where
- .. math::
- H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\
- W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\
- H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ] \\
- W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ]
- Note:
- The conv2d_transpose can be seen as the backward of the conv2d. For conv2d,
- when stride > 1, conv2d maps multiple input shape to the same output shape,
- so for conv2d_transpose, when stride > 1, input shape maps multiple output shape.
- If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`;
- else, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}`
- and :math:`H^\prime_{out} + strides[0]`, and the :math:`W_{out}` of the output size must
- between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[1]`.
- Args:
- x(Tensor): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format,
- whose data type is float32 or float64.
- weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, kH, kW],
- where M is the number of output channels(filters), g is the number of groups,
- kH is the height of the kernel, and kW is the width of the kernel.
- bias(Tensor, optional): The bias, a Tensor with shape [M, ].
- stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
- If stride is a list/tuple, it must contain two integers, (stride_height, stride_width).
- Otherwise, stride_height = stride_width = stride. Default: stride = 1.
- padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
- on both sides for each dimension. If `padding` is a string, either 'VALID' or
- 'SAME' which is the padding algorithm. If padding size is a tuple or list,
- it could be in three forms: `[pad_height, pad_width]` or
- `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
- and when `data_format` is `"NCHW"`, `padding` can be in the form
- `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
- when `data_format` is `"NHWC"`, `padding` can be in the form
- `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
- Default: padding = 0.
- output_padding(int|list|tuple, optional): Additional size added to one side
- of each dimension in the output shape. Default: 0.
- groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by
- grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
- when group=2, the first half of the filters is only connected to the
- first half of the input channels, while the second half of the
- filters is only connected to the second half of the input channels.
- Default: groups = 1.
- dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
- If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width).
- Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
- output_size(int|tuple|list, optional): The output image size. If output size is a
- tuple/list, it must contain two integers, (image_height, image_width). None if use
- filter_size(shape of weight), padding, and stride to calculate output_size.
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
- The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
- `[batch_size, input_channels, input_height, input_width]`.
- name(str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
- Returns:
- A Tensor representing the conv2d_transpose, whose
- data type is the same with input and shape is (num_batches, channels, out_h,
- out_w) or (num_batches, out_h, out_w, channels). The tensor variable storing
- transposed convolution result.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> x_var = paddle.randn((2, 3, 8, 8), dtype='float32')
- >>> w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
- >>> y_var = F.conv2d_transpose(x_var, w_var)
- >>> print(y_var.shape)
- [2, 6, 10, 10]
- """
- if data_format not in ['NCHW', 'NHWC']:
- raise ValueError(
- "Attr(data_format) of conv2d_transpose got wrong value: "
- f"received {data_format}, but only 'NCHW' or 'NHWC' are supported."
- )
- channel_last = data_format == "NHWC"
- channel_dim = -1 if channel_last else 1
- if len(x.shape) != 4:
- raise ValueError(
- f"Input x should be 4D tensor, but received x with the shape of {x.shape}"
- )
- if len(weight.shape) != 4:
- raise ValueError(
- f"Input weight should be 4D tensor, but received weight with the shape of {weight.shape}"
- )
- num_channels = x.shape[channel_dim]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) "
- f"should be defined. Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv2d_transpose should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "the channel of input must be divisible by groups,"
- f"received: the channel of input is {num_channels}, the shape of input is {x.shape}"
- f", the groups is {groups}"
- )
- cudnn_version = get_cudnn_version()
- use_cudnn = (
- True
- if (is_compiled_with_cuda() and cudnn_version is not None)
- else False
- )
- # update attrs
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
- stride = convert_to_list(stride, 2, 'stride')
- dilation = convert_to_list(dilation, 2, 'dilation')
- if output_size is None:
- output_size = []
- else:
- if output_padding != 0:
- raise ValueError(
- 'output_padding option is mutually exclusive with '
- 'output_size'
- )
- if isinstance(output_size, (list, tuple)):
- if _contain_var(output_size):
- output_size = _convert_to_tensor_list(output_size)
- else:
- output_size = convert_to_list(output_size, 2, 'output_size')
- elif isinstance(output_size, int):
- output_size = convert_to_list(output_size, 2, 'output_size')
- elif isinstance(output_size, (Variable, pir.Value)):
- check_dtype(
- output_size.dtype,
- 'output_size',
- ['int32', 'int64'],
- 'conv2d_transpose',
- )
- if len(output_size.shape) == 1 and (
- output_size.shape[0] == 1 or output_size.shape[0] == 2
- ):
- if output_size.shape[0] == 1:
- output_size = [output_size, output_size]
- else:
- raise ValueError(
- "output_size must contain one or two integers."
- )
- else:
- raise ValueError(
- "output_size should be int or Tensor or list, tuple of ints or Tensor"
- )
- if output_padding == 0:
- output_padding = []
- else:
- output_padding = convert_to_list(output_padding, 2, 'output_padding')
- op_type = 'conv2d_transpose'
- num_filters = weight.shape[1]
- if num_channels == groups and num_channels != 1 and num_filters == 1:
- op_type = 'depthwise_conv2d_transpose'
- use_cudnn = False
- if in_dynamic_or_pir_mode():
- op = (
- _C_ops.conv2d_transpose
- if op_type == 'conv2d_transpose'
- else _C_ops.depthwise_conv2d_transpose
- )
- pre_bias = op(
- x,
- weight,
- stride,
- padding,
- output_padding,
- output_size,
- padding_algorithm,
- groups,
- dilation,
- data_format,
- )
- if bias is not None:
- return _add_with_axis(pre_bias, bias, axis=channel_dim)
- else:
- return pre_bias
- else:
- inputs = {'Input': [x], 'Filter': [weight]}
- attrs = {
- 'output_padding': output_padding,
- 'output_size': output_size,
- 'strides': stride,
- 'paddings': padding,
- 'padding_algorithm': padding_algorithm,
- 'dilations': dilation,
- 'groups': groups,
- 'use_cudnn': use_cudnn,
- 'data_format': data_format,
- }
- check_variable_and_dtype(
- x,
- 'x',
- ['float16', 'uint16', 'float32', 'float64'],
- 'conv2d_transpose',
- )
- helper = LayerHelper(op_type, **locals())
- pre_bias = helper.create_variable_for_type_inference(x.dtype)
- outputs = {"Output": [pre_bias]}
- helper.append_op(
- type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
- )
- if bias is not None:
- out = helper.create_variable_for_type_inference(x.dtype)
- x_shape = list(pre_bias.shape)
- y_shape = list(bias.shape)
- if channel_dim == -1 or len(x_shape) == len(y_shape):
- helper.append_op(
- type='elementwise_add',
- inputs={'X': [pre_bias], 'Y': [bias]},
- outputs={'Out': [out]},
- attrs={'axis': -1},
- )
- else:
- assert len(x_shape) > len(
- y_shape
- ), 'The length of pre_bias must greater than the length of bias'
- padding = len(x_shape) - len(y_shape) - channel_dim
- bias = reshape(
- bias, [1] * channel_dim + y_shape + [1] * padding
- )
- helper.append_op(
- type='elementwise_add',
- inputs={'X': [pre_bias], 'Y': [bias]},
- outputs={'Out': [out]},
- attrs={'axis': -1},
- )
- else:
- out = pre_bias
- return out
- def conv3d(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- dilation=1,
- groups=1,
- data_format="NCDHW",
- name=None,
- ):
- r"""
- The convolution3D layer calculates the output based on the input, filter
- and strides, paddings, dilations, groups parameters. Input(Input) and
- Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of
- channels, D is the depth of the feature, H is the height of the feature,
- and W is the width of the feature. Convolution3D is similar with Convolution2D
- but adds one dimension(depth). If bias attribution and activation type are
- provided, bias is added to the output of the convolution, and the
- corresponding activation function is applied to the final result.
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- In the above equation:
- * :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
- * :math:`W`: Filter value, a tensor with MCDHW format.
- * :math:`\\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
- * :math:`\\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
- Where
- .. math::
- D_{out}&= \frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\
- H_{out}&= \frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\
- W_{out}&= \frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
- Args:
- x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W], the data
- type of input is float16 or float32 or float64.
- weight (Tensor): The convolution kernel, a Tensor with shape [M, C/g, kD, kH, kW],
- where M is the number of filters(output channels), g is the number of groups,
- kD, kH, kW are the filter's depth, height and width respectively.
- bias (Tensor, optional): The bias, a Tensor of shape [M, ].
- stride (int|list|tuple, optional): The stride size. It means the stride in convolution. If stride is a
- list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width).
- Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1.
- padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
- on both sides for each dimension. If `padding` is a string, either 'VALID' or
- 'SAME' which is the padding algorithm. If padding size is a tuple or list,
- it could be in three forms: `[pad_depth, pad_height, pad_width]` or
- `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
- and when `data_format` is `"NCDHW"`, `padding` can be in the form
- `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
- when `data_format` is `"NDHWC"`, `padding` can be in the form
- `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
- Default: padding = 0.
- dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
- If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height,
- dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
- Default: dilation = 1.
- groups (int, optional): The groups number of the Conv3D Layer. According to grouped
- convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
- the first half of the filters is only connected to the first half
- of the input channels, while the second half of the filters is only
- connected to the second half of the input channels. Default: groups=1
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCDHW"`, `"NDHWC"`.
- The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
- `[batch_size, input_channels, input_depth, input_height, input_width]`.
- name(str|None, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set and
- None by default.
- Returns:
- A Tensor representing the conv3d, whose data type is
- the same with input. If act is None, the tensor storing the
- convolution result, and if act is not None, the tensor storing
- convolution and non-linearity activation result.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
- >>> w_var = paddle.randn((6, 3, 3, 3, 3), dtype='float32')
- >>> y_var = F.conv3d(x_var, w_var)
- >>> print(y_var.shape)
- [2, 6, 6, 6, 6]
- """
- # entry check
- if data_format not in ["NCDHW", "NDHWC"]:
- raise ValueError(
- "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
- f"Attr(data_format): {data_format}."
- )
- channel_last = data_format == "NDHWC"
- channel_dim = -1 if channel_last else 1
- if len(x.shape) != 5:
- raise ValueError(
- f"Input x should be 5D tensor, but received x with the shape of {x.shape}"
- )
- num_channels = x.shape[channel_dim]
- num_filters = weight.shape[0]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) should be defined. "
- f"Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv3d should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "The number of input channels must be divisible by Attr(groups). "
- f"Received: number of channels({num_channels}), groups({groups})."
- )
- if num_filters % groups != 0:
- raise ValueError(
- "The number of filters must be divisible by Attr(groups). "
- f"Received: number of filters({num_filters}), groups({groups})."
- )
- cudnn_version = get_cudnn_version()
- use_cudnn = (
- True
- if (is_compiled_with_cuda() and cudnn_version is not None)
- else False
- )
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
- stride = convert_to_list(stride, 3, 'stride')
- dilation = convert_to_list(dilation, 3, 'dilation')
- op_type = "conv3d"
- return _conv_nd(
- x,
- weight,
- bias,
- stride,
- padding,
- padding_algorithm,
- dilation,
- groups,
- data_format,
- channel_dim,
- op_type,
- use_cudnn,
- name,
- )
- def conv3d_transpose(
- x,
- weight,
- bias=None,
- stride=1,
- padding=0,
- output_padding=0,
- groups=1,
- dilation=1,
- output_size=None,
- data_format='NCDHW',
- name=None,
- ):
- r"""
- The convolution3d transpose layer calculates the output based on the input,
- filter, and dilations, strides, paddings. Input(Input) and output(Output)
- are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels,
- D is the depth of the feature, H is the height of the feature, and W
- is the width of the feature. Parameters(dilations, strides, paddings) are
- two elements. These two elements represent height and width, respectively.
- The details of convolution transpose layer, please refer to the following
- explanation and references `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
- If bias attribution and activation type are provided, bias is added to
- the output of the convolution, and the corresponding activation function
- is applied to the final result.
- See more detail in :ref:`api_paddle_nn_Conv3DTranspose` .
- For each input :math:`X`, the equation is:
- .. math::
- Out = \sigma (W \ast X + b)
- In the above equation:
- * :math:`X`: Input value, a Tensor with NCDHW or NDHWC format.
- * :math:`W`: Filter value, a Tensor with NCDHW format.
- * :math:`\ast`: Convolution operation.
- * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
- * :math:`\sigma`: Activation function.
- * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
- Example:
- - Input:
- Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
- - Output:
- Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
- Where
- .. math::
- D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\
- H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\
- W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\
- D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\
- H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\
- W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[2] ]
- Note:
- The conv3d_transpose can be seen as the backward of the conv3d. For conv3d,
- when stride > 1, conv3d maps multiple input shape to the same output shape,
- so for conv3d_transpose, when stride > 1, input shape maps multiple output shape.
- If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`;
- else, the :math:`D_{out}` of the output size must between :math:`D^\prime_{out}` and
- :math:`D^\prime_{out} + strides[0]`, the :math:`H_{out}` of the output size must
- between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[1]`, and the
- :math:`W_{out}` of the output size must between :math:`W^\prime_{out}` and
- :math:`W^\prime_{out} + strides[2]`.
- Args:
- x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type
- of input is float32 or float64.
- weight (Tensor): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW],
- where M is the number of filters (output channels), g is the number of groups,
- kD, kH, kW are the filter's depth, height and width respectively.
- bias (Tensor, optional): The bias, a Tensor of shape [M, ]. Default: None.
- stride (int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
- If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height,
- stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
- Default: 1.
- padding (str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
- on both sides for each dimension. If `padding` is a string, either 'VALID' or
- 'SAME' which is the padding algorithm. If padding size is a tuple or list,
- it could be in three forms: `[pad_depth, pad_height, pad_width]` or
- `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
- and when `data_format` is `"NCDHW"`, `padding` can be in the form
- `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
- when `data_format` is `"NDHWC"`, `padding` can be in the form
- `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
- Default: 0.
- output_padding (int|list|tuple, optional): Additional size added to one side
- of each dimension in the output shape. Default: 0.
- groups (int, optional): The groups number of the Conv3D transpose layer. Inspired by
- grouped convolution in `Alex Krizhevsky's Deep CNN paper <https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf>`_, in which
- when groups = 2, the first half of the filters is only connected to the
- first half of the input channels, while the second half of the
- filters is only connected to the second half of the input channels.
- Default: 1.
- dilation (int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
- If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height,
- dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
- Default: 1.
- output_size (int|list|tuple, optional): The output image size. If output size is a
- list/tuple, it must contain three integers, (image_depth, image_height, image_width).
- None if use filter_size(shape of weight), padding, and stride to calculate output_size.
- data_format (str, optional): Specify the data format of the input, and the data format of the output
- will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
- When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`.
- Default: `"NCHW"`.
- name (str, optional): For detailed information, please refer
- to :ref:`api_guide_Name`. Usually name is no need to set.
- Default: None.
- Returns:
- A Tensor representing the conv3d_transpose, whose data
- type is the same with input and shape is (num_batches, channels, out_d, out_h,
- out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor
- variable storing the transposed convolution result, and if act is not None, the tensor
- variable storing transposed convolution and non-linearity activation result.
- Examples:
- .. code-block:: python
- >>> import paddle
- >>> import paddle.nn.functional as F
- >>> x_var = paddle.randn((2, 3, 8, 8, 8), dtype='float32')
- >>> w_var = paddle.randn((3, 6, 3, 3, 3), dtype='float32')
- >>> y_var = F.conv3d_transpose(x_var, w_var)
- >>> print(y_var.shape)
- [2, 6, 10, 10, 10]
- """
- # entry checks
- if data_format not in ["NCDHW", "NDHWC"]:
- raise ValueError(
- "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
- f"Attr(data_format): {data_format}."
- )
- channel_last = data_format == "NDHWC"
- channel_dim = -1 if channel_last else 1
- if len(x.shape) != 5:
- raise ValueError(
- f"Input x should be 5D tensor, but received x with the shape of {x.shape}"
- )
- if len(weight.shape) != 5:
- raise ValueError(
- f"Input weight should be 5D tensor, but received weight with the shape of {weight.shape}"
- )
- num_channels = x.shape[channel_dim]
- num_filters = weight.shape[1]
- if num_channels < 0:
- raise ValueError(
- f"The channel dimension of the input({x.shape}) should be defined. "
- f"Received: {num_channels}."
- )
- if groups <= 0:
- raise ValueError(
- f"The groups of conv3d_transpose should be greater than 0. Received groups: {groups}"
- )
- if num_channels % groups != 0:
- raise ValueError(
- "The number of input channels must be divisible by Attr(groups). "
- f"Received: number of channels({num_channels}), groups({groups})."
- )
- padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
- stride = convert_to_list(stride, 3, 'stride')
- dilation = convert_to_list(dilation, 3, 'dilation')
- if output_size is None:
- output_size = []
- else:
- if output_padding != 0:
- raise ValueError(
- 'output_padding option is mutually exclusive with '
- 'output_size'
- )
- if isinstance(output_size, (list, tuple, int)):
- output_size = convert_to_list(output_size, 3, 'output_size')
- else:
- raise ValueError(
- "output_size should be int, or list, tuple of ints"
- )
- if output_padding == 0:
- output_padding = []
- else:
- output_padding = convert_to_list(output_padding, 3, 'output_padding')
- cudnn_version = get_cudnn_version()
- # TODO(LielinJiang): whether to use cudnn according to the version of cudnn
- use_cudnn = (
- True
- if (is_compiled_with_cuda() and cudnn_version is not None)
- else False
- )
- op_type = 'conv3d_transpose'
- data_format_ = "NHWC" if channel_last else "NCHW"
- if in_dynamic_or_pir_mode():
- pre_bias = _C_ops.conv3d_transpose(
- x,
- weight,
- stride,
- padding,
- output_padding,
- output_size,
- padding_algorithm,
- groups,
- dilation,
- data_format_,
- )
- if bias is not None:
- return _add_with_axis(pre_bias, bias, axis=channel_dim)
- else:
- return pre_bias
- else:
- inputs = {'Input': [x], 'Filter': [weight]}
- attrs = {
- 'output_padding': output_padding,
- 'output_size': output_size,
- 'paddings': padding,
- "padding_algorithm": padding_algorithm,
- 'strides': stride,
- 'dilations': dilation,
- 'groups': groups,
- 'use_cudnn': use_cudnn,
- "data_format": data_format_,
- }
- helper = LayerHelper(op_type, **locals())
- check_variable_and_dtype(
- x, 'x', ['float16', 'float32', 'float64'], 'conv3d'
- )
- pre_bias = helper.create_variable_for_type_inference(x.dtype)
- outputs = {"Output": [pre_bias]}
- helper.append_op(
- type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
- )
- if bias is not None:
- out = _add_with_axis(pre_bias, bias, axis=channel_dim)
- else:
- out = pre_bias
- return out
|