pooling.py 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from .. import functional as F
  15. from .layers import Layer
  16. __all__ = []
  17. class AvgPool1D(Layer):
  18. r"""
  19. This operation applies a 1D average pooling over an input signal composed
  20. of several input planes, based on the input, output_size, return_mask parameters.
  21. Input(X) and output(Out) are in NCL format, where N is batch
  22. size, C is the number of channels, L is the length of the feature.
  23. The output tensor shape will be [N, C, output_size].
  24. The output value of the layer with input size (N, C, L),
  25. output (N, C, :math:`L_{out}`) and kernel_size ksize can be precisely described as
  26. For average pool1d:
  27. .. math::
  28. Output(N_i, C_i, l) = \frac{Input[N_i, C_i, stride \times l:stride \times l+k]}{ksize}
  29. Parameters:
  30. kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  31. it must contain an integer.
  32. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  33. it must contain an integer. Default None, then stride will be equal to the kernel_size.
  34. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  35. 1. A string in ['valid', 'same'].
  36. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
  37. 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
  38. 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
  39. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
  40. The default value is 0.
  41. exclusive(bool, optional): Whether to exclude padding points in average pooling mode, default is `True`.
  42. ceil_mode(bool, optional): ${ceil_mode_comment}Whether to use the ceil function to calculate output height
  43. and width. If it is set to False, the floor function will be used. The default value is False.
  44. name(str, optional): For eed to detailed information, please refer to :ref:`api_guide_Name`.
  45. Usually name is no nset and None by default.
  46. Shape:
  47. - x(Tensor): The input tensor of avg pool1d operator, which is a 3-D tensor.
  48. The data type can be float32, float64.
  49. - output(Tensor): The output tensor of avg pool1d operator, which is a 3-D tensor.
  50. The data type is same as input x.
  51. Returns:
  52. A callable object of AvgPool1D.
  53. Examples:
  54. .. code-block:: python
  55. >>> import paddle
  56. >>> import paddle.nn as nn
  57. >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
  58. >>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
  59. >>> pool_out = AvgPool1D(data)
  60. >>> print(pool_out.shape)
  61. [1, 3, 16]
  62. """
  63. def __init__(
  64. self,
  65. kernel_size,
  66. stride=None,
  67. padding=0,
  68. exclusive=True,
  69. ceil_mode=False,
  70. name=None,
  71. ):
  72. super().__init__()
  73. self.kernel_size = kernel_size
  74. self.stride = stride
  75. self.padding = padding
  76. self.ceil_mode = ceil_mode
  77. self.exclusive = exclusive
  78. self.name = name
  79. def forward(self, x):
  80. out = F.avg_pool1d(
  81. x,
  82. self.kernel_size,
  83. self.stride,
  84. self.padding,
  85. self.exclusive,
  86. self.ceil_mode,
  87. self.name,
  88. )
  89. return out
  90. def extra_repr(self):
  91. return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
  92. **self.__dict__
  93. )
  94. class AvgPool2D(Layer):
  95. r"""
  96. This operation applies 2D average pooling over input features based on the input,
  97. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
  98. in NCHW format, where N is batch size, C is the number of channels,
  99. H is the height of the feature, and W is the width of the feature.
  100. Example:
  101. Input:
  102. X shape: :math:`(N, C, :math:`H_{in}`, :math:`W_{in}`)`
  103. Attr:
  104. kernel_size: ksize
  105. Output:
  106. Out shape: :math:`(N, C, :math:`H_{out}`, :math:`W_{out}`)`
  107. .. math::
  108. Output(N_i, C_j, h, w) = \frac{\sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
  109. Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)}{ksize[0] * ksize[1]}
  110. Parameters:
  111. kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  112. it must contain two integers, (pool_size_Height, pool_size_Width).
  113. Otherwise, the pool kernel size will be a square of an int.
  114. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  115. it must contain two integers, (pool_stride_Height, pool_stride_Width).
  116. Otherwise, the pool stride size will be a square of an int.
  117. Default None, then stride will be equal to the kernel_size.
  118. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  119. 1. A string in ['valid', 'same'].
  120. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
  121. 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
  122. 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
  123. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
  124. The default value is 0.
  125. ceil_mode(bool, optional): When True, will use `ceil` instead of `floor` to compute the output shape.
  126. exclusive(bool, optional): Whether to exclude padding points in average pooling
  127. mode, default is `true`.
  128. divisor_override(float, optional): If specified, it will be used as divisor, otherwise kernel_size will be
  129. used. Default None.
  130. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`,
  131. `"NDHW"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
  132. `[batch_size, input_channels, input_height, input_width]`.
  133. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  134. Usually name is no need to set and None by default.
  135. Shape:
  136. - x(Tensor): The input tensor of avg pool2d operator, which is a 4-D tensor.
  137. The data type can be float32, float64.
  138. - output(Tensor): The output tensor of avg pool2d operator, which is a 4-D tensor.
  139. The data type is same as input x.
  140. Returns:
  141. A callable object of AvgPool2D.
  142. Examples:
  143. .. code-block:: python
  144. >>> import paddle
  145. >>> import paddle.nn as nn
  146. >>> # max pool2d
  147. >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
  148. >>> AvgPool2D = nn.AvgPool2D(kernel_size=2, stride=2, padding=0)
  149. >>> output = AvgPool2D(input)
  150. >>> print(output.shape)
  151. [1, 3, 16, 16]
  152. """
  153. def __init__(
  154. self,
  155. kernel_size,
  156. stride=None,
  157. padding=0,
  158. ceil_mode=False,
  159. exclusive=True,
  160. divisor_override=None,
  161. data_format="NCHW",
  162. name=None,
  163. ):
  164. super().__init__()
  165. self.ksize = kernel_size
  166. self.stride = stride
  167. self.padding = padding
  168. self.ceil_mode = ceil_mode
  169. self.exclusive = exclusive
  170. self.divisor = divisor_override
  171. self.data_format = data_format
  172. self.name = name
  173. def forward(self, x):
  174. return F.avg_pool2d(
  175. x,
  176. kernel_size=self.ksize,
  177. stride=self.stride,
  178. padding=self.padding,
  179. ceil_mode=self.ceil_mode,
  180. exclusive=self.exclusive,
  181. divisor_override=self.divisor,
  182. data_format=self.data_format,
  183. name=self.name,
  184. )
  185. def extra_repr(self):
  186. return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
  187. **self.__dict__
  188. )
  189. class AvgPool3D(Layer):
  190. """
  191. This operation applies 3D max pooling over input features based on the input,
  192. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
  193. in NCDHW format, where N is batch size, C is the number of channels,
  194. H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
  195. Parameters:
  196. kernel_size(int|list|tuple): The pool kernel size. If pool kernel size
  197. is a tuple or list, it must contain three integers,
  198. (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
  199. Otherwise, the pool kernel size will be the cube of an int.
  200. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  201. it must contain three integers, [stride_Depth, stride_Height, stride_Width).
  202. Otherwise, the pool stride size will be a cube of an int.
  203. Default None, then stride will be equal to the kernel_size.
  204. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  205. 1. A string in ['valid', 'same'].
  206. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
  207. 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
  208. 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
  209. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
  210. The default value is 0.
  211. ceil_mode(bool, optional): ${ceil_mode_comment}
  212. exclusive(bool, optional): Whether to exclude padding points in average pooling mode, default is True.
  213. divisor_override(int|float, optional): if specified, it will be used as divisor, otherwise kernel_size will
  214. be used. Default None.
  215. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCDHW"`,
  216. `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
  217. `[batch_size, input_channels, input_depth, input_height, input_width]`.
  218. name(str, optional): For detailed information, please refer
  219. to :ref:`api_guide_Name`. Usually name is no need to set and
  220. None by default.
  221. Returns:
  222. A callable object of AvgPool3D.
  223. Shape:
  224. - x(Tensor): The input tensor of avg pool3d operator, which is a 5-D tensor.
  225. The data type can be float16, float32, float64.
  226. - output(Tensor): The output tensor of avg pool3d operator, which is a 5-D tensor.
  227. The data type is same as input x.
  228. Examples:
  229. .. code-block:: python
  230. >>> import paddle
  231. >>> import paddle.nn as nn
  232. >>> # avg pool3d
  233. >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
  234. >>> AvgPool3D = nn.AvgPool3D(kernel_size=2, stride=2, padding=0)
  235. >>> output = AvgPool3D(input)
  236. >>> print(output.shape)
  237. [1, 2, 1, 16, 16]
  238. """
  239. def __init__(
  240. self,
  241. kernel_size,
  242. stride=None,
  243. padding=0,
  244. ceil_mode=False,
  245. exclusive=True,
  246. divisor_override=None,
  247. data_format="NCDHW",
  248. name=None,
  249. ):
  250. super().__init__()
  251. self.ksize = kernel_size
  252. self.stride = stride
  253. self.padding = padding
  254. self.ceil_mode = ceil_mode
  255. self.exclusive = exclusive
  256. self.divisor = divisor_override
  257. self.data_format = data_format
  258. self.name = name
  259. def forward(self, x):
  260. return F.avg_pool3d(
  261. x,
  262. kernel_size=self.ksize,
  263. stride=self.stride,
  264. padding=self.padding,
  265. ceil_mode=self.ceil_mode,
  266. exclusive=self.exclusive,
  267. divisor_override=self.divisor,
  268. data_format=self.data_format,
  269. name=self.name,
  270. )
  271. def extra_repr(self):
  272. return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
  273. **self.__dict__
  274. )
  275. class MaxPool1D(Layer):
  276. """
  277. This operation applies 1D max pooling over input signal
  278. composed of several input planes based on the input,
  279. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
  280. in NCL format, where N is batch size, C is the number of channels,
  281. L is the length of the feature.
  282. The output value of the layer with input size (N, C, L),
  283. output (N, C, L_{out}) and kernel_size k can be precisely described as
  284. For average pool1d:
  285. .. math::
  286. Output(N_i, C_i, l) = max(Input[N_i, C_i, stride \times l:stride \times l+k])
  287. Parameters:
  288. kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  289. it must contain an integer.
  290. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  291. it must contain an integer. Default None, then stride will be equal to the kernel_size.
  292. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  293. 1. A string in ['valid', 'same'].
  294. 2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
  295. 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
  296. 4. A list[int] or tuple(int) whose length is 2, It has the form [pad_before, pad_after].
  297. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or(0,0).
  298. The default value is 0.
  299. return_mask(bool, optional): Whether return the max indices along with the outputs. default is `False`.
  300. ceil_mode(bool, optional): Whether to use the ceil function to calculate output height and width.
  301. False is the default. If it is set to False, the floor function will be used. Default False.
  302. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  303. Usually name is no need to set and None by default.
  304. Returns:
  305. A callable object of MaxPool1D.
  306. Shape:
  307. - x(Tensor): The input tensor of max pool1d operator, which is a 3-D tensor.
  308. The data type can be float32, float64.
  309. - output(Tensor): The output tensor of max pool1d operator, which is a 3-D tensor.
  310. The data type is same as input x.
  311. Examples:
  312. .. code-block:: python
  313. >>> import paddle
  314. >>> import paddle.nn as nn
  315. >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
  316. >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
  317. >>> pool_out = MaxPool1D(data)
  318. >>> print(pool_out.shape)
  319. [1, 3, 16]
  320. >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
  321. >>> pool_out, indices = MaxPool1D(data)
  322. >>> print(pool_out.shape)
  323. [1, 3, 16]
  324. >>> print(indices.shape)
  325. [1, 3, 16]
  326. """
  327. def __init__(
  328. self,
  329. kernel_size,
  330. stride=None,
  331. padding=0,
  332. return_mask=False,
  333. ceil_mode=False,
  334. name=None,
  335. ):
  336. super().__init__()
  337. self.kernel_size = kernel_size
  338. self.stride = stride
  339. self.padding = padding
  340. self.ceil_mode = ceil_mode
  341. self.return_mask = return_mask
  342. self.name = name
  343. def forward(self, input):
  344. out = F.max_pool1d(
  345. input,
  346. self.kernel_size,
  347. self.stride,
  348. self.padding,
  349. self.return_mask,
  350. self.ceil_mode,
  351. self.name,
  352. )
  353. return out
  354. def extra_repr(self):
  355. return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
  356. **self.__dict__
  357. )
  358. class MaxPool2D(Layer):
  359. r"""
  360. This operation applies 2D max pooling over input feature based on the input,
  361. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
  362. in NCHW format, where N is batch size, C is the number of channels,
  363. H is the height of the feature, and W is the width of the feature.
  364. Example:
  365. - Input:
  366. X shape: :math:`(N, C, H_{in}, W_{in})`
  367. - Attr:
  368. kernel_size: ksize
  369. - Output:
  370. Out shape: :math:`(N, C, H_{out}, W_{out})`
  371. .. math::
  372. Output(N_i, C_j, h, w) = \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1}
  373. Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
  374. Parameters:
  375. kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  376. it must contain two integers, (pool_size_Height, pool_size_Width).
  377. Otherwise, the pool kernel size will be a square of an int.
  378. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  379. it must contain two integers, (pool_stride_Height, pool_stride_Width).
  380. Otherwise, the pool stride size will be a square of an int.
  381. Default None, then stride will be equal to the kernel_size.
  382. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  383. 1. A string in ['valid', 'same'].
  384. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
  385. 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
  386. 4. A list[int] or tuple(int) whose length is \4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
  387. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
  388. The default value is 0.
  389. ceil_mode(bool, optional): when True, will use `ceil` instead of `floor` to compute the output shape
  390. return_mask(bool, optional): Whether to return the max indices along with the outputs.
  391. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
  392. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
  393. `[batch_size, input_channels, input_height, input_width]`.
  394. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  395. Usually name is no need to set and None by default.
  396. Returns:
  397. A callable object of MaxPool2D.
  398. Shape:
  399. - x(Tensor): The input tensor of max pool2d operator, which is a 4-D tensor.
  400. The data type can be float32, float64.
  401. - output(Tensor): The output tensor of max pool2d operator, which is a 4-D tensor.
  402. The data type is same as input x.
  403. Examples:
  404. .. code-block:: python
  405. >>> import paddle
  406. >>> import paddle.nn as nn
  407. >>> # max pool2d
  408. >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
  409. >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
  410. >>> output = MaxPool2D(input)
  411. >>> print(output.shape)
  412. [1, 3, 16, 16]
  413. >>> # for return_mask=True
  414. >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
  415. >>> output, max_indices = MaxPool2D(input)
  416. >>> print(output.shape)
  417. [1, 3, 16, 16]
  418. >>> print(max_indices.shape)
  419. [1, 3, 16, 16]
  420. """
  421. def __init__(
  422. self,
  423. kernel_size,
  424. stride=None,
  425. padding=0,
  426. return_mask=False,
  427. ceil_mode=False,
  428. data_format="NCHW",
  429. name=None,
  430. ):
  431. super().__init__()
  432. self.ksize = kernel_size
  433. self.stride = stride
  434. self.padding = padding
  435. self.return_mask = return_mask
  436. self.ceil_mode = ceil_mode
  437. self.data_format = data_format
  438. self.name = name
  439. def forward(self, x):
  440. return F.max_pool2d(
  441. x,
  442. kernel_size=self.ksize,
  443. stride=self.stride,
  444. padding=self.padding,
  445. return_mask=self.return_mask,
  446. ceil_mode=self.ceil_mode,
  447. data_format=self.data_format,
  448. name=self.name,
  449. )
  450. def extra_repr(self):
  451. return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
  452. **self.__dict__
  453. )
  454. class MaxPool3D(Layer):
  455. """
  456. This operation applies 3D max pooling over input features based on the input,
  457. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
  458. in NCDHW format, where N is batch size, C is the number of channels,
  459. H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
  460. Parameters:
  461. kernel_size(int|list|tuple): The pool kernel size. If the kernel size
  462. is a tuple or list, it must contain three integers,
  463. (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
  464. Otherwise, the pool kernel size will be the cube of an int.
  465. stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
  466. it must contain three integers, [stride_Depth, stride_Height, stride_Width).
  467. Otherwise, the pool stride size will be a cube of an int.
  468. Default None, then stride will be equal to the kernel_size.
  469. padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
  470. 1. A string in ['valid', 'same'].
  471. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
  472. 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
  473. 4. A list[int] or tuple(int) whose length is \6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
  474. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
  475. The default value is 0.
  476. ceil_mode(bool, optional): ${ceil_mode_comment}
  477. return_mask(bool, optional): Whether to return the max indices along with the outputs.
  478. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCDHW"`,
  479. `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
  480. `[batch_size, input_channels, input_depth, input_height, input_width]`.
  481. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  482. Usually name is no need to set and None by default.
  483. Returns:
  484. A callable object of MaxPool3D.
  485. Shape:
  486. - x(Tensor): The input tensor of max pool3d operator, which is a 5-D tensor.
  487. The data type can be float32, float64.
  488. - output(Tensor): The output tensor of max pool3d operator, which is a 5-D tensor.
  489. The data type is same as input x.
  490. Examples:
  491. .. code-block:: python
  492. >>> import paddle
  493. >>> import paddle.nn as nn
  494. >>> # max pool3d
  495. >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
  496. >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0)
  497. >>> output = MaxPool3D(input)
  498. >>> print(output.shape)
  499. [1, 2, 1, 16, 16]
  500. >>> # for return_mask=True
  501. >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
  502. >>> output, max_indices = MaxPool3D(input)
  503. >>> print(output.shape)
  504. [1, 2, 1, 16, 16]
  505. >>> print(max_indices.shape)
  506. [1, 2, 1, 16, 16]
  507. """
  508. def __init__(
  509. self,
  510. kernel_size,
  511. stride=None,
  512. padding=0,
  513. return_mask=False,
  514. ceil_mode=False,
  515. data_format="NCDHW",
  516. name=None,
  517. ):
  518. super().__init__()
  519. self.ksize = kernel_size
  520. self.stride = stride
  521. self.padding = padding
  522. self.return_mask = return_mask
  523. self.ceil_mode = ceil_mode
  524. self.data_format = data_format
  525. self.name = name
  526. def forward(self, x):
  527. return F.max_pool3d(
  528. x,
  529. kernel_size=self.ksize,
  530. stride=self.stride,
  531. padding=self.padding,
  532. return_mask=self.return_mask,
  533. ceil_mode=self.ceil_mode,
  534. data_format=self.data_format,
  535. name=self.name,
  536. )
  537. def extra_repr(self):
  538. return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
  539. **self.__dict__
  540. )
  541. class AdaptiveAvgPool1D(Layer):
  542. r"""
  543. A 1D adaptive average pooling over an input signal composed
  544. of several input planes, based on :attr:`output_size`.
  545. Input and output are in NCL format, where N is batch
  546. size, C is the number of channels and L is the length of the feature.
  547. The shape of output will be :math:`[N, C, output\_size]`.
  548. The formulation for average adaptive pool1d is
  549. .. math::
  550. lstart &= \lfloor i * L_{in} / L_{out}\rfloor,
  551. lend &= \lceil(i + 1) * L_{in} / L_{out}\rceil,
  552. Output(i) &= \frac{\sum Input[lstart:lend]}{lend - lstart}.
  553. Parameters:
  554. output_size(int): The target output size. Its data type must be int.
  555. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
  556. Returns:
  557. A callable object for computing 1D adaptive average pooling.
  558. Examples:
  559. .. code-block:: python
  560. >>> # average adaptive pool1d
  561. >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
  562. >>> # output shape is [N, C, m], adaptive pool divide L dimension
  563. >>> # of input data into m grids averagely and performs poolings in each
  564. >>> # grid to get output.
  565. >>> # adaptive max pool performs calculations as follow:
  566. >>> #
  567. >>> # for i in range(m):
  568. >>> # lstart = floor(i * L / m)
  569. >>> # lend = ceil((i + 1) * L / m)
  570. >>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart)
  571. >>> #
  572. >>> import paddle
  573. >>> import paddle.nn as nn
  574. >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
  575. >>> AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
  576. >>> pool_out = AdaptiveAvgPool1D(data)
  577. >>> print(pool_out.shape)
  578. [1, 3, 16]
  579. """
  580. def __init__(self, output_size, name=None):
  581. super().__init__()
  582. self.output_size = output_size
  583. self.name = name
  584. def forward(self, input):
  585. return F.adaptive_avg_pool1d(input, self.output_size, self.name)
  586. def extra_repr(self):
  587. return f'output_size={self.output_size}'
  588. class AdaptiveAvgPool2D(Layer):
  589. r"""
  590. This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
  591. of the output tensor are determined by the parameter output_size.
  592. For avg adaptive pool2d:
  593. .. math::
  594. hstart &= floor(i * H_{in} / H_{out})
  595. hend &= ceil((i + 1) * H_{in} / H_{out})
  596. wstart &= floor(j * W_{in} / W_{out})
  597. wend &= ceil((j + 1) * W_{in} / W_{out})
  598. Output(i ,j) &= \frac{\sum Input[hstart:hend, wstart:wend]}{(hend - hstart) * (wend - wstart)}
  599. Parameters:
  600. output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  601. it must contain two element, (H, W). H and W can be either a int, or None which means
  602. the size will be the same as that of the input.
  603. data_format(str, optional): The data format of the input and output data. An optional string
  604. from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
  605. the order of: [batch_size, input_channels, input_height, input_width].
  606. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  607. Usually name is no need to set and None by default.
  608. Shape:
  609. - x(Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
  610. The data type can be float32, float64.
  611. - output(Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor.
  612. The data type is same as input x.
  613. Returns:
  614. A callable object of AdaptiveAvgPool2D.
  615. Examples:
  616. .. code-block:: python
  617. >>> # adaptive avg pool2d
  618. >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
  619. >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
  620. >>> # of input data into m * n grids averagely and performs poolings in each
  621. >>> # grid to get output.
  622. >>> # adaptive avg pool performs calculations as follow:
  623. >>> #
  624. >>> # for i in range(m):
  625. >>> # for j in range(n):
  626. >>> # hstart = floor(i * H / m)
  627. >>> # hend = ceil((i + 1) * H / m)
  628. >>> # wstart = floor(i * W / n)
  629. >>> # wend = ceil((i + 1) * W / n)
  630. >>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
  631. >>> #
  632. >>> import paddle
  633. >>> x = paddle.rand([2, 3, 32, 32])
  634. >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
  635. >>> pool_out = adaptive_avg_pool(x = x)
  636. >>> print(pool_out.shape)
  637. [2, 3, 3, 3]
  638. """
  639. def __init__(self, output_size, data_format="NCHW", name=None):
  640. super().__init__()
  641. self._output_size = output_size
  642. self._data_format = data_format
  643. self._name = name
  644. def forward(self, x):
  645. return F.adaptive_avg_pool2d(
  646. x,
  647. output_size=self._output_size,
  648. data_format=self._data_format,
  649. name=self._name,
  650. )
  651. def extra_repr(self):
  652. return f'output_size={self._output_size}'
  653. class AdaptiveAvgPool3D(Layer):
  654. r"""
  655. This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
  656. of the output tensor are determined by the parameter output_size.
  657. For avg adaptive pool3d:
  658. .. math::
  659. dstart &= floor(i * D_{in} / D_{out})
  660. dend &= ceil((i + 1) * D_{in} / D_{out})
  661. hstart &= floor(j * H_{in} / H_{out})
  662. hend &= ceil((j + 1) * H_{in} / H_{out})
  663. wstart &= floor(k * W_{in} / W_{out})
  664. wend &= ceil((k + 1) * W_{in} / W_{out})
  665. Output(i ,j, k) &= \frac{\sum Input[dstart:dend, hstart:hend, wstart:wend]}
  666. {(dend - dstart) * (hend - hstart) * (wend - wstart)}
  667. Parameters:
  668. output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  669. it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
  670. the size will be the same as that of the input.
  671. data_format(str, optional): The data format of the input and output data. An optional string
  672. from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
  673. the order of: [batch_size, input_channels, input_depth, input_height, input_width].
  674. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  675. Usually name is no need to set and None by default.
  676. Shape:
  677. - x(Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
  678. The data type can be float32, float64\.
  679. - output(Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor.
  680. The data type is same as input x.
  681. Returns:
  682. A callable object of AdaptiveAvgPool3D.
  683. Examples:
  684. .. code-block:: python
  685. >>> # adaptive avg pool3d
  686. >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
  687. >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
  688. >>> # of input data into l * m * n grids averagely and performs poolings in each
  689. >>> # grid to get output.
  690. >>> # adaptive avg pool performs calculations as follow:
  691. >>> #
  692. >>> # for i in range(l):
  693. >>> # for j in range(m):
  694. >>> # for k in range(n):
  695. >>> # dstart = floor(i * D / l)
  696. >>> # dend = ceil((i + 1) * D / l)
  697. >>> # hstart = floor(j * H / m)
  698. >>> # hend = ceil((j + 1) * H / m)
  699. >>> # wstart = floor(k * W / n)
  700. >>> # wend = ceil((k + 1) * W / n)
  701. >>> # output[:, :, i, j, k] =
  702. >>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
  703. >>> import paddle
  704. >>> x = paddle.rand([2, 3, 8, 32, 32])
  705. >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
  706. >>> pool_out = adaptive_avg_pool(x = x)
  707. >>> print(pool_out.shape)
  708. [2, 3, 3, 3, 3]
  709. """
  710. def __init__(self, output_size, data_format="NCDHW", name=None):
  711. super().__init__()
  712. self._output_size = output_size
  713. self._data_format = data_format
  714. self._name = name
  715. def forward(self, x):
  716. return F.adaptive_avg_pool3d(
  717. x,
  718. output_size=self._output_size,
  719. data_format=self._data_format,
  720. name=self._name,
  721. )
  722. def extra_repr(self):
  723. return f'output_size={self._output_size}'
  724. class AdaptiveMaxPool1D(Layer):
  725. """
  726. This operation applies a 1D adaptive max pooling over an input signal composed
  727. of several input planes, based on the input, output_size, return_mask parameters.
  728. Input(X) and output(Out) are in NCL format, where N is batch
  729. size, C is the number of channels, L is the length of the feature.
  730. The output tensor shape will be [N, C, output_size].
  731. For max adaptive pool1d:
  732. .. math::
  733. lstart &= floor(i * L_{in} / L_{out})
  734. lend &= ceil((i + 1) * L_{in} / L_{out})
  735. Output(i) &= max(Input[lstart:lend])
  736. Parameters:
  737. output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
  738. it must contain one int.
  739. return_mask(bool, optional): If true, the index of max pooling point will be returned along
  740. with outputs. It cannot be set in average pooling type. Default False.
  741. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  742. Usually name is no need to set and None by default.
  743. Returns:
  744. A callable object of AdaptiveMaxPool1D.
  745. Shape:
  746. - x(Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor.
  747. The data type can be float32, float64.
  748. - output(Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor.
  749. The data type is same as input x.
  750. Examples:
  751. .. code-block:: python
  752. >>> # max adaptive pool1d
  753. >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
  754. >>> # output shape is [N, C, m], adaptive pool divide L dimension
  755. >>> # of input data into m grids averagely and performs poolings in each
  756. >>> # grid to get output.
  757. >>> # adaptive max pool performs calculations as follow:
  758. >>> #
  759. >>> # for i in range(m):
  760. >>> # lstart = floor(i * L / m)
  761. >>> # lend = ceil((i + 1) * L / m)
  762. >>> # output[:, :, i] = max(input[:, :, lstart: lend])
  763. >>> #
  764. >>> import paddle
  765. >>> import paddle.nn as nn
  766. >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
  767. >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
  768. >>> pool_out = AdaptiveMaxPool1D(data)
  769. >>> print(pool_out.shape)
  770. [1, 3, 16]
  771. >>> # for return_mask = true
  772. >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
  773. >>> pool_out, indices = AdaptiveMaxPool1D(data)
  774. >>> print(pool_out.shape)
  775. [1, 3, 16]
  776. >>> print(indices.shape)
  777. [1, 3, 16]
  778. """
  779. def __init__(self, output_size, return_mask=False, name=None):
  780. super().__init__()
  781. self.output_size = output_size
  782. self.return_mask = return_mask
  783. self.name = name
  784. def forward(self, input):
  785. return F.adaptive_max_pool1d(
  786. input, self.output_size, self.return_mask, self.name
  787. )
  788. def extra_repr(self):
  789. return f'output_size={self.output_size}, return_mask={self.return_mask}'
  790. class AdaptiveMaxPool2D(Layer):
  791. """
  792. This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
  793. of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and
  794. pooling is adaptive one focus on the output size.
  795. For adaptive max pool2d:
  796. .. math::
  797. hstart &= floor(i * H_{in} / H_{out})
  798. hend &= ceil((i + 1) * H_{in} / H_{out})
  799. wstart &= floor(j * W_{in} / W_{out})
  800. wend &= ceil((j + 1) * W_{in} / W_{out})
  801. Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
  802. Parameters:
  803. output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain
  804. two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of
  805. the input.
  806. return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs.
  807. It cannot be set in average pooling type. Default False.
  808. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  809. Usually name is no need to set and None by default.
  810. Shape:
  811. - x(Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor.
  812. The data type can be float32, float64.
  813. - output(Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor.
  814. The data type is same as input x.
  815. Returns:
  816. A callable object of AdaptiveMaxPool2D.
  817. Examples:
  818. .. code-block:: python
  819. >>> # adaptive max pool2d
  820. >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
  821. >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
  822. >>> # of input data into m * n grids averagely and performs poolings in each
  823. >>> # grid to get output.
  824. >>> # adaptive max pool performs calculations as follow:
  825. >>> #
  826. >>> # for i in range(m):
  827. >>> # for j in range(n):
  828. >>> # hstart = floor(i * H / m)
  829. >>> # hend = ceil((i + 1) * H / m)
  830. >>> # wstart = floor(i * W / n)
  831. >>> # wend = ceil((i + 1) * W / n)
  832. >>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
  833. >>> #
  834. >>> import paddle
  835. >>> x = paddle.rand([2, 3, 32, 32])
  836. >>> adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
  837. >>> pool_out, indices = adaptive_max_pool(x = x)
  838. >>> print(pool_out.shape)
  839. [2, 3, 3, 3]
  840. >>> print(indices.shape)
  841. [2, 3, 3, 3]
  842. """
  843. def __init__(self, output_size, return_mask=False, name=None):
  844. super().__init__()
  845. self._output_size = output_size
  846. self._return_mask = return_mask
  847. self._name = name
  848. def forward(self, x):
  849. return F.adaptive_max_pool2d(
  850. x,
  851. output_size=self._output_size,
  852. return_mask=self._return_mask,
  853. name=self._name,
  854. )
  855. def extra_repr(self):
  856. return (
  857. f'output_size={self._output_size}, return_mask={self._return_mask}'
  858. )
  859. class AdaptiveMaxPool3D(Layer):
  860. """
  861. This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are
  862. determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus
  863. on the output size.
  864. For adaptive max pool3d:
  865. .. math::
  866. dstart &= floor(i * D_{in} / D_{out})
  867. dend &= ceil((i + 1) * D_{in} / D_{out})
  868. hstart &= floor(j * H_{in} / H_{out})
  869. hend &= ceil((j + 1) * H_{in} / H_{out})
  870. wstart &= floor(k * W_{in} / W_{out})
  871. wend &= ceil((k + 1) * W_{in} / W_{out})
  872. Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
  873. Parameters:
  874. output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain
  875. three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as
  876. that of the input.
  877. return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs.
  878. Default False.
  879. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  880. Usually name is no need to set and None by default.
  881. Shape:
  882. - x(Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor.
  883. The data type can be float32, float64.
  884. - output(Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor.
  885. The data type is same as input x.
  886. Returns:
  887. A callable object of AdaptiveMaxPool3D.
  888. Examples:
  889. .. code-block:: python
  890. >>> # adaptive max pool3d
  891. >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
  892. >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
  893. >>> # of input data into l * m * n grids averagely and performs poolings in each
  894. >>> # grid to get output.
  895. >>> # adaptive max pool performs calculations as follow:
  896. >>> #
  897. >>> # for i in range(l):
  898. >>> # for j in range(m):
  899. >>> # for k in range(n):
  900. >>> # dstart = floor(i * D / l)
  901. >>> # dend = ceil((i + 1) * D / l)
  902. >>> # hstart = floor(j * H / m)
  903. >>> # hend = ceil((j + 1) * H / m)
  904. >>> # wstart = floor(k * W / n)
  905. >>> # wend = ceil((k + 1) * W / n)
  906. >>> # output[:, :, i, j, k] =
  907. >>> # max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
  908. >>> import paddle
  909. >>> x = paddle.rand([2, 3, 8, 32, 32])
  910. >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
  911. >>> out = pool(x)
  912. >>> print(out.shape)
  913. [2, 3, 4, 4, 4]
  914. >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
  915. >>> out, indices = pool(x)
  916. >>> print(out.shape)
  917. [2, 3, 3, 3, 3]
  918. >>> print(indices.shape)
  919. [2, 3, 3, 3, 3]
  920. """
  921. def __init__(self, output_size, return_mask=False, name=None):
  922. super().__init__()
  923. self._output_size = output_size
  924. self._return_mask = return_mask
  925. self._name = name
  926. def forward(self, x):
  927. return F.adaptive_max_pool3d(
  928. x,
  929. output_size=self._output_size,
  930. return_mask=self._return_mask,
  931. name=self._name,
  932. )
  933. def extra_repr(self):
  934. return (
  935. f'output_size={self._output_size}, return_mask={self._return_mask}'
  936. )
  937. class MaxUnPool1D(Layer):
  938. r"""
  939. This API implements max unpooling 1d operation.
  940. `max_unpool1d` accepts the output of `max_pool1d` as input,
  941. including the indices of the maximum value and calculate the partial inverse.
  942. All non-maximum values are set to zero.
  943. - Input: :math:`(N, C, L_{in})`
  944. - Output: :math:`(N, C, L_{out})`, where
  945. .. math::
  946. L_{out} = (L_{in} - 1) * stride - 2 * padding + kernel\_size
  947. or as given by :attr:`output_size` in the call operator.
  948. Parameters:
  949. kernel_size (int|list|tuple): The unpool kernel size. If unpool kernel size is a tuple or list,
  950. it must contain an integer.
  951. stride (int|list|tuple): The unpool stride size. If unpool stride size is a tuple or list,
  952. it must contain an integer.
  953. padding (int | tuple): Padding that was added to the input.
  954. output_size(list|tuple, optional): The target output size. If output_size is not specified,
  955. the actual output shape will be automatically calculated by (input_shape,
  956. kernel_size, stride, padding).
  957. data_format (string): The data format of the input and output data.
  958. The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
  959. `[batch_size, input_channels, input_length]`.
  960. name(str, optional): For detailed information, please refer
  961. to :ref:`api_guide_Name`. Usually name is no need to set and
  962. None by default.
  963. Returns:
  964. A callable object of MaxUnPool1D.
  965. Examples:
  966. .. code-block:: python
  967. >>> import paddle
  968. >>> import paddle.nn.functional as F
  969. >>> data = paddle.rand(shape=[1, 3, 16])
  970. >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
  971. >>> print(pool_out.shape)
  972. [1, 3, 8]
  973. >>> print(indices.shape)
  974. [1, 3, 8]
  975. >>> Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0)
  976. >>> unpool_out = Unpool1D(pool_out, indices)
  977. >>> print(unpool_out.shape)
  978. [1, 3, 16]
  979. """
  980. def __init__(
  981. self,
  982. kernel_size,
  983. stride=None,
  984. padding=0,
  985. data_format="NCL",
  986. output_size=None,
  987. name=None,
  988. ):
  989. super().__init__()
  990. self.ksize = kernel_size
  991. self.stride = stride
  992. self.padding = padding
  993. self.data_format = data_format
  994. self.output_size = output_size
  995. self.name = name
  996. def forward(self, x, indices):
  997. return F.max_unpool1d(
  998. x,
  999. indices,
  1000. kernel_size=self.ksize,
  1001. stride=self.stride,
  1002. padding=self.padding,
  1003. data_format=self.data_format,
  1004. output_size=self.output_size,
  1005. name=self.name,
  1006. )
  1007. def extra_repr(self):
  1008. return f'output_size={self.output_size}'
  1009. class MaxUnPool2D(Layer):
  1010. r"""
  1011. This API implements max unpooling 2d operation.
  1012. 'max_unpool2d' accepts the output of 'max_unpool2d' as input
  1013. Including the indices of the maximum value and calculating the partial inverse
  1014. All non-maximum values are set to zero.
  1015. Parameters:
  1016. kernel_size (int|list|tuple): The unpool kernel size. If unpool kernel size is a tuple or list,
  1017. it must contain an integer.
  1018. stride (int|list|tuple): The unpool stride size. If unpool stride size is a tuple or list,
  1019. it must contain an integer.
  1020. kernel_size (int|tuple): Size of the max unpooling window.
  1021. padding (int | tuple): Padding that was added to the input.
  1022. output_size(list|tuple, optional): The target output size. If output_size is not specified,
  1023. the actual output shape will be automatically calculated by (input_shape,
  1024. kernel_size, padding).
  1025. name(str, optional): For detailed information, please refer
  1026. to :ref:`api_guide_Name`. Usually name is no need to set and
  1027. None by default.
  1028. - Input: :math:`(N, C, H_{in}, W_{in})`
  1029. - Output: :math:`(N, C, H_{out}, W_{out})`, where
  1030. .. math::
  1031. H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
  1032. .. math::
  1033. W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
  1034. or as given by :attr:`output_size` in the call operator
  1035. Returns:
  1036. A callable object of MaxUnPool2D.
  1037. Examples:
  1038. .. code-block:: python
  1039. >>> import paddle
  1040. >>> import paddle.nn.functional as F
  1041. >>> data = paddle.rand(shape=[1, 1, 6, 6])
  1042. >>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
  1043. >>> print(pool_out.shape)
  1044. [1, 1, 3, 3]
  1045. >>> print(indices.shape)
  1046. [1, 1, 3, 3]
  1047. >>> Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0)
  1048. >>> unpool_out = Unpool2D(pool_out, indices)
  1049. >>> print(unpool_out.shape)
  1050. [1, 1, 6, 6]
  1051. """
  1052. def __init__(
  1053. self,
  1054. kernel_size,
  1055. stride=None,
  1056. padding=0,
  1057. data_format="NCHW",
  1058. output_size=None,
  1059. name=None,
  1060. ):
  1061. super().__init__()
  1062. self.ksize = kernel_size
  1063. self.stride = stride
  1064. self.padding = padding
  1065. self.data_format = data_format
  1066. self.output_size = output_size
  1067. self.name = name
  1068. def forward(self, x, indices):
  1069. return F.max_unpool2d(
  1070. x,
  1071. indices,
  1072. kernel_size=self.ksize,
  1073. stride=self.stride,
  1074. padding=self.padding,
  1075. data_format=self.data_format,
  1076. output_size=self.output_size,
  1077. name=self.name,
  1078. )
  1079. def extra_repr(self):
  1080. return f'output_size={self.output_size}'
  1081. class MaxUnPool3D(Layer):
  1082. r"""
  1083. This API implements max unpooling 3d operation.
  1084. `max_unpool3d` accepts the output of `max_pool3d` as input,
  1085. including the indices of the maximum value and calculate the partial inverse.
  1086. All non-maximum values are set to zero.
  1087. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
  1088. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
  1089. .. math::
  1090. D_{out} = (D_{in} - 1) * stride[0] - 2 * padding[0] + kernel\_size[0]
  1091. .. math::
  1092. H_{out} = (H_{in} - 1) * stride[1] - 2 * padding[1] + kernel\_size[1]
  1093. .. math::
  1094. W_{out} = (W_{in} - 1) * stride[2] - 2 * padding[2] + kernel\_size[2]
  1095. or as given by :attr:`output_size` in the call operator
  1096. Parameters:
  1097. kernel_size (int|list|tuple): The unpool kernel size. If unpool kernel size is a tuple or list,
  1098. it must contain an integer.
  1099. stride (int|list|tuple): The unpool stride size. If unpool stride size is a tuple or list,
  1100. it must contain an integer.
  1101. padding (int | tuple): Padding that was added to the input.
  1102. output_size(list|tuple, optional): The target output size. If output_size is not specified,
  1103. the actual output shape will be automatically calculated by (input_shape,
  1104. kernel_size, stride, padding).
  1105. data_format (string): The data format of the input and output data.
  1106. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
  1107. `[batch_size, input_channels, input_depth, input_height, input_width]`.
  1108. name(str, optional): For detailed information, please refer
  1109. to :ref:`api_guide_Name`. Usually name is no need to set and
  1110. None by default.
  1111. Returns:
  1112. A callable object of MaxUnPool3D.
  1113. Examples:
  1114. .. code-block:: python
  1115. >>> import paddle
  1116. >>> import paddle.nn.functional as F
  1117. >>> data = paddle.rand(shape=[1, 1, 4, 4, 6])
  1118. >>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
  1119. >>> print(pool_out.shape)
  1120. [1, 1, 2, 2, 3]
  1121. >>> print(indices.shape)
  1122. [1, 1, 2, 2, 3]
  1123. >>> Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0)
  1124. >>> unpool_out = Unpool3D(pool_out, indices)
  1125. >>> print(unpool_out.shape)
  1126. [1, 1, 4, 4, 6]
  1127. """
  1128. def __init__(
  1129. self,
  1130. kernel_size,
  1131. stride=None,
  1132. padding=0,
  1133. data_format="NCDHW",
  1134. output_size=None,
  1135. name=None,
  1136. ):
  1137. super().__init__()
  1138. self.ksize = kernel_size
  1139. self.stride = stride
  1140. self.padding = padding
  1141. self.data_format = data_format
  1142. self.output_size = output_size
  1143. self.name = name
  1144. def forward(self, x, indices):
  1145. return F.max_unpool3d(
  1146. x,
  1147. indices,
  1148. kernel_size=self.ksize,
  1149. stride=self.stride,
  1150. padding=self.padding,
  1151. data_format=self.data_format,
  1152. output_size=self.output_size,
  1153. name=self.name,
  1154. )
  1155. def extra_repr(self):
  1156. return f'output_size={self.output_size}'
  1157. class FractionalMaxPool2D(Layer):
  1158. r"""
  1159. This operation applies 2D fractional max pooling on input tensor, which is described in the paper:
  1160. [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071
  1161. The h and w dimensions of the output tensor are determined by the parameter output_size.
  1162. For each dimension, the fractional max pooling:
  1163. .. math::
  1164. \alpha &= size_{input} / size_{output}
  1165. index_{start} &= ceil( \alpha * (i + u) - 1)
  1166. index_{end} &= ceil( \alpha * (i + 1 + u) - 1)
  1167. Output &= max(Input[index_{start}:index_{end}])
  1168. where, u \in (0, 1), i = 0,1,2...size_{output}
  1169. The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0``
  1170. instead of ``1`` where ``ceil`` works.
  1171. For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``.
  1172. The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the
  1173. random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to
  1174. the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``.
  1175. Parameters:
  1176. output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain
  1177. two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of
  1178. the input.
  1179. kernel_size (int|list|tuple, optional): The pool kernel size. If the kernel size
  1180. is a tuple or list, it must contain two integers, (kernel_size_Height, kernel_size_Width).
  1181. Otherwise, the pool kernel size will be the square of an int. Default is None, means using the non-overlapping mode.
  1182. random_u(float): A random float number in range (0, 1) for the fractional pooling.
  1183. Default None, means randomly generated by framework which can be fixed by ``paddle.seed``.
  1184. return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False.
  1185. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  1186. Usually name is no need to set and None by default.
  1187. Shape:
  1188. - x(Tensor): The input tensor of fractional max pool2d operator, which is a 4-D tensor.
  1189. The data type can be float16, bfloat16, float32, float64.
  1190. - output(Tensor): The output tensor of fractional max pool2d operator, which is a 4-D tensor.
  1191. The data type is same as input x.
  1192. Returns:
  1193. A callable object of FractionalMaxPool2D.
  1194. Examples:
  1195. .. code-block:: python
  1196. >>> # fractional max pool2d
  1197. >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
  1198. >>> # output shape is [N, C, m, n], fractional pool divide H and W dimensions
  1199. >>> # of input data into m * n grids and performs poolings in each
  1200. >>> # grid to get output.
  1201. >>> import paddle
  1202. >>> x = paddle.rand([2, 3, 32, 32])
  1203. >>> # disjoint: without `kernel_size`
  1204. >>> fractional_max_pool = paddle.nn.FractionalMaxPool2D(output_size=3)
  1205. >>> pool_out = fractional_max_pool(x=x)
  1206. >>> print(pool_out.shape)
  1207. [2, 3, 3, 3]
  1208. >>> # overlapping: with `kernel_size`
  1209. >>> fractional_max_pool = paddle.nn.FractionalMaxPool2D(kernel_size=2, output_size=3)
  1210. >>> pool_out = fractional_max_pool(x=x)
  1211. >>> print(pool_out.shape)
  1212. [2, 3, 3, 3]
  1213. >>> fractional_max_pool = paddle.nn.FractionalMaxPool2D(output_size=[2, 3], return_mask=True)
  1214. >>> pool_out, indices = fractional_max_pool(x=x)
  1215. >>> print(pool_out.shape)
  1216. [2, 3, 2, 3]
  1217. >>> print(indices.shape)
  1218. [2, 3, 2, 3]
  1219. """
  1220. def __init__(
  1221. self,
  1222. output_size,
  1223. kernel_size=None,
  1224. random_u=None,
  1225. return_mask=False,
  1226. name=None,
  1227. ):
  1228. super().__init__()
  1229. self._output_size = output_size
  1230. self._kernel_size = kernel_size
  1231. self._random_u = random_u
  1232. self._return_mask = return_mask
  1233. self._name = name
  1234. def forward(self, x):
  1235. return F.fractional_max_pool2d(
  1236. x,
  1237. output_size=self._output_size,
  1238. kernel_size=self._kernel_size,
  1239. random_u=self._random_u,
  1240. return_mask=self._return_mask,
  1241. name=self._name,
  1242. )
  1243. def extra_repr(self):
  1244. return (
  1245. f'output_size={self._output_size}, return_mask={self._return_mask}'
  1246. )
  1247. class FractionalMaxPool3D(Layer):
  1248. r"""
  1249. This operation applies 3D fractional max pooling on input tensor, which is described in the paper:
  1250. [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071
  1251. The d, h and w dimensions of the output tensor are determined by the parameter output_size.
  1252. For each dimension, the fractional max pooling:
  1253. .. math::
  1254. \alpha &= size_{input} / size_{output}
  1255. index_{start} &= ceil( \alpha * (i + u) - 1)
  1256. index_{end} &= ceil( \alpha * (i + 1 + u) - 1)
  1257. Output &= max(Input[index_{start}:index_{end}])
  1258. where, u \in (0, 1), i = 0,1,2...size_{output}
  1259. The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0``
  1260. instead of ``1`` where ``ceil`` works.
  1261. For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``.
  1262. The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the
  1263. random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to
  1264. the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``.
  1265. Parameters:
  1266. output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain
  1267. three element, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of
  1268. the input.
  1269. kernel_size (int|list|tuple): The pool kernel size. If the kernel size
  1270. is a tuple or list, it must contain three integers, (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
  1271. Otherwise, the pool kernel size will be the cube of an int. Default is None, means using the non-overlapping mode.
  1272. random_u(float): A random float number in range (0, 1) for the fractional pooling.
  1273. Default None, means randomly generated by framework which can be fixed by ``paddle.seed``.
  1274. return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False.
  1275. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
  1276. Usually name is no need to set and None by default.
  1277. Shape:
  1278. - x(Tensor): The input tensor of fractional max pool3d operator, which is a 5-D tensor.
  1279. The data type can be float16, bfloat16, float32, float64.
  1280. - output(Tensor): The output tensor of fractional max pool3d operator, which is a 5-D tensor.
  1281. The data type is same as input x.
  1282. Returns:
  1283. A callable object of FractionalMaxPool3D.
  1284. Examples:
  1285. .. code-block:: python
  1286. >>> # fractional max pool3d
  1287. >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
  1288. >>> # output shape is [N, C, l, m, n], fractional pool divide D, H and W dimensions
  1289. >>> # of input data into l * m * n grids and performs poolings in each
  1290. >>> # grid to get output.
  1291. >>> import paddle
  1292. >>> x = paddle.rand([2, 3, 8, 32, 32])
  1293. >>> # disjoint: without `kernel_size`
  1294. >>> fractional_max_pool = paddle.nn.FractionalMaxPool3D(output_size=3)
  1295. >>> pool_out = fractional_max_pool(x=x)
  1296. >>> print(pool_out.shape)
  1297. [2, 3, 3, 3, 3]
  1298. >>> # overlapping: with `kernel_size`
  1299. >>> fractional_max_pool = paddle.nn.FractionalMaxPool3D(kernel_size=2, output_size=3)
  1300. >>> pool_out = fractional_max_pool(x=x)
  1301. >>> print(pool_out.shape)
  1302. [2, 3, 3, 3, 3]
  1303. >>> fractional_max_pool = paddle.nn.FractionalMaxPool3D(output_size=[2, 3, 3], return_mask=True)
  1304. >>> pool_out, indices = fractional_max_pool(x=x)
  1305. >>> print(pool_out.shape)
  1306. [2, 3, 2, 3, 3]
  1307. >>> print(indices.shape)
  1308. [2, 3, 2, 3, 3]
  1309. """
  1310. def __init__(
  1311. self,
  1312. output_size,
  1313. kernel_size=None,
  1314. random_u=None,
  1315. return_mask=False,
  1316. name=None,
  1317. ):
  1318. super().__init__()
  1319. self._output_size = output_size
  1320. self._kernel_size = kernel_size
  1321. self._random_u = random_u
  1322. self._return_mask = return_mask
  1323. self._name = name
  1324. def forward(self, x):
  1325. return F.fractional_max_pool3d(
  1326. x,
  1327. output_size=self._output_size,
  1328. kernel_size=self._kernel_size,
  1329. random_u=self._random_u,
  1330. return_mask=self._return_mask,
  1331. name=self._name,
  1332. )
  1333. def extra_repr(self):
  1334. return (
  1335. f'output_size={self._output_size}, return_mask={self._return_mask}'
  1336. )