vision.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # TODO: define special functions used in computer vision task
  15. from .. import functional
  16. from .layers import Layer
  17. __all__ = []
  18. class PixelShuffle(Layer):
  19. """
  20. Rearranges elements in a tensor of shape :math:`[N, C, H, W]`
  21. to a tensor of shape :math:`[N, C/upscale_factor^2, H*upscale_factor, W*upscale_factor]`,
  22. or from shape :math:`[N, H, W, C]` to :math:`[N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2]`.
  23. This is useful for implementing efficient sub-pixel convolution
  24. with a stride of 1/upscale_factor.
  25. Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
  26. Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
  27. by Shi et. al (2016) for more details.
  28. Parameters:
  29. upscale_factor(int): factor to increase spatial resolution.
  30. data_format (str, optional): The data format of the input and output data. An optional string from: `'NCHW'``, ``'NHWC'``. When it is ``'NCHW'``, the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. Default: ``'NCHW'``.
  31. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
  32. Shape:
  33. - x: 4-D tensor with shape of :math:`(N, C, H, W)` or :math:`(N, H, W, C)`.
  34. - out: 4-D tensor with shape of :math:`(N, C/upscale_factor^2, H*upscale_factor, W*upscale_factor)` or :math:`(N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2)`.
  35. Examples:
  36. .. code-block:: python
  37. >>> import paddle
  38. >>> import paddle.nn as nn
  39. >>> x = paddle.randn(shape=[2, 9, 4, 4])
  40. >>> pixel_shuffle = nn.PixelShuffle(3)
  41. >>> out = pixel_shuffle(x)
  42. >>> print(out.shape)
  43. [2, 1, 12, 12]
  44. """
  45. def __init__(self, upscale_factor, data_format="NCHW", name=None):
  46. super().__init__()
  47. if not isinstance(upscale_factor, int):
  48. raise TypeError("upscale factor must be int type")
  49. if data_format not in ["NCHW", "NHWC"]:
  50. raise ValueError(
  51. "Data format should be 'NCHW' or 'NHWC'."
  52. f"But receive data format: {data_format}"
  53. )
  54. self._upscale_factor = upscale_factor
  55. self._data_format = data_format
  56. self._name = name
  57. def forward(self, x):
  58. return functional.pixel_shuffle(
  59. x, self._upscale_factor, self._data_format, self._name
  60. )
  61. def extra_repr(self):
  62. main_str = f'upscale_factor={self._upscale_factor}'
  63. if self._data_format != 'NCHW':
  64. main_str += f', data_format={self._data_format}'
  65. if self._name is not None:
  66. main_str += f', name={self._name}'
  67. return main_str
  68. class PixelUnshuffle(Layer):
  69. """
  70. Rearranges elements in a tensor of shape :math:`[N, C, H, W]`
  71. to a tensor of shape :math:`[N, r^2C, H/r, W/r]`, or from shape
  72. :math:`[N, H, W, C]` to :math:`[N, H/r, W/r, r^2C]`, where :math:`r` is the
  73. downscale factor. This operation is the reversion of PixelShuffle operation.
  74. Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
  75. Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
  76. by Shi et. al (2016) for more details.
  77. Parameters:
  78. downscale_factor (int): Factor to decrease spatial resolution.
  79. data_format (str, optional): The data format of the input and output data. An optional string of ``'NCHW'`` or ``'NHWC'``. When it is ``'NCHW'``, the data is stored in the order of [batch_size, input_channels, input_height, input_width]. Default: ``'NCHW'``.
  80. name (str, optional): Name for the operation (optional, default is None). Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.
  81. Shape:
  82. - **x**: 4-D tensor with shape of :math:`[N, C, H, W]` or :math:`[N, C, H, W]`.
  83. - **out**: 4-D tensor with shape of :math:`[N, r^2C, H/r, W/r]` or :math:`[N, H/r, W/r, r^2C]`, where :math:`r` is :attr:`downscale_factor`.
  84. Examples:
  85. .. code-block:: python
  86. >>> import paddle
  87. >>> import paddle.nn as nn
  88. >>> x = paddle.randn([2, 1, 12, 12])
  89. >>> pixel_unshuffle = nn.PixelUnshuffle(3)
  90. >>> out = pixel_unshuffle(x)
  91. >>> print(out.shape)
  92. [2, 9, 4, 4]
  93. """
  94. def __init__(self, downscale_factor, data_format="NCHW", name=None):
  95. super().__init__()
  96. if not isinstance(downscale_factor, int):
  97. raise TypeError("Downscale factor must be int type")
  98. if downscale_factor <= 0:
  99. raise ValueError("Downscale factor must be positive")
  100. if data_format not in ["NCHW", "NHWC"]:
  101. raise ValueError(
  102. "Data format should be 'NCHW' or 'NHWC'."
  103. f"But receive data format: {data_format}"
  104. )
  105. self._downscale_factor = downscale_factor
  106. self._data_format = data_format
  107. self._name = name
  108. def forward(self, x):
  109. return functional.pixel_unshuffle(
  110. x, self._downscale_factor, self._data_format, self._name
  111. )
  112. def extra_repr(self):
  113. main_str = f'downscale_factor={self._downscale_factor}'
  114. if self._data_format != 'NCHW':
  115. main_str += f', data_format={self._data_format}'
  116. if self._name is not None:
  117. main_str += f', name={self._name}'
  118. return main_str
  119. class ChannelShuffle(Layer):
  120. """
  121. Can divide channels in a tensor of shape [N, C, H, W] or [N, H, W, C] into g groups,
  122. getting a tensor with the shape of [N, g, C/g, H, W] or [N, H, W, g, C/g], and transposes them
  123. as [N, C/g, g, H, W] or [N, H, W, g, C/g], then rearranges them to original tensor shape. This
  124. operation can improve the interaction between channels, using features efficiently. Please
  125. refer to the paper: `ShuffleNet: An Extremely Efficient
  126. Convolutional Neural Network for Mobile Devices <https://arxiv.org/abs/1707.01083>`_ .
  127. by Zhang et. al (2017) for more details.
  128. Parameters:
  129. groups (int): Number of groups to divide channels in.
  130. data_format (str, optional): The data format of the input and output data. An optional string of NCHW or NHWC. The default is NCHW. When it is NCHW, the data is stored in the order of [batch_size, input_channels, input_height, input_width].
  131. name (str, optional): Name for the operation (optional, default is None). Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.
  132. Shape:
  133. - **x**: 4-D tensor with shape of [N, C, H, W] or [N, H, W, C].
  134. - **out**: 4-D tensor with shape and dtype same as x.
  135. Examples:
  136. .. code-block:: python
  137. >>> import paddle
  138. >>> import paddle.nn as nn
  139. >>> x = paddle.arange(0, 0.6, 0.1, 'float32')
  140. >>> x = paddle.reshape(x, [1, 6, 1, 1])
  141. >>> print(x)
  142. Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  143. [[[[0. ]],
  144. [[0.10000000]],
  145. [[0.20000000]],
  146. [[0.30000001]],
  147. [[0.40000001]],
  148. [[0.50000000]]]])
  149. >>> channel_shuffle = nn.ChannelShuffle(3)
  150. >>> y = channel_shuffle(x)
  151. >>> print(y)
  152. Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  153. [[[[0. ]],
  154. [[0.20000000]],
  155. [[0.40000001]],
  156. [[0.10000000]],
  157. [[0.30000001]],
  158. [[0.50000000]]]])
  159. """
  160. def __init__(self, groups, data_format="NCHW", name=None):
  161. super().__init__()
  162. if not isinstance(groups, int):
  163. raise TypeError("groups must be int type")
  164. if groups <= 0:
  165. raise ValueError("groups must be positive")
  166. if data_format not in ["NCHW", "NHWC"]:
  167. raise ValueError(
  168. "Data format should be 'NCHW' or 'NHWC'."
  169. f"But receive data format: {data_format}"
  170. )
  171. self._groups = groups
  172. self._data_format = data_format
  173. self._name = name
  174. def forward(self, x):
  175. return functional.channel_shuffle(
  176. x, self._groups, self._data_format, self._name
  177. )
  178. def extra_repr(self):
  179. main_str = f'groups={self._groups}'
  180. if self._data_format != 'NCHW':
  181. main_str += f', data_format={self._data_format}'
  182. if self._name is not None:
  183. main_str += f', name={self._name}'
  184. return main_str