csp_pan.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # The code is based on:
  15. # https://github.com/PaddlePaddle/PaddleDetection/blob/release%2F2.3/ppdet/modeling/necks/csp_pan.py
  16. import paddle
  17. import paddle.nn as nn
  18. import paddle.nn.functional as F
  19. from paddle import ParamAttr
  20. __all__ = ["CSPPAN"]
  21. class ConvBNLayer(nn.Layer):
  22. def __init__(
  23. self,
  24. in_channel=96,
  25. out_channel=96,
  26. kernel_size=3,
  27. stride=1,
  28. groups=1,
  29. act="leaky_relu",
  30. ):
  31. super(ConvBNLayer, self).__init__()
  32. initializer = nn.initializer.KaimingUniform()
  33. self.act = act
  34. assert self.act in ["leaky_relu", "hard_swish"]
  35. self.conv = nn.Conv2D(
  36. in_channels=in_channel,
  37. out_channels=out_channel,
  38. kernel_size=kernel_size,
  39. groups=groups,
  40. padding=(kernel_size - 1) // 2,
  41. stride=stride,
  42. weight_attr=ParamAttr(initializer=initializer),
  43. bias_attr=False,
  44. )
  45. self.bn = nn.BatchNorm2D(out_channel)
  46. def forward(self, x):
  47. x = self.bn(self.conv(x))
  48. if self.act == "leaky_relu":
  49. x = F.leaky_relu(x)
  50. elif self.act == "hard_swish":
  51. x = F.hardswish(x)
  52. return x
  53. class DPModule(nn.Layer):
  54. """
  55. Depth-wise and point-wise module.
  56. Args:
  57. in_channel (int): The input channels of this Module.
  58. out_channel (int): The output channels of this Module.
  59. kernel_size (int): The conv2d kernel size of this Module.
  60. stride (int): The conv2d's stride of this Module.
  61. act (str): The activation function of this Module,
  62. Now support `leaky_relu` and `hard_swish`.
  63. """
  64. def __init__(
  65. self, in_channel=96, out_channel=96, kernel_size=3, stride=1, act="leaky_relu"
  66. ):
  67. super(DPModule, self).__init__()
  68. initializer = nn.initializer.KaimingUniform()
  69. self.act = act
  70. self.dwconv = nn.Conv2D(
  71. in_channels=in_channel,
  72. out_channels=out_channel,
  73. kernel_size=kernel_size,
  74. groups=out_channel,
  75. padding=(kernel_size - 1) // 2,
  76. stride=stride,
  77. weight_attr=ParamAttr(initializer=initializer),
  78. bias_attr=False,
  79. )
  80. self.bn1 = nn.BatchNorm2D(out_channel)
  81. self.pwconv = nn.Conv2D(
  82. in_channels=out_channel,
  83. out_channels=out_channel,
  84. kernel_size=1,
  85. groups=1,
  86. padding=0,
  87. weight_attr=ParamAttr(initializer=initializer),
  88. bias_attr=False,
  89. )
  90. self.bn2 = nn.BatchNorm2D(out_channel)
  91. def act_func(self, x):
  92. if self.act == "leaky_relu":
  93. x = F.leaky_relu(x)
  94. elif self.act == "hard_swish":
  95. x = F.hardswish(x)
  96. return x
  97. def forward(self, x):
  98. x = self.act_func(self.bn1(self.dwconv(x)))
  99. x = self.act_func(self.bn2(self.pwconv(x)))
  100. return x
  101. class DarknetBottleneck(nn.Layer):
  102. """The basic bottleneck block used in Darknet.
  103. Each Block consists of two ConvModules and the input is added to the
  104. final output. Each ConvModule is composed of Conv, BN, and act.
  105. The first convLayer has filter size of 1x1 and the second one has the
  106. filter size of 3x3.
  107. Args:
  108. in_channels (int): The input channels of this Module.
  109. out_channels (int): The output channels of this Module.
  110. expansion (int): The kernel size of the convolution. Default: 0.5
  111. add_identity (bool): Whether to add identity to the out.
  112. Default: True
  113. use_depthwise (bool): Whether to use depthwise separable convolution.
  114. Default: False
  115. """
  116. def __init__(
  117. self,
  118. in_channels,
  119. out_channels,
  120. kernel_size=3,
  121. expansion=0.5,
  122. add_identity=True,
  123. use_depthwise=False,
  124. act="leaky_relu",
  125. ):
  126. super(DarknetBottleneck, self).__init__()
  127. hidden_channels = int(out_channels * expansion)
  128. conv_func = DPModule if use_depthwise else ConvBNLayer
  129. self.conv1 = ConvBNLayer(
  130. in_channel=in_channels, out_channel=hidden_channels, kernel_size=1, act=act
  131. )
  132. self.conv2 = conv_func(
  133. in_channel=hidden_channels,
  134. out_channel=out_channels,
  135. kernel_size=kernel_size,
  136. stride=1,
  137. act=act,
  138. )
  139. self.add_identity = add_identity and in_channels == out_channels
  140. def forward(self, x):
  141. identity = x
  142. out = self.conv1(x)
  143. out = self.conv2(out)
  144. if self.add_identity:
  145. return out + identity
  146. else:
  147. return out
  148. class CSPLayer(nn.Layer):
  149. """Cross Stage Partial Layer.
  150. Args:
  151. in_channels (int): The input channels of the CSP layer.
  152. out_channels (int): The output channels of the CSP layer.
  153. expand_ratio (float): Ratio to adjust the number of channels of the
  154. hidden layer. Default: 0.5
  155. num_blocks (int): Number of blocks. Default: 1
  156. add_identity (bool): Whether to add identity in blocks.
  157. Default: True
  158. use_depthwise (bool): Whether to depthwise separable convolution in
  159. blocks. Default: False
  160. """
  161. def __init__(
  162. self,
  163. in_channels,
  164. out_channels,
  165. kernel_size=3,
  166. expand_ratio=0.5,
  167. num_blocks=1,
  168. add_identity=True,
  169. use_depthwise=False,
  170. act="leaky_relu",
  171. ):
  172. super().__init__()
  173. mid_channels = int(out_channels * expand_ratio)
  174. self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  175. self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  176. self.final_conv = ConvBNLayer(2 * mid_channels, out_channels, 1, act=act)
  177. self.blocks = nn.Sequential(
  178. *[
  179. DarknetBottleneck(
  180. mid_channels,
  181. mid_channels,
  182. kernel_size,
  183. 1.0,
  184. add_identity,
  185. use_depthwise,
  186. act=act,
  187. )
  188. for _ in range(num_blocks)
  189. ]
  190. )
  191. def forward(self, x):
  192. x_short = self.short_conv(x)
  193. x_main = self.main_conv(x)
  194. x_main = self.blocks(x_main)
  195. x_final = paddle.concat((x_main, x_short), axis=1)
  196. return self.final_conv(x_final)
  197. class Channel_T(nn.Layer):
  198. def __init__(self, in_channels=[116, 232, 464], out_channels=96, act="leaky_relu"):
  199. super(Channel_T, self).__init__()
  200. self.convs = nn.LayerList()
  201. for i in range(len(in_channels)):
  202. self.convs.append(ConvBNLayer(in_channels[i], out_channels, 1, act=act))
  203. def forward(self, x):
  204. outs = [self.convs[i](x[i]) for i in range(len(x))]
  205. return outs
  206. class CSPPAN(nn.Layer):
  207. """Path Aggregation Network with CSP module.
  208. Args:
  209. in_channels (List[int]): Number of input channels per scale.
  210. out_channels (int): Number of output channels (used at each scale)
  211. kernel_size (int): The conv2d kernel size of this Module.
  212. num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
  213. use_depthwise (bool): Whether to depthwise separable convolution in
  214. blocks. Default: True
  215. """
  216. def __init__(
  217. self,
  218. in_channels,
  219. out_channels,
  220. kernel_size=5,
  221. num_csp_blocks=1,
  222. use_depthwise=True,
  223. act="hard_swish",
  224. ):
  225. super(CSPPAN, self).__init__()
  226. self.in_channels = in_channels
  227. self.out_channels = [out_channels] * len(in_channels)
  228. conv_func = DPModule if use_depthwise else ConvBNLayer
  229. self.conv_t = Channel_T(in_channels, out_channels, act=act)
  230. # build top-down blocks
  231. self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
  232. self.top_down_blocks = nn.LayerList()
  233. for idx in range(len(in_channels) - 1, 0, -1):
  234. self.top_down_blocks.append(
  235. CSPLayer(
  236. out_channels * 2,
  237. out_channels,
  238. kernel_size=kernel_size,
  239. num_blocks=num_csp_blocks,
  240. add_identity=False,
  241. use_depthwise=use_depthwise,
  242. act=act,
  243. )
  244. )
  245. # build bottom-up blocks
  246. self.downsamples = nn.LayerList()
  247. self.bottom_up_blocks = nn.LayerList()
  248. for idx in range(len(in_channels) - 1):
  249. self.downsamples.append(
  250. conv_func(
  251. out_channels,
  252. out_channels,
  253. kernel_size=kernel_size,
  254. stride=2,
  255. act=act,
  256. )
  257. )
  258. self.bottom_up_blocks.append(
  259. CSPLayer(
  260. out_channels * 2,
  261. out_channels,
  262. kernel_size=kernel_size,
  263. num_blocks=num_csp_blocks,
  264. add_identity=False,
  265. use_depthwise=use_depthwise,
  266. act=act,
  267. )
  268. )
  269. def forward(self, inputs):
  270. """
  271. Args:
  272. inputs (tuple[Tensor]): input features.
  273. Returns:
  274. tuple[Tensor]: CSPPAN features.
  275. """
  276. assert len(inputs) == len(self.in_channels)
  277. inputs = self.conv_t(inputs)
  278. # top-down path
  279. inner_outs = [inputs[-1]]
  280. for idx in range(len(self.in_channels) - 1, 0, -1):
  281. feat_heigh = inner_outs[0]
  282. feat_low = inputs[idx - 1]
  283. upsample_feat = F.upsample(
  284. feat_heigh, size=feat_low.shape[2:4], mode="nearest"
  285. )
  286. inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
  287. paddle.concat([upsample_feat, feat_low], 1)
  288. )
  289. inner_outs.insert(0, inner_out)
  290. # bottom-up path
  291. outs = [inner_outs[0]]
  292. for idx in range(len(self.in_channels) - 1):
  293. feat_low = outs[-1]
  294. feat_height = inner_outs[idx + 1]
  295. downsample_feat = self.downsamples[idx](feat_low)
  296. out = self.bottom_up_blocks[idx](
  297. paddle.concat([downsample_feat, feat_height], 1)
  298. )
  299. outs.append(out)
  300. return tuple(outs)