fce_fpn.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
  17. """
  18. import paddle.nn as nn
  19. import paddle.nn.functional as F
  20. from paddle import ParamAttr
  21. from paddle.nn.initializer import XavierUniform
  22. from paddle.nn.initializer import Normal
  23. from paddle.regularizer import L2Decay
  24. __all__ = ["FCEFPN"]
  25. class ConvNormLayer(nn.Layer):
  26. def __init__(
  27. self,
  28. ch_in,
  29. ch_out,
  30. filter_size,
  31. stride,
  32. groups=1,
  33. norm_type="bn",
  34. norm_decay=0.0,
  35. norm_groups=32,
  36. lr_scale=1.0,
  37. freeze_norm=False,
  38. initializer=Normal(mean=0.0, std=0.01),
  39. ):
  40. super(ConvNormLayer, self).__init__()
  41. assert norm_type in ["bn", "sync_bn", "gn"]
  42. bias_attr = False
  43. self.conv = nn.Conv2D(
  44. in_channels=ch_in,
  45. out_channels=ch_out,
  46. kernel_size=filter_size,
  47. stride=stride,
  48. padding=(filter_size - 1) // 2,
  49. groups=groups,
  50. weight_attr=ParamAttr(initializer=initializer, learning_rate=1.0),
  51. bias_attr=bias_attr,
  52. )
  53. norm_lr = 0.0 if freeze_norm else 1.0
  54. param_attr = ParamAttr(
  55. learning_rate=norm_lr,
  56. regularizer=L2Decay(norm_decay) if norm_decay is not None else None,
  57. )
  58. bias_attr = ParamAttr(
  59. learning_rate=norm_lr,
  60. regularizer=L2Decay(norm_decay) if norm_decay is not None else None,
  61. )
  62. if norm_type == "bn":
  63. self.norm = nn.BatchNorm2D(
  64. ch_out, weight_attr=param_attr, bias_attr=bias_attr
  65. )
  66. elif norm_type == "sync_bn":
  67. self.norm = nn.SyncBatchNorm(
  68. ch_out, weight_attr=param_attr, bias_attr=bias_attr
  69. )
  70. elif norm_type == "gn":
  71. self.norm = nn.GroupNorm(
  72. num_groups=norm_groups,
  73. num_channels=ch_out,
  74. weight_attr=param_attr,
  75. bias_attr=bias_attr,
  76. )
  77. def forward(self, inputs):
  78. out = self.conv(inputs)
  79. out = self.norm(out)
  80. return out
  81. class FCEFPN(nn.Layer):
  82. """
  83. Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
  84. Args:
  85. in_channels (list[int]): input channels of each level which can be
  86. derived from the output shape of backbone by from_config
  87. out_channels (list[int]): output channel of each level
  88. spatial_scales (list[float]): the spatial scales between input feature
  89. maps and original input image which can be derived from the output
  90. shape of backbone by from_config
  91. has_extra_convs (bool): whether to add extra conv to the last level.
  92. default False
  93. extra_stage (int): the number of extra stages added to the last level.
  94. default 1
  95. use_c5 (bool): Whether to use c5 as the input of extra stage,
  96. otherwise p5 is used. default True
  97. norm_type (string|None): The normalization type in FPN module. If
  98. norm_type is None, norm will not be used after conv and if
  99. norm_type is string, bn, gn, sync_bn are available. default None
  100. norm_decay (float): weight decay for normalization layer weights.
  101. default 0.
  102. freeze_norm (bool): whether to freeze normalization layer.
  103. default False
  104. relu_before_extra_convs (bool): whether to add relu before extra convs.
  105. default False
  106. """
  107. def __init__(
  108. self,
  109. in_channels,
  110. out_channels,
  111. spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
  112. has_extra_convs=False,
  113. extra_stage=1,
  114. use_c5=True,
  115. norm_type=None,
  116. norm_decay=0.0,
  117. freeze_norm=False,
  118. relu_before_extra_convs=True,
  119. ):
  120. super(FCEFPN, self).__init__()
  121. self.out_channels = out_channels
  122. for s in range(extra_stage):
  123. spatial_scales = spatial_scales + [spatial_scales[-1] / 2.0]
  124. self.spatial_scales = spatial_scales
  125. self.has_extra_convs = has_extra_convs
  126. self.extra_stage = extra_stage
  127. self.use_c5 = use_c5
  128. self.relu_before_extra_convs = relu_before_extra_convs
  129. self.norm_type = norm_type
  130. self.norm_decay = norm_decay
  131. self.freeze_norm = freeze_norm
  132. self.lateral_convs = []
  133. self.fpn_convs = []
  134. fan = out_channels * 3 * 3
  135. # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
  136. # 0 <= st_stage < ed_stage <= 3
  137. st_stage = 4 - len(in_channels)
  138. ed_stage = st_stage + len(in_channels) - 1
  139. for i in range(st_stage, ed_stage + 1):
  140. if i == 3:
  141. lateral_name = "fpn_inner_res5_sum"
  142. else:
  143. lateral_name = "fpn_inner_res{}_sum_lateral".format(i + 2)
  144. in_c = in_channels[i - st_stage]
  145. if self.norm_type is not None:
  146. lateral = self.add_sublayer(
  147. lateral_name,
  148. ConvNormLayer(
  149. ch_in=in_c,
  150. ch_out=out_channels,
  151. filter_size=1,
  152. stride=1,
  153. norm_type=self.norm_type,
  154. norm_decay=self.norm_decay,
  155. freeze_norm=self.freeze_norm,
  156. initializer=XavierUniform(fan_out=in_c),
  157. ),
  158. )
  159. else:
  160. lateral = self.add_sublayer(
  161. lateral_name,
  162. nn.Conv2D(
  163. in_channels=in_c,
  164. out_channels=out_channels,
  165. kernel_size=1,
  166. weight_attr=ParamAttr(initializer=XavierUniform(fan_out=in_c)),
  167. ),
  168. )
  169. self.lateral_convs.append(lateral)
  170. for i in range(st_stage, ed_stage + 1):
  171. fpn_name = "fpn_res{}_sum".format(i + 2)
  172. if self.norm_type is not None:
  173. fpn_conv = self.add_sublayer(
  174. fpn_name,
  175. ConvNormLayer(
  176. ch_in=out_channels,
  177. ch_out=out_channels,
  178. filter_size=3,
  179. stride=1,
  180. norm_type=self.norm_type,
  181. norm_decay=self.norm_decay,
  182. freeze_norm=self.freeze_norm,
  183. initializer=XavierUniform(fan_out=fan),
  184. ),
  185. )
  186. else:
  187. fpn_conv = self.add_sublayer(
  188. fpn_name,
  189. nn.Conv2D(
  190. in_channels=out_channels,
  191. out_channels=out_channels,
  192. kernel_size=3,
  193. padding=1,
  194. weight_attr=ParamAttr(initializer=XavierUniform(fan_out=fan)),
  195. ),
  196. )
  197. self.fpn_convs.append(fpn_conv)
  198. # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
  199. if self.has_extra_convs:
  200. for i in range(self.extra_stage):
  201. lvl = ed_stage + 1 + i
  202. if i == 0 and self.use_c5:
  203. in_c = in_channels[-1]
  204. else:
  205. in_c = out_channels
  206. extra_fpn_name = "fpn_{}".format(lvl + 2)
  207. if self.norm_type is not None:
  208. extra_fpn_conv = self.add_sublayer(
  209. extra_fpn_name,
  210. ConvNormLayer(
  211. ch_in=in_c,
  212. ch_out=out_channels,
  213. filter_size=3,
  214. stride=2,
  215. norm_type=self.norm_type,
  216. norm_decay=self.norm_decay,
  217. freeze_norm=self.freeze_norm,
  218. initializer=XavierUniform(fan_out=fan),
  219. ),
  220. )
  221. else:
  222. extra_fpn_conv = self.add_sublayer(
  223. extra_fpn_name,
  224. nn.Conv2D(
  225. in_channels=in_c,
  226. out_channels=out_channels,
  227. kernel_size=3,
  228. stride=2,
  229. padding=1,
  230. weight_attr=ParamAttr(
  231. initializer=XavierUniform(fan_out=fan)
  232. ),
  233. ),
  234. )
  235. self.fpn_convs.append(extra_fpn_conv)
  236. @classmethod
  237. def from_config(cls, cfg, input_shape):
  238. return {
  239. "in_channels": [i.channels for i in input_shape],
  240. "spatial_scales": [1.0 / i.stride for i in input_shape],
  241. }
  242. def forward(self, body_feats):
  243. laterals = []
  244. num_levels = len(body_feats)
  245. for i in range(num_levels):
  246. laterals.append(self.lateral_convs[i](body_feats[i]))
  247. for i in range(1, num_levels):
  248. lvl = num_levels - i
  249. upsample = F.interpolate(
  250. laterals[lvl],
  251. scale_factor=2.0,
  252. mode="nearest",
  253. )
  254. laterals[lvl - 1] += upsample
  255. fpn_output = []
  256. for lvl in range(num_levels):
  257. fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
  258. if self.extra_stage > 0:
  259. # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
  260. if not self.has_extra_convs:
  261. assert (
  262. self.extra_stage == 1
  263. ), "extra_stage should be 1 if FPN has not extra convs"
  264. fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
  265. # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
  266. else:
  267. if self.use_c5:
  268. extra_source = body_feats[-1]
  269. else:
  270. extra_source = fpn_output[-1]
  271. fpn_output.append(self.fpn_convs[num_levels](extra_source))
  272. for i in range(1, self.extra_stage):
  273. if self.relu_before_extra_convs:
  274. fpn_output.append(
  275. self.fpn_convs[num_levels + i](F.relu(fpn_output[-1]))
  276. )
  277. else:
  278. fpn_output.append(
  279. self.fpn_convs[num_levels + i](fpn_output[-1])
  280. )
  281. return fpn_output