det_resnet_vd.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import ParamAttr
  19. import paddle.nn as nn
  20. import paddle.nn.functional as F
  21. from paddle.vision.ops import DeformConv2D
  22. from paddle.regularizer import L2Decay
  23. from paddle.nn.initializer import Normal, Constant, XavierUniform
  24. __all__ = ["ResNet_vd", "ConvBNLayer", "DeformableConvV2"]
  25. class DeformableConvV2(nn.Layer):
  26. def __init__(
  27. self,
  28. in_channels,
  29. out_channels,
  30. kernel_size,
  31. stride=1,
  32. padding=0,
  33. dilation=1,
  34. groups=1,
  35. weight_attr=None,
  36. bias_attr=None,
  37. lr_scale=1,
  38. regularizer=None,
  39. skip_quant=False,
  40. dcn_bias_regularizer=L2Decay(0.0),
  41. dcn_bias_lr_scale=2.0,
  42. ):
  43. super(DeformableConvV2, self).__init__()
  44. self.offset_channel = 2 * kernel_size**2 * groups
  45. self.mask_channel = kernel_size**2 * groups
  46. if bias_attr:
  47. # in FCOS-DCN head, specifically need learning_rate and regularizer
  48. dcn_bias_attr = ParamAttr(
  49. initializer=Constant(value=0),
  50. regularizer=dcn_bias_regularizer,
  51. learning_rate=dcn_bias_lr_scale,
  52. )
  53. else:
  54. # in ResNet backbone, do not need bias
  55. dcn_bias_attr = False
  56. self.conv_dcn = DeformConv2D(
  57. in_channels,
  58. out_channels,
  59. kernel_size,
  60. stride=stride,
  61. padding=(kernel_size - 1) // 2 * dilation,
  62. dilation=dilation,
  63. deformable_groups=groups,
  64. weight_attr=weight_attr,
  65. bias_attr=dcn_bias_attr,
  66. )
  67. if lr_scale == 1 and regularizer is None:
  68. offset_bias_attr = ParamAttr(initializer=Constant(0.0))
  69. else:
  70. offset_bias_attr = ParamAttr(
  71. initializer=Constant(0.0),
  72. learning_rate=lr_scale,
  73. regularizer=regularizer,
  74. )
  75. self.conv_offset = nn.Conv2D(
  76. in_channels,
  77. groups * 3 * kernel_size**2,
  78. kernel_size,
  79. stride=stride,
  80. padding=(kernel_size - 1) // 2,
  81. weight_attr=ParamAttr(initializer=Constant(0.0)),
  82. bias_attr=offset_bias_attr,
  83. )
  84. if skip_quant:
  85. self.conv_offset.skip_quant = True
  86. def forward(self, x):
  87. offset_mask = self.conv_offset(x)
  88. offset, mask = paddle.split(
  89. offset_mask,
  90. num_or_sections=[self.offset_channel, self.mask_channel],
  91. axis=1,
  92. )
  93. mask = F.sigmoid(mask)
  94. y = self.conv_dcn(x, offset, mask=mask)
  95. return y
  96. class ConvBNLayer(nn.Layer):
  97. def __init__(
  98. self,
  99. in_channels,
  100. out_channels,
  101. kernel_size,
  102. stride=1,
  103. groups=1,
  104. dcn_groups=1,
  105. is_vd_mode=False,
  106. act=None,
  107. is_dcn=False,
  108. ):
  109. super(ConvBNLayer, self).__init__()
  110. self.is_vd_mode = is_vd_mode
  111. self._pool2d_avg = nn.AvgPool2D(
  112. kernel_size=2, stride=2, padding=0, ceil_mode=True
  113. )
  114. if not is_dcn:
  115. self._conv = nn.Conv2D(
  116. in_channels=in_channels,
  117. out_channels=out_channels,
  118. kernel_size=kernel_size,
  119. stride=stride,
  120. padding=(kernel_size - 1) // 2,
  121. groups=groups,
  122. bias_attr=False,
  123. )
  124. else:
  125. self._conv = DeformableConvV2(
  126. in_channels=in_channels,
  127. out_channels=out_channels,
  128. kernel_size=kernel_size,
  129. stride=stride,
  130. padding=(kernel_size - 1) // 2,
  131. groups=dcn_groups, # groups,
  132. bias_attr=False,
  133. )
  134. self._batch_norm = nn.BatchNorm(out_channels, act=act)
  135. def forward(self, inputs):
  136. if self.is_vd_mode:
  137. inputs = self._pool2d_avg(inputs)
  138. y = self._conv(inputs)
  139. y = self._batch_norm(y)
  140. return y
  141. class BottleneckBlock(nn.Layer):
  142. def __init__(
  143. self,
  144. in_channels,
  145. out_channels,
  146. stride,
  147. shortcut=True,
  148. if_first=False,
  149. is_dcn=False,
  150. ):
  151. super(BottleneckBlock, self).__init__()
  152. self.conv0 = ConvBNLayer(
  153. in_channels=in_channels,
  154. out_channels=out_channels,
  155. kernel_size=1,
  156. act="relu",
  157. )
  158. self.conv1 = ConvBNLayer(
  159. in_channels=out_channels,
  160. out_channels=out_channels,
  161. kernel_size=3,
  162. stride=stride,
  163. act="relu",
  164. is_dcn=is_dcn,
  165. dcn_groups=2,
  166. )
  167. self.conv2 = ConvBNLayer(
  168. in_channels=out_channels,
  169. out_channels=out_channels * 4,
  170. kernel_size=1,
  171. act=None,
  172. )
  173. if not shortcut:
  174. self.short = ConvBNLayer(
  175. in_channels=in_channels,
  176. out_channels=out_channels * 4,
  177. kernel_size=1,
  178. stride=1,
  179. is_vd_mode=False if if_first else True,
  180. )
  181. self.shortcut = shortcut
  182. def forward(self, inputs):
  183. y = self.conv0(inputs)
  184. conv1 = self.conv1(y)
  185. conv2 = self.conv2(conv1)
  186. if self.shortcut:
  187. short = inputs
  188. else:
  189. short = self.short(inputs)
  190. y = paddle.add(x=short, y=conv2)
  191. y = F.relu(y)
  192. return y
  193. class BasicBlock(nn.Layer):
  194. def __init__(
  195. self,
  196. in_channels,
  197. out_channels,
  198. stride,
  199. shortcut=True,
  200. if_first=False,
  201. ):
  202. super(BasicBlock, self).__init__()
  203. self.stride = stride
  204. self.conv0 = ConvBNLayer(
  205. in_channels=in_channels,
  206. out_channels=out_channels,
  207. kernel_size=3,
  208. stride=stride,
  209. act="relu",
  210. )
  211. self.conv1 = ConvBNLayer(
  212. in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None
  213. )
  214. if not shortcut:
  215. self.short = ConvBNLayer(
  216. in_channels=in_channels,
  217. out_channels=out_channels,
  218. kernel_size=1,
  219. stride=1,
  220. is_vd_mode=False if if_first else True,
  221. )
  222. self.shortcut = shortcut
  223. def forward(self, inputs):
  224. y = self.conv0(inputs)
  225. conv1 = self.conv1(y)
  226. if self.shortcut:
  227. short = inputs
  228. else:
  229. short = self.short(inputs)
  230. y = paddle.add(x=short, y=conv1)
  231. y = F.relu(y)
  232. return y
  233. class ResNet_vd(nn.Layer):
  234. def __init__(
  235. self, in_channels=3, layers=50, dcn_stage=None, out_indices=None, **kwargs
  236. ):
  237. super(ResNet_vd, self).__init__()
  238. self.layers = layers
  239. supported_layers = [18, 34, 50, 101, 152, 200]
  240. assert (
  241. layers in supported_layers
  242. ), "supported layers are {} but input layer is {}".format(
  243. supported_layers, layers
  244. )
  245. if layers == 18:
  246. depth = [2, 2, 2, 2]
  247. elif layers == 34 or layers == 50:
  248. depth = [3, 4, 6, 3]
  249. elif layers == 101:
  250. depth = [3, 4, 23, 3]
  251. elif layers == 152:
  252. depth = [3, 8, 36, 3]
  253. elif layers == 200:
  254. depth = [3, 12, 48, 3]
  255. num_channels = [64, 256, 512, 1024] if layers >= 50 else [64, 64, 128, 256]
  256. num_filters = [64, 128, 256, 512]
  257. self.dcn_stage = (
  258. dcn_stage if dcn_stage is not None else [False, False, False, False]
  259. )
  260. self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
  261. self.conv1_1 = ConvBNLayer(
  262. in_channels=in_channels,
  263. out_channels=32,
  264. kernel_size=3,
  265. stride=2,
  266. act="relu",
  267. )
  268. self.conv1_2 = ConvBNLayer(
  269. in_channels=32, out_channels=32, kernel_size=3, stride=1, act="relu"
  270. )
  271. self.conv1_3 = ConvBNLayer(
  272. in_channels=32, out_channels=64, kernel_size=3, stride=1, act="relu"
  273. )
  274. self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
  275. self.stages = []
  276. self.out_channels = []
  277. if layers >= 50:
  278. for block in range(len(depth)):
  279. block_list = []
  280. shortcut = False
  281. is_dcn = self.dcn_stage[block]
  282. for i in range(depth[block]):
  283. bottleneck_block = self.add_sublayer(
  284. "bb_%d_%d" % (block, i),
  285. BottleneckBlock(
  286. in_channels=(
  287. num_channels[block]
  288. if i == 0
  289. else num_filters[block] * 4
  290. ),
  291. out_channels=num_filters[block],
  292. stride=2 if i == 0 and block != 0 else 1,
  293. shortcut=shortcut,
  294. if_first=block == i == 0,
  295. is_dcn=is_dcn,
  296. ),
  297. )
  298. shortcut = True
  299. block_list.append(bottleneck_block)
  300. if block in self.out_indices:
  301. self.out_channels.append(num_filters[block] * 4)
  302. self.stages.append(nn.Sequential(*block_list))
  303. else:
  304. for block in range(len(depth)):
  305. block_list = []
  306. shortcut = False
  307. for i in range(depth[block]):
  308. basic_block = self.add_sublayer(
  309. "bb_%d_%d" % (block, i),
  310. BasicBlock(
  311. in_channels=(
  312. num_channels[block] if i == 0 else num_filters[block]
  313. ),
  314. out_channels=num_filters[block],
  315. stride=2 if i == 0 and block != 0 else 1,
  316. shortcut=shortcut,
  317. if_first=block == i == 0,
  318. ),
  319. )
  320. shortcut = True
  321. block_list.append(basic_block)
  322. if block in self.out_indices:
  323. self.out_channels.append(num_filters[block])
  324. self.stages.append(nn.Sequential(*block_list))
  325. def forward(self, inputs):
  326. y = self.conv1_1(inputs)
  327. y = self.conv1_2(y)
  328. y = self.conv1_3(y)
  329. y = self.pool2d_max(y)
  330. out = []
  331. for i, block in enumerate(self.stages):
  332. y = block(y)
  333. if i in self.out_indices:
  334. out.append(y)
  335. return out