db_fpn.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. from paddle import nn
  19. import paddle.nn.functional as F
  20. from paddle import ParamAttr
  21. import os
  22. import sys
  23. from ppocr.modeling.necks.intracl import IntraCLBlock
  24. __dir__ = os.path.dirname(os.path.abspath(__file__))
  25. sys.path.append(__dir__)
  26. sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../../..")))
  27. from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
  28. class DSConv(nn.Layer):
  29. def __init__(
  30. self,
  31. in_channels,
  32. out_channels,
  33. kernel_size,
  34. padding,
  35. stride=1,
  36. groups=None,
  37. if_act=True,
  38. act="relu",
  39. **kwargs,
  40. ):
  41. super(DSConv, self).__init__()
  42. if groups == None:
  43. groups = in_channels
  44. self.if_act = if_act
  45. self.act = act
  46. self.conv1 = nn.Conv2D(
  47. in_channels=in_channels,
  48. out_channels=in_channels,
  49. kernel_size=kernel_size,
  50. stride=stride,
  51. padding=padding,
  52. groups=groups,
  53. bias_attr=False,
  54. )
  55. self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
  56. self.conv2 = nn.Conv2D(
  57. in_channels=in_channels,
  58. out_channels=int(in_channels * 4),
  59. kernel_size=1,
  60. stride=1,
  61. bias_attr=False,
  62. )
  63. self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
  64. self.conv3 = nn.Conv2D(
  65. in_channels=int(in_channels * 4),
  66. out_channels=out_channels,
  67. kernel_size=1,
  68. stride=1,
  69. bias_attr=False,
  70. )
  71. self._c = [in_channels, out_channels]
  72. if in_channels != out_channels:
  73. self.conv_end = nn.Conv2D(
  74. in_channels=in_channels,
  75. out_channels=out_channels,
  76. kernel_size=1,
  77. stride=1,
  78. bias_attr=False,
  79. )
  80. def forward(self, inputs):
  81. x = self.conv1(inputs)
  82. x = self.bn1(x)
  83. x = self.conv2(x)
  84. x = self.bn2(x)
  85. if self.if_act:
  86. if self.act == "relu":
  87. x = F.relu(x)
  88. elif self.act == "hardswish":
  89. x = F.hardswish(x)
  90. else:
  91. print(
  92. "The activation function({}) is selected incorrectly.".format(
  93. self.act
  94. )
  95. )
  96. exit()
  97. x = self.conv3(x)
  98. if self._c[0] != self._c[1]:
  99. x = x + self.conv_end(inputs)
  100. return x
  101. class DBFPN(nn.Layer):
  102. def __init__(self, in_channels, out_channels, use_asf=False, **kwargs):
  103. super(DBFPN, self).__init__()
  104. self.out_channels = out_channels
  105. self.use_asf = use_asf
  106. weight_attr = paddle.nn.initializer.KaimingUniform()
  107. self.in2_conv = nn.Conv2D(
  108. in_channels=in_channels[0],
  109. out_channels=self.out_channels,
  110. kernel_size=1,
  111. weight_attr=ParamAttr(initializer=weight_attr),
  112. bias_attr=False,
  113. )
  114. self.in3_conv = nn.Conv2D(
  115. in_channels=in_channels[1],
  116. out_channels=self.out_channels,
  117. kernel_size=1,
  118. weight_attr=ParamAttr(initializer=weight_attr),
  119. bias_attr=False,
  120. )
  121. self.in4_conv = nn.Conv2D(
  122. in_channels=in_channels[2],
  123. out_channels=self.out_channels,
  124. kernel_size=1,
  125. weight_attr=ParamAttr(initializer=weight_attr),
  126. bias_attr=False,
  127. )
  128. self.in5_conv = nn.Conv2D(
  129. in_channels=in_channels[3],
  130. out_channels=self.out_channels,
  131. kernel_size=1,
  132. weight_attr=ParamAttr(initializer=weight_attr),
  133. bias_attr=False,
  134. )
  135. self.p5_conv = nn.Conv2D(
  136. in_channels=self.out_channels,
  137. out_channels=self.out_channels // 4,
  138. kernel_size=3,
  139. padding=1,
  140. weight_attr=ParamAttr(initializer=weight_attr),
  141. bias_attr=False,
  142. )
  143. self.p4_conv = nn.Conv2D(
  144. in_channels=self.out_channels,
  145. out_channels=self.out_channels // 4,
  146. kernel_size=3,
  147. padding=1,
  148. weight_attr=ParamAttr(initializer=weight_attr),
  149. bias_attr=False,
  150. )
  151. self.p3_conv = nn.Conv2D(
  152. in_channels=self.out_channels,
  153. out_channels=self.out_channels // 4,
  154. kernel_size=3,
  155. padding=1,
  156. weight_attr=ParamAttr(initializer=weight_attr),
  157. bias_attr=False,
  158. )
  159. self.p2_conv = nn.Conv2D(
  160. in_channels=self.out_channels,
  161. out_channels=self.out_channels // 4,
  162. kernel_size=3,
  163. padding=1,
  164. weight_attr=ParamAttr(initializer=weight_attr),
  165. bias_attr=False,
  166. )
  167. if self.use_asf is True:
  168. self.asf = ASFBlock(self.out_channels, self.out_channels // 4)
  169. def forward(self, x):
  170. c2, c3, c4, c5 = x
  171. in5 = self.in5_conv(c5)
  172. in4 = self.in4_conv(c4)
  173. in3 = self.in3_conv(c3)
  174. in2 = self.in2_conv(c2)
  175. out4 = in4 + F.upsample(
  176. in5, scale_factor=2, mode="nearest", align_mode=1
  177. ) # 1/16
  178. out3 = in3 + F.upsample(
  179. out4, scale_factor=2, mode="nearest", align_mode=1
  180. ) # 1/8
  181. out2 = in2 + F.upsample(
  182. out3, scale_factor=2, mode="nearest", align_mode=1
  183. ) # 1/4
  184. p5 = self.p5_conv(in5)
  185. p4 = self.p4_conv(out4)
  186. p3 = self.p3_conv(out3)
  187. p2 = self.p2_conv(out2)
  188. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  189. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  190. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  191. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  192. if self.use_asf is True:
  193. fuse = self.asf(fuse, [p5, p4, p3, p2])
  194. return fuse
  195. class RSELayer(nn.Layer):
  196. def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
  197. super(RSELayer, self).__init__()
  198. weight_attr = paddle.nn.initializer.KaimingUniform()
  199. self.out_channels = out_channels
  200. self.in_conv = nn.Conv2D(
  201. in_channels=in_channels,
  202. out_channels=self.out_channels,
  203. kernel_size=kernel_size,
  204. padding=int(kernel_size // 2),
  205. weight_attr=ParamAttr(initializer=weight_attr),
  206. bias_attr=False,
  207. )
  208. self.se_block = SEModule(self.out_channels)
  209. self.shortcut = shortcut
  210. def forward(self, ins):
  211. x = self.in_conv(ins)
  212. if self.shortcut:
  213. out = x + self.se_block(x)
  214. else:
  215. out = self.se_block(x)
  216. return out
  217. class RSEFPN(nn.Layer):
  218. def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
  219. super(RSEFPN, self).__init__()
  220. self.out_channels = out_channels
  221. self.ins_conv = nn.LayerList()
  222. self.inp_conv = nn.LayerList()
  223. self.intracl = False
  224. if "intracl" in kwargs.keys() and kwargs["intracl"] is True:
  225. self.intracl = kwargs["intracl"]
  226. self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  227. self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  228. self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  229. self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  230. for i in range(len(in_channels)):
  231. self.ins_conv.append(
  232. RSELayer(in_channels[i], out_channels, kernel_size=1, shortcut=shortcut)
  233. )
  234. self.inp_conv.append(
  235. RSELayer(
  236. out_channels, out_channels // 4, kernel_size=3, shortcut=shortcut
  237. )
  238. )
  239. def forward(self, x):
  240. c2, c3, c4, c5 = x
  241. in5 = self.ins_conv[3](c5)
  242. in4 = self.ins_conv[2](c4)
  243. in3 = self.ins_conv[1](c3)
  244. in2 = self.ins_conv[0](c2)
  245. out4 = in4 + F.upsample(
  246. in5, scale_factor=2, mode="nearest", align_mode=1
  247. ) # 1/16
  248. out3 = in3 + F.upsample(
  249. out4, scale_factor=2, mode="nearest", align_mode=1
  250. ) # 1/8
  251. out2 = in2 + F.upsample(
  252. out3, scale_factor=2, mode="nearest", align_mode=1
  253. ) # 1/4
  254. p5 = self.inp_conv[3](in5)
  255. p4 = self.inp_conv[2](out4)
  256. p3 = self.inp_conv[1](out3)
  257. p2 = self.inp_conv[0](out2)
  258. if self.intracl is True:
  259. p5 = self.incl4(p5)
  260. p4 = self.incl3(p4)
  261. p3 = self.incl2(p3)
  262. p2 = self.incl1(p2)
  263. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  264. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  265. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  266. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  267. return fuse
  268. class LKPAN(nn.Layer):
  269. def __init__(self, in_channels, out_channels, mode="large", **kwargs):
  270. super(LKPAN, self).__init__()
  271. self.out_channels = out_channels
  272. weight_attr = paddle.nn.initializer.KaimingUniform()
  273. self.ins_conv = nn.LayerList()
  274. self.inp_conv = nn.LayerList()
  275. # pan head
  276. self.pan_head_conv = nn.LayerList()
  277. self.pan_lat_conv = nn.LayerList()
  278. if mode.lower() == "lite":
  279. p_layer = DSConv
  280. elif mode.lower() == "large":
  281. p_layer = nn.Conv2D
  282. else:
  283. raise ValueError(
  284. "mode can only be one of ['lite', 'large'], but received {}".format(
  285. mode
  286. )
  287. )
  288. for i in range(len(in_channels)):
  289. self.ins_conv.append(
  290. nn.Conv2D(
  291. in_channels=in_channels[i],
  292. out_channels=self.out_channels,
  293. kernel_size=1,
  294. weight_attr=ParamAttr(initializer=weight_attr),
  295. bias_attr=False,
  296. )
  297. )
  298. self.inp_conv.append(
  299. p_layer(
  300. in_channels=self.out_channels,
  301. out_channels=self.out_channels // 4,
  302. kernel_size=9,
  303. padding=4,
  304. weight_attr=ParamAttr(initializer=weight_attr),
  305. bias_attr=False,
  306. )
  307. )
  308. if i > 0:
  309. self.pan_head_conv.append(
  310. nn.Conv2D(
  311. in_channels=self.out_channels // 4,
  312. out_channels=self.out_channels // 4,
  313. kernel_size=3,
  314. padding=1,
  315. stride=2,
  316. weight_attr=ParamAttr(initializer=weight_attr),
  317. bias_attr=False,
  318. )
  319. )
  320. self.pan_lat_conv.append(
  321. p_layer(
  322. in_channels=self.out_channels // 4,
  323. out_channels=self.out_channels // 4,
  324. kernel_size=9,
  325. padding=4,
  326. weight_attr=ParamAttr(initializer=weight_attr),
  327. bias_attr=False,
  328. )
  329. )
  330. self.intracl = False
  331. if "intracl" in kwargs.keys() and kwargs["intracl"] is True:
  332. self.intracl = kwargs["intracl"]
  333. self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  334. self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  335. self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  336. self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
  337. def forward(self, x):
  338. c2, c3, c4, c5 = x
  339. in5 = self.ins_conv[3](c5)
  340. in4 = self.ins_conv[2](c4)
  341. in3 = self.ins_conv[1](c3)
  342. in2 = self.ins_conv[0](c2)
  343. out4 = in4 + F.upsample(
  344. in5, scale_factor=2, mode="nearest", align_mode=1
  345. ) # 1/16
  346. out3 = in3 + F.upsample(
  347. out4, scale_factor=2, mode="nearest", align_mode=1
  348. ) # 1/8
  349. out2 = in2 + F.upsample(
  350. out3, scale_factor=2, mode="nearest", align_mode=1
  351. ) # 1/4
  352. f5 = self.inp_conv[3](in5)
  353. f4 = self.inp_conv[2](out4)
  354. f3 = self.inp_conv[1](out3)
  355. f2 = self.inp_conv[0](out2)
  356. pan3 = f3 + self.pan_head_conv[0](f2)
  357. pan4 = f4 + self.pan_head_conv[1](pan3)
  358. pan5 = f5 + self.pan_head_conv[2](pan4)
  359. p2 = self.pan_lat_conv[0](f2)
  360. p3 = self.pan_lat_conv[1](pan3)
  361. p4 = self.pan_lat_conv[2](pan4)
  362. p5 = self.pan_lat_conv[3](pan5)
  363. if self.intracl is True:
  364. p5 = self.incl4(p5)
  365. p4 = self.incl3(p4)
  366. p3 = self.incl2(p3)
  367. p2 = self.incl1(p2)
  368. p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
  369. p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
  370. p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
  371. fuse = paddle.concat([p5, p4, p3, p2], axis=1)
  372. return fuse
  373. class ASFBlock(nn.Layer):
  374. """
  375. This code is referred from:
  376. https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py
  377. """
  378. def __init__(self, in_channels, inter_channels, out_features_num=4):
  379. """
  380. Adaptive Scale Fusion (ASF) block of DBNet++
  381. Args:
  382. in_channels: the number of channels in the input data
  383. inter_channels: the number of middle channels
  384. out_features_num: the number of fused stages
  385. """
  386. super(ASFBlock, self).__init__()
  387. weight_attr = paddle.nn.initializer.KaimingUniform()
  388. self.in_channels = in_channels
  389. self.inter_channels = inter_channels
  390. self.out_features_num = out_features_num
  391. self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1)
  392. self.spatial_scale = nn.Sequential(
  393. # Nx1xHxW
  394. nn.Conv2D(
  395. in_channels=1,
  396. out_channels=1,
  397. kernel_size=3,
  398. bias_attr=False,
  399. padding=1,
  400. weight_attr=ParamAttr(initializer=weight_attr),
  401. ),
  402. nn.ReLU(),
  403. nn.Conv2D(
  404. in_channels=1,
  405. out_channels=1,
  406. kernel_size=1,
  407. bias_attr=False,
  408. weight_attr=ParamAttr(initializer=weight_attr),
  409. ),
  410. nn.Sigmoid(),
  411. )
  412. self.channel_scale = nn.Sequential(
  413. nn.Conv2D(
  414. in_channels=inter_channels,
  415. out_channels=out_features_num,
  416. kernel_size=1,
  417. bias_attr=False,
  418. weight_attr=ParamAttr(initializer=weight_attr),
  419. ),
  420. nn.Sigmoid(),
  421. )
  422. def forward(self, fuse_features, features_list):
  423. fuse_features = self.conv(fuse_features)
  424. spatial_x = paddle.mean(fuse_features, axis=1, keepdim=True)
  425. attention_scores = self.spatial_scale(spatial_x) + fuse_features
  426. attention_scores = self.channel_scale(attention_scores)
  427. assert len(features_list) == self.out_features_num
  428. out_list = []
  429. for i in range(self.out_features_num):
  430. out_list.append(attention_scores[:, i : i + 1] * features_list[i])
  431. return paddle.concat(out_list, axis=1)