rec_micronet.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
  17. https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
  18. """
  19. from __future__ import absolute_import
  20. from __future__ import division
  21. from __future__ import print_function
  22. import paddle
  23. import paddle.nn as nn
  24. from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible
  25. M0_cfgs = [
  26. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
  27. [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
  28. [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
  29. [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
  30. [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
  31. [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
  32. [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  33. [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
  34. ]
  35. M1_cfgs = [
  36. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  37. [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
  38. [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
  39. [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
  40. [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
  41. [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
  42. [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  43. [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
  44. ]
  45. M2_cfgs = [
  46. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  47. [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
  48. [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
  49. [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
  50. [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
  51. [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
  52. [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
  53. [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
  54. [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
  55. [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
  56. ]
  57. M3_cfgs = [
  58. # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
  59. [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
  60. [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
  61. [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
  62. [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
  63. [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
  64. [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
  65. [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
  66. [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
  67. [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
  68. [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
  69. [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
  70. [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
  71. ]
  72. def get_micronet_config(mode):
  73. return eval(mode + "_cfgs")
  74. class MaxGroupPooling(nn.Layer):
  75. def __init__(self, channel_per_group=2):
  76. super(MaxGroupPooling, self).__init__()
  77. self.channel_per_group = channel_per_group
  78. def forward(self, x):
  79. if self.channel_per_group == 1:
  80. return x
  81. # max op
  82. b, c, h, w = x.shape
  83. # reshape
  84. y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
  85. out = paddle.max(y, axis=2)
  86. return out
  87. class SpatialSepConvSF(nn.Layer):
  88. def __init__(self, inp, oups, kernel_size, stride):
  89. super(SpatialSepConvSF, self).__init__()
  90. oup1, oup2 = oups
  91. self.conv = nn.Sequential(
  92. nn.Conv2D(
  93. inp,
  94. oup1,
  95. (kernel_size, 1),
  96. (stride, 1),
  97. (kernel_size // 2, 0),
  98. bias_attr=False,
  99. groups=1,
  100. ),
  101. nn.BatchNorm2D(oup1),
  102. nn.Conv2D(
  103. oup1,
  104. oup1 * oup2,
  105. (1, kernel_size),
  106. (1, stride),
  107. (0, kernel_size // 2),
  108. bias_attr=False,
  109. groups=oup1,
  110. ),
  111. nn.BatchNorm2D(oup1 * oup2),
  112. ChannelShuffle(oup1),
  113. )
  114. def forward(self, x):
  115. out = self.conv(x)
  116. return out
  117. class ChannelShuffle(nn.Layer):
  118. def __init__(self, groups):
  119. super(ChannelShuffle, self).__init__()
  120. self.groups = groups
  121. def forward(self, x):
  122. b, c, h, w = x.shape
  123. channels_per_group = c // self.groups
  124. # reshape
  125. x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])
  126. x = paddle.transpose(x, (0, 2, 1, 3, 4))
  127. out = paddle.reshape(x, [b, -1, h, w])
  128. return out
  129. class StemLayer(nn.Layer):
  130. def __init__(self, inp, oup, stride, groups=(4, 4)):
  131. super(StemLayer, self).__init__()
  132. g1, g2 = groups
  133. self.stem = nn.Sequential(
  134. SpatialSepConvSF(inp, groups, 3, stride),
  135. MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6(),
  136. )
  137. def forward(self, x):
  138. out = self.stem(x)
  139. return out
  140. class DepthSpatialSepConv(nn.Layer):
  141. def __init__(self, inp, expand, kernel_size, stride):
  142. super(DepthSpatialSepConv, self).__init__()
  143. exp1, exp2 = expand
  144. hidden_dim = inp * exp1
  145. oup = inp * exp1 * exp2
  146. self.conv = nn.Sequential(
  147. nn.Conv2D(
  148. inp,
  149. inp * exp1,
  150. (kernel_size, 1),
  151. (stride, 1),
  152. (kernel_size // 2, 0),
  153. bias_attr=False,
  154. groups=inp,
  155. ),
  156. nn.BatchNorm2D(inp * exp1),
  157. nn.Conv2D(
  158. hidden_dim,
  159. oup,
  160. (1, kernel_size),
  161. 1,
  162. (0, kernel_size // 2),
  163. bias_attr=False,
  164. groups=hidden_dim,
  165. ),
  166. nn.BatchNorm2D(oup),
  167. )
  168. def forward(self, x):
  169. x = self.conv(x)
  170. return x
  171. class GroupConv(nn.Layer):
  172. def __init__(self, inp, oup, groups=2):
  173. super(GroupConv, self).__init__()
  174. self.inp = inp
  175. self.oup = oup
  176. self.groups = groups
  177. self.conv = nn.Sequential(
  178. nn.Conv2D(inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
  179. nn.BatchNorm2D(oup),
  180. )
  181. def forward(self, x):
  182. x = self.conv(x)
  183. return x
  184. class DepthConv(nn.Layer):
  185. def __init__(self, inp, oup, kernel_size, stride):
  186. super(DepthConv, self).__init__()
  187. self.conv = nn.Sequential(
  188. nn.Conv2D(
  189. inp,
  190. oup,
  191. kernel_size,
  192. stride,
  193. kernel_size // 2,
  194. bias_attr=False,
  195. groups=inp,
  196. ),
  197. nn.BatchNorm2D(oup),
  198. )
  199. def forward(self, x):
  200. out = self.conv(x)
  201. return out
  202. class DYShiftMax(nn.Layer):
  203. def __init__(
  204. self,
  205. inp,
  206. oup,
  207. reduction=4,
  208. act_max=1.0,
  209. act_relu=True,
  210. init_a=[0.0, 0.0],
  211. init_b=[0.0, 0.0],
  212. relu_before_pool=False,
  213. g=None,
  214. expansion=False,
  215. ):
  216. super(DYShiftMax, self).__init__()
  217. self.oup = oup
  218. self.act_max = act_max * 2
  219. self.act_relu = act_relu
  220. self.avg_pool = nn.Sequential(
  221. nn.ReLU() if relu_before_pool == True else nn.Sequential(),
  222. nn.AdaptiveAvgPool2D(1),
  223. )
  224. self.exp = 4 if act_relu else 2
  225. self.init_a = init_a
  226. self.init_b = init_b
  227. # determine squeeze
  228. squeeze = make_divisible(inp // reduction, 4)
  229. if squeeze < 4:
  230. squeeze = 4
  231. self.fc = nn.Sequential(
  232. nn.Linear(inp, squeeze),
  233. nn.ReLU(),
  234. nn.Linear(squeeze, oup * self.exp),
  235. nn.Hardsigmoid(),
  236. )
  237. if g is None:
  238. g = 1
  239. self.g = g[1]
  240. if self.g != 1 and expansion:
  241. self.g = inp // self.g
  242. self.gc = inp // self.g
  243. index = paddle.to_tensor([range(inp)])
  244. index = paddle.reshape(index, [1, inp, 1, 1])
  245. index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
  246. indexgs = paddle.split(index, [1, self.g - 1], axis=1)
  247. indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
  248. indexes = paddle.split(indexgs, [1, self.gc - 1], axis=2)
  249. indexes = paddle.concat((indexes[1], indexes[0]), axis=2)
  250. self.index = paddle.reshape(indexes, [inp])
  251. self.expansion = expansion
  252. def forward(self, x):
  253. x_in = x
  254. x_out = x
  255. b, c, _, _ = x_in.shape
  256. y = self.avg_pool(x_in)
  257. y = paddle.reshape(y, [b, c])
  258. y = self.fc(y)
  259. y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
  260. y = (y - 0.5) * self.act_max
  261. n2, c2, h2, w2 = x_out.shape
  262. x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])
  263. if self.exp == 4:
  264. temp = y.shape
  265. a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)
  266. a1 = a1 + self.init_a[0]
  267. a2 = a2 + self.init_a[1]
  268. b1 = b1 + self.init_b[0]
  269. b2 = b2 + self.init_b[1]
  270. z1 = x_out * a1 + x2 * b1
  271. z2 = x_out * a2 + x2 * b2
  272. out = paddle.maximum(z1, z2)
  273. elif self.exp == 2:
  274. temp = y.shape
  275. a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
  276. a1 = a1 + self.init_a[0]
  277. b1 = b1 + self.init_b[0]
  278. out = x_out * a1 + x2 * b1
  279. return out
  280. class DYMicroBlock(nn.Layer):
  281. def __init__(
  282. self,
  283. inp,
  284. oup,
  285. kernel_size=3,
  286. stride=1,
  287. ch_exp=(2, 2),
  288. ch_per_group=4,
  289. groups_1x1=(1, 1),
  290. depthsep=True,
  291. shuffle=False,
  292. activation_cfg=None,
  293. ):
  294. super(DYMicroBlock, self).__init__()
  295. self.identity = stride == 1 and inp == oup
  296. y1, y2, y3 = activation_cfg["dy"]
  297. act_reduction = 8 * activation_cfg["ratio"]
  298. init_a = activation_cfg["init_a"]
  299. init_b = activation_cfg["init_b"]
  300. t1 = ch_exp
  301. gs1 = ch_per_group
  302. hidden_fft, g1, g2 = groups_1x1
  303. hidden_dim2 = inp * t1[0] * t1[1]
  304. if gs1[0] == 0:
  305. self.layers = nn.Sequential(
  306. DepthSpatialSepConv(inp, t1, kernel_size, stride),
  307. (
  308. DYShiftMax(
  309. hidden_dim2,
  310. hidden_dim2,
  311. act_max=2.0,
  312. act_relu=True if y2 == 2 else False,
  313. init_a=init_a,
  314. reduction=act_reduction,
  315. init_b=init_b,
  316. g=gs1,
  317. expansion=False,
  318. )
  319. if y2 > 0
  320. else nn.ReLU6()
  321. ),
  322. ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
  323. (
  324. ChannelShuffle(hidden_dim2 // 2)
  325. if shuffle and y2 != 0
  326. else nn.Sequential()
  327. ),
  328. GroupConv(hidden_dim2, oup, (g1, g2)),
  329. (
  330. DYShiftMax(
  331. oup,
  332. oup,
  333. act_max=2.0,
  334. act_relu=False,
  335. init_a=[1.0, 0.0],
  336. reduction=act_reduction // 2,
  337. init_b=[0.0, 0.0],
  338. g=(g1, g2),
  339. expansion=False,
  340. )
  341. if y3 > 0
  342. else nn.Sequential()
  343. ),
  344. ChannelShuffle(g2) if shuffle else nn.Sequential(),
  345. (
  346. ChannelShuffle(oup // 2)
  347. if shuffle and oup % 2 == 0 and y3 != 0
  348. else nn.Sequential()
  349. ),
  350. )
  351. elif g2 == 0:
  352. self.layers = nn.Sequential(
  353. GroupConv(inp, hidden_dim2, gs1),
  354. (
  355. DYShiftMax(
  356. hidden_dim2,
  357. hidden_dim2,
  358. act_max=2.0,
  359. act_relu=False,
  360. init_a=[1.0, 0.0],
  361. reduction=act_reduction,
  362. init_b=[0.0, 0.0],
  363. g=gs1,
  364. expansion=False,
  365. )
  366. if y3 > 0
  367. else nn.Sequential()
  368. ),
  369. )
  370. else:
  371. self.layers = nn.Sequential(
  372. GroupConv(inp, hidden_dim2, gs1),
  373. (
  374. DYShiftMax(
  375. hidden_dim2,
  376. hidden_dim2,
  377. act_max=2.0,
  378. act_relu=True if y1 == 2 else False,
  379. init_a=init_a,
  380. reduction=act_reduction,
  381. init_b=init_b,
  382. g=gs1,
  383. expansion=False,
  384. )
  385. if y1 > 0
  386. else nn.ReLU6()
  387. ),
  388. ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
  389. (
  390. DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
  391. if depthsep
  392. else DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride)
  393. ),
  394. nn.Sequential(),
  395. (
  396. DYShiftMax(
  397. hidden_dim2,
  398. hidden_dim2,
  399. act_max=2.0,
  400. act_relu=True if y2 == 2 else False,
  401. init_a=init_a,
  402. reduction=act_reduction,
  403. init_b=init_b,
  404. g=gs1,
  405. expansion=True,
  406. )
  407. if y2 > 0
  408. else nn.ReLU6()
  409. ),
  410. (
  411. ChannelShuffle(hidden_dim2 // 4)
  412. if shuffle and y1 != 0 and y2 != 0
  413. else (
  414. nn.Sequential()
  415. if y1 == 0 and y2 == 0
  416. else ChannelShuffle(hidden_dim2 // 2)
  417. )
  418. ),
  419. GroupConv(hidden_dim2, oup, (g1, g2)),
  420. (
  421. DYShiftMax(
  422. oup,
  423. oup,
  424. act_max=2.0,
  425. act_relu=False,
  426. init_a=[1.0, 0.0],
  427. reduction=(
  428. act_reduction // 2 if oup < hidden_dim2 else act_reduction
  429. ),
  430. init_b=[0.0, 0.0],
  431. g=(g1, g2),
  432. expansion=False,
  433. )
  434. if y3 > 0
  435. else nn.Sequential()
  436. ),
  437. ChannelShuffle(g2) if shuffle else nn.Sequential(),
  438. ChannelShuffle(oup // 2) if shuffle and y3 != 0 else nn.Sequential(),
  439. )
  440. def forward(self, x):
  441. identity = x
  442. out = self.layers(x)
  443. if self.identity:
  444. out = out + identity
  445. return out
  446. class MicroNet(nn.Layer):
  447. """
  448. the MicroNet backbone network for recognition module.
  449. Args:
  450. mode(str): {'M0', 'M1', 'M2', 'M3'}
  451. Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
  452. Default: 'M3'.
  453. """
  454. def __init__(self, mode="M3", **kwargs):
  455. super(MicroNet, self).__init__()
  456. self.cfgs = get_micronet_config(mode)
  457. activation_cfg = {}
  458. if mode == "M0":
  459. input_channel = 4
  460. stem_groups = 2, 2
  461. out_ch = 384
  462. activation_cfg["init_a"] = 1.0, 1.0
  463. activation_cfg["init_b"] = 0.0, 0.0
  464. elif mode == "M1":
  465. input_channel = 6
  466. stem_groups = 3, 2
  467. out_ch = 576
  468. activation_cfg["init_a"] = 1.0, 1.0
  469. activation_cfg["init_b"] = 0.0, 0.0
  470. elif mode == "M2":
  471. input_channel = 8
  472. stem_groups = 4, 2
  473. out_ch = 768
  474. activation_cfg["init_a"] = 1.0, 1.0
  475. activation_cfg["init_b"] = 0.0, 0.0
  476. elif mode == "M3":
  477. input_channel = 12
  478. stem_groups = 4, 3
  479. out_ch = 432
  480. activation_cfg["init_a"] = 1.0, 0.5
  481. activation_cfg["init_b"] = 0.0, 0.5
  482. else:
  483. raise NotImplementedError("mode[" + mode + "_model] is not implemented!")
  484. layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]
  485. for idx, val in enumerate(self.cfgs):
  486. s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val
  487. t1 = (c1, c2)
  488. gs1 = (g1, g2)
  489. gs2 = (c3, g3, g4)
  490. activation_cfg["dy"] = [y1, y2, y3]
  491. activation_cfg["ratio"] = r
  492. output_channel = c
  493. layers.append(
  494. DYMicroBlock(
  495. input_channel,
  496. output_channel,
  497. kernel_size=ks,
  498. stride=s,
  499. ch_exp=t1,
  500. ch_per_group=gs1,
  501. groups_1x1=gs2,
  502. depthsep=True,
  503. shuffle=True,
  504. activation_cfg=activation_cfg,
  505. )
  506. )
  507. input_channel = output_channel
  508. for i in range(1, n):
  509. layers.append(
  510. DYMicroBlock(
  511. input_channel,
  512. output_channel,
  513. kernel_size=ks,
  514. stride=1,
  515. ch_exp=t1,
  516. ch_per_group=gs1,
  517. groups_1x1=gs2,
  518. depthsep=True,
  519. shuffle=True,
  520. activation_cfg=activation_cfg,
  521. )
  522. )
  523. input_channel = output_channel
  524. self.features = nn.Sequential(*layers)
  525. self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
  526. self.out_channels = make_divisible(out_ch)
  527. def forward(self, x):
  528. x = self.features(x)
  529. x = self.pool(x)
  530. return x