rec_resnet_31.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
  17. https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
  18. """
  19. from __future__ import absolute_import
  20. from __future__ import division
  21. from __future__ import print_function
  22. import paddle
  23. from paddle import ParamAttr
  24. import paddle.nn as nn
  25. import paddle.nn.functional as F
  26. import numpy as np
  27. __all__ = ["ResNet31"]
  28. def conv3x3(in_channel, out_channel, stride=1, conv_weight_attr=None):
  29. return nn.Conv2D(
  30. in_channel,
  31. out_channel,
  32. kernel_size=3,
  33. stride=stride,
  34. padding=1,
  35. weight_attr=conv_weight_attr,
  36. bias_attr=False,
  37. )
  38. class BasicBlock(nn.Layer):
  39. expansion = 1
  40. def __init__(
  41. self,
  42. in_channels,
  43. channels,
  44. stride=1,
  45. downsample=False,
  46. conv_weight_attr=None,
  47. bn_weight_attr=None,
  48. ):
  49. super().__init__()
  50. self.conv1 = conv3x3(
  51. in_channels, channels, stride, conv_weight_attr=conv_weight_attr
  52. )
  53. self.bn1 = nn.BatchNorm2D(channels, weight_attr=bn_weight_attr)
  54. self.relu = nn.ReLU()
  55. self.conv2 = conv3x3(channels, channels, conv_weight_attr=conv_weight_attr)
  56. self.bn2 = nn.BatchNorm2D(channels, weight_attr=bn_weight_attr)
  57. self.downsample = downsample
  58. if downsample:
  59. self.downsample = nn.Sequential(
  60. nn.Conv2D(
  61. in_channels,
  62. channels * self.expansion,
  63. 1,
  64. stride,
  65. weight_attr=conv_weight_attr,
  66. bias_attr=False,
  67. ),
  68. nn.BatchNorm2D(channels * self.expansion, weight_attr=bn_weight_attr),
  69. )
  70. else:
  71. self.downsample = nn.Sequential()
  72. self.stride = stride
  73. def forward(self, x):
  74. residual = x
  75. out = self.conv1(x)
  76. out = self.bn1(out)
  77. out = self.relu(out)
  78. out = self.conv2(out)
  79. out = self.bn2(out)
  80. if self.downsample:
  81. residual = self.downsample(x)
  82. out += residual
  83. out = self.relu(out)
  84. return out
  85. class ResNet31(nn.Layer):
  86. """
  87. Args:
  88. in_channels (int): Number of channels of input image tensor.
  89. layers (list[int]): List of BasicBlock number for each stage.
  90. channels (list[int]): List of out_channels of Conv2d layer.
  91. out_indices (None | Sequence[int]): Indices of output stages.
  92. last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
  93. init_type (None | str): the config to control the initialization.
  94. """
  95. def __init__(
  96. self,
  97. in_channels=3,
  98. layers=[1, 2, 5, 3],
  99. channels=[64, 128, 256, 256, 512, 512, 512],
  100. out_indices=None,
  101. last_stage_pool=False,
  102. init_type=None,
  103. ):
  104. super(ResNet31, self).__init__()
  105. assert isinstance(in_channels, int)
  106. assert isinstance(last_stage_pool, bool)
  107. self.out_indices = out_indices
  108. self.last_stage_pool = last_stage_pool
  109. conv_weight_attr = None
  110. bn_weight_attr = None
  111. if init_type is not None:
  112. support_dict = ["KaimingNormal"]
  113. assert init_type in support_dict, Exception(
  114. "resnet31 only support {}".format(support_dict)
  115. )
  116. conv_weight_attr = nn.initializer.KaimingNormal()
  117. bn_weight_attr = ParamAttr(
  118. initializer=nn.initializer.Uniform(), learning_rate=1
  119. )
  120. # conv 1 (Conv Conv)
  121. self.conv1_1 = nn.Conv2D(
  122. in_channels,
  123. channels[0],
  124. kernel_size=3,
  125. stride=1,
  126. padding=1,
  127. weight_attr=conv_weight_attr,
  128. )
  129. self.bn1_1 = nn.BatchNorm2D(channels[0], weight_attr=bn_weight_attr)
  130. self.relu1_1 = nn.ReLU()
  131. self.conv1_2 = nn.Conv2D(
  132. channels[0],
  133. channels[1],
  134. kernel_size=3,
  135. stride=1,
  136. padding=1,
  137. weight_attr=conv_weight_attr,
  138. )
  139. self.bn1_2 = nn.BatchNorm2D(channels[1], weight_attr=bn_weight_attr)
  140. self.relu1_2 = nn.ReLU()
  141. # conv 2 (Max-pooling, Residual block, Conv)
  142. self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
  143. self.block2 = self._make_layer(
  144. channels[1],
  145. channels[2],
  146. layers[0],
  147. conv_weight_attr=conv_weight_attr,
  148. bn_weight_attr=bn_weight_attr,
  149. )
  150. self.conv2 = nn.Conv2D(
  151. channels[2],
  152. channels[2],
  153. kernel_size=3,
  154. stride=1,
  155. padding=1,
  156. weight_attr=conv_weight_attr,
  157. )
  158. self.bn2 = nn.BatchNorm2D(channels[2], weight_attr=bn_weight_attr)
  159. self.relu2 = nn.ReLU()
  160. # conv 3 (Max-pooling, Residual block, Conv)
  161. self.pool3 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
  162. self.block3 = self._make_layer(
  163. channels[2],
  164. channels[3],
  165. layers[1],
  166. conv_weight_attr=conv_weight_attr,
  167. bn_weight_attr=bn_weight_attr,
  168. )
  169. self.conv3 = nn.Conv2D(
  170. channels[3],
  171. channels[3],
  172. kernel_size=3,
  173. stride=1,
  174. padding=1,
  175. weight_attr=conv_weight_attr,
  176. )
  177. self.bn3 = nn.BatchNorm2D(channels[3], weight_attr=bn_weight_attr)
  178. self.relu3 = nn.ReLU()
  179. # conv 4 (Max-pooling, Residual block, Conv)
  180. self.pool4 = nn.MaxPool2D(
  181. kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True
  182. )
  183. self.block4 = self._make_layer(
  184. channels[3],
  185. channels[4],
  186. layers[2],
  187. conv_weight_attr=conv_weight_attr,
  188. bn_weight_attr=bn_weight_attr,
  189. )
  190. self.conv4 = nn.Conv2D(
  191. channels[4],
  192. channels[4],
  193. kernel_size=3,
  194. stride=1,
  195. padding=1,
  196. weight_attr=conv_weight_attr,
  197. )
  198. self.bn4 = nn.BatchNorm2D(channels[4], weight_attr=bn_weight_attr)
  199. self.relu4 = nn.ReLU()
  200. # conv 5 ((Max-pooling), Residual block, Conv)
  201. self.pool5 = None
  202. if self.last_stage_pool:
  203. self.pool5 = nn.MaxPool2D(
  204. kernel_size=2, stride=2, padding=0, ceil_mode=True
  205. )
  206. self.block5 = self._make_layer(
  207. channels[4],
  208. channels[5],
  209. layers[3],
  210. conv_weight_attr=conv_weight_attr,
  211. bn_weight_attr=bn_weight_attr,
  212. )
  213. self.conv5 = nn.Conv2D(
  214. channels[5],
  215. channels[5],
  216. kernel_size=3,
  217. stride=1,
  218. padding=1,
  219. weight_attr=conv_weight_attr,
  220. )
  221. self.bn5 = nn.BatchNorm2D(channels[5], weight_attr=bn_weight_attr)
  222. self.relu5 = nn.ReLU()
  223. self.out_channels = channels[-1]
  224. def _make_layer(
  225. self,
  226. input_channels,
  227. output_channels,
  228. blocks,
  229. conv_weight_attr=None,
  230. bn_weight_attr=None,
  231. ):
  232. layers = []
  233. for _ in range(blocks):
  234. downsample = None
  235. if input_channels != output_channels:
  236. downsample = nn.Sequential(
  237. nn.Conv2D(
  238. input_channels,
  239. output_channels,
  240. kernel_size=1,
  241. stride=1,
  242. weight_attr=conv_weight_attr,
  243. bias_attr=False,
  244. ),
  245. nn.BatchNorm2D(output_channels, weight_attr=bn_weight_attr),
  246. )
  247. layers.append(
  248. BasicBlock(
  249. input_channels,
  250. output_channels,
  251. downsample=downsample,
  252. conv_weight_attr=conv_weight_attr,
  253. bn_weight_attr=bn_weight_attr,
  254. )
  255. )
  256. input_channels = output_channels
  257. return nn.Sequential(*layers)
  258. def forward(self, x):
  259. x = self.conv1_1(x)
  260. x = self.bn1_1(x)
  261. x = self.relu1_1(x)
  262. x = self.conv1_2(x)
  263. x = self.bn1_2(x)
  264. x = self.relu1_2(x)
  265. outs = []
  266. for i in range(4):
  267. layer_index = i + 2
  268. pool_layer = getattr(self, f"pool{layer_index}")
  269. block_layer = getattr(self, f"block{layer_index}")
  270. conv_layer = getattr(self, f"conv{layer_index}")
  271. bn_layer = getattr(self, f"bn{layer_index}")
  272. relu_layer = getattr(self, f"relu{layer_index}")
  273. if pool_layer is not None:
  274. x = pool_layer(x)
  275. x = block_layer(x)
  276. x = conv_layer(x)
  277. x = bn_layer(x)
  278. x = relu_layer(x)
  279. outs.append(x)
  280. if self.out_indices is not None:
  281. return tuple([outs[i] for i in self.out_indices])
  282. return x