e2e_pg_head.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import math
  18. import paddle
  19. from paddle import nn
  20. import paddle.nn.functional as F
  21. from paddle import ParamAttr
  22. class ConvBNLayer(nn.Layer):
  23. def __init__(
  24. self,
  25. in_channels,
  26. out_channels,
  27. kernel_size,
  28. stride,
  29. padding,
  30. groups=1,
  31. if_act=True,
  32. act=None,
  33. name=None,
  34. ):
  35. super(ConvBNLayer, self).__init__()
  36. self.if_act = if_act
  37. self.act = act
  38. self.conv = nn.Conv2D(
  39. in_channels=in_channels,
  40. out_channels=out_channels,
  41. kernel_size=kernel_size,
  42. stride=stride,
  43. padding=padding,
  44. groups=groups,
  45. weight_attr=ParamAttr(name=name + "_weights"),
  46. bias_attr=False,
  47. )
  48. self.bn = nn.BatchNorm(
  49. num_channels=out_channels,
  50. act=act,
  51. param_attr=ParamAttr(name="bn_" + name + "_scale"),
  52. bias_attr=ParamAttr(name="bn_" + name + "_offset"),
  53. moving_mean_name="bn_" + name + "_mean",
  54. moving_variance_name="bn_" + name + "_variance",
  55. use_global_stats=False,
  56. )
  57. def forward(self, x):
  58. x = self.conv(x)
  59. x = self.bn(x)
  60. return x
  61. class PGHead(nn.Layer):
  62. """ """
  63. def __init__(
  64. self, in_channels, character_dict_path="ppocr/utils/ic15_dict.txt", **kwargs
  65. ):
  66. super(PGHead, self).__init__()
  67. # get character_length
  68. with open(character_dict_path, "rb") as fin:
  69. lines = fin.readlines()
  70. character_length = len(lines) + 1
  71. self.conv_f_score1 = ConvBNLayer(
  72. in_channels=in_channels,
  73. out_channels=64,
  74. kernel_size=1,
  75. stride=1,
  76. padding=0,
  77. act="relu",
  78. name="conv_f_score{}".format(1),
  79. )
  80. self.conv_f_score2 = ConvBNLayer(
  81. in_channels=64,
  82. out_channels=64,
  83. kernel_size=3,
  84. stride=1,
  85. padding=1,
  86. act="relu",
  87. name="conv_f_score{}".format(2),
  88. )
  89. self.conv_f_score3 = ConvBNLayer(
  90. in_channels=64,
  91. out_channels=128,
  92. kernel_size=1,
  93. stride=1,
  94. padding=0,
  95. act="relu",
  96. name="conv_f_score{}".format(3),
  97. )
  98. self.conv1 = nn.Conv2D(
  99. in_channels=128,
  100. out_channels=1,
  101. kernel_size=3,
  102. stride=1,
  103. padding=1,
  104. groups=1,
  105. weight_attr=ParamAttr(name="conv_f_score{}".format(4)),
  106. bias_attr=False,
  107. )
  108. self.conv_f_boder1 = ConvBNLayer(
  109. in_channels=in_channels,
  110. out_channels=64,
  111. kernel_size=1,
  112. stride=1,
  113. padding=0,
  114. act="relu",
  115. name="conv_f_boder{}".format(1),
  116. )
  117. self.conv_f_boder2 = ConvBNLayer(
  118. in_channels=64,
  119. out_channels=64,
  120. kernel_size=3,
  121. stride=1,
  122. padding=1,
  123. act="relu",
  124. name="conv_f_boder{}".format(2),
  125. )
  126. self.conv_f_boder3 = ConvBNLayer(
  127. in_channels=64,
  128. out_channels=128,
  129. kernel_size=1,
  130. stride=1,
  131. padding=0,
  132. act="relu",
  133. name="conv_f_boder{}".format(3),
  134. )
  135. self.conv2 = nn.Conv2D(
  136. in_channels=128,
  137. out_channels=4,
  138. kernel_size=3,
  139. stride=1,
  140. padding=1,
  141. groups=1,
  142. weight_attr=ParamAttr(name="conv_f_boder{}".format(4)),
  143. bias_attr=False,
  144. )
  145. self.conv_f_char1 = ConvBNLayer(
  146. in_channels=in_channels,
  147. out_channels=128,
  148. kernel_size=1,
  149. stride=1,
  150. padding=0,
  151. act="relu",
  152. name="conv_f_char{}".format(1),
  153. )
  154. self.conv_f_char2 = ConvBNLayer(
  155. in_channels=128,
  156. out_channels=128,
  157. kernel_size=3,
  158. stride=1,
  159. padding=1,
  160. act="relu",
  161. name="conv_f_char{}".format(2),
  162. )
  163. self.conv_f_char3 = ConvBNLayer(
  164. in_channels=128,
  165. out_channels=256,
  166. kernel_size=1,
  167. stride=1,
  168. padding=0,
  169. act="relu",
  170. name="conv_f_char{}".format(3),
  171. )
  172. self.conv_f_char4 = ConvBNLayer(
  173. in_channels=256,
  174. out_channels=256,
  175. kernel_size=3,
  176. stride=1,
  177. padding=1,
  178. act="relu",
  179. name="conv_f_char{}".format(4),
  180. )
  181. self.conv_f_char5 = ConvBNLayer(
  182. in_channels=256,
  183. out_channels=256,
  184. kernel_size=1,
  185. stride=1,
  186. padding=0,
  187. act="relu",
  188. name="conv_f_char{}".format(5),
  189. )
  190. self.conv3 = nn.Conv2D(
  191. in_channels=256,
  192. out_channels=character_length,
  193. kernel_size=3,
  194. stride=1,
  195. padding=1,
  196. groups=1,
  197. weight_attr=ParamAttr(name="conv_f_char{}".format(6)),
  198. bias_attr=False,
  199. )
  200. self.conv_f_direc1 = ConvBNLayer(
  201. in_channels=in_channels,
  202. out_channels=64,
  203. kernel_size=1,
  204. stride=1,
  205. padding=0,
  206. act="relu",
  207. name="conv_f_direc{}".format(1),
  208. )
  209. self.conv_f_direc2 = ConvBNLayer(
  210. in_channels=64,
  211. out_channels=64,
  212. kernel_size=3,
  213. stride=1,
  214. padding=1,
  215. act="relu",
  216. name="conv_f_direc{}".format(2),
  217. )
  218. self.conv_f_direc3 = ConvBNLayer(
  219. in_channels=64,
  220. out_channels=128,
  221. kernel_size=1,
  222. stride=1,
  223. padding=0,
  224. act="relu",
  225. name="conv_f_direc{}".format(3),
  226. )
  227. self.conv4 = nn.Conv2D(
  228. in_channels=128,
  229. out_channels=2,
  230. kernel_size=3,
  231. stride=1,
  232. padding=1,
  233. groups=1,
  234. weight_attr=ParamAttr(name="conv_f_direc{}".format(4)),
  235. bias_attr=False,
  236. )
  237. def forward(self, x, targets=None):
  238. f_score = self.conv_f_score1(x)
  239. f_score = self.conv_f_score2(f_score)
  240. f_score = self.conv_f_score3(f_score)
  241. f_score = self.conv1(f_score)
  242. f_score = F.sigmoid(f_score)
  243. # f_border
  244. f_border = self.conv_f_boder1(x)
  245. f_border = self.conv_f_boder2(f_border)
  246. f_border = self.conv_f_boder3(f_border)
  247. f_border = self.conv2(f_border)
  248. f_char = self.conv_f_char1(x)
  249. f_char = self.conv_f_char2(f_char)
  250. f_char = self.conv_f_char3(f_char)
  251. f_char = self.conv_f_char4(f_char)
  252. f_char = self.conv_f_char5(f_char)
  253. f_char = self.conv3(f_char)
  254. f_direction = self.conv_f_direc1(x)
  255. f_direction = self.conv_f_direc2(f_direction)
  256. f_direction = self.conv_f_direc3(f_direction)
  257. f_direction = self.conv4(f_direction)
  258. predicts = {}
  259. predicts["f_score"] = f_score
  260. predicts["f_border"] = f_border
  261. predicts["f_char"] = f_char
  262. predicts["f_direction"] = f_direction
  263. return predicts