rec_shallow_cnn.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/1.x/mmocr/models/textrecog/backbones/shallow_cnn.py
  17. """
  18. from __future__ import absolute_import
  19. from __future__ import division
  20. from __future__ import print_function
  21. import math
  22. import numpy as np
  23. import paddle
  24. from paddle import ParamAttr
  25. import paddle.nn as nn
  26. import paddle.nn.functional as F
  27. from paddle.nn import MaxPool2D
  28. from paddle.nn.initializer import KaimingNormal, Uniform, Constant
  29. class ConvBNLayer(nn.Layer):
  30. def __init__(
  31. self, num_channels, filter_size, num_filters, stride, padding, num_groups=1
  32. ):
  33. super(ConvBNLayer, self).__init__()
  34. self.conv = nn.Conv2D(
  35. in_channels=num_channels,
  36. out_channels=num_filters,
  37. kernel_size=filter_size,
  38. stride=stride,
  39. padding=padding,
  40. groups=num_groups,
  41. weight_attr=ParamAttr(initializer=KaimingNormal()),
  42. bias_attr=False,
  43. )
  44. self.bn = nn.BatchNorm2D(
  45. num_filters,
  46. weight_attr=ParamAttr(initializer=Uniform(0, 1)),
  47. bias_attr=ParamAttr(initializer=Constant(0)),
  48. )
  49. self.relu = nn.ReLU()
  50. def forward(self, inputs):
  51. y = self.conv(inputs)
  52. y = self.bn(y)
  53. y = self.relu(y)
  54. return y
  55. class ShallowCNN(nn.Layer):
  56. def __init__(self, in_channels=1, hidden_dim=512):
  57. super().__init__()
  58. assert isinstance(in_channels, int)
  59. assert isinstance(hidden_dim, int)
  60. self.conv1 = ConvBNLayer(in_channels, 3, hidden_dim // 2, stride=1, padding=1)
  61. self.conv2 = ConvBNLayer(hidden_dim // 2, 3, hidden_dim, stride=1, padding=1)
  62. self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
  63. self.out_channels = hidden_dim
  64. def forward(self, x):
  65. x = self.conv1(x)
  66. x = self.pool(x)
  67. x = self.conv2(x)
  68. x = self.pool(x)
  69. return x