fce_postprocess.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py
  17. """
  18. import cv2
  19. import paddle
  20. import numpy as np
  21. from numpy.fft import ifft
  22. from ppocr.utils.poly_nms import poly_nms, valid_boundary
  23. def fill_hole(input_mask):
  24. h, w = input_mask.shape
  25. canvas = np.zeros((h + 2, w + 2), np.uint8)
  26. canvas[1 : h + 1, 1 : w + 1] = input_mask.copy()
  27. mask = np.zeros((h + 4, w + 4), np.uint8)
  28. cv2.floodFill(canvas, mask, (0, 0), 1)
  29. canvas = canvas[1 : h + 1, 1 : w + 1].astype(np.bool_)
  30. return ~canvas | input_mask
  31. def fourier2poly(fourier_coeff, num_reconstr_points=50):
  32. """Inverse Fourier transform
  33. Args:
  34. fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
  35. with n and k being candidates number and Fourier degree
  36. respectively.
  37. num_reconstr_points (int): Number of reconstructed polygon points.
  38. Returns:
  39. Polygons (ndarray): The reconstructed polygons shaped (n, n')
  40. """
  41. a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype="complex")
  42. k = (len(fourier_coeff[0]) - 1) // 2
  43. a[:, 0 : k + 1] = fourier_coeff[:, k:]
  44. a[:, -k:] = fourier_coeff[:, :k]
  45. poly_complex = ifft(a) * num_reconstr_points
  46. polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
  47. polygon[:, :, 0] = poly_complex.real
  48. polygon[:, :, 1] = poly_complex.imag
  49. return polygon.astype("int32").reshape((len(fourier_coeff), -1))
  50. class FCEPostProcess(object):
  51. """
  52. The post process for FCENet.
  53. """
  54. def __init__(
  55. self,
  56. scales,
  57. fourier_degree=5,
  58. num_reconstr_points=50,
  59. decoding_type="fcenet",
  60. score_thr=0.3,
  61. nms_thr=0.1,
  62. alpha=1.0,
  63. beta=1.0,
  64. box_type="poly",
  65. **kwargs,
  66. ):
  67. self.scales = scales
  68. self.fourier_degree = fourier_degree
  69. self.num_reconstr_points = num_reconstr_points
  70. self.decoding_type = decoding_type
  71. self.score_thr = score_thr
  72. self.nms_thr = nms_thr
  73. self.alpha = alpha
  74. self.beta = beta
  75. self.box_type = box_type
  76. def __call__(self, preds, shape_list):
  77. score_maps = []
  78. for key, value in preds.items():
  79. if isinstance(value, paddle.Tensor):
  80. value = value.numpy()
  81. cls_res = value[:, :4, :, :]
  82. reg_res = value[:, 4:, :, :]
  83. score_maps.append([cls_res, reg_res])
  84. return self.get_boundary(score_maps, shape_list)
  85. def resize_boundary(self, boundaries, scale_factor):
  86. """Rescale boundaries via scale_factor.
  87. Args:
  88. boundaries (list[list[float]]): The boundary list. Each boundary
  89. with size 2k+1 with k>=4.
  90. scale_factor(ndarray): The scale factor of size (4,).
  91. Returns:
  92. boundaries (list[list[float]]): The scaled boundaries.
  93. """
  94. boxes = []
  95. scores = []
  96. for b in boundaries:
  97. sz = len(b)
  98. valid_boundary(b, True)
  99. scores.append(b[-1])
  100. b = (
  101. (
  102. np.array(b[: sz - 1])
  103. * (np.tile(scale_factor[:2], int((sz - 1) / 2)).reshape(1, sz - 1))
  104. )
  105. .flatten()
  106. .tolist()
  107. )
  108. boxes.append(np.array(b).reshape([-1, 2]))
  109. return np.array(boxes, dtype=np.float32), scores
  110. def get_boundary(self, score_maps, shape_list):
  111. assert len(score_maps) == len(self.scales)
  112. boundaries = []
  113. for idx, score_map in enumerate(score_maps):
  114. scale = self.scales[idx]
  115. boundaries = boundaries + self._get_boundary_single(score_map, scale)
  116. # nms
  117. boundaries = poly_nms(boundaries, self.nms_thr)
  118. boundaries, scores = self.resize_boundary(
  119. boundaries, (1 / shape_list[0, 2:]).tolist()[::-1]
  120. )
  121. boxes_batch = [dict(points=boundaries, scores=scores)]
  122. return boxes_batch
  123. def _get_boundary_single(self, score_map, scale):
  124. assert len(score_map) == 2
  125. assert score_map[1].shape[1] == 4 * self.fourier_degree + 2
  126. return self.fcenet_decode(
  127. preds=score_map,
  128. fourier_degree=self.fourier_degree,
  129. num_reconstr_points=self.num_reconstr_points,
  130. scale=scale,
  131. alpha=self.alpha,
  132. beta=self.beta,
  133. box_type=self.box_type,
  134. score_thr=self.score_thr,
  135. nms_thr=self.nms_thr,
  136. )
  137. def fcenet_decode(
  138. self,
  139. preds,
  140. fourier_degree,
  141. num_reconstr_points,
  142. scale,
  143. alpha=1.0,
  144. beta=2.0,
  145. box_type="poly",
  146. score_thr=0.3,
  147. nms_thr=0.1,
  148. ):
  149. """Decoding predictions of FCENet to instances.
  150. Args:
  151. preds (list(Tensor)): The head output tensors.
  152. fourier_degree (int): The maximum Fourier transform degree k.
  153. num_reconstr_points (int): The points number of the polygon
  154. reconstructed from predicted Fourier coefficients.
  155. scale (int): The down-sample scale of the prediction.
  156. alpha (float) : The parameter to calculate final scores. Score_{final}
  157. = (Score_{text region} ^ alpha)
  158. * (Score_{text center region}^ beta)
  159. beta (float) : The parameter to calculate final score.
  160. box_type (str): Boundary encoding type 'poly' or 'quad'.
  161. score_thr (float) : The threshold used to filter out the final
  162. candidates.
  163. nms_thr (float) : The threshold of nms.
  164. Returns:
  165. boundaries (list[list[float]]): The instance boundary and confidence
  166. list.
  167. """
  168. assert isinstance(preds, list)
  169. assert len(preds) == 2
  170. assert box_type in ["poly", "quad"]
  171. cls_pred = preds[0][0]
  172. tr_pred = cls_pred[0:2]
  173. tcl_pred = cls_pred[2:]
  174. reg_pred = preds[1][0].transpose([1, 2, 0])
  175. x_pred = reg_pred[:, :, : 2 * fourier_degree + 1]
  176. y_pred = reg_pred[:, :, 2 * fourier_degree + 1 :]
  177. score_pred = (tr_pred[1] ** alpha) * (tcl_pred[1] ** beta)
  178. tr_pred_mask = (score_pred) > score_thr
  179. tr_mask = fill_hole(tr_pred_mask)
  180. tr_contours, _ = cv2.findContours(
  181. tr_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
  182. ) # opencv4
  183. mask = np.zeros_like(tr_mask)
  184. boundaries = []
  185. for cont in tr_contours:
  186. deal_map = mask.copy().astype(np.int8)
  187. cv2.drawContours(deal_map, [cont], -1, 1, -1)
  188. score_map = score_pred * deal_map
  189. score_mask = score_map > 0
  190. xy_text = np.argwhere(score_mask)
  191. dxy = xy_text[:, 1] + xy_text[:, 0] * 1j
  192. x, y = x_pred[score_mask], y_pred[score_mask]
  193. c = x + y * 1j
  194. c[:, fourier_degree] = c[:, fourier_degree] + dxy
  195. c *= scale
  196. polygons = fourier2poly(c, num_reconstr_points)
  197. score = score_map[score_mask].reshape(-1, 1)
  198. polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)
  199. boundaries = boundaries + polygons
  200. boundaries = poly_nms(boundaries, nms_thr)
  201. if box_type == "quad":
  202. new_boundaries = []
  203. for boundary in boundaries:
  204. poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
  205. score = boundary[-1]
  206. points = cv2.boxPoints(cv2.minAreaRect(poly))
  207. points = np.int64(points)
  208. new_boundaries.append(points.reshape(-1).tolist() + [score])
  209. boundaries = new_boundaries
  210. return boundaries