fce_targets.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/fcenet_targets.py
  17. """
  18. import cv2
  19. import numpy as np
  20. from numpy.fft import fft
  21. from numpy.linalg import norm
  22. import sys
  23. def vector_slope(vec):
  24. assert len(vec) == 2
  25. return abs(vec[1] / (vec[0] + 1e-8))
  26. class FCENetTargets:
  27. """Generate the ground truth targets of FCENet: Fourier Contour Embedding
  28. for Arbitrary-Shaped Text Detection.
  29. [https://arxiv.org/abs/2104.10442]
  30. Args:
  31. fourier_degree (int): The maximum Fourier transform degree k.
  32. resample_step (float): The step size for resampling the text center
  33. line (TCL). It's better not to exceed half of the minimum width.
  34. center_region_shrink_ratio (float): The shrink ratio of text center
  35. region.
  36. level_size_divisors (tuple(int)): The downsample ratio on each level.
  37. level_proportion_range (tuple(tuple(int))): The range of text sizes
  38. assigned to each level.
  39. """
  40. def __init__(
  41. self,
  42. fourier_degree=5,
  43. resample_step=4.0,
  44. center_region_shrink_ratio=0.3,
  45. level_size_divisors=(8, 16, 32),
  46. level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0)),
  47. orientation_thr=2.0,
  48. **kwargs,
  49. ):
  50. super().__init__()
  51. assert isinstance(level_size_divisors, tuple)
  52. assert isinstance(level_proportion_range, tuple)
  53. assert len(level_size_divisors) == len(level_proportion_range)
  54. self.fourier_degree = fourier_degree
  55. self.resample_step = resample_step
  56. self.center_region_shrink_ratio = center_region_shrink_ratio
  57. self.level_size_divisors = level_size_divisors
  58. self.level_proportion_range = level_proportion_range
  59. self.orientation_thr = orientation_thr
  60. def vector_angle(self, vec1, vec2):
  61. if vec1.ndim > 1:
  62. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
  63. else:
  64. unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
  65. if vec2.ndim > 1:
  66. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
  67. else:
  68. unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
  69. return np.arccos(np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
  70. def resample_line(self, line, n):
  71. """Resample n points on a line.
  72. Args:
  73. line (ndarray): The points composing a line.
  74. n (int): The resampled points number.
  75. Returns:
  76. resampled_line (ndarray): The points composing the resampled line.
  77. """
  78. assert line.ndim == 2
  79. assert line.shape[0] >= 2
  80. assert line.shape[1] == 2
  81. assert isinstance(n, int)
  82. assert n > 0
  83. length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
  84. total_length = sum(length_list)
  85. length_cumsum = np.cumsum([0.0] + length_list)
  86. delta_length = total_length / (float(n) + 1e-8)
  87. current_edge_ind = 0
  88. resampled_line = [line[0]]
  89. for i in range(1, n):
  90. current_line_len = i * delta_length
  91. while (
  92. current_edge_ind + 1 < len(length_cumsum)
  93. and current_line_len >= length_cumsum[current_edge_ind + 1]
  94. ):
  95. current_edge_ind += 1
  96. current_edge_end_shift = current_line_len - length_cumsum[current_edge_ind]
  97. if current_edge_ind >= len(length_list):
  98. break
  99. end_shift_ratio = current_edge_end_shift / length_list[current_edge_ind]
  100. current_point = (
  101. line[current_edge_ind]
  102. + (line[current_edge_ind + 1] - line[current_edge_ind])
  103. * end_shift_ratio
  104. )
  105. resampled_line.append(current_point)
  106. resampled_line.append(line[-1])
  107. resampled_line = np.array(resampled_line)
  108. return resampled_line
  109. def reorder_poly_edge(self, points):
  110. """Get the respective points composing head edge, tail edge, top
  111. sideline and bottom sideline.
  112. Args:
  113. points (ndarray): The points composing a text polygon.
  114. Returns:
  115. head_edge (ndarray): The two points composing the head edge of text
  116. polygon.
  117. tail_edge (ndarray): The two points composing the tail edge of text
  118. polygon.
  119. top_sideline (ndarray): The points composing top curved sideline of
  120. text polygon.
  121. bot_sideline (ndarray): The points composing bottom curved sideline
  122. of text polygon.
  123. """
  124. assert points.ndim == 2
  125. assert points.shape[0] >= 4
  126. assert points.shape[1] == 2
  127. head_inds, tail_inds = self.find_head_tail(points, self.orientation_thr)
  128. head_edge, tail_edge = points[head_inds], points[tail_inds]
  129. pad_points = np.vstack([points, points])
  130. if tail_inds[1] < 1:
  131. tail_inds[1] = len(points)
  132. sideline1 = pad_points[head_inds[1] : tail_inds[1]]
  133. sideline2 = pad_points[tail_inds[1] : (head_inds[1] + len(points))]
  134. sideline_mean_shift = np.mean(sideline1, axis=0) - np.mean(sideline2, axis=0)
  135. if sideline_mean_shift[1] > 0:
  136. top_sideline, bot_sideline = sideline2, sideline1
  137. else:
  138. top_sideline, bot_sideline = sideline1, sideline2
  139. return head_edge, tail_edge, top_sideline, bot_sideline
  140. def find_head_tail(self, points, orientation_thr):
  141. """Find the head edge and tail edge of a text polygon.
  142. Args:
  143. points (ndarray): The points composing a text polygon.
  144. orientation_thr (float): The threshold for distinguishing between
  145. head edge and tail edge among the horizontal and vertical edges
  146. of a quadrangle.
  147. Returns:
  148. head_inds (list): The indexes of two points composing head edge.
  149. tail_inds (list): The indexes of two points composing tail edge.
  150. """
  151. assert points.ndim == 2
  152. assert points.shape[0] >= 4
  153. assert points.shape[1] == 2
  154. assert isinstance(orientation_thr, float)
  155. if len(points) > 4:
  156. pad_points = np.vstack([points, points[0]])
  157. edge_vec = pad_points[1:] - pad_points[:-1]
  158. theta_sum = []
  159. adjacent_vec_theta = []
  160. for i, edge_vec1 in enumerate(edge_vec):
  161. adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
  162. adjacent_edge_vec = edge_vec[adjacent_ind]
  163. temp_theta_sum = np.sum(self.vector_angle(edge_vec1, adjacent_edge_vec))
  164. temp_adjacent_theta = self.vector_angle(
  165. adjacent_edge_vec[0], adjacent_edge_vec[1]
  166. )
  167. theta_sum.append(temp_theta_sum)
  168. adjacent_vec_theta.append(temp_adjacent_theta)
  169. theta_sum_score = np.array(theta_sum) / np.pi
  170. adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
  171. poly_center = np.mean(points, axis=0)
  172. edge_dist = np.maximum(
  173. norm(pad_points[1:] - poly_center, axis=-1),
  174. norm(pad_points[:-1] - poly_center, axis=-1),
  175. )
  176. dist_score = edge_dist / np.max(edge_dist)
  177. position_score = np.zeros(len(edge_vec))
  178. score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
  179. score += 0.35 * dist_score
  180. if len(points) % 2 == 0:
  181. position_score[(len(score) // 2 - 1)] += 1
  182. position_score[-1] += 1
  183. score += 0.1 * position_score
  184. pad_score = np.concatenate([score, score])
  185. score_matrix = np.zeros((len(score), len(score) - 3))
  186. x = np.arange(len(score) - 3) / float(len(score) - 4)
  187. gaussian = (
  188. 1.0
  189. / (np.sqrt(2.0 * np.pi) * 0.5)
  190. * np.exp(-np.power((x - 0.5) / 0.5, 2.0) / 2)
  191. )
  192. gaussian = gaussian / np.max(gaussian)
  193. for i in range(len(score)):
  194. score_matrix[i, :] = (
  195. score[i]
  196. + pad_score[(i + 2) : (i + len(score) - 1)] * gaussian * 0.3
  197. )
  198. head_start, tail_increment = np.unravel_index(
  199. score_matrix.argmax(), score_matrix.shape
  200. )
  201. tail_start = (head_start + tail_increment + 2) % len(points)
  202. head_end = (head_start + 1) % len(points)
  203. tail_end = (tail_start + 1) % len(points)
  204. if head_end > tail_end:
  205. head_start, tail_start = tail_start, head_start
  206. head_end, tail_end = tail_end, head_end
  207. head_inds = [head_start, head_end]
  208. tail_inds = [tail_start, tail_end]
  209. else:
  210. if vector_slope(points[1] - points[0]) + vector_slope(
  211. points[3] - points[2]
  212. ) < vector_slope(points[2] - points[1]) + vector_slope(
  213. points[0] - points[3]
  214. ):
  215. horizontal_edge_inds = [[0, 1], [2, 3]]
  216. vertical_edge_inds = [[3, 0], [1, 2]]
  217. else:
  218. horizontal_edge_inds = [[3, 0], [1, 2]]
  219. vertical_edge_inds = [[0, 1], [2, 3]]
  220. vertical_len_sum = norm(
  221. points[vertical_edge_inds[0][0]] - points[vertical_edge_inds[0][1]]
  222. ) + norm(
  223. points[vertical_edge_inds[1][0]] - points[vertical_edge_inds[1][1]]
  224. )
  225. horizontal_len_sum = norm(
  226. points[horizontal_edge_inds[0][0]] - points[horizontal_edge_inds[0][1]]
  227. ) + norm(
  228. points[horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1][1]]
  229. )
  230. if vertical_len_sum > horizontal_len_sum * orientation_thr:
  231. head_inds = horizontal_edge_inds[0]
  232. tail_inds = horizontal_edge_inds[1]
  233. else:
  234. head_inds = vertical_edge_inds[0]
  235. tail_inds = vertical_edge_inds[1]
  236. return head_inds, tail_inds
  237. def resample_sidelines(self, sideline1, sideline2, resample_step):
  238. """Resample two sidelines to be of the same points number according to
  239. step size.
  240. Args:
  241. sideline1 (ndarray): The points composing a sideline of a text
  242. polygon.
  243. sideline2 (ndarray): The points composing another sideline of a
  244. text polygon.
  245. resample_step (float): The resampled step size.
  246. Returns:
  247. resampled_line1 (ndarray): The resampled line 1.
  248. resampled_line2 (ndarray): The resampled line 2.
  249. """
  250. assert sideline1.ndim == sideline2.ndim == 2
  251. assert sideline1.shape[1] == sideline2.shape[1] == 2
  252. assert sideline1.shape[0] >= 2
  253. assert sideline2.shape[0] >= 2
  254. assert isinstance(resample_step, float)
  255. length1 = sum(
  256. [norm(sideline1[i + 1] - sideline1[i]) for i in range(len(sideline1) - 1)]
  257. )
  258. length2 = sum(
  259. [norm(sideline2[i + 1] - sideline2[i]) for i in range(len(sideline2) - 1)]
  260. )
  261. total_length = (length1 + length2) / 2
  262. resample_point_num = max(int(float(total_length) / resample_step), 1)
  263. resampled_line1 = self.resample_line(sideline1, resample_point_num)
  264. resampled_line2 = self.resample_line(sideline2, resample_point_num)
  265. return resampled_line1, resampled_line2
  266. def generate_center_region_mask(self, img_size, text_polys):
  267. """Generate text center region mask.
  268. Args:
  269. img_size (tuple): The image size of (height, width).
  270. text_polys (list[list[ndarray]]): The list of text polygons.
  271. Returns:
  272. center_region_mask (ndarray): The text center region mask.
  273. """
  274. assert isinstance(img_size, tuple)
  275. # assert check_argument.is_2dlist(text_polys)
  276. h, w = img_size
  277. center_region_mask = np.zeros((h, w), np.uint8)
  278. center_region_boxes = []
  279. for poly in text_polys:
  280. # assert len(poly) == 1
  281. polygon_points = poly.reshape(-1, 2)
  282. _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points)
  283. resampled_top_line, resampled_bot_line = self.resample_sidelines(
  284. top_line, bot_line, self.resample_step
  285. )
  286. resampled_bot_line = resampled_bot_line[::-1]
  287. if len(resampled_top_line) != len(resampled_bot_line):
  288. continue
  289. center_line = (resampled_top_line + resampled_bot_line) / 2
  290. line_head_shrink_len = (
  291. norm(resampled_top_line[0] - resampled_bot_line[0]) / 4.0
  292. )
  293. line_tail_shrink_len = (
  294. norm(resampled_top_line[-1] - resampled_bot_line[-1]) / 4.0
  295. )
  296. head_shrink_num = int(line_head_shrink_len // self.resample_step)
  297. tail_shrink_num = int(line_tail_shrink_len // self.resample_step)
  298. if len(center_line) > head_shrink_num + tail_shrink_num + 2:
  299. center_line = center_line[
  300. head_shrink_num : len(center_line) - tail_shrink_num
  301. ]
  302. resampled_top_line = resampled_top_line[
  303. head_shrink_num : len(resampled_top_line) - tail_shrink_num
  304. ]
  305. resampled_bot_line = resampled_bot_line[
  306. head_shrink_num : len(resampled_bot_line) - tail_shrink_num
  307. ]
  308. for i in range(0, len(center_line) - 1):
  309. tl = (
  310. center_line[i]
  311. + (resampled_top_line[i] - center_line[i])
  312. * self.center_region_shrink_ratio
  313. )
  314. tr = (
  315. center_line[i + 1]
  316. + (resampled_top_line[i + 1] - center_line[i + 1])
  317. * self.center_region_shrink_ratio
  318. )
  319. br = (
  320. center_line[i + 1]
  321. + (resampled_bot_line[i + 1] - center_line[i + 1])
  322. * self.center_region_shrink_ratio
  323. )
  324. bl = (
  325. center_line[i]
  326. + (resampled_bot_line[i] - center_line[i])
  327. * self.center_region_shrink_ratio
  328. )
  329. current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32)
  330. center_region_boxes.append(current_center_box)
  331. cv2.fillPoly(center_region_mask, center_region_boxes, 1)
  332. return center_region_mask
  333. def resample_polygon(self, polygon, n=400):
  334. """Resample one polygon with n points on its boundary.
  335. Args:
  336. polygon (list[float]): The input polygon.
  337. n (int): The number of resampled points.
  338. Returns:
  339. resampled_polygon (list[float]): The resampled polygon.
  340. """
  341. length = []
  342. for i in range(len(polygon)):
  343. p1 = polygon[i]
  344. if i == len(polygon) - 1:
  345. p2 = polygon[0]
  346. else:
  347. p2 = polygon[i + 1]
  348. length.append(((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5)
  349. total_length = sum(length)
  350. n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n
  351. n_on_each_line = n_on_each_line.astype(np.int32)
  352. new_polygon = []
  353. for i in range(len(polygon)):
  354. num = n_on_each_line[i]
  355. p1 = polygon[i]
  356. if i == len(polygon) - 1:
  357. p2 = polygon[0]
  358. else:
  359. p2 = polygon[i + 1]
  360. if num == 0:
  361. continue
  362. dxdy = (p2 - p1) / num
  363. for j in range(num):
  364. point = p1 + dxdy * j
  365. new_polygon.append(point)
  366. return np.array(new_polygon)
  367. def normalize_polygon(self, polygon):
  368. """Normalize one polygon so that its start point is at right most.
  369. Args:
  370. polygon (list[float]): The origin polygon.
  371. Returns:
  372. new_polygon (lost[float]): The polygon with start point at right.
  373. """
  374. temp_polygon = polygon - polygon.mean(axis=0)
  375. x = np.abs(temp_polygon[:, 0])
  376. y = temp_polygon[:, 1]
  377. index_x = np.argsort(x)
  378. index_y = np.argmin(y[index_x[:8]])
  379. index = index_x[index_y]
  380. new_polygon = np.concatenate([polygon[index:], polygon[:index]])
  381. return new_polygon
  382. def poly2fourier(self, polygon, fourier_degree):
  383. """Perform Fourier transformation to generate Fourier coefficients ck
  384. from polygon.
  385. Args:
  386. polygon (ndarray): An input polygon.
  387. fourier_degree (int): The maximum Fourier degree K.
  388. Returns:
  389. c (ndarray(complex)): Fourier coefficients.
  390. """
  391. points = polygon[:, 0] + polygon[:, 1] * 1j
  392. c_fft = fft(points) / len(points)
  393. c = np.hstack((c_fft[-fourier_degree:], c_fft[: fourier_degree + 1]))
  394. return c
  395. def clockwise(self, c, fourier_degree):
  396. """Make sure the polygon reconstructed from Fourier coefficients c in
  397. the clockwise direction.
  398. Args:
  399. polygon (list[float]): The origin polygon.
  400. Returns:
  401. new_polygon (lost[float]): The polygon in clockwise point order.
  402. """
  403. if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]):
  404. return c
  405. elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]):
  406. return c[::-1]
  407. else:
  408. if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]):
  409. return c
  410. else:
  411. return c[::-1]
  412. def cal_fourier_signature(self, polygon, fourier_degree):
  413. """Calculate Fourier signature from input polygon.
  414. Args:
  415. polygon (ndarray): The input polygon.
  416. fourier_degree (int): The maximum Fourier degree K.
  417. Returns:
  418. fourier_signature (ndarray): An array shaped (2k+1, 2) containing
  419. real part and image part of 2k+1 Fourier coefficients.
  420. """
  421. resampled_polygon = self.resample_polygon(polygon)
  422. resampled_polygon = self.normalize_polygon(resampled_polygon)
  423. fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree)
  424. fourier_coeff = self.clockwise(fourier_coeff, fourier_degree)
  425. real_part = np.real(fourier_coeff).reshape((-1, 1))
  426. image_part = np.imag(fourier_coeff).reshape((-1, 1))
  427. fourier_signature = np.hstack([real_part, image_part])
  428. return fourier_signature
  429. def generate_fourier_maps(self, img_size, text_polys):
  430. """Generate Fourier coefficient maps.
  431. Args:
  432. img_size (tuple): The image size of (height, width).
  433. text_polys (list[list[ndarray]]): The list of text polygons.
  434. Returns:
  435. fourier_real_map (ndarray): The Fourier coefficient real part maps.
  436. fourier_image_map (ndarray): The Fourier coefficient image part
  437. maps.
  438. """
  439. assert isinstance(img_size, tuple)
  440. h, w = img_size
  441. k = self.fourier_degree
  442. real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
  443. imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32)
  444. for poly in text_polys:
  445. mask = np.zeros((h, w), dtype=np.uint8)
  446. polygon = np.array(poly).reshape((1, -1, 2))
  447. cv2.fillPoly(mask, polygon.astype(np.int32), 1)
  448. fourier_coeff = self.cal_fourier_signature(polygon[0], k)
  449. for i in range(-k, k + 1):
  450. if i != 0:
  451. real_map[i + k, :, :] = (
  452. mask * fourier_coeff[i + k, 0]
  453. + (1 - mask) * real_map[i + k, :, :]
  454. )
  455. imag_map[i + k, :, :] = (
  456. mask * fourier_coeff[i + k, 1]
  457. + (1 - mask) * imag_map[i + k, :, :]
  458. )
  459. else:
  460. yx = np.argwhere(mask > 0.5)
  461. k_ind = np.ones((len(yx)), dtype=np.int64) * k
  462. y, x = yx[:, 0], yx[:, 1]
  463. real_map[k_ind, y, x] = fourier_coeff[k, 0] - x
  464. imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y
  465. return real_map, imag_map
  466. def generate_text_region_mask(self, img_size, text_polys):
  467. """Generate text center region mask and geometry attribute maps.
  468. Args:
  469. img_size (tuple): The image size (height, width).
  470. text_polys (list[list[ndarray]]): The list of text polygons.
  471. Returns:
  472. text_region_mask (ndarray): The text region mask.
  473. """
  474. assert isinstance(img_size, tuple)
  475. h, w = img_size
  476. text_region_mask = np.zeros((h, w), dtype=np.uint8)
  477. for poly in text_polys:
  478. polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2))
  479. cv2.fillPoly(text_region_mask, polygon, 1)
  480. return text_region_mask
  481. def generate_effective_mask(self, mask_size: tuple, polygons_ignore):
  482. """Generate effective mask by setting the ineffective regions to 0 and
  483. effective regions to 1.
  484. Args:
  485. mask_size (tuple): The mask size.
  486. polygons_ignore (list[[ndarray]]: The list of ignored text
  487. polygons.
  488. Returns:
  489. mask (ndarray): The effective mask of (height, width).
  490. """
  491. mask = np.ones(mask_size, dtype=np.uint8)
  492. for poly in polygons_ignore:
  493. instance = poly.reshape(-1, 2).astype(np.int32).reshape(1, -1, 2)
  494. cv2.fillPoly(mask, instance, 0)
  495. return mask
  496. def generate_level_targets(self, img_size, text_polys, ignore_polys):
  497. """Generate ground truth target on each level.
  498. Args:
  499. img_size (list[int]): Shape of input image.
  500. text_polys (list[list[ndarray]]): A list of ground truth polygons.
  501. ignore_polys (list[list[ndarray]]): A list of ignored polygons.
  502. Returns:
  503. level_maps (list(ndarray)): A list of ground target on each level.
  504. """
  505. h, w = img_size
  506. lv_size_divs = self.level_size_divisors
  507. lv_proportion_range = self.level_proportion_range
  508. lv_text_polys = [[] for i in range(len(lv_size_divs))]
  509. lv_ignore_polys = [[] for i in range(len(lv_size_divs))]
  510. level_maps = []
  511. for poly in text_polys:
  512. polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2))
  513. _, _, box_w, box_h = cv2.boundingRect(polygon)
  514. proportion = max(box_h, box_w) / (h + 1e-8)
  515. for ind, proportion_range in enumerate(lv_proportion_range):
  516. if proportion_range[0] < proportion < proportion_range[1]:
  517. lv_text_polys[ind].append(poly / lv_size_divs[ind])
  518. for ignore_poly in ignore_polys:
  519. polygon = np.array(ignore_poly, dtype=np.int32).reshape((1, -1, 2))
  520. _, _, box_w, box_h = cv2.boundingRect(polygon)
  521. proportion = max(box_h, box_w) / (h + 1e-8)
  522. for ind, proportion_range in enumerate(lv_proportion_range):
  523. if proportion_range[0] < proportion < proportion_range[1]:
  524. lv_ignore_polys[ind].append(ignore_poly / lv_size_divs[ind])
  525. for ind, size_divisor in enumerate(lv_size_divs):
  526. current_level_maps = []
  527. level_img_size = (h // size_divisor, w // size_divisor)
  528. text_region = self.generate_text_region_mask(
  529. level_img_size, lv_text_polys[ind]
  530. )[None]
  531. current_level_maps.append(text_region)
  532. center_region = self.generate_center_region_mask(
  533. level_img_size, lv_text_polys[ind]
  534. )[None]
  535. current_level_maps.append(center_region)
  536. effective_mask = self.generate_effective_mask(
  537. level_img_size, lv_ignore_polys[ind]
  538. )[None]
  539. current_level_maps.append(effective_mask)
  540. fourier_real_map, fourier_image_maps = self.generate_fourier_maps(
  541. level_img_size, lv_text_polys[ind]
  542. )
  543. current_level_maps.append(fourier_real_map)
  544. current_level_maps.append(fourier_image_maps)
  545. level_maps.append(np.concatenate(current_level_maps))
  546. return level_maps
  547. def generate_targets(self, results):
  548. """Generate the ground truth targets for FCENet.
  549. Args:
  550. results (dict): The input result dictionary.
  551. Returns:
  552. results (dict): The output result dictionary.
  553. """
  554. assert isinstance(results, dict)
  555. image = results["image"]
  556. polygons = results["polys"]
  557. ignore_tags = results["ignore_tags"]
  558. h, w, _ = image.shape
  559. polygon_masks = []
  560. polygon_masks_ignore = []
  561. for tag, polygon in zip(ignore_tags, polygons):
  562. if tag is True:
  563. polygon_masks_ignore.append(polygon)
  564. else:
  565. polygon_masks.append(polygon)
  566. level_maps = self.generate_level_targets(
  567. (h, w), polygon_masks, polygon_masks_ignore
  568. )
  569. mapping = {
  570. "p3_maps": level_maps[0],
  571. "p4_maps": level_maps[1],
  572. "p5_maps": level_maps[2],
  573. }
  574. for key, value in mapping.items():
  575. results[key] = value
  576. return results
  577. def __call__(self, results):
  578. results = self.generate_targets(results)
  579. return results