random_crop_data.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py
  17. """
  18. from __future__ import absolute_import
  19. from __future__ import division
  20. from __future__ import print_function
  21. from __future__ import unicode_literals
  22. import numpy as np
  23. import cv2
  24. import random
  25. from paddle import get_device
  26. def is_poly_in_rect(poly, x, y, w, h):
  27. poly = np.array(poly)
  28. if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
  29. return False
  30. if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
  31. return False
  32. return True
  33. def is_poly_outside_rect(poly, x, y, w, h):
  34. poly = np.array(poly)
  35. if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
  36. return True
  37. if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
  38. return True
  39. return False
  40. def split_regions(axis):
  41. regions = []
  42. min_axis = 0
  43. for i in range(1, axis.shape[0]):
  44. if axis[i] != axis[i - 1] + 1:
  45. region = axis[min_axis:i]
  46. min_axis = i
  47. regions.append(region)
  48. return regions
  49. def random_select(axis, max_size):
  50. xx = np.random.choice(axis, size=2)
  51. xmin = np.min(xx)
  52. xmax = np.max(xx)
  53. xmin = np.clip(xmin, 0, max_size - 1)
  54. xmax = np.clip(xmax, 0, max_size - 1)
  55. return xmin, xmax
  56. def region_wise_random_select(regions, max_size):
  57. selected_index = list(np.random.choice(len(regions), 2))
  58. selected_values = []
  59. for index in selected_index:
  60. axis = regions[index]
  61. xx = int(np.random.choice(axis, size=1))
  62. selected_values.append(xx)
  63. xmin = min(selected_values)
  64. xmax = max(selected_values)
  65. return xmin, xmax
  66. def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
  67. h, w, _ = im.shape
  68. h_array = np.zeros(h, dtype=np.int32)
  69. w_array = np.zeros(w, dtype=np.int32)
  70. for points in text_polys:
  71. points = np.round(points, decimals=0).astype(np.int32)
  72. minx = np.min(points[:, 0])
  73. maxx = np.max(points[:, 0])
  74. w_array[minx:maxx] = 1
  75. miny = np.min(points[:, 1])
  76. maxy = np.max(points[:, 1])
  77. h_array[miny:maxy] = 1
  78. # ensure the cropped area not across a text
  79. h_axis = np.where(h_array == 0)[0]
  80. w_axis = np.where(w_array == 0)[0]
  81. if len(h_axis) == 0 or len(w_axis) == 0:
  82. return 0, 0, w, h
  83. h_regions = split_regions(h_axis)
  84. w_regions = split_regions(w_axis)
  85. for i in range(max_tries):
  86. if len(w_regions) > 1:
  87. xmin, xmax = region_wise_random_select(w_regions, w)
  88. else:
  89. xmin, xmax = random_select(w_axis, w)
  90. if len(h_regions) > 1:
  91. ymin, ymax = region_wise_random_select(h_regions, h)
  92. else:
  93. ymin, ymax = random_select(h_axis, h)
  94. if (
  95. xmax - xmin < min_crop_side_ratio * w
  96. or ymax - ymin < min_crop_side_ratio * h
  97. ):
  98. # area too small
  99. continue
  100. num_poly_in_rect = 0
  101. for poly in text_polys:
  102. if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, ymax - ymin):
  103. num_poly_in_rect += 1
  104. break
  105. if num_poly_in_rect > 0:
  106. return xmin, ymin, xmax - xmin, ymax - ymin
  107. return 0, 0, w, h
  108. class EastRandomCropData(object):
  109. def __init__(
  110. self,
  111. size=(640, 640),
  112. max_tries=10,
  113. min_crop_side_ratio=0.1,
  114. keep_ratio=True,
  115. **kwargs,
  116. ):
  117. self.size = size
  118. self.max_tries = max_tries
  119. self.min_crop_side_ratio = min_crop_side_ratio
  120. self.keep_ratio = keep_ratio
  121. def __call__(self, data):
  122. img = data["image"]
  123. text_polys = data["polys"]
  124. ignore_tags = data["ignore_tags"]
  125. texts = data["texts"]
  126. all_care_polys = [text_polys[i] for i, tag in enumerate(ignore_tags) if not tag]
  127. # 计算crop区域
  128. crop_x, crop_y, crop_w, crop_h = crop_area(
  129. img, all_care_polys, self.min_crop_side_ratio, self.max_tries
  130. )
  131. # crop 图片 保持比例填充
  132. scale_w = self.size[0] / crop_w
  133. scale_h = self.size[1] / crop_h
  134. scale = min(scale_w, scale_h)
  135. h = int(crop_h * scale)
  136. w = int(crop_w * scale)
  137. if self.keep_ratio:
  138. padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), img.dtype)
  139. padimg[:h, :w] = cv2.resize(
  140. img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], (w, h)
  141. )
  142. img = padimg
  143. else:
  144. img = cv2.resize(
  145. img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w],
  146. tuple(self.size),
  147. )
  148. # crop 文本框
  149. text_polys_crop = []
  150. ignore_tags_crop = []
  151. texts_crop = []
  152. for poly, text, tag in zip(text_polys, texts, ignore_tags):
  153. poly = ((poly - (crop_x, crop_y)) * scale).tolist()
  154. if not is_poly_outside_rect(poly, 0, 0, w, h):
  155. text_polys_crop.append(poly)
  156. ignore_tags_crop.append(tag)
  157. texts_crop.append(text)
  158. data["image"] = img
  159. data["polys"] = np.array(text_polys_crop)
  160. if "iluvatar_gpu" in get_device():
  161. data["polys"] = np.array(text_polys_crop).astype(np.float32)
  162. data["ignore_tags"] = ignore_tags_crop
  163. data["texts"] = texts_crop
  164. return data
  165. class RandomCropImgMask(object):
  166. def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs):
  167. self.size = size
  168. self.main_key = main_key
  169. self.crop_keys = crop_keys
  170. self.p = p
  171. def __call__(self, data):
  172. image = data["image"]
  173. h, w = image.shape[0:2]
  174. th, tw = self.size
  175. if w == tw and h == th:
  176. return data
  177. mask = data[self.main_key]
  178. if np.max(mask) > 0 and random.random() > self.p:
  179. # make sure to crop the text region
  180. tl = np.min(np.where(mask > 0), axis=1) - (th, tw)
  181. tl[tl < 0] = 0
  182. br = np.max(np.where(mask > 0), axis=1) - (th, tw)
  183. br[br < 0] = 0
  184. br[0] = min(br[0], h - th)
  185. br[1] = min(br[1], w - tw)
  186. i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
  187. j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
  188. else:
  189. i = random.randint(0, h - th) if h - th > 0 else 0
  190. j = random.randint(0, w - tw) if w - tw > 0 else 0
  191. # return i, j, th, tw
  192. for k in data:
  193. if k in self.crop_keys:
  194. if len(data[k].shape) == 3:
  195. if np.argmin(data[k].shape) == 0:
  196. img = data[k][:, i : i + th, j : j + tw]
  197. if img.shape[1] != img.shape[2]:
  198. a = 1
  199. elif np.argmin(data[k].shape) == 2:
  200. img = data[k][i : i + th, j : j + tw, :]
  201. if img.shape[1] != img.shape[0]:
  202. a = 1
  203. else:
  204. img = data[k]
  205. else:
  206. img = data[k][i : i + th, j : j + tw]
  207. if img.shape[0] != img.shape[1]:
  208. a = 1
  209. data[k] = img
  210. return data