fce_aug.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py
  17. """
  18. import numpy as np
  19. from PIL import Image, ImageDraw
  20. import cv2
  21. from shapely.geometry import Polygon
  22. import math
  23. from ppocr.utils.poly_nms import poly_intersection
  24. class RandomScaling:
  25. def __init__(self, size=800, scale=(3.0 / 4, 5.0 / 2), **kwargs):
  26. """Random scale the image while keeping aspect.
  27. Args:
  28. size (int) : Base size before scaling.
  29. scale (tuple(float)) : The range of scaling.
  30. """
  31. assert isinstance(size, int)
  32. assert isinstance(scale, float) or isinstance(scale, tuple)
  33. self.size = size
  34. self.scale = scale if isinstance(scale, tuple) else (1 - scale, 1 + scale)
  35. def __call__(self, data):
  36. image = data["image"]
  37. text_polys = data["polys"]
  38. h, w, _ = image.shape
  39. aspect_ratio = np.random.uniform(min(self.scale), max(self.scale))
  40. scales = self.size * 1.0 / max(h, w) * aspect_ratio
  41. scales = np.array([scales, scales])
  42. out_size = (int(h * scales[1]), int(w * scales[0]))
  43. image = cv2.resize(image, out_size[::-1])
  44. data["image"] = image
  45. text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1]
  46. text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0]
  47. data["polys"] = text_polys
  48. return data
  49. class RandomCropFlip:
  50. def __init__(
  51. self, pad_ratio=0.1, crop_ratio=0.5, iter_num=1, min_area_ratio=0.2, **kwargs
  52. ):
  53. """Random crop and flip a patch of the image.
  54. Args:
  55. crop_ratio (float): The ratio of cropping.
  56. iter_num (int): Number of operations.
  57. min_area_ratio (float): Minimal area ratio between cropped patch
  58. and original image.
  59. """
  60. assert isinstance(crop_ratio, float)
  61. assert isinstance(iter_num, int)
  62. assert isinstance(min_area_ratio, float)
  63. self.pad_ratio = pad_ratio
  64. self.epsilon = 1e-2
  65. self.crop_ratio = crop_ratio
  66. self.iter_num = iter_num
  67. self.min_area_ratio = min_area_ratio
  68. def __call__(self, results):
  69. for i in range(self.iter_num):
  70. results = self.random_crop_flip(results)
  71. return results
  72. def random_crop_flip(self, results):
  73. image = results["image"]
  74. polygons = results["polys"]
  75. ignore_tags = results["ignore_tags"]
  76. if len(polygons) == 0:
  77. return results
  78. if np.random.random() >= self.crop_ratio:
  79. return results
  80. h, w, _ = image.shape
  81. area = h * w
  82. pad_h = int(h * self.pad_ratio)
  83. pad_w = int(w * self.pad_ratio)
  84. h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h, pad_w)
  85. if len(h_axis) == 0 or len(w_axis) == 0:
  86. return results
  87. attempt = 0
  88. while attempt < 50:
  89. attempt += 1
  90. polys_keep = []
  91. polys_new = []
  92. ignore_tags_keep = []
  93. ignore_tags_new = []
  94. xx = np.random.choice(w_axis, size=2)
  95. xmin = np.min(xx) - pad_w
  96. xmax = np.max(xx) - pad_w
  97. xmin = np.clip(xmin, 0, w - 1)
  98. xmax = np.clip(xmax, 0, w - 1)
  99. yy = np.random.choice(h_axis, size=2)
  100. ymin = np.min(yy) - pad_h
  101. ymax = np.max(yy) - pad_h
  102. ymin = np.clip(ymin, 0, h - 1)
  103. ymax = np.clip(ymax, 0, h - 1)
  104. if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio:
  105. # area too small
  106. continue
  107. pts = np.stack(
  108. [[xmin, xmax, xmax, xmin], [ymin, ymin, ymax, ymax]]
  109. ).T.astype(np.int32)
  110. pp = Polygon(pts)
  111. fail_flag = False
  112. for polygon, ignore_tag in zip(polygons, ignore_tags):
  113. ppi = Polygon(polygon.reshape(-1, 2))
  114. ppiou, _ = poly_intersection(ppi, pp, buffer=0)
  115. if (
  116. np.abs(ppiou - float(ppi.area)) > self.epsilon
  117. and np.abs(ppiou) > self.epsilon
  118. ):
  119. fail_flag = True
  120. break
  121. elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
  122. polys_new.append(polygon)
  123. ignore_tags_new.append(ignore_tag)
  124. else:
  125. polys_keep.append(polygon)
  126. ignore_tags_keep.append(ignore_tag)
  127. if fail_flag:
  128. continue
  129. else:
  130. break
  131. cropped = image[ymin:ymax, xmin:xmax, :]
  132. select_type = np.random.randint(3)
  133. if select_type == 0:
  134. img = np.ascontiguousarray(cropped[:, ::-1])
  135. elif select_type == 1:
  136. img = np.ascontiguousarray(cropped[::-1, :])
  137. else:
  138. img = np.ascontiguousarray(cropped[::-1, ::-1])
  139. image[ymin:ymax, xmin:xmax, :] = img
  140. results["img"] = image
  141. if len(polys_new) != 0:
  142. height, width, _ = cropped.shape
  143. if select_type == 0:
  144. for idx, polygon in enumerate(polys_new):
  145. poly = polygon.reshape(-1, 2)
  146. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  147. polys_new[idx] = poly
  148. elif select_type == 1:
  149. for idx, polygon in enumerate(polys_new):
  150. poly = polygon.reshape(-1, 2)
  151. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  152. polys_new[idx] = poly
  153. else:
  154. for idx, polygon in enumerate(polys_new):
  155. poly = polygon.reshape(-1, 2)
  156. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  157. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  158. polys_new[idx] = poly
  159. polygons = polys_keep + polys_new
  160. ignore_tags = ignore_tags_keep + ignore_tags_new
  161. results["polys"] = np.array(polygons)
  162. results["ignore_tags"] = ignore_tags
  163. return results
  164. def generate_crop_target(self, image, all_polys, pad_h, pad_w):
  165. """Generate crop target and make sure not to crop the polygon
  166. instances.
  167. Args:
  168. image (ndarray): The image waited to be crop.
  169. all_polys (list[list[ndarray]]): All polygons including ground
  170. truth polygons and ground truth ignored polygons.
  171. pad_h (int): Padding length of height.
  172. pad_w (int): Padding length of width.
  173. Returns:
  174. h_axis (ndarray): Vertical cropping range.
  175. w_axis (ndarray): Horizontal cropping range.
  176. """
  177. h, w, _ = image.shape
  178. h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
  179. w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
  180. text_polys = []
  181. for polygon in all_polys:
  182. rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2))
  183. box = cv2.boxPoints(rect)
  184. box = np.int64(box)
  185. text_polys.append([box[0], box[1], box[2], box[3]])
  186. polys = np.array(text_polys, dtype=np.int32)
  187. for poly in polys:
  188. poly = np.round(poly, decimals=0).astype(np.int32)
  189. minx = np.min(poly[:, 0])
  190. maxx = np.max(poly[:, 0])
  191. w_array[minx + pad_w : maxx + pad_w] = 1
  192. miny = np.min(poly[:, 1])
  193. maxy = np.max(poly[:, 1])
  194. h_array[miny + pad_h : maxy + pad_h] = 1
  195. h_axis = np.where(h_array == 0)[0]
  196. w_axis = np.where(w_array == 0)[0]
  197. return h_axis, w_axis
  198. class RandomCropPolyInstances:
  199. """Randomly crop images and make sure to contain at least one intact
  200. instance."""
  201. def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs):
  202. super().__init__()
  203. self.crop_ratio = crop_ratio
  204. self.min_side_ratio = min_side_ratio
  205. def sample_valid_start_end(self, valid_array, min_len, max_start, min_end):
  206. assert isinstance(min_len, int)
  207. assert len(valid_array) > min_len
  208. start_array = valid_array.copy()
  209. max_start = min(len(start_array) - min_len, max_start)
  210. start_array[max_start:] = 0
  211. start_array[0] = 1
  212. diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0])
  213. region_starts = np.where(diff_array < 0)[0]
  214. region_ends = np.where(diff_array > 0)[0]
  215. region_ind = np.random.randint(0, len(region_starts))
  216. start = np.random.randint(region_starts[region_ind], region_ends[region_ind])
  217. end_array = valid_array.copy()
  218. min_end = max(start + min_len, min_end)
  219. end_array[:min_end] = 0
  220. end_array[-1] = 1
  221. diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0])
  222. region_starts = np.where(diff_array < 0)[0]
  223. region_ends = np.where(diff_array > 0)[0]
  224. region_ind = np.random.randint(0, len(region_starts))
  225. end = np.random.randint(region_starts[region_ind], region_ends[region_ind])
  226. return start, end
  227. def sample_crop_box(self, img_size, results):
  228. """Generate crop box and make sure not to crop the polygon instances.
  229. Args:
  230. img_size (tuple(int)): The image size (h, w).
  231. results (dict): The results dict.
  232. """
  233. assert isinstance(img_size, tuple)
  234. h, w = img_size[:2]
  235. key_masks = results["polys"]
  236. x_valid_array = np.ones(w, dtype=np.int32)
  237. y_valid_array = np.ones(h, dtype=np.int32)
  238. selected_mask = key_masks[np.random.randint(0, len(key_masks))]
  239. selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32)
  240. max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0)
  241. min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1)
  242. max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0)
  243. min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1)
  244. for mask in key_masks:
  245. mask = mask.reshape((-1, 2)).astype(np.int32)
  246. clip_x = np.clip(mask[:, 0], 0, w - 1)
  247. clip_y = np.clip(mask[:, 1], 0, h - 1)
  248. min_x, max_x = np.min(clip_x), np.max(clip_x)
  249. min_y, max_y = np.min(clip_y), np.max(clip_y)
  250. x_valid_array[min_x - 2 : max_x + 3] = 0
  251. y_valid_array[min_y - 2 : max_y + 3] = 0
  252. min_w = int(w * self.min_side_ratio)
  253. min_h = int(h * self.min_side_ratio)
  254. x1, x2 = self.sample_valid_start_end(
  255. x_valid_array, min_w, max_x_start, min_x_end
  256. )
  257. y1, y2 = self.sample_valid_start_end(
  258. y_valid_array, min_h, max_y_start, min_y_end
  259. )
  260. return np.array([x1, y1, x2, y2])
  261. def crop_img(self, img, bbox):
  262. assert img.ndim == 3
  263. h, w, _ = img.shape
  264. assert 0 <= bbox[1] < bbox[3] <= h
  265. assert 0 <= bbox[0] < bbox[2] <= w
  266. return img[bbox[1] : bbox[3], bbox[0] : bbox[2]]
  267. def __call__(self, results):
  268. image = results["image"]
  269. polygons = results["polys"]
  270. ignore_tags = results["ignore_tags"]
  271. if len(polygons) < 1:
  272. return results
  273. if np.random.random_sample() < self.crop_ratio:
  274. crop_box = self.sample_crop_box(image.shape, results)
  275. img = self.crop_img(image, crop_box)
  276. results["image"] = img
  277. # crop and filter masks
  278. x1, y1, x2, y2 = crop_box
  279. w = max(x2 - x1, 1)
  280. h = max(y2 - y1, 1)
  281. polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1
  282. polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1
  283. valid_masks_list = []
  284. valid_tags_list = []
  285. for ind, polygon in enumerate(polygons):
  286. if (
  287. (polygon[:, ::2] > -4).all()
  288. and (polygon[:, ::2] < w + 4).all()
  289. and (polygon[:, 1::2] > -4).all()
  290. and (polygon[:, 1::2] < h + 4).all()
  291. ):
  292. polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w)
  293. polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h)
  294. valid_masks_list.append(polygon)
  295. valid_tags_list.append(ignore_tags[ind])
  296. results["polys"] = np.array(valid_masks_list)
  297. results["ignore_tags"] = valid_tags_list
  298. return results
  299. def __repr__(self):
  300. repr_str = self.__class__.__name__
  301. return repr_str
  302. class RandomRotatePolyInstances:
  303. def __init__(
  304. self,
  305. rotate_ratio=0.5,
  306. max_angle=10,
  307. pad_with_fixed_color=False,
  308. pad_value=(0, 0, 0),
  309. **kwargs,
  310. ):
  311. """Randomly rotate images and polygon masks.
  312. Args:
  313. rotate_ratio (float): The ratio of samples to operate rotation.
  314. max_angle (int): The maximum rotation angle.
  315. pad_with_fixed_color (bool): The flag for whether to pad rotated
  316. image with fixed value. If set to False, the rotated image will
  317. be padded onto cropped image.
  318. pad_value (tuple(int)): The color value for padding rotated image.
  319. """
  320. self.rotate_ratio = rotate_ratio
  321. self.max_angle = max_angle
  322. self.pad_with_fixed_color = pad_with_fixed_color
  323. self.pad_value = pad_value
  324. def rotate(self, center, points, theta, center_shift=(0, 0)):
  325. # rotate points.
  326. (center_x, center_y) = center
  327. center_y = -center_y
  328. x, y = points[:, ::2], points[:, 1::2]
  329. y = -y
  330. theta = theta / 180 * math.pi
  331. cos = math.cos(theta)
  332. sin = math.sin(theta)
  333. x = x - center_x
  334. y = y - center_y
  335. _x = center_x + x * cos - y * sin + center_shift[0]
  336. _y = -(center_y + x * sin + y * cos) + center_shift[1]
  337. points[:, ::2], points[:, 1::2] = _x, _y
  338. return points
  339. def cal_canvas_size(self, ori_size, degree):
  340. assert isinstance(ori_size, tuple)
  341. angle = degree * math.pi / 180.0
  342. h, w = ori_size[:2]
  343. cos = math.cos(angle)
  344. sin = math.sin(angle)
  345. canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos))
  346. canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin))
  347. canvas_size = (canvas_h, canvas_w)
  348. return canvas_size
  349. def sample_angle(self, max_angle):
  350. angle = np.random.random_sample() * 2 * max_angle - max_angle
  351. return angle
  352. def rotate_img(self, img, angle, canvas_size):
  353. h, w = img.shape[:2]
  354. rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
  355. rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2)
  356. rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2)
  357. if self.pad_with_fixed_color:
  358. target_img = cv2.warpAffine(
  359. img,
  360. rotation_matrix,
  361. (canvas_size[1], canvas_size[0]),
  362. flags=cv2.INTER_NEAREST,
  363. borderValue=self.pad_value,
  364. )
  365. else:
  366. mask = np.zeros_like(img)
  367. (h_ind, w_ind) = (
  368. np.random.randint(0, h * 7 // 8),
  369. np.random.randint(0, w * 7 // 8),
  370. )
  371. img_cut = img[h_ind : (h_ind + h // 9), w_ind : (w_ind + w // 9)]
  372. img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0]))
  373. mask = cv2.warpAffine(
  374. mask,
  375. rotation_matrix,
  376. (canvas_size[1], canvas_size[0]),
  377. borderValue=[1, 1, 1],
  378. )
  379. target_img = cv2.warpAffine(
  380. img,
  381. rotation_matrix,
  382. (canvas_size[1], canvas_size[0]),
  383. borderValue=[0, 0, 0],
  384. )
  385. target_img = target_img + img_cut * mask
  386. return target_img
  387. def __call__(self, results):
  388. if np.random.random_sample() < self.rotate_ratio:
  389. image = results["image"]
  390. polygons = results["polys"]
  391. h, w = image.shape[:2]
  392. angle = self.sample_angle(self.max_angle)
  393. canvas_size = self.cal_canvas_size((h, w), angle)
  394. center_shift = (
  395. int((canvas_size[1] - w) / 2),
  396. int((canvas_size[0] - h) / 2),
  397. )
  398. image = self.rotate_img(image, angle, canvas_size)
  399. results["image"] = image
  400. # rotate polygons
  401. rotated_masks = []
  402. for mask in polygons:
  403. rotated_mask = self.rotate((w / 2, h / 2), mask, angle, center_shift)
  404. rotated_masks.append(rotated_mask)
  405. results["polys"] = np.array(rotated_masks)
  406. return results
  407. def __repr__(self):
  408. repr_str = self.__class__.__name__
  409. return repr_str
  410. class SquareResizePad:
  411. def __init__(
  412. self,
  413. target_size,
  414. pad_ratio=0.6,
  415. pad_with_fixed_color=False,
  416. pad_value=(0, 0, 0),
  417. **kwargs,
  418. ):
  419. """Resize or pad images to be square shape.
  420. Args:
  421. target_size (int): The target size of square shaped image.
  422. pad_with_fixed_color (bool): The flag for whether to pad rotated
  423. image with fixed value. If set to False, the rescales image will
  424. be padded onto cropped image.
  425. pad_value (tuple(int)): The color value for padding rotated image.
  426. """
  427. assert isinstance(target_size, int)
  428. assert isinstance(pad_ratio, float)
  429. assert isinstance(pad_with_fixed_color, bool)
  430. assert isinstance(pad_value, tuple)
  431. self.target_size = target_size
  432. self.pad_ratio = pad_ratio
  433. self.pad_with_fixed_color = pad_with_fixed_color
  434. self.pad_value = pad_value
  435. def resize_img(self, img, keep_ratio=True):
  436. h, w, _ = img.shape
  437. if keep_ratio:
  438. t_h = self.target_size if h >= w else int(h * self.target_size / w)
  439. t_w = self.target_size if h <= w else int(w * self.target_size / h)
  440. else:
  441. t_h = t_w = self.target_size
  442. img = cv2.resize(img, (t_w, t_h))
  443. return img, (t_h, t_w)
  444. def square_pad(self, img):
  445. h, w = img.shape[:2]
  446. if h == w:
  447. return img, (0, 0)
  448. pad_size = max(h, w)
  449. if self.pad_with_fixed_color:
  450. expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8)
  451. expand_img[:] = self.pad_value
  452. else:
  453. (h_ind, w_ind) = (
  454. np.random.randint(0, h * 7 // 8),
  455. np.random.randint(0, w * 7 // 8),
  456. )
  457. img_cut = img[h_ind : (h_ind + h // 9), w_ind : (w_ind + w // 9)]
  458. expand_img = cv2.resize(img_cut, (pad_size, pad_size))
  459. if h > w:
  460. y0, x0 = 0, (h - w) // 2
  461. else:
  462. y0, x0 = (w - h) // 2, 0
  463. expand_img[y0 : y0 + h, x0 : x0 + w] = img
  464. offset = (x0, y0)
  465. return expand_img, offset
  466. def square_pad_mask(self, points, offset):
  467. x0, y0 = offset
  468. pad_points = points.copy()
  469. pad_points[::2] = pad_points[::2] + x0
  470. pad_points[1::2] = pad_points[1::2] + y0
  471. return pad_points
  472. def __call__(self, results):
  473. image = results["image"]
  474. polygons = results["polys"]
  475. h, w = image.shape[:2]
  476. if np.random.random_sample() < self.pad_ratio:
  477. image, out_size = self.resize_img(image, keep_ratio=True)
  478. image, offset = self.square_pad(image)
  479. else:
  480. image, out_size = self.resize_img(image, keep_ratio=False)
  481. offset = (0, 0)
  482. results["image"] = image
  483. try:
  484. polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[1] / w + offset[0]
  485. polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[0] / h + offset[1]
  486. except:
  487. pass
  488. results["polys"] = polygons
  489. return results
  490. def __repr__(self):
  491. repr_str = self.__class__.__name__
  492. return repr_str