augment.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2019/8/23 21:52
  3. # @Author : zhoujun
  4. import math
  5. import numbers
  6. import random
  7. import cv2
  8. import numpy as np
  9. from skimage.util import random_noise
  10. class RandomNoise:
  11. def __init__(self, random_rate):
  12. self.random_rate = random_rate
  13. def __call__(self, data: dict):
  14. """
  15. 对图片加噪声
  16. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  17. :return:
  18. """
  19. if random.random() > self.random_rate:
  20. return data
  21. data["img"] = (
  22. random_noise(data["img"], mode="gaussian", clip=True) * 255
  23. ).astype(data["img"].dtype)
  24. return data
  25. class RandomScale:
  26. def __init__(self, scales, random_rate):
  27. """
  28. :param scales: 尺度
  29. :param random_rate: 随机系数
  30. :return:
  31. """
  32. self.random_rate = random_rate
  33. self.scales = scales
  34. def __call__(self, data: dict) -> dict:
  35. """
  36. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  37. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  38. :return:
  39. """
  40. if random.random() > self.random_rate:
  41. return data
  42. im = data["img"]
  43. text_polys = data["text_polys"]
  44. tmp_text_polys = text_polys.copy()
  45. rd_scale = float(np.random.choice(self.scales))
  46. im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
  47. tmp_text_polys *= rd_scale
  48. data["img"] = im
  49. data["text_polys"] = tmp_text_polys
  50. return data
  51. class RandomRotateImgBox:
  52. def __init__(self, degrees, random_rate, same_size=False):
  53. """
  54. :param degrees: 角度,可以是一个数值或者list
  55. :param random_rate: 随机系数
  56. :param same_size: 是否保持和原图一样大
  57. :return:
  58. """
  59. if isinstance(degrees, numbers.Number):
  60. if degrees < 0:
  61. raise ValueError("If degrees is a single number, it must be positive.")
  62. degrees = (-degrees, degrees)
  63. elif (
  64. isinstance(degrees, list)
  65. or isinstance(degrees, tuple)
  66. or isinstance(degrees, np.ndarray)
  67. ):
  68. if len(degrees) != 2:
  69. raise ValueError("If degrees is a sequence, it must be of len 2.")
  70. degrees = degrees
  71. else:
  72. raise Exception("degrees must in Number or list or tuple or np.ndarray")
  73. self.degrees = degrees
  74. self.same_size = same_size
  75. self.random_rate = random_rate
  76. def __call__(self, data: dict) -> dict:
  77. """
  78. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  79. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  80. :return:
  81. """
  82. if random.random() > self.random_rate:
  83. return data
  84. im = data["img"]
  85. text_polys = data["text_polys"]
  86. # ---------------------- 旋转图像 ----------------------
  87. w = im.shape[1]
  88. h = im.shape[0]
  89. angle = np.random.uniform(self.degrees[0], self.degrees[1])
  90. if self.same_size:
  91. nw = w
  92. nh = h
  93. else:
  94. # 角度变弧度
  95. rangle = np.deg2rad(angle)
  96. # 计算旋转之后图像的w, h
  97. nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)
  98. nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)
  99. # 构造仿射矩阵
  100. rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, 1)
  101. # 计算原图中心点到新图中心点的偏移量
  102. rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
  103. # 更新仿射矩阵
  104. rot_mat[0, 2] += rot_move[0]
  105. rot_mat[1, 2] += rot_move[1]
  106. # 仿射变换
  107. rot_img = cv2.warpAffine(
  108. im,
  109. rot_mat,
  110. (int(math.ceil(nw)), int(math.ceil(nh))),
  111. flags=cv2.INTER_LANCZOS4,
  112. )
  113. # ---------------------- 矫正bbox坐标 ----------------------
  114. # rot_mat是最终的旋转矩阵
  115. # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
  116. rot_text_polys = list()
  117. for bbox in text_polys:
  118. point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
  119. point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
  120. point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
  121. point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
  122. rot_text_polys.append([point1, point2, point3, point4])
  123. data["img"] = rot_img
  124. data["text_polys"] = np.array(rot_text_polys)
  125. return data
  126. class RandomResize:
  127. def __init__(self, size, random_rate, keep_ratio=False):
  128. """
  129. :param input_size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
  130. :param random_rate: 随机系数
  131. :param keep_ratio: 是否保持长宽比
  132. :return:
  133. """
  134. if isinstance(size, numbers.Number):
  135. if size < 0:
  136. raise ValueError(
  137. "If input_size is a single number, it must be positive."
  138. )
  139. size = (size, size)
  140. elif (
  141. isinstance(size, list)
  142. or isinstance(size, tuple)
  143. or isinstance(size, np.ndarray)
  144. ):
  145. if len(size) != 2:
  146. raise ValueError("If input_size is a sequence, it must be of len 2.")
  147. size = (size[0], size[1])
  148. else:
  149. raise Exception("input_size must in Number or list or tuple or np.ndarray")
  150. self.size = size
  151. self.keep_ratio = keep_ratio
  152. self.random_rate = random_rate
  153. def __call__(self, data: dict) -> dict:
  154. """
  155. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  156. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  157. :return:
  158. """
  159. if random.random() > self.random_rate:
  160. return data
  161. im = data["img"]
  162. text_polys = data["text_polys"]
  163. if self.keep_ratio:
  164. # 将图片短边pad到和长边一样
  165. h, w, c = im.shape
  166. max_h = max(h, self.size[0])
  167. max_w = max(w, self.size[1])
  168. im_padded = np.zeros((max_h, max_w, c), dtype=np.uint8)
  169. im_padded[:h, :w] = im.copy()
  170. im = im_padded
  171. text_polys = text_polys.astype(np.float32)
  172. h, w, _ = im.shape
  173. im = cv2.resize(im, self.size)
  174. w_scale = self.size[0] / float(w)
  175. h_scale = self.size[1] / float(h)
  176. text_polys[:, :, 0] *= w_scale
  177. text_polys[:, :, 1] *= h_scale
  178. data["img"] = im
  179. data["text_polys"] = text_polys
  180. return data
  181. def resize_image(img, short_size):
  182. height, width, _ = img.shape
  183. if height < width:
  184. new_height = short_size
  185. new_width = new_height / height * width
  186. else:
  187. new_width = short_size
  188. new_height = new_width / width * height
  189. new_height = int(round(new_height / 32) * 32)
  190. new_width = int(round(new_width / 32) * 32)
  191. resized_img = cv2.resize(img, (new_width, new_height))
  192. return resized_img, (new_width / width, new_height / height)
  193. class ResizeShortSize:
  194. def __init__(self, short_size, resize_text_polys=True):
  195. """
  196. :param size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
  197. :return:
  198. """
  199. self.short_size = short_size
  200. self.resize_text_polys = resize_text_polys
  201. def __call__(self, data: dict) -> dict:
  202. """
  203. 对图片和文本框进行缩放
  204. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  205. :return:
  206. """
  207. im = data["img"]
  208. text_polys = data["text_polys"]
  209. h, w, _ = im.shape
  210. short_edge = min(h, w)
  211. if short_edge < self.short_size:
  212. # 保证短边 >= short_size
  213. scale = self.short_size / short_edge
  214. im = cv2.resize(im, dsize=None, fx=scale, fy=scale)
  215. scale = (scale, scale)
  216. # im, scale = resize_image(im, self.short_size)
  217. if self.resize_text_polys:
  218. # text_polys *= scale
  219. text_polys[:, 0] *= scale[0]
  220. text_polys[:, 1] *= scale[1]
  221. data["img"] = im
  222. data["text_polys"] = text_polys
  223. return data
  224. class HorizontalFlip:
  225. def __init__(self, random_rate):
  226. """
  227. :param random_rate: 随机系数
  228. """
  229. self.random_rate = random_rate
  230. def __call__(self, data: dict) -> dict:
  231. """
  232. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  233. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  234. :return:
  235. """
  236. if random.random() > self.random_rate:
  237. return data
  238. im = data["img"]
  239. text_polys = data["text_polys"]
  240. flip_text_polys = text_polys.copy()
  241. flip_im = cv2.flip(im, 1)
  242. h, w, _ = flip_im.shape
  243. flip_text_polys[:, :, 0] = w - flip_text_polys[:, :, 0]
  244. data["img"] = flip_im
  245. data["text_polys"] = flip_text_polys
  246. return data
  247. class VerticalFlip:
  248. def __init__(self, random_rate):
  249. """
  250. :param random_rate: 随机系数
  251. """
  252. self.random_rate = random_rate
  253. def __call__(self, data: dict) -> dict:
  254. """
  255. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  256. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  257. :return:
  258. """
  259. if random.random() > self.random_rate:
  260. return data
  261. im = data["img"]
  262. text_polys = data["text_polys"]
  263. flip_text_polys = text_polys.copy()
  264. flip_im = cv2.flip(im, 0)
  265. h, w, _ = flip_im.shape
  266. flip_text_polys[:, :, 1] = h - flip_text_polys[:, :, 1]
  267. data["img"] = flip_im
  268. data["text_polys"] = flip_text_polys
  269. return data