image.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This file contains some common interfaces for image preprocess.
  16. Many users are confused about the image layout. We introduce
  17. the image layout as follows.
  18. - CHW Layout
  19. - The abbreviations: C=channel, H=Height, W=Width
  20. - The default layout of image opened by cv2 or PIL is HWC.
  21. PaddlePaddle only supports the CHW layout. And CHW is simply
  22. a transpose of HWC. It must transpose the input image.
  23. - Color format: RGB or BGR
  24. OpenCV use BGR color format. PIL use RGB color format. Both
  25. formats can be used for training. Noted that, the format should
  26. be keep consistent between the training and inference period.
  27. """
  28. import os
  29. import tarfile
  30. import numpy as np
  31. try:
  32. import cv2
  33. except ImportError:
  34. cv2 = None
  35. import pickle
  36. __all__ = []
  37. def _check_cv2():
  38. if cv2 is None:
  39. import sys
  40. sys.stderr.write(
  41. '''Warning with paddle image module: opencv-python should be imported,
  42. or paddle image module could NOT work; please install opencv-python first.'''
  43. )
  44. return False
  45. else:
  46. return True
  47. def batch_images_from_tar(
  48. data_file, dataset_name, img2label, num_per_batch=1024
  49. ):
  50. """
  51. Read images from tar file and batch them into batch file.
  52. :param data_file: path of image tar file
  53. :type data_file: string
  54. :param dataset_name: 'train','test' or 'valid'
  55. :type dataset_name: string
  56. :param img2label: a dict with image file name as key
  57. and image's label as value
  58. :type img2label: dict
  59. :param num_per_batch: image number per batch file
  60. :type num_per_batch: int
  61. :return: path of list file containing paths of batch file
  62. :rtype: string
  63. """
  64. batch_dir = data_file + "_batch"
  65. out_path = f"{batch_dir}/{dataset_name}_{os.getpid()}"
  66. meta_file = f"{batch_dir}/{dataset_name}_{os.getpid()}.txt"
  67. if os.path.exists(out_path):
  68. return meta_file
  69. else:
  70. os.makedirs(out_path)
  71. tf = tarfile.open(data_file)
  72. mems = tf.getmembers()
  73. data = []
  74. labels = []
  75. file_id = 0
  76. for mem in mems:
  77. if mem.name in img2label:
  78. data.append(tf.extractfile(mem).read())
  79. labels.append(img2label[mem.name])
  80. if len(data) == num_per_batch:
  81. output = {'label': labels, 'data': data}
  82. pickle.dump(
  83. output,
  84. open('%s/batch_%d' % (out_path, file_id), 'wb'),
  85. protocol=2,
  86. )
  87. file_id += 1
  88. data = []
  89. labels = []
  90. if len(data) > 0:
  91. output = {'label': labels, 'data': data}
  92. pickle.dump(
  93. output, open('%s/batch_%d' % (out_path, file_id), 'wb'), protocol=2
  94. )
  95. with open(meta_file, mode='a') as meta:
  96. for file in os.listdir(out_path):
  97. meta.write(os.path.abspath(f"{out_path}/{file}") + "\n")
  98. return meta_file
  99. def load_image_bytes(bytes, is_color=True):
  100. """
  101. Load an color or gray image from bytes array.
  102. Example usage:
  103. .. code-block:: python
  104. >>> with open('cat.jpg') as f:
  105. ... im = load_image_bytes(f.read())
  106. ...
  107. :param bytes: the input image bytes array.
  108. :type bytes: str
  109. :param is_color: If set is_color True, it will load and
  110. return a color image. Otherwise, it will
  111. load and return a gray image.
  112. :type is_color: bool
  113. """
  114. assert _check_cv2() is True
  115. flag = 1 if is_color else 0
  116. file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
  117. img = cv2.imdecode(file_bytes, flag)
  118. return img
  119. def load_image(file, is_color=True):
  120. """
  121. Load an color or gray image from the file path.
  122. Example usage:
  123. .. code-block:: python
  124. >>> im = load_image('cat.jpg')
  125. :param file: the input image path.
  126. :type file: string
  127. :param is_color: If set is_color True, it will load and
  128. return a color image. Otherwise, it will
  129. load and return a gray image.
  130. :type is_color: bool
  131. """
  132. assert _check_cv2() is True
  133. # cv2.IMAGE_COLOR for OpenCV3
  134. # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
  135. # cv2.IMAGE_GRAYSCALE for OpenCV3
  136. # cv2.CV_LOAD_IMAGE_GRAYSCALE for older OpenCV Version
  137. # Here, use constant 1 and 0
  138. # 1: COLOR, 0: GRAYSCALE
  139. flag = 1 if is_color else 0
  140. im = cv2.imread(file.encode('utf-8').decode('utf-8'), flag)
  141. return im
  142. def resize_short(im, size):
  143. """
  144. Resize an image so that the length of shorter edge is size.
  145. Example usage:
  146. .. code-block:: python
  147. >>> im = load_image('cat.jpg')
  148. >>> im = resize_short(im, 256)
  149. :param im: the input image with HWC layout.
  150. :type im: ndarray
  151. :param size: the shorter edge size of image after resizing.
  152. :type size: int
  153. """
  154. assert _check_cv2() is True
  155. h, w = im.shape[:2]
  156. h_new, w_new = size, size
  157. if h > w:
  158. h_new = size * h // w
  159. else:
  160. w_new = size * w // h
  161. im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
  162. return im
  163. def to_chw(im, order=(2, 0, 1)):
  164. """
  165. Transpose the input image order. The image layout is HWC format
  166. opened by cv2 or PIL. Transpose the input image to CHW layout
  167. according the order (2,0,1).
  168. Example usage:
  169. .. code-block:: python
  170. >>> im = load_image('cat.jpg')
  171. >>> im = resize_short(im, 256)
  172. >>> im = to_chw(im)
  173. :param im: the input image with HWC layout.
  174. :type im: ndarray
  175. :param order: the transposed order.
  176. :type order: tuple|list
  177. """
  178. assert len(im.shape) == len(order)
  179. im = im.transpose(order)
  180. return im
  181. def center_crop(im, size, is_color=True):
  182. """
  183. Crop the center of image with size.
  184. Example usage:
  185. .. code-block:: python
  186. >>> im = load_image('cat.jpg')
  187. >>> im = center_crop(im, 224)
  188. :param im: the input image with HWC layout.
  189. :type im: ndarray
  190. :param size: the cropping size.
  191. :type size: int
  192. :param is_color: whether the image is color or not.
  193. :type is_color: bool
  194. """
  195. h, w = im.shape[:2]
  196. h_start = (h - size) // 2
  197. w_start = (w - size) // 2
  198. h_end, w_end = h_start + size, w_start + size
  199. if is_color:
  200. im = im[h_start:h_end, w_start:w_end, :]
  201. else:
  202. im = im[h_start:h_end, w_start:w_end]
  203. return im
  204. def random_crop(im, size, is_color=True):
  205. """
  206. Randomly crop input image with size.
  207. Example usage:
  208. .. code-block:: python
  209. >>> im = load_image('cat.jpg')
  210. >>> im = random_crop(im, 224)
  211. :param im: the input image with HWC layout.
  212. :type im: ndarray
  213. :param size: the cropping size.
  214. :type size: int
  215. :param is_color: whether the image is color or not.
  216. :type is_color: bool
  217. """
  218. h, w = im.shape[:2]
  219. h_start = np.random.randint(0, h - size + 1)
  220. w_start = np.random.randint(0, w - size + 1)
  221. h_end, w_end = h_start + size, w_start + size
  222. if is_color:
  223. im = im[h_start:h_end, w_start:w_end, :]
  224. else:
  225. im = im[h_start:h_end, w_start:w_end]
  226. return im
  227. def left_right_flip(im, is_color=True):
  228. """
  229. Flip an image along the horizontal direction.
  230. Return the flipped image.
  231. Example usage:
  232. .. code-block:: python
  233. >>> im = load_image('cat.jpg')
  234. >>> im = left_right_flip(im)
  235. :param im: input image with HWC layout or HW layout for gray image
  236. :type im: ndarray
  237. :param is_color: whether input image is color or not
  238. :type is_color: bool
  239. """
  240. if len(im.shape) == 3 and is_color:
  241. return im[:, ::-1, :]
  242. else:
  243. return im[:, ::-1]
  244. def simple_transform(
  245. im, resize_size, crop_size, is_train, is_color=True, mean=None
  246. ):
  247. """
  248. Simply data argumentation for training. These operations include
  249. resizing, cropping and flipping.
  250. Example usage:
  251. .. code-block:: python
  252. >>> im = load_image('cat.jpg')
  253. >>> im = simple_transform(im, 256, 224, True)
  254. :param im: The input image with HWC layout.
  255. :type im: ndarray
  256. :param resize_size: The shorter edge length of the resized image.
  257. :type resize_size: int
  258. :param crop_size: The cropping size.
  259. :type crop_size: int
  260. :param is_train: Whether it is training or not.
  261. :type is_train: bool
  262. :param is_color: whether the image is color or not.
  263. :type is_color: bool
  264. :param mean: the mean values, which can be element-wise mean values or
  265. mean values per channel.
  266. :type mean: numpy array | list
  267. """
  268. im = resize_short(im, resize_size)
  269. if is_train:
  270. im = random_crop(im, crop_size, is_color=is_color)
  271. if np.random.randint(2) == 0:
  272. im = left_right_flip(im, is_color)
  273. else:
  274. im = center_crop(im, crop_size, is_color=is_color)
  275. if len(im.shape) == 3:
  276. im = to_chw(im)
  277. im = im.astype('float32')
  278. if mean is not None:
  279. mean = np.array(mean, dtype=np.float32)
  280. # mean value, may be one value per channel
  281. if mean.ndim == 1 and is_color:
  282. mean = mean[:, np.newaxis, np.newaxis]
  283. elif mean.ndim == 1:
  284. mean = mean
  285. else:
  286. # elementwise mean
  287. assert len(mean.shape) == len(im)
  288. im -= mean
  289. return im
  290. def load_and_transform(
  291. filename, resize_size, crop_size, is_train, is_color=True, mean=None
  292. ):
  293. """
  294. Load image from the input file `filename` and transform image for
  295. data argumentation. Please refer to the `simple_transform` interface
  296. for the transform operations.
  297. Example usage:
  298. .. code-block:: python
  299. >>> im = load_and_transform('cat.jpg', 256, 224, True)
  300. :param filename: The file name of input image.
  301. :type filename: string
  302. :param resize_size: The shorter edge length of the resized image.
  303. :type resize_size: int
  304. :param crop_size: The cropping size.
  305. :type crop_size: int
  306. :param is_train: Whether it is training or not.
  307. :type is_train: bool
  308. :param is_color: whether the image is color or not.
  309. :type is_color: bool
  310. :param mean: the mean values, which can be element-wise mean values or
  311. mean values per channel.
  312. :type mean: numpy array | list
  313. """
  314. im = load_image(filename, is_color)
  315. im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean)
  316. return im