| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390 |
- # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This file contains some common interfaces for image preprocess.
- Many users are confused about the image layout. We introduce
- the image layout as follows.
- - CHW Layout
- - The abbreviations: C=channel, H=Height, W=Width
- - The default layout of image opened by cv2 or PIL is HWC.
- PaddlePaddle only supports the CHW layout. And CHW is simply
- a transpose of HWC. It must transpose the input image.
- - Color format: RGB or BGR
- OpenCV use BGR color format. PIL use RGB color format. Both
- formats can be used for training. Noted that, the format should
- be keep consistent between the training and inference period.
- """
- import os
- import tarfile
- import numpy as np
- try:
- import cv2
- except ImportError:
- cv2 = None
- import pickle
- __all__ = []
- def _check_cv2():
- if cv2 is None:
- import sys
- sys.stderr.write(
- '''Warning with paddle image module: opencv-python should be imported,
- or paddle image module could NOT work; please install opencv-python first.'''
- )
- return False
- else:
- return True
- def batch_images_from_tar(
- data_file, dataset_name, img2label, num_per_batch=1024
- ):
- """
- Read images from tar file and batch them into batch file.
- :param data_file: path of image tar file
- :type data_file: string
- :param dataset_name: 'train','test' or 'valid'
- :type dataset_name: string
- :param img2label: a dict with image file name as key
- and image's label as value
- :type img2label: dict
- :param num_per_batch: image number per batch file
- :type num_per_batch: int
- :return: path of list file containing paths of batch file
- :rtype: string
- """
- batch_dir = data_file + "_batch"
- out_path = f"{batch_dir}/{dataset_name}_{os.getpid()}"
- meta_file = f"{batch_dir}/{dataset_name}_{os.getpid()}.txt"
- if os.path.exists(out_path):
- return meta_file
- else:
- os.makedirs(out_path)
- tf = tarfile.open(data_file)
- mems = tf.getmembers()
- data = []
- labels = []
- file_id = 0
- for mem in mems:
- if mem.name in img2label:
- data.append(tf.extractfile(mem).read())
- labels.append(img2label[mem.name])
- if len(data) == num_per_batch:
- output = {'label': labels, 'data': data}
- pickle.dump(
- output,
- open('%s/batch_%d' % (out_path, file_id), 'wb'),
- protocol=2,
- )
- file_id += 1
- data = []
- labels = []
- if len(data) > 0:
- output = {'label': labels, 'data': data}
- pickle.dump(
- output, open('%s/batch_%d' % (out_path, file_id), 'wb'), protocol=2
- )
- with open(meta_file, mode='a') as meta:
- for file in os.listdir(out_path):
- meta.write(os.path.abspath(f"{out_path}/{file}") + "\n")
- return meta_file
- def load_image_bytes(bytes, is_color=True):
- """
- Load an color or gray image from bytes array.
- Example usage:
- .. code-block:: python
- >>> with open('cat.jpg') as f:
- ... im = load_image_bytes(f.read())
- ...
- :param bytes: the input image bytes array.
- :type bytes: str
- :param is_color: If set is_color True, it will load and
- return a color image. Otherwise, it will
- load and return a gray image.
- :type is_color: bool
- """
- assert _check_cv2() is True
- flag = 1 if is_color else 0
- file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
- img = cv2.imdecode(file_bytes, flag)
- return img
- def load_image(file, is_color=True):
- """
- Load an color or gray image from the file path.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- :param file: the input image path.
- :type file: string
- :param is_color: If set is_color True, it will load and
- return a color image. Otherwise, it will
- load and return a gray image.
- :type is_color: bool
- """
- assert _check_cv2() is True
- # cv2.IMAGE_COLOR for OpenCV3
- # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
- # cv2.IMAGE_GRAYSCALE for OpenCV3
- # cv2.CV_LOAD_IMAGE_GRAYSCALE for older OpenCV Version
- # Here, use constant 1 and 0
- # 1: COLOR, 0: GRAYSCALE
- flag = 1 if is_color else 0
- im = cv2.imread(file.encode('utf-8').decode('utf-8'), flag)
- return im
- def resize_short(im, size):
- """
- Resize an image so that the length of shorter edge is size.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = resize_short(im, 256)
- :param im: the input image with HWC layout.
- :type im: ndarray
- :param size: the shorter edge size of image after resizing.
- :type size: int
- """
- assert _check_cv2() is True
- h, w = im.shape[:2]
- h_new, w_new = size, size
- if h > w:
- h_new = size * h // w
- else:
- w_new = size * w // h
- im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
- return im
- def to_chw(im, order=(2, 0, 1)):
- """
- Transpose the input image order. The image layout is HWC format
- opened by cv2 or PIL. Transpose the input image to CHW layout
- according the order (2,0,1).
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = resize_short(im, 256)
- >>> im = to_chw(im)
- :param im: the input image with HWC layout.
- :type im: ndarray
- :param order: the transposed order.
- :type order: tuple|list
- """
- assert len(im.shape) == len(order)
- im = im.transpose(order)
- return im
- def center_crop(im, size, is_color=True):
- """
- Crop the center of image with size.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = center_crop(im, 224)
- :param im: the input image with HWC layout.
- :type im: ndarray
- :param size: the cropping size.
- :type size: int
- :param is_color: whether the image is color or not.
- :type is_color: bool
- """
- h, w = im.shape[:2]
- h_start = (h - size) // 2
- w_start = (w - size) // 2
- h_end, w_end = h_start + size, w_start + size
- if is_color:
- im = im[h_start:h_end, w_start:w_end, :]
- else:
- im = im[h_start:h_end, w_start:w_end]
- return im
- def random_crop(im, size, is_color=True):
- """
- Randomly crop input image with size.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = random_crop(im, 224)
- :param im: the input image with HWC layout.
- :type im: ndarray
- :param size: the cropping size.
- :type size: int
- :param is_color: whether the image is color or not.
- :type is_color: bool
- """
- h, w = im.shape[:2]
- h_start = np.random.randint(0, h - size + 1)
- w_start = np.random.randint(0, w - size + 1)
- h_end, w_end = h_start + size, w_start + size
- if is_color:
- im = im[h_start:h_end, w_start:w_end, :]
- else:
- im = im[h_start:h_end, w_start:w_end]
- return im
- def left_right_flip(im, is_color=True):
- """
- Flip an image along the horizontal direction.
- Return the flipped image.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = left_right_flip(im)
- :param im: input image with HWC layout or HW layout for gray image
- :type im: ndarray
- :param is_color: whether input image is color or not
- :type is_color: bool
- """
- if len(im.shape) == 3 and is_color:
- return im[:, ::-1, :]
- else:
- return im[:, ::-1]
- def simple_transform(
- im, resize_size, crop_size, is_train, is_color=True, mean=None
- ):
- """
- Simply data argumentation for training. These operations include
- resizing, cropping and flipping.
- Example usage:
- .. code-block:: python
- >>> im = load_image('cat.jpg')
- >>> im = simple_transform(im, 256, 224, True)
- :param im: The input image with HWC layout.
- :type im: ndarray
- :param resize_size: The shorter edge length of the resized image.
- :type resize_size: int
- :param crop_size: The cropping size.
- :type crop_size: int
- :param is_train: Whether it is training or not.
- :type is_train: bool
- :param is_color: whether the image is color or not.
- :type is_color: bool
- :param mean: the mean values, which can be element-wise mean values or
- mean values per channel.
- :type mean: numpy array | list
- """
- im = resize_short(im, resize_size)
- if is_train:
- im = random_crop(im, crop_size, is_color=is_color)
- if np.random.randint(2) == 0:
- im = left_right_flip(im, is_color)
- else:
- im = center_crop(im, crop_size, is_color=is_color)
- if len(im.shape) == 3:
- im = to_chw(im)
- im = im.astype('float32')
- if mean is not None:
- mean = np.array(mean, dtype=np.float32)
- # mean value, may be one value per channel
- if mean.ndim == 1 and is_color:
- mean = mean[:, np.newaxis, np.newaxis]
- elif mean.ndim == 1:
- mean = mean
- else:
- # elementwise mean
- assert len(mean.shape) == len(im)
- im -= mean
- return im
- def load_and_transform(
- filename, resize_size, crop_size, is_train, is_color=True, mean=None
- ):
- """
- Load image from the input file `filename` and transform image for
- data argumentation. Please refer to the `simple_transform` interface
- for the transform operations.
- Example usage:
- .. code-block:: python
- >>> im = load_and_transform('cat.jpg', 256, 224, True)
- :param filename: The file name of input image.
- :type filename: string
- :param resize_size: The shorter edge length of the resized image.
- :type resize_size: int
- :param crop_size: The cropping size.
- :type crop_size: int
- :param is_train: Whether it is training or not.
- :type is_train: bool
- :param is_color: whether the image is color or not.
- :type is_color: bool
- :param mean: the mean values, which can be element-wise mean values or
- mean values per channel.
- :type mean: numpy array | list
- """
- im = load_image(filename, is_color)
- im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean)
- return im
|