| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from io import StringIO
- import numpy as np
- from PIL import Image
- __all__ = []
- def resize_image(img, target_size):
- """
- Resize an image so that the shorter edge has length target_size.
- img: the input image to be resized.
- target_size: the target resized image size.
- """
- percent = target_size / float(min(img.size[0], img.size[1]))
- resized_size = int(round(img.size[0] * percent)), int(
- round(img.size[1] * percent)
- )
- img = img.resize(resized_size, Image.ANTIALIAS)
- return img
- def flip(im):
- """
- Return the flipped image.
- Flip an image along the horizontal direction.
- im: input image, (K x H x W) ndarrays
- """
- if len(im.shape) == 3:
- return im[:, :, ::-1]
- else:
- return im[:, ::-1]
- def crop_img(im, inner_size, color=True, test=True):
- """
- Return cropped image.
- The size of the cropped image is inner_size * inner_size.
- im: (K x H x W) ndarrays
- inner_size: the cropped image size.
- color: whether it is color image.
- test: whether in test mode.
- If False, does random cropping and flipping.
- If True, crop the center of images.
- """
- if color:
- height, width = max(inner_size, im.shape[1]), max(
- inner_size, im.shape[2]
- )
- padded_im = np.zeros((3, height, width))
- startY = (height - im.shape[1]) / 2
- startX = (width - im.shape[2]) / 2
- endY, endX = startY + im.shape[1], startX + im.shape[2]
- padded_im[:, startY:endY, startX:endX] = im
- else:
- im = im.astype('float32')
- height, width = max(inner_size, im.shape[0]), max(
- inner_size, im.shape[1]
- )
- padded_im = np.zeros((height, width))
- startY = (height - im.shape[0]) / 2
- startX = (width - im.shape[1]) / 2
- endY, endX = startY + im.shape[0], startX + im.shape[1]
- padded_im[startY:endY, startX:endX] = im
- if test:
- startY = (height - inner_size) / 2
- startX = (width - inner_size) / 2
- else:
- startY = np.random.randint(0, height - inner_size + 1)
- startX = np.random.randint(0, width - inner_size + 1)
- endY, endX = startY + inner_size, startX + inner_size
- if color:
- pic = padded_im[:, startY:endY, startX:endX]
- else:
- pic = padded_im[startY:endY, startX:endX]
- if (not test) and (np.random.randint(2) == 0):
- pic = flip(pic)
- return pic
- def decode_jpeg(jpeg_string):
- np_array = np.array(Image.open(StringIO(jpeg_string)))
- if len(np_array.shape) == 3:
- np_array = np.transpose(np_array, (2, 0, 1))
- return np_array
- def preprocess_img(im, img_mean, crop_size, is_train, color=True):
- """
- Does data augmentation for images.
- If is_train is false, cropping the center region from the image.
- If is_train is true, randomly crop a region from the image,
- and random does flipping.
- im: (K x H x W) ndarrays
- """
- im = im.astype('float32')
- test = not is_train
- pic = crop_img(im, crop_size, color, test)
- pic -= img_mean
- return pic.flatten()
- def load_meta(meta_path, mean_img_size, crop_size, color=True):
- """
- Return the loaded meta file.
- Load the meta image, which is the mean of the images in the dataset.
- The mean image is subtracted from every input image so that the expected mean
- of each input image is zero.
- """
- mean = np.load(meta_path)['data_mean']
- border = (mean_img_size - crop_size) / 2
- if color:
- assert mean_img_size * mean_img_size * 3 == mean.shape[0]
- mean = mean.reshape(3, mean_img_size, mean_img_size)
- mean = mean[
- :, border : border + crop_size, border : border + crop_size
- ].astype('float32')
- else:
- assert mean_img_size * mean_img_size == mean.shape[0]
- mean = mean.reshape(mean_img_size, mean_img_size)
- mean = mean[
- border : border + crop_size, border : border + crop_size
- ].astype('float32')
- return mean
- def load_image(img_path, is_color=True):
- """
- Load image and return.
- img_path: image path.
- is_color: is color image or not.
- """
- img = Image.open(img_path)
- img.load()
- return img
- def oversample(img, crop_dims):
- """
- image : iterable of (H x W x K) ndarrays
- crop_dims: (height, width) tuple for the crops.
- Returned data contains ten crops of input image, namely,
- four corner patches and the center patch as well as their
- horizontal reflections.
- """
- # Dimensions and center.
- im_shape = np.array(img[0].shape)
- crop_dims = np.array(crop_dims)
- im_center = im_shape[:2] / 2.0
- # Make crop coordinates
- h_indices = (0, im_shape[0] - crop_dims[0])
- w_indices = (0, im_shape[1] - crop_dims[1])
- crops_ix = np.empty((5, 4), dtype=int)
- curr = 0
- for i in h_indices:
- for j in w_indices:
- crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
- curr += 1
- crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
- [-crop_dims / 2.0, crop_dims / 2.0]
- )
- crops_ix = np.tile(crops_ix, (2, 1))
- # Extract crops
- crops = np.empty(
- (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
- dtype=np.float32,
- )
- ix = 0
- for im in img:
- for crop in crops_ix:
- crops[ix] = im[crop[0] : crop[2], crop[1] : crop[3], :]
- ix += 1
- crops[ix - 5 : ix] = crops[ix - 5 : ix, :, ::-1, :] # flip for mirrors
- return crops
- class ImageTransformer:
- def __init__(
- self, transpose=None, channel_swap=None, mean=None, is_color=True
- ):
- self.is_color = is_color
- self.set_transpose(transpose)
- self.set_channel_swap(channel_swap)
- self.set_mean(mean)
- def set_transpose(self, order):
- if order is not None:
- if self.is_color:
- assert 3 == len(order)
- self.transpose = order
- def set_channel_swap(self, order):
- if order is not None:
- if self.is_color:
- assert 3 == len(order)
- self.channel_swap = order
- def set_mean(self, mean):
- if mean is not None:
- # mean value, may be one value per channel
- if mean.ndim == 1:
- mean = mean[:, np.newaxis, np.newaxis]
- else:
- # elementwise mean
- if self.is_color:
- assert len(mean.shape) == 3
- self.mean = mean
- def transformer(self, data):
- if self.transpose is not None:
- data = data.transpose(self.transpose)
- if self.channel_swap is not None:
- data = data[self.channel_swap, :, :]
- if self.mean is not None:
- data -= self.mean
- return data
|