| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328 |
- # Copyright (c) Alibaba, Inc. and its affiliates.
- import time
- from typing import Dict, List, Optional, Tuple, Union
- import cv2
- import numpy as np
- import torch
- import torch.nn.functional as F
- from einops import rearrange
- __all__ = [
- 'gen_diffuse_mask', 'get_crop_bbox', 'get_roi_without_padding',
- 'patch_aggregation_overlap', 'patch_partition_overlap', 'preprocess_roi',
- 'resize_on_long_side', 'roi_to_tensor', 'smooth_border_mg', 'whiten_img'
- ]
- def resize_on_long_side(img, long_side=800):
- src_height = img.shape[0]
- src_width = img.shape[1]
- if src_height > src_width:
- scale = long_side * 1.0 / src_height
- _img = cv2.resize(
- img, (int(src_width * scale), long_side),
- interpolation=cv2.INTER_LINEAR)
- else:
- scale = long_side * 1.0 / src_width
- _img = cv2.resize(
- img, (long_side, int(src_height * scale)),
- interpolation=cv2.INTER_LINEAR)
- return _img, scale
- def get_crop_bbox(detecting_results):
- boxes = []
- for anno in detecting_results:
- if anno['score'] == -1:
- break
- boxes.append({
- 'x1': anno['bbox'][0],
- 'y1': anno['bbox'][1],
- 'x2': anno['bbox'][2],
- 'y2': anno['bbox'][3]
- })
- face_count = len(boxes)
- suitable_bboxes = []
- for i in range(face_count):
- face_bbox = boxes[i]
- face_bbox_width = abs(face_bbox['x2'] - face_bbox['x1'])
- face_bbox_height = abs(face_bbox['y2'] - face_bbox['y1'])
- face_bbox_center = ((face_bbox['x1'] + face_bbox['x2']) / 2,
- (face_bbox['y1'] + face_bbox['y2']) / 2)
- square_bbox_length = face_bbox_height if face_bbox_height > face_bbox_width else face_bbox_width
- enlarge_ratio = 1.5
- square_bbox_length = int(enlarge_ratio * square_bbox_length)
- sideScale = 1
- square_bbox = {
- 'x1':
- int(face_bbox_center[0] - sideScale * square_bbox_length / 2),
- 'x2':
- int(face_bbox_center[0] + sideScale * square_bbox_length / 2),
- 'y1':
- int(face_bbox_center[1] - sideScale * square_bbox_length / 2),
- 'y2': int(face_bbox_center[1] + sideScale * square_bbox_length / 2)
- }
- suitable_bboxes.append(square_bbox)
- return suitable_bboxes
- def get_roi_without_padding(img, bbox):
- crop_t = max(bbox['y1'], 0)
- crop_b = min(bbox['y2'], img.shape[0])
- crop_l = max(bbox['x1'], 0)
- crop_r = min(bbox['x2'], img.shape[1])
- roi = img[crop_t:crop_b, crop_l:crop_r]
- return roi, 0, [crop_t, crop_b, crop_l, crop_r]
- def roi_to_tensor(img):
- img = torch.from_numpy(img.transpose((2, 0, 1)))[None, ...]
- return img
- def preprocess_roi(img):
- img = img.float() / 255.0
- img = (img - 0.5) * 2
- return img
- def patch_partition_overlap(image, p1, p2, padding=32):
- B, C, H, W = image.size()
- h, w = H // p1, W // p2
- image = F.pad(
- image,
- pad=(padding, padding, padding, padding, 0, 0),
- mode='constant',
- value=0)
- patch_list = []
- for i in range(h):
- for j in range(w):
- patch = image[:, :, p1 * i:p1 * (i + 1) + padding * 2,
- p2 * j:p2 * (j + 1) + padding * 2]
- patch_list.append(patch)
- output = torch.cat(
- patch_list, dim=0) # (b h w) c (p1 + 2 * padding) (p2 + 2 * padding)
- return output
- def patch_aggregation_overlap(image, h, w, padding=32):
- image = image[:, :, padding:-padding, padding:-padding]
- output = rearrange(image, '(b h w) c p1 p2 -> b c (h p1) (w p2)', h=h, w=w)
- return output
- def smooth_border_mg(diffuse_mask, mg):
- mg = mg - 0.5
- diffuse_mask = F.interpolate(
- diffuse_mask, mg.shape[:2], mode='bilinear')[0].permute(1, 2, 0)
- mg = mg * diffuse_mask
- mg = mg + 0.5
- return mg
- def whiten_img(image, skin_mask, whitening_degree, flag_bigKernal=False):
- """
- image: rgb
- """
- dilate_kernalsize = 30
- if flag_bigKernal:
- dilate_kernalsize = 80
- new_kernel1 = cv2.getStructuringElement(
- cv2.MORPH_ELLIPSE, (dilate_kernalsize, dilate_kernalsize))
- new_kernel2 = cv2.getStructuringElement(
- cv2.MORPH_ELLIPSE, (dilate_kernalsize, dilate_kernalsize))
- if len(skin_mask.shape) == 3:
- skin_mask = skin_mask[:, :, -1]
- skin_mask = cv2.dilate(skin_mask, new_kernel1, 1)
- skin_mask = cv2.erode(skin_mask, new_kernel2, 1)
- skin_mask = cv2.blur(skin_mask, (20, 20)) / 255.0
- skin_mask = skin_mask.squeeze()
- skin_mask = torch.from_numpy(skin_mask).to(image.device)
- skin_mask = torch.stack([skin_mask, skin_mask, skin_mask], dim=0)[None,
- ...]
- skin_mask[:, 1:, :, :] *= 0.75
- whiten_mg = skin_mask * 0.2 * whitening_degree + 0.5
- assert len(whiten_mg.shape) == 4
- whiten_mg = F.interpolate(
- whiten_mg, image.shape[:2], mode='bilinear')[0].permute(1, 2,
- 0).half()
- output_pred = image.half()
- output_pred = output_pred / 255.0
- output_pred = (
- -2 * whiten_mg + 1
- ) * output_pred * output_pred + 2 * whiten_mg * output_pred # value: 0~1
- output_pred = output_pred * 255.0
- output_pred = output_pred.byte()
- output_pred = output_pred.cpu().numpy()
- return output_pred
- def gen_diffuse_mask(out_channels=3):
- mask_size = 500
- diffuse_with = 20
- a = np.ones(shape=(mask_size, mask_size), dtype=np.float32)
- for i in range(mask_size):
- for j in range(mask_size):
- if i >= diffuse_with and i <= (
- mask_size - diffuse_with) and j >= diffuse_with and j <= (
- mask_size - diffuse_with):
- a[i, j] = 1.0
- elif i <= diffuse_with:
- a[i, j] = i * 1.0 / diffuse_with
- elif i > (mask_size - diffuse_with):
- a[i, j] = (mask_size - i) * 1.0 / diffuse_with
- for i in range(mask_size):
- for j in range(mask_size):
- if j <= diffuse_with:
- a[i, j] = min(a[i, j], j * 1.0 / diffuse_with)
- elif j > (mask_size - diffuse_with):
- a[i, j] = min(a[i, j], (mask_size - j) * 1.0 / diffuse_with)
- a = np.dstack([a] * out_channels)
- return a
- def pad_to_size(
- target_size: Tuple[int, int],
- image: np.array,
- bboxes: Optional[np.ndarray] = None,
- keypoints: Optional[np.ndarray] = None,
- ) -> Dict[str, Union[np.ndarray, Tuple[int, int, int, int]]]:
- """Pads the image on the sides to the target_size
- Args:
- target_size: (target_height, target_width)
- image:
- bboxes: np.array with shape (num_boxes, 4). Each row: [x_min, y_min, x_max, y_max]
- keypoints: np.array with shape (num_keypoints, 2), each row: [x, y]
- Returns:
- {
- "image": padded_image,
- "pads": (x_min_pad, y_min_pad, x_max_pad, y_max_pad),
- "bboxes": shifted_boxes,
- "keypoints": shifted_keypoints
- }
- """
- target_height, target_width = target_size
- image_height, image_width = image.shape[:2]
- if target_width < image_width:
- raise ValueError(f'Target width should bigger than image_width'
- f'We got {target_width} {image_width}')
- if target_height < image_height:
- raise ValueError(f'Target height should bigger than image_height'
- f'We got {target_height} {image_height}')
- if image_height == target_height:
- y_min_pad = 0
- y_max_pad = 0
- else:
- y_pad = target_height - image_height
- y_min_pad = y_pad // 2
- y_max_pad = y_pad - y_min_pad
- if image_width == target_width:
- x_min_pad = 0
- x_max_pad = 0
- else:
- x_pad = target_width - image_width
- x_min_pad = x_pad // 2
- x_max_pad = x_pad - x_min_pad
- result = {
- 'pads': (x_min_pad, y_min_pad, x_max_pad, y_max_pad),
- 'image':
- cv2.copyMakeBorder(image, y_min_pad, y_max_pad, x_min_pad, x_max_pad,
- cv2.BORDER_CONSTANT),
- }
- if bboxes is not None:
- bboxes[:, 0] += x_min_pad
- bboxes[:, 1] += y_min_pad
- bboxes[:, 2] += x_min_pad
- bboxes[:, 3] += y_min_pad
- result['bboxes'] = bboxes
- if keypoints is not None:
- keypoints[:, 0] += x_min_pad
- keypoints[:, 1] += y_min_pad
- result['keypoints'] = keypoints
- return result
- def unpad_from_size(
- pads: Tuple[int, int, int, int],
- image: Optional[np.array] = None,
- bboxes: Optional[np.ndarray] = None,
- keypoints: Optional[np.ndarray] = None,
- ) -> Dict[str, np.ndarray]:
- """Crops patch from the center so that sides are equal to pads.
- Args:
- image:
- pads: (x_min_pad, y_min_pad, x_max_pad, y_max_pad)
- bboxes: np.array with shape (num_boxes, 4). Each row: [x_min, y_min, x_max, y_max]
- keypoints: np.array with shape (num_keypoints, 2), each row: [x, y]
- Returns: cropped image
- {
- "image": cropped_image,
- "bboxes": shifted_boxes,
- "keypoints": shifted_keypoints
- }
- """
- x_min_pad, y_min_pad, x_max_pad, y_max_pad = pads
- result = {}
- if image is not None:
- height, width = image.shape[:2]
- result['image'] = image[y_min_pad:height - y_max_pad,
- x_min_pad:width - x_max_pad]
- if bboxes is not None:
- bboxes[:, 0] -= x_min_pad
- bboxes[:, 1] -= y_min_pad
- bboxes[:, 2] -= x_min_pad
- bboxes[:, 3] -= y_min_pad
- result['bboxes'] = bboxes
- if keypoints is not None:
- keypoints[:, 0] -= x_min_pad
- keypoints[:, 1] -= y_min_pad
- result['keypoints'] = keypoints
- return result
|