| 123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- # Part of the implementation is borrowed and modified from DUTCode,
- # publicly available at https://github.com/Annbless/DUTCode
- import cv2
- import numpy as np
- import torch
- import torch.nn as nn
- from modelscope.preprocessors.cv import VideoReader
- def stabilization_preprocessor(input, cfg):
- video_reader = VideoReader(input)
- inputs = []
- for frame in video_reader:
- inputs.append(np.flip(frame, axis=2))
- fps = video_reader.fps
- w = video_reader.width
- h = video_reader.height
- rgb_images = []
- images = []
- ori_images = []
- for i, frame in enumerate(inputs):
- frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
- image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
- image = image * (1. / 255.)
- image = cv2.resize(image, (cfg.MODEL.WIDTH, cfg.MODEL.HEIGHT))
- images.append(image.reshape(1, 1, cfg.MODEL.HEIGHT, cfg.MODEL.WIDTH))
- rgb_image = cv2.resize(frame, (cfg.MODEL.WIDTH, cfg.MODEL.HEIGHT))
- rgb_images.append(
- np.expand_dims(np.transpose(rgb_image, (2, 0, 1)), 0))
- ori_images.append(np.expand_dims(np.transpose(frame, (2, 0, 1)), 0))
- x = np.concatenate(images, 1).astype(np.float32)
- x = torch.from_numpy(x).unsqueeze(0)
- x_rgb = np.concatenate(rgb_images, 0).astype(np.float32)
- x_rgb = torch.from_numpy(x_rgb).unsqueeze(0)
- return {
- 'ori_images': ori_images,
- 'x': x,
- 'x_rgb': x_rgb,
- 'fps': fps,
- 'width': w,
- 'height': h
- }
|