utils.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved.
  2. import copy
  3. import decord
  4. import numpy as np
  5. from decord import VideoReader, cpu
  6. from decord._ffi.base import DECORDError
  7. from tqdm import tqdm
  8. def decode_video(video_path, target_fps=5):
  9. """
  10. Decode video from 'video_path' and return the sampled frames based on target_fps.
  11. The default value of target_fps is 5.
  12. Args:
  13. video_path: the absolute path of video.
  14. target_fps: the number of sampled video frames per second.
  15. Returns:
  16. [imgs, duration]
  17. """
  18. decord.bridge.set_bridge('torch')
  19. vr = VideoReader(video_path, ctx=cpu(0))
  20. cur_fps = vr.get_avg_fps()
  21. if cur_fps > target_fps:
  22. interval = float(cur_fps) / float(target_fps)
  23. start = float(interval) / 2.
  24. else:
  25. interval = 1.0
  26. start = 0.0
  27. vid_length = len(vr)
  28. duration = vid_length / cur_fps
  29. sampled_idxs = np.clip(
  30. np.round(np.arange(start, float(vid_length), step=interval)), 0,
  31. vid_length - 1).astype(np.int32)
  32. imgs = list()
  33. for i in tqdm(sampled_idxs):
  34. bias = 0
  35. # avoid broken frames
  36. while bias <= 10:
  37. try:
  38. img = vr[i - bias]
  39. break
  40. except DECORDError:
  41. bias += 1
  42. if bias > 10:
  43. img = copy.deepcopy(imgs[-1])
  44. imgs.append(img)
  45. else:
  46. img = img / 255.
  47. img = img.permute(2, 0, 1)
  48. imgs.append(img)
  49. return imgs, duration