pyav.py 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244
  1. """Read/Write Videos (and images) using PyAV.
  2. .. note::
  3. To use this plugin you need to have `PyAV <https://pyav.org/docs/stable/>`_
  4. installed::
  5. pip install av
  6. This plugin wraps pyAV, a pythonic binding for the FFMPEG library. It is similar
  7. to our FFMPEG plugin, has improved performance, features a robust interface, and
  8. aims to supersede the FFMPEG plugin in the future.
  9. Methods
  10. -------
  11. .. note::
  12. Check the respective function for a list of supported kwargs and detailed
  13. documentation.
  14. .. autosummary::
  15. :toctree:
  16. PyAVPlugin.read
  17. PyAVPlugin.iter
  18. PyAVPlugin.write
  19. PyAVPlugin.properties
  20. PyAVPlugin.metadata
  21. Additional methods available inside the :func:`imopen <imageio.v3.imopen>`
  22. context:
  23. .. autosummary::
  24. :toctree:
  25. PyAVPlugin.init_video_stream
  26. PyAVPlugin.write_frame
  27. PyAVPlugin.set_video_filter
  28. PyAVPlugin.container_metadata
  29. PyAVPlugin.video_stream_metadata
  30. Advanced API
  31. ------------
  32. In addition to the default ImageIO v3 API this plugin exposes custom functions
  33. that are specific to reading/writing video and its metadata. These are available
  34. inside the :func:`imopen <imageio.v3.imopen>` context and allow fine-grained
  35. control over how the video is processed. The functions are documented above and
  36. below you can find a usage example::
  37. import imageio.v3 as iio
  38. with iio.imopen("test.mp4", "w", plugin="pyav") as file:
  39. file.init_video_stream("libx264")
  40. file.container_metadata["comment"] = "This video was created using ImageIO."
  41. for _ in range(5):
  42. for frame in iio.imiter("imageio:newtonscradle.gif"):
  43. file.write_frame(frame)
  44. meta = iio.immeta("test.mp4", plugin="pyav")
  45. assert meta["comment"] == "This video was created using ImageIO."
  46. Pixel Formats (Colorspaces)
  47. ---------------------------
  48. By default, this plugin converts the video into 8-bit RGB (called ``rgb24`` in
  49. ffmpeg). This is a useful behavior for many use-cases, but sometimes you may
  50. want to use the video's native colorspace or you may wish to convert the video
  51. into an entirely different colorspace. This is controlled using the ``format``
  52. kwarg. You can use ``format=None`` to leave the image in its native colorspace
  53. or specify any colorspace supported by FFMPEG as long as it is stridable, i.e.,
  54. as long as it can be represented by a single numpy array. Some useful choices
  55. include:
  56. - rgb24 (default; 8-bit RGB)
  57. - rgb48le (16-bit lower-endian RGB)
  58. - bgr24 (8-bit BGR; openCVs default colorspace)
  59. - gray (8-bit grayscale)
  60. - yuv444p (8-bit channel-first YUV)
  61. Further, FFMPEG maintains a list of available formats, albeit not as part of the
  62. narrative docs. It can be `found here
  63. <https://ffmpeg.org/doxygen/trunk/pixfmt_8h_source.html>`_ (warning: C source
  64. code).
  65. Filters
  66. -------
  67. On top of providing basic read/write functionality, this plugin allows you to
  68. use the full collection of `video filters available in FFMPEG
  69. <https://ffmpeg.org/ffmpeg-filters.html#Video-Filters>`_. This means that you
  70. can apply excessive preprocessing to your video before retrieving it as a numpy
  71. array or apply excessive post-processing before you encode your data.
  72. Filters come in two forms: sequences or graphs. Filter sequences are, as the
  73. name suggests, sequences of filters that are applied one after the other. They
  74. are specified using the ``filter_sequence`` kwarg. Filter graphs, on the other
  75. hand, come in the form of a directed graph and are specified using the
  76. ``filter_graph`` kwarg.
  77. .. note::
  78. All filters are either sequences or graphs. If all you want is to apply a
  79. single filter, you can do this by specifying a filter sequence with a single
  80. entry.
  81. A ``filter_sequence`` is a list of filters, each defined through a 2-element
  82. tuple of the form ``(filter_name, filter_parameters)``. The first element of the
  83. tuple is the name of the filter. The second element are the filter parameters,
  84. which can be given either as a string or a dict. The string matches the same
  85. format that you would use when specifying the filter using the ffmpeg
  86. command-line tool and the dict has entries of the form ``parameter:value``. For
  87. example::
  88. import imageio.v3 as iio
  89. # using a filter_parameters str
  90. img1 = iio.imread(
  91. "imageio:cockatoo.mp4",
  92. plugin="pyav",
  93. filter_sequence=[
  94. ("rotate", "45*PI/180")
  95. ]
  96. )
  97. # using a filter_parameters dict
  98. img2 = iio.imread(
  99. "imageio:cockatoo.mp4",
  100. plugin="pyav",
  101. filter_sequence=[
  102. ("rotate", {"angle":"45*PI/180", "fillcolor":"AliceBlue"})
  103. ]
  104. )
  105. A ``filter_graph``, on the other hand, is specified using a ``(nodes, edges)``
  106. tuple. It is best explained using an example::
  107. img = iio.imread(
  108. "imageio:cockatoo.mp4",
  109. plugin="pyav",
  110. filter_graph=(
  111. {
  112. "split": ("split", ""),
  113. "scale_overlay":("scale", "512:-1"),
  114. "overlay":("overlay", "x=25:y=25:enable='between(t,1,8)'"),
  115. },
  116. [
  117. ("video_in", "split", 0, 0),
  118. ("split", "overlay", 0, 0),
  119. ("split", "scale_overlay", 1, 0),
  120. ("scale_overlay", "overlay", 0, 1),
  121. ("overlay", "video_out", 0, 0),
  122. ]
  123. )
  124. )
  125. The above transforms the video to have picture-in-picture of itself in the top
  126. left corner. As you can see, nodes are specified using a dict which has names as
  127. its keys and filter tuples as values; the same tuples as the ones used when
  128. defining a filter sequence. Edges are a list of a 4-tuples of the form
  129. ``(node_out, node_in, output_idx, input_idx)`` and specify which two filters are
  130. connected and which inputs/outputs should be used for this.
  131. Further, there are two special nodes in a filter graph: ``video_in`` and
  132. ``video_out``, which represent the graph's input and output respectively. These
  133. names can not be chosen for other nodes (those nodes would simply be
  134. overwritten), and for a graph to be valid there must be a path from the input to
  135. the output and all nodes in the graph must be connected.
  136. While most graphs are quite simple, they can become very complex and we
  137. recommend that you read through the `FFMPEG documentation
  138. <https://ffmpeg.org/ffmpeg-filters.html#Filtergraph-description>`_ and their
  139. examples to better understand how to use them.
  140. """
  141. from fractions import Fraction
  142. from math import ceil
  143. from typing import Any, Dict, Generator, List, Optional, Tuple, Union
  144. import av
  145. import av.filter
  146. import numpy as np
  147. from av.codec.context import Flags
  148. from numpy.lib.stride_tricks import as_strided
  149. from ..core import Request
  150. from ..core.request import URI_BYTES, InitializationError, IOMode
  151. from ..core.v3_plugin_api import ImageProperties, PluginV3
  152. def _format_to_dtype(format: av.VideoFormat) -> np.dtype:
  153. """Convert a pyAV video format into a numpy dtype"""
  154. if len(format.components) == 0:
  155. # fake format
  156. raise ValueError(
  157. f"Can't determine dtype from format `{format.name}`. It has no channels."
  158. )
  159. endian = ">" if format.is_big_endian else "<"
  160. dtype = "f" if "f32" in format.name else "u"
  161. bits_per_channel = [x.bits for x in format.components]
  162. n_bytes = str(int(ceil(bits_per_channel[0] / 8)))
  163. return np.dtype(endian + dtype + n_bytes)
  164. def _get_frame_shape(frame: av.VideoFrame) -> Tuple[int, ...]:
  165. """Compute the frame's array shape
  166. Parameters
  167. ----------
  168. frame : av.VideoFrame
  169. A frame for which the resulting shape should be computed.
  170. Returns
  171. -------
  172. shape : Tuple[int, ...]
  173. A tuple describing the shape of the image data in the frame.
  174. """
  175. widths = [component.width for component in frame.format.components]
  176. heights = [component.height for component in frame.format.components]
  177. bits = np.array([component.bits for component in frame.format.components])
  178. line_sizes = [plane.line_size for plane in frame.planes]
  179. subsampled_width = widths[:-1] != widths[1:]
  180. subsampled_height = heights[:-1] != heights[1:]
  181. unaligned_components = np.any(bits % 8 != 0) or (line_sizes[:-1] != line_sizes[1:])
  182. if subsampled_width or subsampled_height or unaligned_components:
  183. raise IOError(
  184. f"{frame.format.name} can't be expressed as a strided array."
  185. "Use `format=` to select a format to convert into."
  186. )
  187. shape = [frame.height, frame.width]
  188. # ffmpeg doesn't have a notion of channel-first or channel-last formats
  189. # instead it stores frames in one or more planes which contain individual
  190. # components of a pixel depending on the pixel format. For channel-first
  191. # formats each component lives on a separate plane (n_planes) and for
  192. # channel-last formats all components are packed on a single plane
  193. # (n_channels)
  194. n_planes = max([component.plane for component in frame.format.components]) + 1
  195. if n_planes > 1:
  196. shape = [n_planes] + shape
  197. channels_per_plane = [0] * n_planes
  198. for component in frame.format.components:
  199. channels_per_plane[component.plane] += 1
  200. n_channels = max(channels_per_plane)
  201. if n_channels > 1:
  202. shape = shape + [n_channels]
  203. return tuple(shape)
  204. def _get_frame_type(picture_type: int) -> str:
  205. """Return a human-readable name for provided picture type
  206. Parameters
  207. ----------
  208. picture_type : int
  209. The picture type extracted from Frame.pict_type
  210. Returns
  211. -------
  212. picture_name : str
  213. A human readable name of the picture type
  214. """
  215. if not isinstance(picture_type, int):
  216. # old pyAV versions send an enum, not an int
  217. return picture_type.name
  218. picture_types = [
  219. "NONE",
  220. "I",
  221. "P",
  222. "B",
  223. "S",
  224. "SI",
  225. "SP",
  226. "BI",
  227. ]
  228. return picture_types[picture_type]
  229. class PyAVPlugin(PluginV3):
  230. """Support for pyAV as backend.
  231. Parameters
  232. ----------
  233. request : iio.Request
  234. A request object that represents the users intent. It provides a
  235. standard interface to access various the various ImageResources and
  236. serves them to the plugin as a file object (or file). Check the docs for
  237. details.
  238. container : str
  239. Only used during `iio_mode="w"`! If not None, overwrite the default container
  240. format chosen by pyav.
  241. kwargs : Any
  242. Additional kwargs are forwarded to PyAV's constructor.
  243. """
  244. def __init__(self, request: Request, *, container: str = None, **kwargs) -> None:
  245. """Initialize a new Plugin Instance.
  246. See Plugin's docstring for detailed documentation.
  247. Notes
  248. -----
  249. The implementation here stores the request as a local variable that is
  250. exposed using a @property below. If you inherit from PluginV3, remember
  251. to call ``super().__init__(request)``.
  252. """
  253. super().__init__(request)
  254. self._container = None
  255. self._video_stream = None
  256. self._video_filter = None
  257. if request.mode.io_mode == IOMode.read:
  258. self._next_idx = 0
  259. try:
  260. if request._uri_type == 5: # 5 is the value of URI_HTTP
  261. # pyav should read from HTTP by itself. This enables reading
  262. # HTTP-based streams like DASH. Note that solving streams
  263. # like this is temporary until the new request object gets
  264. # implemented.
  265. self._container = av.open(request.raw_uri, **kwargs)
  266. else:
  267. self._container = av.open(request.get_file(), **kwargs)
  268. self._video_stream = self._container.streams.video[0]
  269. self._decoder = self._container.decode(video=0)
  270. except av.FFmpegError:
  271. if isinstance(request.raw_uri, bytes):
  272. msg = "PyAV does not support these `<bytes>`"
  273. else:
  274. msg = f"PyAV does not support `{request.raw_uri}`"
  275. raise InitializationError(msg) from None
  276. else:
  277. self.frames_written = 0
  278. file_handle = self.request.get_file()
  279. filename = getattr(file_handle, "name", None)
  280. extension = self.request.extension or self.request.format_hint
  281. if extension is None:
  282. raise InitializationError("Can't determine output container to use.")
  283. # hacky, but beats running our own format selection logic
  284. # (since av_guess_format is not exposed)
  285. try:
  286. setattr(file_handle, "name", filename or "tmp" + extension)
  287. except AttributeError:
  288. pass # read-only, nothing we can do
  289. try:
  290. self._container = av.open(
  291. file_handle, mode="w", format=container, **kwargs
  292. )
  293. except ValueError:
  294. raise InitializationError(
  295. f"PyAV can not write to `{self.request.raw_uri}`"
  296. )
  297. # ---------------------
  298. # Standard V3 Interface
  299. # ---------------------
  300. def read(
  301. self,
  302. *,
  303. index: int = ...,
  304. format: str = "rgb24",
  305. filter_sequence: List[Tuple[str, Union[str, dict]]] = None,
  306. filter_graph: Tuple[dict, List] = None,
  307. constant_framerate: bool = None,
  308. thread_count: int = 0,
  309. thread_type: str = None,
  310. ) -> np.ndarray:
  311. """Read frames from the video.
  312. If ``index`` is an integer, this function reads the index-th frame from
  313. the file. If ``index`` is ... (Ellipsis), this function reads all frames
  314. from the video, stacks them along the first dimension, and returns a
  315. batch of frames.
  316. Parameters
  317. ----------
  318. index : int
  319. The index of the frame to read, e.g. ``index=5`` reads the 5th
  320. frame. If ``...``, read all the frames in the video and stack them
  321. along a new, prepended, batch dimension.
  322. format : str
  323. Set the returned colorspace. If not None (default: rgb24), convert
  324. the data into the given format before returning it. If ``None``
  325. return the data in the encoded format if it can be expressed as a
  326. strided array; otherwise raise an Exception.
  327. filter_sequence : List[str, str, dict]
  328. If not None, apply the given sequence of FFmpeg filters to each
  329. ndimage. Check the (module-level) plugin docs for details and
  330. examples.
  331. filter_graph : (dict, List)
  332. If not None, apply the given graph of FFmpeg filters to each
  333. ndimage. The graph is given as a tuple of two dicts. The first dict
  334. contains a (named) set of nodes, and the second dict contains a set
  335. of edges between nodes of the previous dict. Check the (module-level)
  336. plugin docs for details and examples.
  337. constant_framerate : bool
  338. If True assume the video's framerate is constant. This allows for
  339. faster seeking inside the file. If False, the video is reset before
  340. each read and searched from the beginning. If None (default), this
  341. value will be read from the container format.
  342. thread_count : int
  343. How many threads to use when decoding a frame. The default is 0,
  344. which will set the number using ffmpeg's default, which is based on
  345. the codec, number of available cores, threadding model, and other
  346. considerations.
  347. thread_type : str
  348. The threading model to be used. One of
  349. - `"SLICE"`: threads assemble parts of the current frame
  350. - `"FRAME"`: threads may assemble future frames
  351. - None (default): Uses ``"FRAME"`` if ``index=...`` and ffmpeg's
  352. default otherwise.
  353. Returns
  354. -------
  355. frame : np.ndarray
  356. A numpy array containing loaded frame data.
  357. Notes
  358. -----
  359. Accessing random frames repeatedly is costly (O(k), where k is the
  360. average distance between two keyframes). You should do so only sparingly
  361. if possible. In some cases, it can be faster to bulk-read the video (if
  362. it fits into memory) and to then access the returned ndarray randomly.
  363. The current implementation may cause problems for b-frames, i.e.,
  364. bidirectionaly predicted pictures. I lack test videos to write unit
  365. tests for this case.
  366. Reading from an index other than ``...``, i.e. reading a single frame,
  367. currently doesn't support filters that introduce delays.
  368. """
  369. if index is ...:
  370. props = self.properties(format=format)
  371. uses_filter = (
  372. self._video_filter is not None
  373. or filter_graph is not None
  374. or filter_sequence is not None
  375. )
  376. self._container.seek(0)
  377. if not uses_filter and props.shape[0] != 0:
  378. frames = np.empty(props.shape, dtype=props.dtype)
  379. for idx, frame in enumerate(
  380. self.iter(
  381. format=format,
  382. filter_sequence=filter_sequence,
  383. filter_graph=filter_graph,
  384. thread_count=thread_count,
  385. thread_type=thread_type or "FRAME",
  386. )
  387. ):
  388. frames[idx] = frame
  389. else:
  390. frames = np.stack(
  391. [
  392. x
  393. for x in self.iter(
  394. format=format,
  395. filter_sequence=filter_sequence,
  396. filter_graph=filter_graph,
  397. thread_count=thread_count,
  398. thread_type=thread_type or "FRAME",
  399. )
  400. ]
  401. )
  402. # reset stream container, because threading model can't change after
  403. # first access
  404. self._video_stream = self._container.streams.video[0]
  405. return frames
  406. if thread_type is not None and not (
  407. self._video_stream.thread_type == thread_type
  408. or self._video_stream.thread_type.name == thread_type
  409. ):
  410. self._video_stream.thread_type = thread_type
  411. if (
  412. thread_count != 0
  413. and thread_count != self._video_stream.codec_context.thread_count
  414. ):
  415. # in FFMPEG thread_count == 0 means use the default count, which we
  416. # change to mean don't change the thread count.
  417. self._video_stream.codec_context.thread_count = thread_count
  418. if constant_framerate is None:
  419. # "variable_fps" is now a flag (handle got removed). Full list at
  420. # https://pyav.org/docs/stable/api/container.html#module-av.format
  421. variable_fps = bool(self._container.format.flags & 0x400)
  422. constant_framerate = not variable_fps
  423. # note: cheap for contigous incremental reads
  424. self._seek(index, constant_framerate=constant_framerate)
  425. desired_frame = next(self._decoder)
  426. self._next_idx += 1
  427. self.set_video_filter(filter_sequence, filter_graph)
  428. if self._video_filter is not None:
  429. desired_frame = self._video_filter.send(desired_frame)
  430. return self._unpack_frame(desired_frame, format=format)
  431. def iter(
  432. self,
  433. *,
  434. format: str = "rgb24",
  435. filter_sequence: List[Tuple[str, Union[str, dict]]] = None,
  436. filter_graph: Tuple[dict, List] = None,
  437. thread_count: int = 0,
  438. thread_type: str = None,
  439. ) -> np.ndarray:
  440. """Yield frames from the video.
  441. Parameters
  442. ----------
  443. frame : np.ndarray
  444. A numpy array containing loaded frame data.
  445. format : str
  446. Convert the data into the given format before returning it. If None,
  447. return the data in the encoded format if it can be expressed as a
  448. strided array; otherwise raise an Exception.
  449. filter_sequence : List[str, str, dict]
  450. Set the returned colorspace. If not None (default: rgb24), convert
  451. the data into the given format before returning it. If ``None``
  452. return the data in the encoded format if it can be expressed as a
  453. strided array; otherwise raise an Exception.
  454. filter_graph : (dict, List)
  455. If not None, apply the given graph of FFmpeg filters to each
  456. ndimage. The graph is given as a tuple of two dicts. The first dict
  457. contains a (named) set of nodes, and the second dict contains a set
  458. of edges between nodes of the previous dict. Check the (module-level)
  459. plugin docs for details and examples.
  460. thread_count : int
  461. How many threads to use when decoding a frame. The default is 0,
  462. which will set the number using ffmpeg's default, which is based on
  463. the codec, number of available cores, threadding model, and other
  464. considerations.
  465. thread_type : str
  466. The threading model to be used. One of
  467. - `"SLICE"` (default): threads assemble parts of the current frame
  468. - `"FRAME"`: threads may assemble future frames (faster for bulk reading)
  469. Yields
  470. ------
  471. frame : np.ndarray
  472. A (decoded) video frame.
  473. """
  474. self._video_stream.thread_type = thread_type or "SLICE"
  475. self._video_stream.codec_context.thread_count = thread_count
  476. self.set_video_filter(filter_sequence, filter_graph)
  477. for frame in self._decoder:
  478. self._next_idx += 1
  479. if self._video_filter is not None:
  480. try:
  481. frame = self._video_filter.send(frame)
  482. except StopIteration:
  483. break
  484. if frame is None:
  485. continue
  486. yield self._unpack_frame(frame, format=format)
  487. if self._video_filter is not None:
  488. for frame in self._video_filter:
  489. yield self._unpack_frame(frame, format=format)
  490. def write(
  491. self,
  492. ndimage: Union[np.ndarray, List[np.ndarray]],
  493. *,
  494. codec: str = None,
  495. is_batch: bool = True,
  496. fps: int = 24,
  497. in_pixel_format: str = "rgb24",
  498. out_pixel_format: str = None,
  499. filter_sequence: List[Tuple[str, Union[str, dict]]] = None,
  500. filter_graph: Tuple[dict, List] = None,
  501. ) -> Optional[bytes]:
  502. """Save a ndimage as a video.
  503. Given a batch of frames (stacked along the first axis) or a list of
  504. frames, encode them and add the result to the ImageResource.
  505. Parameters
  506. ----------
  507. ndimage : ArrayLike, List[ArrayLike]
  508. The ndimage to encode and write to the ImageResource.
  509. codec : str
  510. The codec to use when encoding frames. Only needed on first write
  511. and ignored on subsequent writes.
  512. is_batch : bool
  513. If True (default), the ndimage is a batch of images, otherwise it is
  514. a single image. This parameter has no effect on lists of ndimages.
  515. fps : str
  516. The resulting videos frames per second.
  517. in_pixel_format : str
  518. The pixel format of the incoming ndarray. Defaults to "rgb24" and can
  519. be any stridable pix_fmt supported by FFmpeg.
  520. out_pixel_format : str
  521. The pixel format to use while encoding frames. If None (default)
  522. use the codec's default.
  523. filter_sequence : List[str, str, dict]
  524. If not None, apply the given sequence of FFmpeg filters to each
  525. ndimage. Check the (module-level) plugin docs for details and
  526. examples.
  527. filter_graph : (dict, List)
  528. If not None, apply the given graph of FFmpeg filters to each
  529. ndimage. The graph is given as a tuple of two dicts. The first dict
  530. contains a (named) set of nodes, and the second dict contains a set
  531. of edges between nodes of the previous dict. Check the (module-level)
  532. plugin docs for details and examples.
  533. Returns
  534. -------
  535. encoded_image : bytes or None
  536. If the chosen ImageResource is the special target ``"<bytes>"`` then
  537. write will return a byte string containing the encoded image data.
  538. Otherwise, it returns None.
  539. Notes
  540. -----
  541. When writing ``<bytes>``, the video is finalized immediately after the
  542. first write call and calling write multiple times to append frames is
  543. not possible.
  544. """
  545. if isinstance(ndimage, list):
  546. # frames shapes must agree for video
  547. if any(f.shape != ndimage[0].shape for f in ndimage):
  548. raise ValueError("All frames should have the same shape")
  549. elif not is_batch:
  550. ndimage = np.asarray(ndimage)[None, ...]
  551. else:
  552. ndimage = np.asarray(ndimage)
  553. if self._video_stream is None:
  554. self.init_video_stream(codec, fps=fps, pixel_format=out_pixel_format)
  555. self.set_video_filter(filter_sequence, filter_graph)
  556. for img in ndimage:
  557. self.write_frame(img, pixel_format=in_pixel_format)
  558. if self.request._uri_type == URI_BYTES:
  559. # bytes are immutuable, so we have to flush immediately
  560. # and can't support appending
  561. self._flush_writer()
  562. self._container.close()
  563. return self.request.get_file().getvalue()
  564. def properties(self, index: int = ..., *, format: str = "rgb24") -> ImageProperties:
  565. """Standardized ndimage metadata.
  566. Parameters
  567. ----------
  568. index : int
  569. The index of the ndimage for which to return properties. If ``...``
  570. (Ellipsis, default), return the properties for the resulting batch
  571. of frames.
  572. format : str
  573. If not None (default: rgb24), convert the data into the given format
  574. before returning it. If None return the data in the encoded format
  575. if that can be expressed as a strided array; otherwise raise an
  576. Exception.
  577. Returns
  578. -------
  579. properties : ImageProperties
  580. A dataclass filled with standardized image metadata.
  581. Notes
  582. -----
  583. This function is efficient and won't process any pixel data.
  584. The provided metadata does not include modifications by any filters
  585. (through ``filter_sequence`` or ``filter_graph``).
  586. """
  587. video_width = self._video_stream.codec_context.width
  588. video_height = self._video_stream.codec_context.height
  589. pix_format = format or self._video_stream.codec_context.pix_fmt
  590. frame_template = av.VideoFrame(video_width, video_height, pix_format)
  591. shape = _get_frame_shape(frame_template)
  592. if index is ...:
  593. n_frames = self._video_stream.frames
  594. shape = (n_frames,) + shape
  595. return ImageProperties(
  596. shape=tuple(shape),
  597. dtype=_format_to_dtype(frame_template.format),
  598. n_images=shape[0] if index is ... else None,
  599. is_batch=index is ...,
  600. )
  601. def metadata(
  602. self,
  603. index: int = ...,
  604. exclude_applied: bool = True,
  605. constant_framerate: bool = None,
  606. ) -> Dict[str, Any]:
  607. """Format-specific metadata.
  608. Returns a dictionary filled with metadata that is either stored in the
  609. container, the video stream, or the frame's side-data.
  610. Parameters
  611. ----------
  612. index : int
  613. If ... (Ellipsis, default) return global metadata (the metadata
  614. stored in the container and video stream). If not ..., return the
  615. side data stored in the frame at the given index.
  616. exclude_applied : bool
  617. Currently, this parameter has no effect. It exists for compliance with
  618. the ImageIO v3 API.
  619. constant_framerate : bool
  620. If True assume the video's framerate is constant. This allows for
  621. faster seeking inside the file. If False, the video is reset before
  622. each read and searched from the beginning. If None (default), this
  623. value will be read from the container format.
  624. Returns
  625. -------
  626. metadata : dict
  627. A dictionary filled with format-specific metadata fields and their
  628. values.
  629. """
  630. metadata = dict()
  631. if index is ...:
  632. # useful flags defined on the container and/or video stream
  633. metadata.update(
  634. {
  635. "video_format": self._video_stream.codec_context.pix_fmt,
  636. "codec": self._video_stream.codec.name,
  637. "long_codec": self._video_stream.codec.long_name,
  638. "profile": self._video_stream.profile,
  639. "fps": float(self._video_stream.guessed_rate),
  640. }
  641. )
  642. if self._video_stream.duration is not None:
  643. duration = float(
  644. self._video_stream.duration * self._video_stream.time_base
  645. )
  646. metadata.update({"duration": duration})
  647. metadata.update(self.container_metadata)
  648. metadata.update(self.video_stream_metadata)
  649. return metadata
  650. if constant_framerate is None:
  651. # "variable_fps" is now a flag (handle got removed). Full list at
  652. # https://pyav.org/docs/stable/api/container.html#module-av.format
  653. variable_fps = bool(self._container.format.flags & 0x400)
  654. constant_framerate = not variable_fps
  655. self._seek(index, constant_framerate=constant_framerate)
  656. desired_frame = next(self._decoder)
  657. self._next_idx += 1
  658. # useful flags defined on the frame
  659. metadata.update(
  660. {
  661. "key_frame": bool(desired_frame.key_frame),
  662. "time": desired_frame.time,
  663. "interlaced_frame": bool(desired_frame.interlaced_frame),
  664. "frame_type": _get_frame_type(desired_frame.pict_type),
  665. }
  666. )
  667. # side data
  668. metadata.update(
  669. {item.type.name: bytes(item) for item in desired_frame.side_data}
  670. )
  671. return metadata
  672. def close(self) -> None:
  673. """Close the Video."""
  674. is_write = self.request.mode.io_mode == IOMode.write
  675. if is_write and self._video_stream is not None:
  676. self._flush_writer()
  677. if self._video_stream is not None:
  678. self._video_stream = None
  679. if self._container is not None:
  680. self._container.close()
  681. self.request.finish()
  682. def __enter__(self) -> "PyAVPlugin":
  683. return super().__enter__()
  684. # ------------------------------
  685. # Add-on Interface inside imopen
  686. # ------------------------------
  687. def init_video_stream(
  688. self,
  689. codec: str,
  690. *,
  691. fps: float = 24,
  692. pixel_format: str = None,
  693. max_keyframe_interval: int = None,
  694. force_keyframes: bool = None,
  695. ) -> None:
  696. """Initialize a new video stream.
  697. This function adds a new video stream to the ImageResource using the
  698. selected encoder (codec), framerate, and colorspace.
  699. Parameters
  700. ----------
  701. codec : str
  702. The codec to use, e.g. ``"h264"`` or ``"vp9"``.
  703. fps : float
  704. The desired framerate of the video stream (frames per second).
  705. pixel_format : str
  706. The pixel format to use while encoding frames. If None (default) use
  707. the codec's default.
  708. max_keyframe_interval : int
  709. The maximum distance between two intra frames (I-frames). Also known
  710. as GOP size. If unspecified use the codec's default. Note that not
  711. every I-frame is a keyframe; see the notes for details.
  712. force_keyframes : bool
  713. If True, limit inter frames dependency to frames within the current
  714. keyframe interval (GOP), i.e., force every I-frame to be a keyframe.
  715. If unspecified, use the codec's default.
  716. Notes
  717. -----
  718. You can usually leave ``max_keyframe_interval`` and ``force_keyframes``
  719. at their default values, unless you try to generate seek-optimized video
  720. or have a similar specialist use-case. In this case, ``force_keyframes``
  721. controls the ability to seek to _every_ I-frame, and
  722. ``max_keyframe_interval`` controls how close to a random frame you can
  723. seek. Low values allow more fine-grained seek at the expense of
  724. file-size (and thus I/O performance).
  725. """
  726. # It may introduce `OverflowError` if `fps` is float
  727. # which is a legacy issue of `pyav`: https://github.com/PyAV-Org/PyAV/issues/242
  728. fps = Fraction.from_float(fps).limit_denominator(65535)
  729. stream = self._container.add_stream(codec, fps)
  730. stream.time_base = Fraction(1 / fps).limit_denominator(int(2**16 - 1))
  731. if pixel_format is not None:
  732. stream.pix_fmt = pixel_format
  733. if max_keyframe_interval is not None:
  734. stream.gop_size = max_keyframe_interval
  735. if force_keyframes is not None:
  736. if force_keyframes:
  737. stream.codec_context.flags |= Flags.closed_gop
  738. else:
  739. stream.codec_context.flags &= ~Flags.closed_gop
  740. self._video_stream = stream
  741. def write_frame(self, frame: np.ndarray, *, pixel_format: str = "rgb24") -> None:
  742. """Add a frame to the video stream.
  743. This function appends a new frame to the video. It assumes that the
  744. stream previously has been initialized. I.e., ``init_video_stream`` has
  745. to be called before calling this function for the write to succeed.
  746. Parameters
  747. ----------
  748. frame : np.ndarray
  749. The image to be appended/written to the video stream.
  750. pixel_format : str
  751. The colorspace (pixel format) of the incoming frame.
  752. Notes
  753. -----
  754. Frames may be held in a buffer, e.g., by the filter pipeline used during
  755. writing or by FFMPEG to batch them prior to encoding. Make sure to
  756. ``.close()`` the plugin or to use a context manager to ensure that all
  757. frames are written to the ImageResource.
  758. """
  759. # manual packing of ndarray into frame
  760. # (this should live in pyAV, but it doesn't support all the formats we
  761. # want and PRs there are slow)
  762. pixel_format = av.VideoFormat(pixel_format)
  763. img_dtype = _format_to_dtype(pixel_format)
  764. width = frame.shape[2 if pixel_format.is_planar else 1]
  765. height = frame.shape[1 if pixel_format.is_planar else 0]
  766. av_frame = av.VideoFrame(width, height, pixel_format.name)
  767. if pixel_format.is_planar:
  768. for idx, plane in enumerate(av_frame.planes):
  769. plane_array = np.frombuffer(plane, dtype=img_dtype)
  770. plane_array = as_strided(
  771. plane_array,
  772. shape=(plane.height, plane.width),
  773. strides=(plane.line_size, img_dtype.itemsize),
  774. )
  775. plane_array[...] = frame[idx]
  776. else:
  777. if pixel_format.name.startswith("bayer_"):
  778. # ffmpeg doesn't describe bayer formats correctly
  779. # see https://github.com/imageio/imageio/issues/761#issuecomment-1059318851
  780. # and following for details.
  781. n_channels = 1
  782. else:
  783. n_channels = len(pixel_format.components)
  784. plane = av_frame.planes[0]
  785. plane_shape = (plane.height, plane.width)
  786. plane_strides = (plane.line_size, n_channels * img_dtype.itemsize)
  787. if n_channels > 1:
  788. plane_shape += (n_channels,)
  789. plane_strides += (img_dtype.itemsize,)
  790. plane_array = as_strided(
  791. np.frombuffer(plane, dtype=img_dtype),
  792. shape=plane_shape,
  793. strides=plane_strides,
  794. )
  795. plane_array[...] = frame
  796. stream = self._video_stream
  797. if stream.codec_context.time_base:
  798. av_frame.time_base = stream.codec_context.time_base
  799. av_frame.pts = self.frames_written
  800. self.frames_written += 1
  801. if self._video_filter is not None:
  802. av_frame = self._video_filter.send(av_frame)
  803. if av_frame is None:
  804. return
  805. if stream.frames == 0:
  806. stream.width = av_frame.width
  807. stream.height = av_frame.height
  808. for packet in stream.encode(av_frame):
  809. self._container.mux(packet)
  810. def set_video_filter(
  811. self,
  812. filter_sequence: List[Tuple[str, Union[str, dict]]] = None,
  813. filter_graph: Tuple[dict, List] = None,
  814. ) -> None:
  815. """Set the filter(s) to use.
  816. This function creates a new FFMPEG filter graph to use when reading or
  817. writing video. In the case of reading, frames are passed through the
  818. filter graph before begin returned and, in case of writing, frames are
  819. passed through the filter before being written to the video.
  820. Parameters
  821. ----------
  822. filter_sequence : List[str, str, dict]
  823. If not None, apply the given sequence of FFmpeg filters to each
  824. ndimage. Check the (module-level) plugin docs for details and
  825. examples.
  826. filter_graph : (dict, List)
  827. If not None, apply the given graph of FFmpeg filters to each
  828. ndimage. The graph is given as a tuple of two dicts. The first dict
  829. contains a (named) set of nodes, and the second dict contains a set
  830. of edges between nodes of the previous dict. Check the
  831. (module-level) plugin docs for details and examples.
  832. Notes
  833. -----
  834. Changing a filter graph with lag during reading or writing will
  835. currently cause frames in the filter queue to be lost.
  836. """
  837. if filter_sequence is None and filter_graph is None:
  838. self._video_filter = None
  839. return
  840. if filter_sequence is None:
  841. filter_sequence = list()
  842. node_descriptors: Dict[str, Tuple[str, Union[str, Dict]]]
  843. edges: List[Tuple[str, str, int, int]]
  844. if filter_graph is None:
  845. node_descriptors, edges = dict(), [("video_in", "video_out", 0, 0)]
  846. else:
  847. node_descriptors, edges = filter_graph
  848. graph = av.filter.Graph()
  849. previous_node = graph.add_buffer(template=self._video_stream)
  850. for filter_name, argument in filter_sequence:
  851. if isinstance(argument, str):
  852. current_node = graph.add(filter_name, argument)
  853. else:
  854. current_node = graph.add(filter_name, **argument)
  855. previous_node.link_to(current_node)
  856. previous_node = current_node
  857. nodes = dict()
  858. nodes["video_in"] = previous_node
  859. nodes["video_out"] = graph.add("buffersink")
  860. for name, (filter_name, arguments) in node_descriptors.items():
  861. if isinstance(arguments, str):
  862. nodes[name] = graph.add(filter_name, arguments)
  863. else:
  864. nodes[name] = graph.add(filter_name, **arguments)
  865. for from_note, to_node, out_idx, in_idx in edges:
  866. nodes[from_note].link_to(nodes[to_node], out_idx, in_idx)
  867. graph.configure()
  868. def video_filter():
  869. # this starts a co-routine
  870. # send frames using graph.send()
  871. frame = yield None
  872. # send and receive frames in "parallel"
  873. while frame is not None:
  874. graph.push(frame)
  875. try:
  876. frame = yield graph.pull()
  877. except av.error.BlockingIOError:
  878. # filter has lag and needs more frames
  879. frame = yield None
  880. except av.error.EOFError:
  881. break
  882. try:
  883. # send EOF in av>=9.0
  884. graph.push(None)
  885. except ValueError: # pragma: no cover
  886. # handle av<9.0
  887. pass
  888. # all frames have been sent, empty the filter
  889. while True:
  890. try:
  891. yield graph.pull()
  892. except av.error.EOFError:
  893. break # EOF
  894. except av.error.BlockingIOError: # pragma: no cover
  895. # handle av<9.0
  896. break
  897. self._video_filter = video_filter()
  898. self._video_filter.send(None)
  899. @property
  900. def container_metadata(self):
  901. """Container-specific metadata.
  902. A dictionary containing metadata stored at the container level.
  903. """
  904. return self._container.metadata
  905. @property
  906. def video_stream_metadata(self):
  907. """Stream-specific metadata.
  908. A dictionary containing metadata stored at the stream level.
  909. """
  910. return self._video_stream.metadata
  911. # -------------------------------
  912. # Internals and private functions
  913. # -------------------------------
  914. def _unpack_frame(self, frame: av.VideoFrame, *, format: str = None) -> np.ndarray:
  915. """Convert a av.VideoFrame into a ndarray
  916. Parameters
  917. ----------
  918. frame : av.VideoFrame
  919. The frame to unpack.
  920. format : str
  921. If not None, convert the frame to the given format before unpacking.
  922. """
  923. if format is not None:
  924. frame = frame.reformat(format=format)
  925. dtype = _format_to_dtype(frame.format)
  926. shape = _get_frame_shape(frame)
  927. planes = list()
  928. for idx in range(len(frame.planes)):
  929. n_channels = sum(
  930. [
  931. x.bits // (dtype.itemsize * 8)
  932. for x in frame.format.components
  933. if x.plane == idx
  934. ]
  935. )
  936. av_plane = frame.planes[idx]
  937. plane_shape = (av_plane.height, av_plane.width)
  938. plane_strides = (av_plane.line_size, n_channels * dtype.itemsize)
  939. if n_channels > 1:
  940. plane_shape += (n_channels,)
  941. plane_strides += (dtype.itemsize,)
  942. np_plane = as_strided(
  943. np.frombuffer(av_plane, dtype=dtype),
  944. shape=plane_shape,
  945. strides=plane_strides,
  946. )
  947. planes.append(np_plane)
  948. if len(planes) > 1:
  949. # Note: the planes *should* exist inside a contigous memory block
  950. # somewhere inside av.Frame however pyAV does not appear to expose this,
  951. # so we are forced to copy the planes individually instead of wrapping
  952. # them :(
  953. out = np.concatenate(planes).reshape(shape)
  954. else:
  955. out = planes[0]
  956. return out
  957. def _seek(self, index, *, constant_framerate: bool = True) -> Generator:
  958. """Seeks to the frame at the given index."""
  959. if index == self._next_idx:
  960. return # fast path :)
  961. # we must decode at least once before we seek otherwise the
  962. # returned frames become corrupt.
  963. if self._next_idx == 0:
  964. next(self._decoder)
  965. self._next_idx += 1
  966. if index == self._next_idx:
  967. return # fast path :)
  968. # remove this branch until I find a way to efficiently find the next
  969. # keyframe. keeping this as a reminder
  970. # if self._next_idx < index and index < self._next_keyframe_idx:
  971. # frames_to_yield = index - self._next_idx
  972. if not constant_framerate and index > self._next_idx:
  973. frames_to_yield = index - self._next_idx
  974. elif not constant_framerate:
  975. # seek backwards and can't link idx and pts
  976. self._container.seek(0)
  977. self._decoder = self._container.decode(video=0)
  978. self._next_idx = 0
  979. frames_to_yield = index
  980. else:
  981. # we know that the time between consecutive frames is constant
  982. # hence we can link index and pts
  983. # how many pts lie between two frames
  984. sec_delta = 1 / self._video_stream.guessed_rate
  985. pts_delta = sec_delta / self._video_stream.time_base
  986. index_pts = int(index * pts_delta)
  987. # this only seeks to the closed (preceeding) keyframe
  988. self._container.seek(index_pts, stream=self._video_stream)
  989. self._decoder = self._container.decode(video=0)
  990. # this may be made faster if we could get the keyframe's time without
  991. # decoding it
  992. keyframe = next(self._decoder)
  993. keyframe_time = keyframe.pts * keyframe.time_base
  994. keyframe_pts = int(keyframe_time / self._video_stream.time_base)
  995. keyframe_index = keyframe_pts // pts_delta
  996. self._container.seek(index_pts, stream=self._video_stream)
  997. self._next_idx = keyframe_index
  998. frames_to_yield = index - keyframe_index
  999. for _ in range(frames_to_yield):
  1000. next(self._decoder)
  1001. self._next_idx += 1
  1002. def _flush_writer(self):
  1003. """Flush the filter and encoder
  1004. This will reset the filter to `None` and send EoF to the encoder,
  1005. i.e., after calling, no more frames may be written.
  1006. """
  1007. stream = self._video_stream
  1008. if self._video_filter is not None:
  1009. # flush encoder
  1010. for av_frame in self._video_filter:
  1011. if stream.frames == 0:
  1012. stream.width = av_frame.width
  1013. stream.height = av_frame.height
  1014. for packet in stream.encode(av_frame):
  1015. self._container.mux(packet)
  1016. self._video_filter = None
  1017. # flush stream
  1018. for packet in stream.encode():
  1019. self._container.mux(packet)
  1020. self._video_stream = None